misc updates

2018-11-12 14:18:18 +01:00 · 2018-11-12 14:18:18 +01:00 · 665231c00c
commit 665231c00c
parent 67ef51356e
7 changed files with 147 additions and 49 deletions
--- a/designpattern/lecture/designpattern.tex
+++ b/designpattern/lecture/designpattern.tex
@ -20,7 +20,7 @@ for i=1:length(x)  % For loop over the indices of the vector.
                   % is assigned to the variable a.
  % Use the value of the i-th vector element by passing it
  % as an argument to a function:
-  do_something( x(i) );
+  do_something(x(i));
 end
 \end{lstlisting}

@ -34,7 +34,7 @@ y = zeros(length(x),1);
 for i=1:length(x)
  % Write the result of the computation at 
  % the i-th position in the y vector:
-  y(i) = get_something( x(i) );
+  y(i) = get_something(x(i));
 end
 % Now the result vector can be further processed:
 mean(y);
@ -52,7 +52,7 @@ y = zeros(length(x), 10);
 for i=1:length(x)
  % Write the return value of the function get_something - now a 
  % column vector with 10 elements - into the i-th row of the matrix y:
-  y(i, :) = get_something(x(i));
+  y(i, :) = get_some_more(x(i));
 end
 % Process the results stored in matrix y:
 mean(y, 1)
@ -65,7 +65,7 @@ x = [2:3:20];        % Some vector.
 y = [];              % Empty vector for storing the results.
 for i=1:length(x)
  % The function get_something() returns a vector of unspecified size:
-  z = get_something(x(i));
+  z = get_somehow_more(x(i));
  % The content of z is appended to the result vector y:
  y = [y; z(:)];
  % The z(:) syntax ensures that we append column-vectors.
@ -150,7 +150,7 @@ The \mcode{histogram()} function does this automatically with the appropriate ar
 x = randn(100, 1);         % Some real-valued data.
 histogram(x, 'Normalization', 'pdf');
 \end{lstlisting}
-\begin{lstlisting}[caption={Probability mit der \varcode{histogram()}-function}]
+\begin{lstlisting}[caption={Probability with the \varcode{histogram()}-function}]
 x = randi(6, 100, 1);      % Some integer-valued data.
 histogram(x, 'Normalization', 'probability');
 \end{lstlisting}
--- a/header.tex
+++ b/header.tex
@ -5,7 +5,7 @@
 \author{{\LARGE Jan Grewe \& Jan Benda}\\[5ex]Abteilung Neuroethologie\\[2ex]%
        \includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}\vspace{3ex}}

-\date{WS 2017/2018\\\vfill%
+\date{WS 2018/2019\\\vfill%
      \centerline{\includegraphics[width=0.7\textwidth]{announcements/correlationcartoon}%
      \rotatebox{90}{\footnotesize\url{www.xkcd.com}}}}

@ -294,7 +294,7 @@
  chapterlistsgaps=on,
 ]{exercisef}
 \newboolean{showexercisesolutions}
-\setboolean{showexercisesolutions}{false}
+\setboolean{showexercisesolutions}{true}
 \newenvironment{exercise}[2]%
  { \newcommand{\exercisesource}{#1}%
    \newcommand{\exercisefile}{\protect\StrSubstitute{#1}{_}{\_}}%
--- a/plotting/lecture/plotting.tex
+++ b/plotting/lecture/plotting.tex
@ -608,13 +608,13 @@ Plot should help/enable the interested reader to get a grasp of the
 data and to understand the performed analysis and to critically assess
 the presented results. The most important rule is the correct and
 complete annotation of the plots. This starts with axis labels and
-units and and extends to legends. Incomplete annotation can have
+units and extends to legends. Incomplete annotation can have
 terrible consequences (\figref{xkcdplotting}).

-The principle of \emph{ink minimization} may be used a a guiding
+The principle of \emph{ink minimization} may be used as a guiding
 principle for appealing plots. It requires that the relation of amount
 of ink spent on the data and that spent on other parts of the plot
-should be strongly in favor of the data. Ornamental of otherwise
+should be strongly in favor of the data. Ornamental or otherwise
 unnecessary gimicks should not be used in scientific contexts. An
 exception can be made if the particular figure was designed for
 didactic purposes and sometimes for presentations.
--- a/programmingstyle/lecture/programmingstyle.tex
+++ b/programmingstyle/lecture/programmingstyle.tex
@ -5,13 +5,14 @@
  else.}{Eagleson's law}

 %\selectlanguage{ngerman}
-Cultivating a good code style not a matter of good taste but is
-a key ingredient for understandability, maintainability and, in the end,
-facilitates reproducibility of scientific results. Programs should be
-written and structured in a way that supports outsiders as well the
-author himself --- a few weeks or months after it was written --- to
-understand the programs' rationale. Clean code pays off for the
-original author as well as others that are supposed to use the code.
+Cultivating a good code style is not just a matter of good taste but
+rather is a key ingredient for readability and maintainability of code
+and, in the end, facilitates reproducibility of scientific
+results. Programs should be written and structured in a way that
+supports outsiders as well the author himself --- a few weeks or
+months after it was written --- to understand the programs'
+rationale. Clean code pays off for the original author as well as
+others that are supposed to use the code.

 Clean code addresses several issues:
 \begin{enumerate}
@ -35,7 +36,7 @@ of the program but two questions remain: (i) How to organize the files
 on the file system and (ii) how to name them that the controlling
 script is easily identified among the other \codeterm{m-files}.

-Upon installation ``MATLAB'' creates a folder called \emph{MATLAB} in
+Upon installation ``MATLAB'' creates a folder called \file{MATLAB} in
 the user space (Windows: My files, Linux: Documents, MacOS:
 Documents). Since this folder is already appended to the Matlab search
 path (Box~\ref{matlabpathbox}), it is easiest to stick to it for the
@ -47,12 +48,12 @@ task (analysis) and to store all related \codeterm{m-files}
 (screenshot \ref{fileorganizationfig}). In these task-related folders
 one may consider to create a further sub-folder to store results
 (created figures, result data). On the project level a single script
-(analysis.m) controls the whole process. In parallel to the project
-folder we suggest to create an additional folder for functions that
-are or may be relevant across different projects.
+(\file{analysis.m}) controls the whole process. In parallel to the
+project folder we suggest to create an additional folder for functions
+that are or may be relevant across different projects.

 Within such a structure it is quite likely that programs in different
-projects share the same name (e.g. a ``load\_data.m''
+projects share the same name (e.g. a \varcode{load\_data.m}
 function). Usually this will not lead to conflicts due to the way
 matlab searches for matching functions which always starts in the
 current folder (more information on the \matlab-path in
@ -63,7 +64,7 @@ Box~\ref{matlabpathbox}).
  \titlecaption{\label{fileorganizationfig} Possible folder structure
    for maintaining program code on the file system.}{For each project
    one maintains an individual folder in which analyses or tasks may
-    be structured in sub-folders. Within each analysis a ``main.m''
+    be structured in sub-folders. Within each analysis a \file{main.m}
    script is the entry point for the analyses. On the project level
    there could be a single script that triggers and controls all
    analyses and tasks in the sub-folders. Functions that are of
@ -129,8 +130,8 @@ patterns:

 There are other common patterns such as the \emph{camelCase} in which
 the first character of compound words is capitalized. Other
-conventions use the underscore to separate the individual words (
-\emph{snake\_case}). A function that counts the number of action
+conventions use the underscore to separate the individual words
+(\emph{snake\_case}). A function that counts the number of action
 potentials could be named \file{spikeCount.m} or
 \file{spike\_count.m}.

--- a/statistics/exercises/exercises01.tex
+++ b/statistics/exercises/exercises01.tex
@ -15,7 +15,7 @@
 \else
 \newcommand{\stitle}{}
 \fi
-\header{{\bfseries\large Exercise 6\stitle}}{{\bfseries\large Statistics}}{{\bfseries\large November 14th, 2017}}
+\header{{\bfseries\large Exercise 7\stitle}}{{\bfseries\large Statistics}}{{\bfseries\large November 13th, 2018}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
@ -104,7 +104,7 @@ jan.benda@uni-tuebingen.de}
  addition, the internet offers a lot of material and suggestions for
  any question you have regarding your code !
 \item Please upload your solution to the exercises to ILIAS as a zip-archive with the name
-  ``probabilities\_\{last name\}\_\{first name\}.zip''.
+  ``statistics\_\{last name\}\_\{first name\}.zip''.
 \end{itemize}

 \fi
@ -116,7 +116,7 @@ jan.benda@uni-tuebingen.de}
 \question \textbf{Read chapter 4 of the script on ``programming style''!}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\question \qt{Probabilities of a die I}
+\question \qt{Probabilities of a die}
 The computer can roll dice with more than 6 faces!
 \begin{parts}
  \part Simulate 10000 times rolling a die with eight faces by
@ -152,22 +152,6 @@ The computer can roll dice with more than 6 faces!


 \continue
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\question \qt{Probabilities of a die II}
-Now we analyze several dice at once.
-\begin{parts}
-  \part Simulate 20 dice, each of which is rolled 100 times 
-  (each die is simulated with the same random number generator).
-  \part Compute for this data set for each die a normalized histogram.
-  \part Calculate the mean and the standard deviation for each face
-  value averaged over the dice.
-  \part Visualize the result in a bar plot with error bars
-  (\code{bar()} and \code{errorbar()} functions).
-\end{parts}
-\begin{solution}
-  \lstinputlisting{die2.m}
-  \includegraphics[width=0.5\textwidth]{die2}
-\end{solution}


 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -209,6 +193,102 @@ Now we analyze several dice at once.
 \end{solution}


+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Probabilities of a normal distribution}
+Which fraction of a normally distributed data set is contained in ranges
+that are symmetric around the mean?
+\begin{parts}
+  \part Generate a data set $X = (x_1, x_2, ... x_n)$ of
+  $n=10000$ normally distributed numbers with mean $\mu=0$ and
+  standard deviation $\sigma=1$ (\code{randn() function}).
+  % \part Estimate and plot the probability density of this data set (normalized histogram).
+  % For a comparison plot the normal distribution
+  % \[ p_g(x) = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2} \]
+  % into the same plot.
+
+  \part \label{onesigma} How many data values are at maximum one standard deviation 
+  away from the mean?\\
+  That is, how many data values $x_i$ have the value $-\sigma < x_i < +\sigma$?\\
+  Compute the probability $P_{\pm\sigma}$ to get a value in this range
+  by counting how many data points fall into this range.
+
+  \part \label{probintegral} Compute the probability of 
+  $-\sigma < x_i < +\sigma$ by numerically integrating over the
+  probability density of the normal distribution
+  \[ P_{\pm\sigma}=\int_{x=\mu-\sigma}^{x=\mu+\sigma} p_g(x) \, dx \; .\]
+  First check whether
+  \[ \int_{-\infty}^{+\infty} p_g(x) \, dx = 1 \; . \]
+  Why is this the case?
+
+  \part What fraction of the data is contained in the intervals $\pm 2\sigma$
+  and $\pm 3\sigma$? 
+
+  Compare the results with the corresponding integrals over the normal
+  distribution.
+
+  \part \label{givenfraction} Find out which intervals, that are
+  symmetric with respect to the mean, contain 50\,\%, 90\,\%, 95\,\% and 99\,\%
+  of the data by means of numeric integration of the normal
+  distribution.
+
+  % \part \extra Modify the code of questions \pref{onesigma} -- \pref{givenfraction} such
+  % that it works for data sets with arbitrary mean and arbitrary standard deviation.\\
+  % Check your code with different sets of random numbers.\\
+  % How do you generate random numbers of a given mean and standard
+  % deviation using the \code{randn()} function?
+\end{parts}
+\begin{solution}
+  \lstinputlisting{normprobs.m}
+\end{solution}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Central limit theorem} 
+According to the central limit theorem the sum of independent and
+identically distributed (i.i.d.)  random variables converges towards a
+normal distribution, although the distribution of the randmon
+variables might not be normally distributed.
+
+With the following questions we want to illustrate the central limit theorem.
+\begin{parts}
+  \part Before you continue reading, try to figure out yourself what
+  the central limit theorem means and what you would need to do for
+  illustrating this theorem.
+
+  \part Draw 10000 random numbers that are uniformly distributed between 0 and 1
+  (\code{rand} function).
+
+  \part Plot their probability density (normalized histogram).
+
+  \part Draw another set of 10000 uniformly distributed random numbers
+  and add them to the first set of numbers.
+
+  \part Plot the probability density of the summed up random numbers.
+
+  \part Repeat steps (d) and (e) many times.
+
+  \part Compare in a plot the probability density of the summed up
+  numbers with the normal distribution
+  \[ p_g(x) =
+  \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2}\]
+  with mean $\mu$ and standard deviation $\sigma$ of the summed up random numbers.
+
+  \part How do the mean and the standard deviation change with the
+  number of summed up data sets?
+
+  \part \extra Check the central limit theorem in the same way using
+  exponentially distributed random numbers (\code{rande} function).
+\end{parts}
+\begin{solution}
+  \lstinputlisting{centrallimit.m}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist01}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist02}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist03}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist05}
+  \includegraphics[width=0.5\textwidth]{centrallimit-samples}
+\end{solution}
+
+
 \end{questions}

 \end{document}
--- a/statistics/exercises/exercises02.tex
+++ b/statistics/exercises/exercises02.tex
@ -87,6 +87,23 @@ jan.benda@uni-tuebingen.de}

 \begin{questions}

+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Probabilities of a die II}
+Now we analyze several dice at once.
+\begin{parts}
+  \part Simulate 20 dice, each of which is rolled 100 times 
+  (each die is simulated with the same random number generator).
+  \part Compute for this data set for each die a normalized histogram.
+  \part Calculate the mean and the standard deviation for each face
+  value averaged over the dice.
+  \part Visualize the result in a bar plot with error bars
+  (\code{bar()} and \code{errorbar()} functions).
+\end{parts}
+\begin{solution}
+  \lstinputlisting{die2.m}
+  \includegraphics[width=0.5\textwidth]{die2}
+\end{solution}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \question \qt{Probabilities of a normal distribution}
 Which fraction of a normally distributed data set is contained in ranges
@ -94,7 +111,7 @@ that are symmetric around the mean?
 \begin{parts}
  \part Generate a data set $X = (x_1, x_2, ... x_n)$ of
  $n=10000$ normally distributed numbers with mean $\mu=0$ and
-  standard deviation $\sigma=1$ (\code{randn() Funktion}).
+  standard deviation $\sigma=1$ (\code{randn() function}).
  \part Estimate and plot the probability density of this data set (normalized histogram).
  For a comparison plot the normal distribution
  \[ p_g(x) = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2} \]
--- a/statistics/lecture/statistics.tex
+++ b/statistics/lecture/statistics.tex
@ -214,10 +214,10 @@ category $i$, i.e. of getting a data value in the $i$-th bin.
 \end{exercise}

 \begin{exercise}{diehistograms.m}{}
-  Plotte Histogramme von 20, 100, und 1000-mal W\"urfeln.  Benutze
-  \code[hist()]{hist(x)}, erzwinge sechs Bins mit
-  \code[hist()]{hist(x,6)}, oder setze selbst sinnvolle Bins. Normiere
-  anschliessend das Histogram.
+  Plot histograms for 20, 100, and 1000 times rolling a die.  Use
+  \code[hist()]{hist(x)}, enforce six bins with
+  \code[hist()]{hist(x,6)}, or set useful bins yourself. Normalize the
+  histograms appropriately.
 \end{exercise}