misc updates

2018-11-12 14:18:18 +01:00
parent 67ef51356e
commit 665231c00c
7 changed files with 147 additions and 49 deletions
--- a/statistics/exercises/exercises01.tex
+++ b/statistics/exercises/exercises01.tex
@@ -15,7 +15,7 @@
 \else
 \newcommand{\stitle}{}
 \fi
-\header{{\bfseries\large Exercise 6\stitle}}{{\bfseries\large Statistics}}{{\bfseries\large November 14th, 2017}}
+\header{{\bfseries\large Exercise 7\stitle}}{{\bfseries\large Statistics}}{{\bfseries\large November 13th, 2018}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
@@ -104,7 +104,7 @@ jan.benda@uni-tuebingen.de}
  addition, the internet offers a lot of material and suggestions for
  any question you have regarding your code !
 \item Please upload your solution to the exercises to ILIAS as a zip-archive with the name
-  ``probabilities\_\{last name\}\_\{first name\}.zip''.
+  ``statistics\_\{last name\}\_\{first name\}.zip''.
 \end{itemize}

 \fi
@@ -116,7 +116,7 @@ jan.benda@uni-tuebingen.de}
 \question \textbf{Read chapter 4 of the script on ``programming style''!}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\question \qt{Probabilities of a die I}
+\question \qt{Probabilities of a die}
 The computer can roll dice with more than 6 faces!
 \begin{parts}
  \part Simulate 10000 times rolling a die with eight faces by
@@ -152,22 +152,6 @@ The computer can roll dice with more than 6 faces!


 \continue
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\question \qt{Probabilities of a die II}
-Now we analyze several dice at once.
-\begin{parts}
-  \part Simulate 20 dice, each of which is rolled 100 times 
-  (each die is simulated with the same random number generator).
-  \part Compute for this data set for each die a normalized histogram.
-  \part Calculate the mean and the standard deviation for each face
-  value averaged over the dice.
-  \part Visualize the result in a bar plot with error bars
-  (\code{bar()} and \code{errorbar()} functions).
-\end{parts}
-\begin{solution}
-  \lstinputlisting{die2.m}
-  \includegraphics[width=0.5\textwidth]{die2}
-\end{solution}


 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -209,6 +193,102 @@ Now we analyze several dice at once.
 \end{solution}


+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Probabilities of a normal distribution}
+Which fraction of a normally distributed data set is contained in ranges
+that are symmetric around the mean?
+\begin{parts}
+  \part Generate a data set $X = (x_1, x_2, ... x_n)$ of
+  $n=10000$ normally distributed numbers with mean $\mu=0$ and
+  standard deviation $\sigma=1$ (\code{randn() function}).
+  % \part Estimate and plot the probability density of this data set (normalized histogram).
+  % For a comparison plot the normal distribution
+  % \[ p_g(x) = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2} \]
+  % into the same plot.
+
+  \part \label{onesigma} How many data values are at maximum one standard deviation 
+  away from the mean?\\
+  That is, how many data values $x_i$ have the value $-\sigma < x_i < +\sigma$?\\
+  Compute the probability $P_{\pm\sigma}$ to get a value in this range
+  by counting how many data points fall into this range.
+
+  \part \label{probintegral} Compute the probability of 
+  $-\sigma < x_i < +\sigma$ by numerically integrating over the
+  probability density of the normal distribution
+  \[ P_{\pm\sigma}=\int_{x=\mu-\sigma}^{x=\mu+\sigma} p_g(x) \, dx \; .\]
+  First check whether
+  \[ \int_{-\infty}^{+\infty} p_g(x) \, dx = 1 \; . \]
+  Why is this the case?
+
+  \part What fraction of the data is contained in the intervals $\pm 2\sigma$
+  and $\pm 3\sigma$? 
+
+  Compare the results with the corresponding integrals over the normal
+  distribution.
+
+  \part \label{givenfraction} Find out which intervals, that are
+  symmetric with respect to the mean, contain 50\,\%, 90\,\%, 95\,\% and 99\,\%
+  of the data by means of numeric integration of the normal
+  distribution.
+
+  % \part \extra Modify the code of questions \pref{onesigma} -- \pref{givenfraction} such
+  % that it works for data sets with arbitrary mean and arbitrary standard deviation.\\
+  % Check your code with different sets of random numbers.\\
+  % How do you generate random numbers of a given mean and standard
+  % deviation using the \code{randn()} function?
+\end{parts}
+\begin{solution}
+  \lstinputlisting{normprobs.m}
+\end{solution}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Central limit theorem} 
+According to the central limit theorem the sum of independent and
+identically distributed (i.i.d.)  random variables converges towards a
+normal distribution, although the distribution of the randmon
+variables might not be normally distributed.
+
+With the following questions we want to illustrate the central limit theorem.
+\begin{parts}
+  \part Before you continue reading, try to figure out yourself what
+  the central limit theorem means and what you would need to do for
+  illustrating this theorem.
+
+  \part Draw 10000 random numbers that are uniformly distributed between 0 and 1
+  (\code{rand} function).
+
+  \part Plot their probability density (normalized histogram).
+
+  \part Draw another set of 10000 uniformly distributed random numbers
+  and add them to the first set of numbers.
+
+  \part Plot the probability density of the summed up random numbers.
+
+  \part Repeat steps (d) and (e) many times.
+
+  \part Compare in a plot the probability density of the summed up
+  numbers with the normal distribution
+  \[ p_g(x) =
+  \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2}\]
+  with mean $\mu$ and standard deviation $\sigma$ of the summed up random numbers.
+
+  \part How do the mean and the standard deviation change with the
+  number of summed up data sets?
+
+  \part \extra Check the central limit theorem in the same way using
+  exponentially distributed random numbers (\code{rande} function).
+\end{parts}
+\begin{solution}
+  \lstinputlisting{centrallimit.m}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist01}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist02}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist03}
+  \includegraphics[width=0.5\textwidth]{centrallimit-hist05}
+  \includegraphics[width=0.5\textwidth]{centrallimit-samples}
+\end{solution}
+
+
 \end{questions}

 \end{document}
--- a/statistics/exercises/exercises02.tex
+++ b/statistics/exercises/exercises02.tex
@@ -87,6 +87,23 @@ jan.benda@uni-tuebingen.de}

 \begin{questions}

+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Probabilities of a die II}
+Now we analyze several dice at once.
+\begin{parts}
+  \part Simulate 20 dice, each of which is rolled 100 times 
+  (each die is simulated with the same random number generator).
+  \part Compute for this data set for each die a normalized histogram.
+  \part Calculate the mean and the standard deviation for each face
+  value averaged over the dice.
+  \part Visualize the result in a bar plot with error bars
+  (\code{bar()} and \code{errorbar()} functions).
+\end{parts}
+\begin{solution}
+  \lstinputlisting{die2.m}
+  \includegraphics[width=0.5\textwidth]{die2}
+\end{solution}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \question \qt{Probabilities of a normal distribution}
 Which fraction of a normally distributed data set is contained in ranges
@@ -94,7 +111,7 @@ that are symmetric around the mean?
 \begin{parts}
  \part Generate a data set $X = (x_1, x_2, ... x_n)$ of
  $n=10000$ normally distributed numbers with mean $\mu=0$ and
-  standard deviation $\sigma=1$ (\code{randn() Funktion}).
+  standard deviation $\sigma=1$ (\code{randn() function}).
  \part Estimate and plot the probability density of this data set (normalized histogram).
  For a comparison plot the normal distribution
  \[ p_g(x) = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2} \]
--- a/statistics/lecture/statistics.tex
+++ b/statistics/lecture/statistics.tex
@@ -214,10 +214,10 @@ category $i$, i.e. of getting a data value in the $i$-th bin.
 \end{exercise}

 \begin{exercise}{diehistograms.m}{}
-  Plotte Histogramme von 20, 100, und 1000-mal W\"urfeln.  Benutze
-  \code[hist()]{hist(x)}, erzwinge sechs Bins mit
-  \code[hist()]{hist(x,6)}, oder setze selbst sinnvolle Bins. Normiere
-  anschliessend das Histogram.
+  Plot histograms for 20, 100, and 1000 times rolling a die.  Use
+  \code[hist()]{hist(x)}, enforce six bins with
+  \code[hist()]{hist(x,6)}, or set useful bins yourself. Normalize the
+  histograms appropriately.
 \end{exercise}