From 4b9d1134fe176803a5ce9749973204c85bec97c6 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Mon, 25 Nov 2019 22:49:03 +0100
Subject: [PATCH] [statistics] added new exercise univariatedata.m

---
 statistics/code/univariatedata.m  |  9 +++++++++
 statistics/lecture/statistics.tex | 27 +++++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)
 create mode 100644 statistics/code/univariatedata.m

diff --git a/statistics/code/univariatedata.m b/statistics/code/univariatedata.m
new file mode 100644
index 0000000..a8c6d1d
--- /dev/null
+++ b/statistics/code/univariatedata.m
@@ -0,0 +1,9 @@
+data = 2.0 + randn(40, 1);
+bw = 0.8
+boxplot(data)
+hold on;
+bar(2.0, mean(data), 0.5*bw);
+errorbar(2.0, mean(data), std(data));
+scatter(2.5+bw*rand(length(data), 1), data);
+hold off;
+xlim([0.2, 4.0])
diff --git a/statistics/lecture/statistics.tex b/statistics/lecture/statistics.tex
index fd66e5f..d2abdaf 100644
--- a/statistics/lecture/statistics.tex
+++ b/statistics/lecture/statistics.tex
@@ -3,7 +3,6 @@
 \chapter{Descriptive statistics}
 
 Descriptive statistics characterizes data sets by means of a few measures.
-
 In addition to histograms that estimate the full distribution of the data,
 the following measures are used for characterizing univariate data:
 \begin{description}
@@ -20,7 +19,7 @@ For bivariate and multivariate data sets we can also analyse their
   Spearman's rank correlation coefficient.
 \end{description}
 
-The following is not a complete introduction to descriptive
+The following is in no way a complete introduction to descriptive
 statistics, but summarizes a few concepts that are most important in
 daily data-analysis problems.
 
@@ -63,10 +62,12 @@ used to illustrate the standard deviation of the data
     uniformly distributed random numbers \matlabfun{rand()}. (2) With
     a bar plot \matlabfun{bar()} one usually shows the mean of the
     data. The additional errorbar illustrates the deviation of the
-    data from the mean by $\pm$ one standard deviation. (3) A
+    data from the mean by $\pm$ one standard deviation. In case of
+    non-normal data mean and standard deviation only poorly
+    characterize the distribution of the data values. (3) A
     box-whisker plot \matlabfun{boxplot()} shows more details of the
     distribution of the data values. The box extends from the 1. to
-    the 3. quartile, a horizontal ine within the box marks the median
+    the 3. quartile, a horizontal line within the box marks the median
     value, and the whiskers extend to the minum and the maximum data
     values. (4) The probability density $p(x)$ estimated from a
     normalized histogram shows the entire distribution of the
@@ -151,12 +152,22 @@ that extends from the 1$^{\rm st}$ to the 3$^{\rm rd}$ quartile. The
 whiskers mark the minimum and maximum value of the data set
 (\figref{displayunivariatedatafig} (3)).
 
-\begin{exercise}{boxwhisker.m}{}
-  Generate eine $40 \times 10$ matrix of random numbers and
-  illustrate their distribution in a box-whicker plot
-  (\code{boxplot()} function). How to interpret the plot?
+\begin{exercise}{univariatedata.m}{}
+  Generate 40 normally distributed random numbers with a mean of 2 and
+  illustrate their distribution in a box-whisker plot
+  (\code{boxplot()} function), with a bar and errorbar illustrating
+  the mean and standard deviation (\code{bar()}, \code{errorbar()}),
+  and the data themselves jittered randomly (as in
+  \figref{displayunivariatedatafig}).  How to interpret the different
+  plots?
 \end{exercise}
 
+% \begin{exercise}{boxwhisker.m}{}
+%   Generate a $40 \times 10$ matrix of random numbers and
+%   illustrate their distribution in a box-whisker plot
+%   (\code{boxplot()} function). How to interpret the plot?
+% \end{exercise}
+
 \section{Distributions}
 The distribution of values in a data set is estimated by histograms
 (\figref{displayunivariatedatafig} (4)).