From effc38f96f318b068688c49d81004abf92dd24e1 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sun, 25 Oct 2015 11:25:56 +0100
Subject: [PATCH] Added matlab code to mle chapter

---
 statistics/code/mlemeanstd.m                 |  51 +++
 statistics/code/mlepdffit.m                  |  27 ++
 statistics/code/mlepropfit.m                 |  29 ++
 statistics/code/mleslope.m                   |   6 +
 statistics/lecture/descriptivestatistics.tex | 357 +++++++++++--------
 5 files changed, 323 insertions(+), 147 deletions(-)
 create mode 100644 statistics/code/mlemeanstd.m
 create mode 100644 statistics/code/mlepdffit.m
 create mode 100644 statistics/code/mlepropfit.m
 create mode 100644 statistics/code/mleslope.m

diff --git a/statistics/code/mlemeanstd.m b/statistics/code/mlemeanstd.m
new file mode 100644
index 0000000..6bb25a6
--- /dev/null
+++ b/statistics/code/mlemeanstd.m
@@ -0,0 +1,51 @@
+% draw random numbers:
+n = 500;
+mu = 3.0;
+sigma =2.0;
+x = randn(n,1)*sigma+mu;
+fprintf('              mean of the data is %.2f\n', mean(x))
+fprintf('standard deviation of the data is %.2f\n', std(x))
+
+% mean as parameter:
+pmus = 2.0:0.01:4.0;
+% matrix with the probabilities for each x and pmus:
+lms = zeros(length(x), length(pmus));
+for i=1:length(pmus)
+    pmu = pmus(i);
+    p = exp(-0.5*((x-pmu)/sigma).^2.0)/sqrt(2.0*pi)/sigma;
+    lms(:,i) = p;
+end
+lm = prod(lms, 1);          % likelihood
+loglm = sum(log(lms), 1);   % log likelihood
+
+% plot likelihood of mean:
+subplot(2, 2, 1);
+plot(pmus, lm );
+xlabel('mean')
+ylabel('likelihood')
+subplot(2, 2, 2);
+plot(pmus, loglm );
+xlabel('mean')
+ylabel('log likelihood')
+
+% standard deviation as parameter:
+psigs = 1.0:0.01:3.0;
+% matrix with the probabilities for each x and psigs:
+lms = zeros(length(x), length(psigs));
+for i=1:length(psigs)
+    psig = psigs(i);
+    p = exp(-0.5*((x-mu)/psig).^2.0)/sqrt(2.0*pi)/psig;
+    lms(:,i) = p;
+end
+lm = prod(lms, 1);          % likelihood
+loglm = sum(log(lms), 1);   % log likelihood
+
+% plot likelihood of standard deviation:
+subplot(2, 2, 3);
+plot(psigs, lm );
+xlabel('standard deviation')
+ylabel('likelihood')
+subplot(2, 2, 4);
+plot(psigs, loglm);
+xlabel('standard deviation')
+ylabel('log likelihood')
diff --git a/statistics/code/mlepdffit.m b/statistics/code/mlepdffit.m
new file mode 100644
index 0000000..900fe22
--- /dev/null
+++ b/statistics/code/mlepdffit.m
@@ -0,0 +1,27 @@
+% plot gamma pdfs:
+xx = 0.0:0.1:10.0;
+shapes = [ 1.0, 2.0, 3.0, 5.0];
+cc = jet(length(shapes) );
+for i=1:length(shapes)
+    yy = gampdf(xx, shapes(i), 1.0);
+    plot(xx, yy, '-', 'linewidth', 3, 'color', cc(i,:), ...
+        'DisplayName', sprintf('s=%.0f', shapes(i)) );
+    hold on;
+end
+
+% generate gamma distributed random numbers:
+n = 50;
+x = gamrnd(3.0, 1.0, n, 1);
+
+% histogram:
+[h,b] = hist(x, 15);
+h = h/sum(h)/(b(2)-b(1));
+bar(b, h, 1.0, 'DisplayName', 'data');
+
+% maximum likelihood estimate:
+p = mle(x, 'distribution', 'gamma');
+yy = gampdf(xx, p(1), p(2));
+plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
+
+hold off;
+legend('show');
diff --git a/statistics/code/mlepropfit.m b/statistics/code/mlepropfit.m
new file mode 100644
index 0000000..8197146
--- /dev/null
+++ b/statistics/code/mlepropfit.m
@@ -0,0 +1,29 @@
+m = 2.0;      % slope
+sigma = 1.0;  % standard deviation
+n = 100;      % number of data pairs
+
+% data pairs:
+x = 5.0*rand(n, 1);
+y = m*x + sigma*randn(n, 1);
+
+% fit:
+slope = mleslope(x, y);
+fprintf('slopes:\n');
+fprintf('original = %.2f\n', m);
+fprintf('     fit = %.2f\n', slope);
+
+% lines:
+xx = 0.0:0.1:5.0;     % x-axis values
+yorg = m*xx;
+yfit = slope*xx;
+
+% plot:
+plot(xx, yorg, '-r', 'linewidth', 5);
+hold on;
+plot(xx, yfit, '-g', 'linewidth', 2);
+plot(x, y, 'ob');
+hold off;
+legend('data', 'original', 'fit', 'Location', 'NorthWest');
+legend('boxoff')
+xlabel('x');
+ylabel('y');
diff --git a/statistics/code/mleslope.m b/statistics/code/mleslope.m
new file mode 100644
index 0000000..58f8ed2
--- /dev/null
+++ b/statistics/code/mleslope.m
@@ -0,0 +1,6 @@
+function slope = mleslope(x, y )
+% Compute the maximum likelihood estimate of the slope
+% of a line through the origin 
+% given the data pairs in the vectors x and y.
+    slope = sum(x.*y)/sum(x.*x);
+end
diff --git a/statistics/lecture/descriptivestatistics.tex b/statistics/lecture/descriptivestatistics.tex
index 4888ada..5661917 100644
--- a/statistics/lecture/descriptivestatistics.tex
+++ b/statistics/lecture/descriptivestatistics.tex
@@ -145,10 +145,10 @@
 
 %%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %\newcommand{\eqref}[1]{(\ref{#1})}
-\newcommand{\eqn}{Eq.}
-\newcommand{\Eqn}{Eq.}
-\newcommand{\eqns}{Eqs.}
-\newcommand{\Eqns}{Eqs.}
+\newcommand{\eqn}{\tr{Eq}{Gl}.}
+\newcommand{\Eqn}{\tr{Eq}{Gl}.}
+\newcommand{\eqns}{\tr{Eqs}{Gln}.}
+\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
 \newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
 \newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
 \newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
@@ -205,13 +205,13 @@
 \newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
   {\medskip}
 
-\newcommand{\showlisting}{yes}
-%\newcommand{\showlisting}{no}
+\newcounter{maxexercise} 
+\setcounter{maxexercise}{9}  % show listings up to exercise maxexercise
 \newcounter{theexercise} 
 \setcounter{theexercise}{1}
 \newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
-  \arabic{theexercise}:} \stepcounter{theexercise}\newline \newcommand{\exercisesource}{#1}}%
-  {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\equal{\showlisting}{yes}}{\medskip\lstinputlisting{\exercisesource}}{}}\medskip}
+  \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
+  {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
 
 \graphicspath{{figures/}}
 
@@ -455,126 +455,6 @@ Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}).
 \end{figure}
 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Data types}
-
-\subsection{Nominal scale}
-\begin{itemize}
-\item Binary
-  \begin{itemize}
-  \item ``yes/no'',
-  \item ``true/false'',
-  \item ``success/failure'', etc.
-  \end{itemize}
-\item Categorial
-  \begin{itemize}
-  \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
-  \item blood type (``A/B/AB/0''),
-  \item parts of speech (``noun/veerb/preposition/article/...''),
-  \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
-  \end{itemize}
-\item Each observation/measurement/sample is put into one category
-\item There is no reasonable order among the categories.\\
-  example: [rods, cones] vs. [cones, rods]
-\item Statistics: mode, i.e. the most common item
-\end{itemize}
-
-\subsection{Ordinal scale}
-\begin{itemize}
-\item Like nominal scale, but with an order
-\item Examples: ranks, ratings
-  \begin{itemize}
-  \item ``bad/ok/good'',
-  \item ``cold/warm/hot'',
-  \item ``young/old'', etc.
-  \end{itemize}
-\item {\bf But:} there is no reasonable measure of {\em distance}
-  between the classes
-\item Statistics: mode, median
-\end{itemize}
-
-\subsection{Interval scale}
-\begin{itemize}
-\item Quantitative/metric values
-\item Reasonable measure of distance between values, but no absolute zero
-\item Examples: 
-  \begin{itemize}
-  \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
-  \item Direction measured in degrees from magnetic or true north
-  \end{itemize}
-\item Statistics:
-  \begin{itemize}
-  \item Central tendency: mode, median, arithmetic mean
-  \item Dispersion: range, standard deviation
-  \end{itemize}
-\end{itemize}
-
-\subsection{Absolute/ratio scale}
-\begin{itemize}
-\item Like interval scale, but with absolute origin/zero
-\item Examples: 
-  \begin{itemize}
-  \item Temperature in $^\circ$K
-  \item Length, mass, duration, electric charge, ...
-  \item Plane angle, etc.
-  \item Count (e.g. number of spikes in response to a stimulus)
-  \end{itemize}
-\item Statistics:
-  \begin{itemize}
-  \item Central tendency: mode, median, arithmetic, geometric, harmonic mean
-  \item Dispersion: range, standard deviation
-  \item Coefficient of variation (ratio standard deviation/mean)
-  \item All other statistical measures
-  \end{itemize}
-\end{itemize}
-
-\subsection{Data types}
-\begin{itemize}
-\item Data type selects
-  \begin{itemize}
-  \item statistics 
-  \item type of plots (bar graph versus x-y plot)
-  \item correct tests
-  \end{itemize}
-\item Scales exhibit increasing information content from nominal
-  to absolute.\\
-  Conversion  ,,downwards'' is always possible
-\item For example: size measured in meter (ratio scale) $\rightarrow$
-  categories ``small/medium/large'' (ordinal scale)
-\end{itemize}
-
-\subsection{Examples from neuroscience}
-\begin{itemize}
-\item {\bf absolute:}
-  \begin{itemize}
-  \item size of neuron/brain
-  \item length of axon
-  \item ion concentration
-  \item membrane potential
-  \item firing rate
-  \end{itemize}
-
-\item {\bf interval:}
-  \begin{itemize}
-  \item edge orientation
-  \end{itemize}
-
-\item {\bf ordinal:}
-  \begin{itemize}
-  \item stages of a disease
-  \item ratings
-  \end{itemize}
-
-\item {\bf nominal:}
-  \begin{itemize}
-  \item cell type
-  \item odor
-  \item states of an ion channel
-  \end{itemize}
-
-\end{itemize}
-
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
@@ -669,16 +549,21 @@ Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
 die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann
 ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
 Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$
-\[ p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
-\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .\]
+\begin{equation}
+  p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
+  \ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
+\end{equation}
 Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'')
 den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$,
-\[ {\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta) \]
+\begin{equation}
+  {\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta)
+\end{equation}
 
 Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die
 Likelihood maximiert (``mle'': Maximum-Likelihood Estimate):
-\[ \theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2,
-\ldots x_n) \] 
+\begin{equation}
+  \theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
+\end{equation}
 $\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei
 dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$
 bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat.
@@ -731,6 +616,19 @@ Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h.
 das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer
 Normalverteilung mit diesem Mittelwert gezogen worden sind.
 
+\begin{exercise}[mlemeanstd.m]
+  Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$
+  und einer Standardabweichung $\ne 1$.
+
+  Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
+  die Log-Likelihood (aus der Summe der logarithmierten
+  Wahrscheinlichkeiten) f\"ur (1) den Mittelwert und (2) die
+  Standardabweichung. Vergleiche die Position der Maxima mit den
+  aus den Daten berechneten Mittelwerten und Standardabweichungen.
+
+  Erh\"ohe $n$ auf 1000. Was passiert mit der Likelihood, was mit der Log-Likelihood?
+\end{exercise}
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Kurvenfit als Maximum Likelihood Estimation}
@@ -758,9 +656,9 @@ Maximum weggelassen werden.
 \end{eqnarray*}
 Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood
 umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums.
-\begin{eqnarray*}
-  \theta_{mle} & = & \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
-\end{eqnarray*}
+\begin{equation}
+  \theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
+\end{equation}
 Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen
 Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des
 Parameters $\theta$ welcher den quadratischen Abstand minimiert ist
@@ -782,24 +680,42 @@ mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit
 \[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
 Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$
 und setzen diese gleich Null:
-\begin{eqnarray*}
-  \frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \\
-  & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \\
-  & = & -2 \sum_{i=1}^n  \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \\
-  & = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \\
-\Leftrightarrow \quad  \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \\
-\Leftrightarrow \quad  \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}}
-\end{eqnarray*}
+\begin{eqnarray}
+  \frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
+  & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
+  & = & -2 \sum_{i=1}^n  \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
+  & = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
+\Leftrightarrow \quad  \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\
+\Leftrightarrow \quad  \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
+\end{eqnarray}
 Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
 der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein
 Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht
-n\"otig. Das gilt allgemein f\"ur das fitten von Koeffizienten von
+n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von
 linear kombinierten Basisfunktionen. Parameter die nichtlinear in
 einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den
 Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren
 zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg,
 zur\"uckzugreifen.
 
+\begin{exercise}[mleslope.m]
+  Schreibe eine Funktion, die in einem $x$ und einem $y$ Vektor die
+  Datenpaare \"uberreicht bekommt und die Steigung der
+  Ursprungsgeraden \eqnref{mleslope}, die die Likelihood maximiert,
+  zur\"uckgibt ($\sigma=1$).
+\end{exercise}
+
+\begin{exercise}[mlepropfit.m]
+  Schreibe ein Skript, das Datenpaare erzeugt, die um eine
+  Ursprungsgerade mit vorgegebener Steigung streuen. Berechne mit der
+  Funktion die Steigung aus den Daten, vergleiche mit der wahren
+  Steigung, und plotte die urspr\"ungliche sowie die gefittete Gerade
+  zusammen mit den Daten.
+
+  Ver\"andere die Anzahl der Datenpunkte, die Steigung, sowie die
+  Streuung der Daten um die Gerade.
+\end{exercise}
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Fits von Wahrscheinlichkeitsverteilungen}
@@ -817,7 +733,8 @@ wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind
 nicht unabh\"angig, da das normierte Histogram sich zu Eins
 aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten
 die die Minimierung des quadratischen Abstands zu einem Maximum
-Likelihood Estimator machen sind also verletzt.
+Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm
+h\"angt von der Wahl der Klassenbreite ab.
 
 Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein
 Datenset zu fitten, haben wir oben schon bei dem Beispiel zur
@@ -834,10 +751,36 @@ z.B. dem Gradientenabstieg, gel\"ost wird.
     Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung
     2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt.
     Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung
-    des quadratischen Abstands zum Histogramm berechneten Fits.}
+    des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.}
 \end{figure}
 
 
+\begin{exercise}[mlepdffit.m]
+  Zur Abwechslung ziehen wir uns diesmal Zufallszahlen, die nicht
+  einer Normalverteilung entstammen, sonder aus der Gamma-Verteilung.
+
+  Finde heraus welche Funktion die Wahrscheinlichkeitsdichtefunktion
+  (probability density function) der Gamma-Verteilung in \code{matlab}
+  berechnet.
+
+  Plotte mit Hilfe dieser Funktion die  Wahrscheinlichkeitsdichtefunktion
+  der Gamma-Verteilung f\"ur verschiedene Werte des (positiven) ``shape'' Parameters.
+  Den ``scale'' Parameter setzen wir auf Eins.
+
+  Finde heraus mit welcher Funktion Gamma-verteilte Zufallszahlen in
+  \code{matlab} gezogen werden k\"onnen. Erzeuge mit dieser Funktion
+  50 Zufallszahlen mit einem der oben geplotteten ``shape'' Parameter.
+
+  Berechne und plotte ein normiertes Histogramm dieser Zufallszahlen.
+
+  Finde heraus mit welcher \code{matlab}-Funktion die Gammaverteilung
+  an die Zufallszahlen nach der Maximum-Likelihood Methode gefittet
+  werden kann.  Bestimme mit dieser Funktion die Parameter der
+  Gammaverteilung aus den Zufallszahlen. Plotte anschlie{\ss}end
+  die Gammaverteilung mit den gefitteten Parametern.
+\end{exercise}
+
+
 \end{document}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -852,3 +795,123 @@ What is "a statistic"? % dt. Sch\"atzfunktion
   \source{http://en.wikipedia.org/wiki/Statistic}
 \end{definition}
 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Data types}
+
+\subsection{Nominal scale}
+\begin{itemize}
+\item Binary
+  \begin{itemize}
+  \item ``yes/no'',
+  \item ``true/false'',
+  \item ``success/failure'', etc.
+  \end{itemize}
+\item Categorial
+  \begin{itemize}
+  \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
+  \item blood type (``A/B/AB/0''),
+  \item parts of speech (``noun/veerb/preposition/article/...''),
+  \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
+  \end{itemize}
+\item Each observation/measurement/sample is put into one category
+\item There is no reasonable order among the categories.\\
+  example: [rods, cones] vs. [cones, rods]
+\item Statistics: mode, i.e. the most common item
+\end{itemize}
+
+\subsection{Ordinal scale}
+\begin{itemize}
+\item Like nominal scale, but with an order
+\item Examples: ranks, ratings
+  \begin{itemize}
+  \item ``bad/ok/good'',
+  \item ``cold/warm/hot'',
+  \item ``young/old'', etc.
+  \end{itemize}
+\item {\bf But:} there is no reasonable measure of {\em distance}
+  between the classes
+\item Statistics: mode, median
+\end{itemize}
+
+\subsection{Interval scale}
+\begin{itemize}
+\item Quantitative/metric values
+\item Reasonable measure of distance between values, but no absolute zero
+\item Examples: 
+  \begin{itemize}
+  \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
+  \item Direction measured in degrees from magnetic or true north
+  \end{itemize}
+\item Statistics:
+  \begin{itemize}
+  \item Central tendency: mode, median, arithmetic mean
+  \item Dispersion: range, standard deviation
+  \end{itemize}
+\end{itemize}
+
+\subsection{Absolute/ratio scale}
+\begin{itemize}
+\item Like interval scale, but with absolute origin/zero
+\item Examples: 
+  \begin{itemize}
+  \item Temperature in $^\circ$K
+  \item Length, mass, duration, electric charge, ...
+  \item Plane angle, etc.
+  \item Count (e.g. number of spikes in response to a stimulus)
+  \end{itemize}
+\item Statistics:
+  \begin{itemize}
+  \item Central tendency: mode, median, arithmetic, geometric, harmonic mean
+  \item Dispersion: range, standard deviation
+  \item Coefficient of variation (ratio standard deviation/mean)
+  \item All other statistical measures
+  \end{itemize}
+\end{itemize}
+
+\subsection{Data types}
+\begin{itemize}
+\item Data type selects
+  \begin{itemize}
+  \item statistics 
+  \item type of plots (bar graph versus x-y plot)
+  \item correct tests
+  \end{itemize}
+\item Scales exhibit increasing information content from nominal
+  to absolute.\\
+  Conversion  ,,downwards'' is always possible
+\item For example: size measured in meter (ratio scale) $\rightarrow$
+  categories ``small/medium/large'' (ordinal scale)
+\end{itemize}
+
+\subsection{Examples from neuroscience}
+\begin{itemize}
+\item {\bf absolute:}
+  \begin{itemize}
+  \item size of neuron/brain
+  \item length of axon
+  \item ion concentration
+  \item membrane potential
+  \item firing rate
+  \end{itemize}
+
+\item {\bf interval:}
+  \begin{itemize}
+  \item edge orientation
+  \end{itemize}
+
+\item {\bf ordinal:}
+  \begin{itemize}
+  \item stages of a disease
+  \item ratings
+  \end{itemize}
+
+\item {\bf nominal:}
+  \begin{itemize}
+  \item cell type
+  \item odor
+  \item states of an ion channel
+  \end{itemize}
+
+\end{itemize}
+