First lecture on descriptive statistics

2015-10-19 01:15:37 +02:00 · 2015-10-19 01:15:37 +02:00 · fb9008f571
commit fb9008f571
parent 1264b4749a
17 changed files with 693 additions and 229 deletions
--- a/programming/lectures/Makefile
+++ b/programming/lectures/Makefile
@ -0,0 +1,20 @@
 TEXFILES=$(wildcard *.tex)
 TEXFILES=boolean_logical_indexing.tex control_structures.tex data_structures.tex plotting_spike_trains.tex programming_basics.tex scripts_functions.tex sta_stc.tex variables_datatypes.tex vectors_matrices.tex
 PDFFILES=$(TEXFILES:.tex=.pdf)
 pdf : $(PDFFILES)
 $(PDFFILES) : %.pdf : %.tex
 	pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
 clean :
 	rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
 cleanall : clean
 	rm -f $(PDFFILES)
 watch :
 	while true; do ! make -q pdf && make pdf; sleep 0.5; done
--- a/statistics/code/checkmymedian.m
+++ b/statistics/code/checkmymedian.m
@ -0,0 +1,12 @@
 % check whether the median returned by mymedian 
 % really separates a vector into two halfs
 for i = 1:140                                    % loop over different length
  for k = 1:10                                   % try several times
    a = randn( i, 1 );                           % generate some data
    m = mymedian( a )                            % compute median
    if length( a(a>m) ) ~= length( a(a<m) )      % check
      disp( 'error!' )
    end
  end
 end
--- a/statistics/code/diehistograms.m
+++ b/statistics/code/diehistograms.m
@ -0,0 +1,24 @@
 % dependence of histogram on number of rolls:
 nrolls = [ 20, 100, 1000 ];
 for i = [1:length(nrolls)]
  d = rollthedie( nrolls(i) );
  % plain hist:
  % hist( d )
  % check bin counts of plain hist:
  % h = hist( d )
  % force 6 bins:
  % hist( d, 6 )
  % set the right bin centers:
  bins = 1:6;
  %hist( d, bins )
  % normalize histogram and compare to expectation:
  hold on
  plot( [0 7], [1/6 1/6], '-r', 'linewidth', 10 )
  hist( d, bins, 1.0, 'facecolor', 'b' )
  hold off
  pause
 end
--- a/statistics/code/gaussianbins.m
+++ b/statistics/code/gaussianbins.m
@ -0,0 +1,17 @@
 x = randn( 100, 1 );
 bins1 = -4:2:4;
 bins2 = -4:0.5:4;
 subplot( 1, 2, 1 );
 hold on;
 hist( x, bins1 );
 hist( x, bins2 );
 xlabel('x')
 ylabel('Frequeny')
 hold off;
 subplot( 1, 2, 2 );
 hold on;
 hist( x, bins1, 1.0/(bins1(2)-bins1(1)) );
 hist( x, bins2, 1.0/(bins2(2)-bins2(1)) );
 xlabel('x')
 ylabel('Probability density')
 hold off;
--- a/statistics/code/gaussianpdf.m
+++ b/statistics/code/gaussianpdf.m
@ -0,0 +1,22 @@
 % plot Gaussian pdf:
 dx=0.1
 x = [-4.0:dx:4.0];
 p = exp(-0.5*x.^2)/sqrt(2.0*pi);
 hold on
 plot(x,p, 'linewidth', 10 )
 % compute integral between x1 and x2:
 x1=1.0
 x2=2.0
 P = sum(p((x>=x1)&(x<x2)))*dx
 % draw random numbers:
 r = randn( 10000, 1 );
 hist(r,x,1.0/dx)
 % check P:
 Pr = sum((r>=x1)&(r<x2))/length(r)
 hold off
--- a/statistics/code/histogramquartiles.m
+++ b/statistics/code/histogramquartiles.m
@ -0,0 +1,24 @@
 % generate data:
 x = randn( 1, 100000 );
 % histogram:
 [h,b] = hist( x, 100 );
 % normalize:
 bs = b(2)-b(1);
 h = h/sum(h)/bs;
 % plot:
 bar( b, h );
 xlabel( 'x' );
 % median, quartile:
 q = quartiles( x );
 %q = quantile( x, [0.25, 0.5, 0.75 ] );
 % plot:
 hold on;
 bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
 bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
 bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
 bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
 hold off;
--- a/statistics/code/mymedian.m
+++ b/statistics/code/mymedian.m
@ -0,0 +1,13 @@
 function m = mymedian( x )
 % returns the median of the vector x
  xs = sort( x );
  if ( length( xs ) == 0 )
    m = NaN;
  elseif ( rem( length( xs ), 2 ) == 0 )
    index = length( xs )/2;
    m = (xs( index ) + xs( index+1 ))/2;
  else
    index = (length( xs ) + 1)/2;
    m = xs( index );
  end
 end
--- a/statistics/code/quartiles.m
+++ b/statistics/code/quartiles.m
@ -1,25 +1,15 @@
-% generate data:
+function q = quartiles( x )
-x = randn( 1, 100000 );
+  % returns a vector with the first, second, and third quartile of the vector x
-
+  xs = sort( x );
-% histogram:
+  if ( length( xs ) == 0 )
-[h,b] = hist( x, 100 );
+    q = [];
-% normalize:
+  elseif ( rem( length( xs ), 2 ) == 0 )
-bs = b(2)-b(1);
+    index = length( xs )/2;
-h = h/sum(h)/bs;
+    m = (xs( index ) + xs( index+1 ))/2;
-
+    q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
-% plot:
+  else
-bar( b, h );
+    index = (length( xs ) + 1)/2;
-xlabel( 'x' );
+    m = xs( index );
-
+    q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
-% median, quartile:
+  end
-xs = sort( x )
+end
 q = [ xs(length(xs)/4), xs(length(xs)/2), xs(3*length(xs)/4) ];
 %q = quantile( x, [0.25, 0.5, 0.75 ] );
 % plot:
 bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
 hold on;
 bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
 bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
 bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
 hold off;
--- a/statistics/code/randomwalk.m
+++ b/statistics/code/randomwalk.m
@ -1,4 +1,6 @@
 function x = randomwalk(n,p)
 % returns a random wolk with n steps and 
 % probability p for positive steps.
    r = rand(n,1);
    r(r<p) = -1.0;
    r(r>=p) = +1.0;
--- a/statistics/code/rollthedie.m
+++ b/statistics/code/rollthedie.m
@ -0,0 +1,4 @@
 function x = rollthedie( n )
 % return a vector with the result of rolling a die n times
  x = randi( [1, 6], n, 1 );
 end
--- a/statistics/lecture/Makefile
+++ b/statistics/lecture/Makefile
@ -0,0 +1,18 @@
 TEXFILES=$(wildcard *.tex)
 PDFFILES=$(TEXFILES:.tex=.pdf)
 pdf : $(PDFFILES)
 $(PDFFILES) : %.pdf : %.tex
 	pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
 clean :
 	rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
 cleanall : clean
 	rm -f $(PDFFILES)
 watch :
 	while true; do ! make -q pdf && make pdf; sleep 0.5; done
--- a/statistics/lecture/descriptivestatistics.tex
+++ b/statistics/lecture/descriptivestatistics.tex
@ -1,43 +1,55 @@
-\documentclass{beamer}
+\documentclass[12pt]{report}
 %%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\title[]{Scientific Computing --- Descriptive Statistics}
+\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
-\author[]{Jan Benda}
+\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
-\institute[]{Neuroethology}
+\date{WS 15/16}
 \date[]{WS 15/16}
 \titlegraphic{\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
-%%%%% beamer %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>
+% \newcommand{\tr}[2]{#1}  % en
-{
+% \usepackage[english]{babel}
-  \usetheme{Singapore}
+\newcommand{\tr}[2]{#2}  % de
-  \setbeamercovered{opaque}
+\usepackage[german]{babel}
  \usecolortheme{tuebingen}
  \setbeamertemplate{navigation symbols}{}
  \usefonttheme{default}
  \useoutertheme{infolines}
  % \useoutertheme{miniframes}
 }
 %\AtBeginSection[]
 %{
 %  \begin{frame}<beamer>
 %    \begin{center}
 %      \Huge \insertsectionhead
 %    \end{center}
 %  \end{frame}
 %}
 \setbeamertemplate{blocks}[rounded][shadow=true]
 \setcounter{tocdepth}{1}
 %%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\usepackage[english]{babel}
+\usepackage{pslatex}   % nice font for pdf file
 \usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
 %%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
 \setcounter{tocdepth}{1}
 %%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{graphicx}
 \usepackage{xcolor}
 \newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
 %%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{listings}
 \lstset{
  inputpath=../code,
  basicstyle=\ttfamily\footnotesize,
  numbers=left,
  showstringspaces=false,
  language=Matlab,
  commentstyle=\itshape\color{darkgray},
  keywordstyle=\color{blue},
  stringstyle=\color{green},
  backgroundcolor=\color{blue!10},
  breaklines=true,
  breakautoindent=true,
  columns=flexible,
  frame=single,
  caption={\protect\filename@parse{\lstname}\protect\filename@base},
  captionpos=t,
  xleftmargin=1em,
  xrightmargin=1em,
  aboveskip=10pt
 }
 %%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{amsmath}
 \usepackage{bm} 
 \usepackage{pslatex}   % nice font for pdf file
 %\usepackage{multimedia}
 \usepackage{dsfont}
 \newcommand{\naZ}{\mathds{N}}
 \newcommand{\gaZ}{\mathds{Z}}
@ -47,59 +59,45 @@
 \newcommand{\reZpN}{\mathds{R^+_0}}
 \newcommand{\koZ}{\mathds{C}}
 %%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{graphicx}
 \newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
-%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\usepackage{listings}
+\usepackage{ifthen}
-\lstset{
+
- basicstyle=\ttfamily,
+\newcommand{\code}[1]{\texttt{#1}}
- numbers=left,
+
- showstringspaces=false,
+\newcommand{\source}[1]{    
- language=Matlab,
+  \begin{flushright}
- commentstyle=\itshape\color{darkgray},
+    \color{gray}\scriptsize \url{#1}
- keywordstyle=\color{blue},
+  \end{flushright}
- stringstyle=\color{green},
+}
- backgroundcolor=\color{blue!10},
+
- breaklines=true,
+\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
- breakautoindent=true,
+  {\medskip}
- columns=flexible,
+
- frame=single,
+%\newcommand{\showlisting}{yes}
- captionpos=b,
+\newcommand{\showlisting}{no}
- xleftmargin=1em,
+\newcounter{theexercise} 
- xrightmargin=1em,
+\setcounter{theexercise}{1}
- aboveskip=10pt
+\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
- }
+  \arabic{theexercise}:} \stepcounter{theexercise}\newline \newcommand{\exercisesource}{#1}}%
  {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\equal{\showlisting}{yes}}{\medskip\lstinputlisting{\exercisesource}}{}}\medskip}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{document} 
-\begin{frame}[plain]
+\maketitle
-  \frametitle{}
+
-  \vspace{-1cm}
+%\tableofcontents
  \titlepage % erzeugt Titelseite
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-  \frametitle{Content}
+\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
  \tableofcontents
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Descriptive statistics}
+\section{Statistics of real-valued data}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Statistics of ratio data}
 %-------------------------------------------------------------
 \begin{frame}
  \frametitle{Statistics of ratio data}
  \begin{itemize}
  \item Location, central tendency
    \begin{itemize}
@ -107,7 +105,6 @@
    \item median
    \item mode
    \end{itemize}
  \item Spread, dispersion
    \begin{itemize}
    \item variance
@ -116,163 +113,294 @@
    \item coefficient of variation
    \item minimum, maximum
    \end{itemize}
  \item Shape
    \begin{itemize}
    \item skewnees
    \item kurtosis
    \end{itemize}
  \item Dependence
    \begin{itemize}
    \item Pearson correlation coefficient
    \item Spearman's rank correlation coefficient
    \end{itemize}
  \end{itemize}
-\end{frame}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Median, Quartile, Percentile}
 \begin{figure}[t]
  \includegraphics[width=1\textwidth]{median}
  \caption{\label{medianfig} Median.}
 \end{figure}
 \begin{definition}[\tr{median}{Median}]
  \tr{Half of the observations $X=(x_1, x_2, \ldots, x_n)$ are
    larger than the median and half of them are smaller than the
    median.}  {Der Median teilt eine Liste von Messwerten so in zwei
    H\"alften, dass die eine H\"alfte der Daten nicht gr\"o{\ss}er
    und die andere H\"alfte nicht kleiner als der Median ist.}
 \end{definition}
 \begin{exercise}[mymedian.m]
  \tr{Write a function that computes the median of a vector.}
  {Schreibe eine Funktion, die den Median eines Vektors zur\"uckgibt.}
 \end{exercise}
 \code{matlab} stellt die Funktion \code{median()} zur Berechnung des Medians bereit.
 \begin{exercise}[checkmymedian.m]
  \tr{Write a script that tests whether your median function really
    returns a median above which are the same number of data than
    below. In particular the script should test data vectors of
    different length.}  {Schreibe ein Skript, das testet ob die
    \code{mymedian} Funktion wirklich die Zahl zur\"uckgibt, \"uber
    der genausoviele Datenwerte liegen wie darunter. Das Skript sollte
    insbesondere verschieden lange Datenvektoren testen.}
 \end{exercise}
 \begin{figure}[t]
  \includegraphics[width=1\textwidth]{quartile}
  \caption{\label{quartilefig} Median und Quartile.}
 \end{figure}
 \begin{definition}[\tr{quartile}{Quartile}]
  Die Quartile Q1, Q2 und Q3 unterteilen die Daten in vier gleich
  gro{\ss}e Gruppen, die jeweils ein Viertel der Daten enthalten.
  Das mittlere Quartil entspricht dem Median.
 \end{definition}
 \begin{exercise}[quartiles.m]
  \tr{Write a function that computes the first, second, and third quartile of a vector.}
  {Schreibe eine Funktion, die das erste, zweite und dritte Quartil als Vektor zur\"uckgibt.}
 \end{exercise}
 \subsection{Histogram}
 Histogramme z\"ahlen die H\"aufigkeit $n_i$ des Auftretens von
 $N=\sum_{i=1}^M n_i$ Messwerten in $M$ Messbereichsklassen $i$ (Bins).
 Die Klassen unterteilen den Wertebereich meist in angrenzende und
 gleich gro{\ss}e Intervalle.  Histogramme sch\"atzen die
 Wahrscheinlichkeitsverteilung der Messwerte ab.
 \begin{exercise}[rollthedie.m]
  \tr{Write a function that simulates rolling a die $n$ times.}
  {Schreibe eine Funktion, die das $n$-malige W\"urfeln mit einem W\"urfel simuliert.}
 \end{exercise}
 \begin{exercise}[diehistograms.m]
  \tr{Plot histograms from rolling the die 20, 100, 1000 times.  Use
    the plain hist(x) function, force 6 bins via hist( x, 6 ), and set
    meaningfull bins positions.}  {Plotte Histogramme von 20, 100, und
    1000-mal w\"urfeln.  Benutze \code{hist(x)}, erzwinge sechs Bins
    mit \code{hist(x,6)}, und setze selbst sinnvolle Bins. Normiere
    anschliessend das Histogram auf geeignete Weise.}
 \end{exercise}
 \begin{figure}[t]
  \includegraphics[width=1\textwidth]{diehistograms}
  \caption{\label{diehistogramsfig} \tr{Histograms of rolling a die
      100 or 500 times.  Left: plain histograms counting the frequency
      of the six possible outcomes.  Right: the same data normalized
      to their sum.}{Histogramme des Ergebnisses von 100 oder 500 mal
      W\"urfeln. Links: das absolute Histogramm z\"ahlt die Anzahl des
      Auftretens jeder Augenzahl. Rechts: Normiert auf die Summe des
      Histogramms werden die beiden Messungen vergleichbar.}}
 \end{figure}
 Bei ganzzahligen Messdaten (z.B. die Augenzahl eines W\"urfels) 
 kann f\"ur jede auftretende Zahl eine Klasse definiert werden.
 Damit die H\"ohe der Histogrammbalken unabh\"angig von der Anzahl der Messwerte wird,
 normiert man das Histogram auf die Anzahl der Messwerte.
 Die H\"ohe der Histogrammbalken gibt dann die Wahrscheinlichkeit $P(x_i)$
 des Auftretens der Gr\"o{\ss}e $x_i$ in der $i$-ten Klasse an
 \[ P_i = \frac{n_i}{N} = \frac{n_i}{\sum_{i=1}^M n_i} \; . \]
 \subsection{Probability density function}
 Meistens haben wir es jedoch mit reellen Messgr\"o{\ss}en zu tun.
 \begin{exercise}[gaussianbins.m]
  \tr{Draw 100 random data from a Gaussian distribution and plot
    histograms with different bin sizes of the data.}  {Ziehe 100
    normalverteilte Zufallszahlen und erzeuge Histogramme mit
    unterschiedlichen Klassenbreiten. Was f\"allt auf?}
 \end{exercise}
 \begin{figure}[t]
  \includegraphics[width=1\textwidth]{pdfhistogram}
  \caption{\label{pdfhistogramfig} \tr{Histograms of normally
      distributed data with different bin sizes.}{Histogramme mit
      verschiednenen Klassenbreiten eines Datensatzes von
      normalverteilten Messwerten. Links: Die H\"ohe des absoluten
      Histogramms h\"angt von der Klassenbreite ab. Rechts: Bei auf
      das Integral normierten Histogrammen werden auch
      unterschiedliche Klassenbreiten vergleichbar.}}
 \end{figure}
 Histogramme von reellen Messwerten m\"ussen auf das Integral 1 normiert werden, so dass
 das Integral (nicht die Summe) \"uber das Histogramm eins ergibt. Das Integral
 ist die Fl\"ache des Histograms. Diese setzt sich zusammen aus der Fl\"ache der einzelnen
 Histogrammbalken. Diese haben die H\"ohe $n_i$ und die Breite $\Delta x$. Die Gesamtfl\"ache
 $A$ des Histogramms ist also
 \[ A = \sum_{i=1}^N ( n_i \cdot \Delta x ) = \Delta x \sum_{i=1}^N n_i \]
 und das normierte Histogramm hat die H\"ohe
 \[ p(x_i) = \frac{n_i}{\Delta x \sum_{i=1}^N n_i} \]
 Es muss also nicht nur durch die Summe, sondern auch durch die Breite der Klassen $\Delta x$
 geteilt werden.
 $p(x_i)$ kann keine Wahrscheinlichkeit sein, da $p(x_i)$ nun eine
 Einheit hat --- das Inverse der Einheit der Messgr\"osse $x$. Man
 spricht von einer Wahrscheinlichkeitsdichte.
 \begin{figure}[t]
  \includegraphics[width=1\textwidth]{pdfprobabilities}
  \caption{\label{pdfprobabilitiesfig} Wahrscheinlichkeiten bei
  einer Wahrscheinlichkeitsdichtefunktion.}
 \end{figure}
 \begin{exercise}
  \tr{Plot the Gaussian probability density}{Plotte die Gauss'sche Wahrscheinlichkeitsdichte }
  \[ p_g(x) = 1/\sqrt{2\pi\sigma^2}e^{-\frac{(x-\mu)^2}{2\sigma^2}}\]
  \tr{What does it mean?}{Was bedeutet die folgende Wahrscheinlichkeit?}
  \[ P(x_1 < x < x2) = \int_{x_1}^{x_2} p(x) \, dx \]
  \tr{How large is}{Wie gro{\ss} ist}
  \[ \int_{-\infty}^{+\infty} p(x) \, dx \; ?\]
  \tr{Why?}{Warum?}
 \end{exercise}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Data types}
-%-------------------------------------------------------------
+\subsubsection{Nominal scale}
-\begin{frame}
+\begin{itemize}
-  \frametitle{Data types: nominal scale}
+\item Binary
  \begin{itemize}
-  \item Binary
+  \item ``yes/no'',
-    \begin{itemize}
+  \item ``true/false'',
-    \item ``yes/no'',
+  \item ``success/failure'', etc.
    \item ``true/false'',
    \item ``success/failure'', etc.
    \end{itemize}
  \item Categorial
    \begin{itemize}
    \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
    \item blood type (``A/B/AB/0''),
    \item parts of speech (``noun/veerb/preposition/article/...''),
    \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
    \end{itemize}
  \item Each observation/measurement/sample is put into one category
  \item There is no reasonable order among the categories.\\
    example: [rods, cones] vs. [cones, rods]
  \pause
  \item Statistics: mode, i.e. the most common item
  \end{itemize}
-\end{frame}
+\item Categorial
 %-------------------------------------------------------------
 \begin{frame}
  \frametitle{Data types: ordinal scale}
  \begin{itemize}
-  \item Like nominal scale, but with an order
+  \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
-  \item Examples: ranks, ratings
+  \item blood type (``A/B/AB/0''),
-    \begin{itemize}
+  \item parts of speech (``noun/veerb/preposition/article/...''),
-    \item ``bad/ok/good'',
+  \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
    \item ``cold/warm/hot'',
    \item ``young/old'', etc.
    \end{itemize}
  \item {\bf But:} there is no reasonable measure of {\em distance}
    between the classes
  \pause
  \item Statistics: mode, median
  \end{itemize}
-\end{frame}
+\item Each observation/measurement/sample is put into one category
 \item There is no reasonable order among the categories.\\
  example: [rods, cones] vs. [cones, rods]
 \item Statistics: mode, i.e. the most common item
 \end{itemize}
-%-------------------------------------------------------------
+\subsubsection{Ordinal scale}
-\begin{frame}
+\begin{itemize}
-  \frametitle{Data types: interval scale}
+\item Like nominal scale, but with an order
 \item Examples: ranks, ratings
  \begin{itemize}
-  \item Quantitative/metric values
+  \item ``bad/ok/good'',
-  \item Reasonable measure of distance between values, but no absolute zero
+  \item ``cold/warm/hot'',
-  \item Examples: 
+  \item ``young/old'', etc.
    \begin{itemize}
    \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
    \item Direction measured in degrees from magnetic or true north
    \end{itemize}
  \pause
  \item Statistics:
    \begin{itemize}
    \item Central tendency: mode, median, arithmetic mean
    \item Dispersion: range, standard deviation
    \end{itemize}
  \end{itemize}
-\end{frame}
+\item {\bf But:} there is no reasonable measure of {\em distance}
  between the classes
 \item Statistics: mode, median
 \end{itemize}
-%-------------------------------------------------------------
+\subsubsection{Interval scale}
-\begin{frame}
+\begin{itemize}
-  \frametitle{Data types: absolute/ratio scale}
+\item Quantitative/metric values
 \item Reasonable measure of distance between values, but no absolute zero
 \item Examples: 
  \begin{itemize}
-  \item Like interval scale, but with absolute origin/zero
+  \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
-  \item Examples: 
+  \item Direction measured in degrees from magnetic or true north
    \begin{itemize}
    \item Temperature in $^\circ$K
    \item Length, mass, duration, electric charge, ...
    \item Plane angle, etc.
    \item Count (e.g. number of spikes in response to a stimulus)
    \end{itemize}
  \pause
  \item Statistics:
    \begin{itemize}
    \item Central tendency: mode, median, arithmetic, geometric, harmonic mean
    \item Dispersion: range, standard deviation
    \item Coefficient of variation (ratio standard deviation/mean)
    \item All other statistical measures
    \end{itemize}
  \end{itemize}
-\end{frame}
+\item Statistics:
 %-------------------------------------------------------------
 \begin{frame}
  \frametitle{Data types}
  \begin{itemize}
-  \item Data type selects
+  \item Central tendency: mode, median, arithmetic mean
-    \begin{itemize}
+  \item Dispersion: range, standard deviation
    \item statistics 
    \item type of plots (bar graph versus x-y plot)
    \item correct tests
    \end{itemize}
  \item Scales exhibit increasing information content from nominal
    to absolute.\\
    Conversion  ,,downwards'' is always possible
  \item For example: size measured in meter (ratio scale) $\rightarrow$
    categories ``small/medium/large'' (ordinal scale)
  \end{itemize}
-\end{frame}
+\end{itemize}
-%-------------------------------------------------------------
+\subsubsection{Absolute/ratio scale}
-\begin{frame}
+\begin{itemize}
-  \frametitle{Examples from neuroscience}
+\item Like interval scale, but with absolute origin/zero
 \item Examples: 
  \begin{itemize}
-
+  \item Temperature in $^\circ$K
-  \item {\bf absolute:}\pause
+  \item Length, mass, duration, electric charge, ...
-    \begin{itemize}
+  \item Plane angle, etc.
-    \item size of neuron/brain
+  \item Count (e.g. number of spikes in response to a stimulus)
    \item length of axon
    \item ion concentration
    \item membrane potential
    \item firing rate
    \end{itemize}
  \item {\bf interval:}\pause
    \begin{itemize}
    \item edge orientation
    \end{itemize}
  \item {\bf ordinal:} \pause 
    \begin{itemize}
    \item stages of a disease
    \item ratings
    \end{itemize}
  \item {\bf nominal:}\pause 
    \begin{itemize}
    \item cell type
    \item odor
    \item states of an ion channel
    \end{itemize}
  \end{itemize}
-\end{frame}
+\item Statistics:
  \begin{itemize}
  \item Central tendency: mode, median, arithmetic, geometric, harmonic mean
  \item Dispersion: range, standard deviation
  \item Coefficient of variation (ratio standard deviation/mean)
  \item All other statistical measures
  \end{itemize}
 \end{itemize}
 \subsubsection{Data types}
 \begin{itemize}
 \item Data type selects
  \begin{itemize}
  \item statistics 
  \item type of plots (bar graph versus x-y plot)
  \item correct tests
  \end{itemize}
 \item Scales exhibit increasing information content from nominal
  to absolute.\\
  Conversion  ,,downwards'' is always possible
 \item For example: size measured in meter (ratio scale) $\rightarrow$
  categories ``small/medium/large'' (ordinal scale)
 \end{itemize}
 \subsubsection{Examples from neuroscience}
 \begin{itemize}
 \item {\bf absolute:}
  \begin{itemize}
  \item size of neuron/brain
  \item length of axon
  \item ion concentration
  \item membrane potential
  \item firing rate
  \end{itemize}
 \item {\bf interval:}
  \begin{itemize}
  \item edge orientation
  \end{itemize}
 \item {\bf ordinal:}
  \begin{itemize}
  \item stages of a disease
  \item ratings
  \end{itemize}
 \item {\bf nominal:}
  \begin{itemize}
  \item cell type
  \item odor
  \item states of an ion channel
  \end{itemize}
 \end{itemize}
 \end{document}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Statistics}
 What is "a statistic"? % dt. Sch\"atzfunktion
 \begin{definition}[statistic]
  A statistic (singular) is a single measure of some attribute of a
  sample (e.g., its arithmetic mean value). It is calculated by
  applying a function (statistical algorithm) to the values of the
  items of the sample, which are known together as a set of data.
  \source{http://en.wikipedia.org/wiki/Statistic}
 \end{definition}
--- a/statistics/lecture/diehistograms.py
+++ b/statistics/lecture/diehistograms.py
@ -0,0 +1,32 @@
 import numpy as np
 import matplotlib.pyplot as plt
 # roll the die:
 x1 = np.random.random_integers( 1, 6, 100 )
 x2 = np.random.random_integers( 1, 6, 500 )
 bins = np.arange(0.5, 7, 1.0)
 plt.xkcd()
 fig = plt.figure( figsize=(6,4) )
 ax = fig.add_subplot( 1, 2, 1 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Frequency' )
 ax.hist([x2, x1], bins, color=['#FFCC00', '#FFFF66' ])
 ax = fig.add_subplot( 1, 2, 2 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Probability' )
 ax.hist([x2, x1], bins, normed=True, color=['#FFCC00', '#FFFF66' ])
 plt.tight_layout()
 fig.savefig( 'diehistograms.pdf' )
 plt.show()
--- a/statistics/lecture/median.py
+++ b/statistics/lecture/median.py
@ -0,0 +1,33 @@
 import numpy as np
 import matplotlib.pyplot as plt
 # normal distribution:
 x = np.arange( -4.0, 4.0, 0.01 )
 g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
 plt.xkcd()
 fig = plt.figure( figsize=(6,4) )
 ax = fig.add_subplot( 1, 1, 1 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Probability density p(x)' )
 ax.set_ylim( 0.0, 0.46 )
 ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
 ax.text(-1.0, 0.1, '50%', ha='center' )
 ax.text(+1.0, 0.1, '50%', ha='center' )
 ax.annotate('Median',
            xy=(0.1, 0.3), xycoords='data',
            xytext=(1.6, 0.35), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
            connectionstyle="angle3,angleA=10,angleB=40") )
 ax.fill_between( x[x<0], 0.0, g[x<0], color='#ffcc00' )
 ax.fill_between( x[x>0], 0.0, g[x>0], color='#99ff00' )
 ax.plot(x,g, 'b', lw=4)
 ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
 plt.tight_layout()
 fig.savefig( 'median.pdf' )
 plt.show()
--- a/statistics/lecture/pdfhistogram.py
+++ b/statistics/lecture/pdfhistogram.py
@ -0,0 +1,39 @@
 import numpy as np
 import matplotlib.pyplot as plt
 # normal distribution:
 x = np.arange( -4.0, 4.0, 0.01 )
 g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
 r = np.random.randn( 100 )
 plt.xkcd()
 fig = plt.figure( figsize=(6,4) )
 ax = fig.add_subplot( 1, 2, 1 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Frequency' )
 #ax.set_ylim( 0.0, 0.46 )
 #ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
 ax.hist(r, 5, color='#CC0000')
 ax.hist(r, 20, color='#FFCC00')
 ax = fig.add_subplot( 1, 2, 2 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Probability density p(x)' )
 #ax.set_ylim( 0.0, 0.46 )
 #ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
 ax.hist(r, 5, normed=True, color='#CC0000')
 ax.hist(r, 20, normed=True, color='#FFCC00')
 plt.tight_layout()
 fig.savefig( 'pdfhistogram.pdf' )
 plt.show()
--- a/statistics/lecture/pdfprobabilities.py
+++ b/statistics/lecture/pdfprobabilities.py
@ -0,0 +1,36 @@
 import numpy as np
 import matplotlib.pyplot as plt
 # normal distribution:
 x = np.arange( -3.0, 5.0, 0.01 )
 g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
 x1=0.0
 x2=1.0
 plt.xkcd()
 fig = plt.figure( figsize=(6,4) )
 ax = fig.add_subplot( 1, 1, 1 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Probability density p(x)' )
 ax.set_ylim( 0.0, 0.46 )
 ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
 ax.annotate('Gaussian',
            xy=(-1.0, 0.28), xycoords='data',
            xytext=(-2.5, 0.35), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
            connectionstyle="angle3,angleA=10,angleB=110") )
 ax.annotate('$P(0<x<1) = \int_0^1 p(x) \, dx$',
            xy=(0.6, 0.28), xycoords='data',
            xytext=(1.2, 0.4), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
            connectionstyle="angle3,angleA=10,angleB=80") )
 ax.fill_between( x[(x>x1)&(x<x2)], 0.0, g[(x>x1)&(x<x2)], color='#cc0000' )
 ax.plot(x,g, 'b', lw=4)
 plt.tight_layout()
 fig.savefig( 'pdfprobabilities.pdf' )
 plt.show()
--- a/statistics/lecture/quartile.py
+++ b/statistics/lecture/quartile.py
@ -0,0 +1,50 @@
 import numpy as np
 import matplotlib.pyplot as plt
 # normal distribution:
 x = np.arange( -4.0, 4.0, 0.01 )
 g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
 q = [ -0.67488, 0.0, 0.67488 ]
 plt.xkcd()
 fig = plt.figure( figsize=(6,4) )
 ax = fig.add_subplot( 1, 1, 1 )
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
 ax.yaxis.set_ticks_position('left')
 ax.xaxis.set_ticks_position('bottom')
 ax.set_xlabel( 'x' )
 ax.set_ylabel( 'Probability density p(x)' )
 ax.set_ylim( 0.0, 0.46 )
 ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
 ax.text(-1.2, 0.1, '25%', ha='center' )
 ax.text(-0.35, 0.1, '25%', ha='center' )
 ax.text(+0.35, 0.1, '25%', ha='center' )
 ax.text(+1.2, 0.1, '25%', ha='center' )
 ax.annotate('1. quartile',
            xy=(-0.75, 0.2), xycoords='data',
            xytext=(-1.7, 0.25), textcoords='data', ha='right',
            arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
            connectionstyle="angle3,angleA=170,angleB=120") )
 ax.annotate('3. quartile',
            xy=(0.75, 0.17), xycoords='data',
            xytext=(1.7, 0.22), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
            connectionstyle="angle3,angleA=10,angleB=70") )
 ax.annotate('Median',
            xy=(0.1, 0.3), xycoords='data',
            xytext=(1.6, 0.35), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
            connectionstyle="angle3,angleA=10,angleB=40") )
 ax.fill_between( x[x<q[0]], 0.0, g[x<q[0]], color='#ffcc00' )
 ax.fill_between( x[(x>q[0])&(x<q[1])], 0.0, g[(x>q[0])&(x<q[1])], color='#ff0000' )
 ax.fill_between( x[(x>q[1])&(x<q[2])], 0.0, g[(x>q[1])&(x<q[2])], color='#ff9900' )
 ax.fill_between( x[x>q[2]], 0.0, g[x>q[2]], color='#ffff66' )
 ax.plot(x,g, 'b', lw=4)
 ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
 ax.plot([q[0], q[0]], [0.0, 0.4], 'k', lw=2 )
 ax.plot([q[2], q[2]], [0.0, 0.4], 'k', lw=2 )
 plt.tight_layout()
 fig.savefig( 'quartile.pdf' )
 plt.show()