First lecture on descriptive statistics

2015-10-19 01:15:37 +02:00 · 2015-10-19 01:15:37 +02:00 · fb9008f571
commit fb9008f571
parent 1264b4749a
17 changed files with 693 additions and 229 deletions
--- a/programming/lectures/Makefile
+++ b/programming/lectures/Makefile
@ -0,0 +1,20 @@
+TEXFILES=$(wildcard *.tex)
+TEXFILES=boolean_logical_indexing.tex control_structures.tex data_structures.tex plotting_spike_trains.tex programming_basics.tex scripts_functions.tex sta_stc.tex variables_datatypes.tex vectors_matrices.tex
+
+PDFFILES=$(TEXFILES:.tex=.pdf)
+
+pdf : $(PDFFILES)
+
+$(PDFFILES) : %.pdf : %.tex
+	pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
+
+clean :
+	rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
+
+cleanall : clean
+	rm -f $(PDFFILES)
+
+watch :
+	while true; do ! make -q pdf && make pdf; sleep 0.5; done
+
+
--- a/statistics/code/checkmymedian.m
+++ b/statistics/code/checkmymedian.m
@ -0,0 +1,12 @@
+% check whether the median returned by mymedian 
+% really separates a vector into two halfs
+for i = 1:140                                    % loop over different length
+  for k = 1:10                                   % try several times
+    a = randn( i, 1 );                           % generate some data
+    m = mymedian( a )                            % compute median
+    if length( a(a>m) ) ~= length( a(a<m) )      % check
+      disp( 'error!' )
+    end
+  end
+end
+
--- a/statistics/code/diehistograms.m
+++ b/statistics/code/diehistograms.m
@ -0,0 +1,24 @@
+% dependence of histogram on number of rolls:
+nrolls = [ 20, 100, 1000 ];
+for i = [1:length(nrolls)]
+  d = rollthedie( nrolls(i) );
+  % plain hist:
+  % hist( d )
+
+  % check bin counts of plain hist:
+  % h = hist( d )
+
+  % force 6 bins:
+  % hist( d, 6 )
+
+  % set the right bin centers:
+  bins = 1:6;
+  %hist( d, bins )
+
+  % normalize histogram and compare to expectation:
+  hold on
+  plot( [0 7], [1/6 1/6], '-r', 'linewidth', 10 )
+  hist( d, bins, 1.0, 'facecolor', 'b' )
+  hold off
+  pause
+end
--- a/statistics/code/gaussianbins.m
+++ b/statistics/code/gaussianbins.m
@ -0,0 +1,17 @@
+x = randn( 100, 1 );
+bins1 = -4:2:4;
+bins2 = -4:0.5:4;
+subplot( 1, 2, 1 );
+hold on;
+hist( x, bins1 );
+hist( x, bins2 );
+xlabel('x')
+ylabel('Frequeny')
+hold off;
+subplot( 1, 2, 2 );
+hold on;
+hist( x, bins1, 1.0/(bins1(2)-bins1(1)) );
+hist( x, bins2, 1.0/(bins2(2)-bins2(1)) );
+xlabel('x')
+ylabel('Probability density')
+hold off;
--- a/statistics/code/gaussianpdf.m
+++ b/statistics/code/gaussianpdf.m
@ -0,0 +1,22 @@
+% plot Gaussian pdf:
+dx=0.1
+x = [-4.0:dx:4.0];
+p = exp(-0.5*x.^2)/sqrt(2.0*pi);
+hold on
+plot(x,p, 'linewidth', 10 )
+
+% compute integral between x1 and x2:
+x1=1.0
+x2=2.0
+P = sum(p((x>=x1)&(x<x2)))*dx
+
+% draw random numbers:
+r = randn( 10000, 1 );
+hist(r,x,1.0/dx)
+
+% check P:
+Pr = sum((r>=x1)&(r<x2))/length(r)
+
+hold off
+
+
--- a/statistics/code/histogramquartiles.m
+++ b/statistics/code/histogramquartiles.m
@ -0,0 +1,24 @@
+% generate data:
+x = randn( 1, 100000 );
+
+% histogram:
+[h,b] = hist( x, 100 );
+% normalize:
+bs = b(2)-b(1);
+h = h/sum(h)/bs;
+
+% plot:
+bar( b, h );
+xlabel( 'x' );
+
+% median, quartile:
+q = quartiles( x );
+%q = quantile( x, [0.25, 0.5, 0.75 ] );
+
+% plot:
+hold on;
+bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
+bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
+bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
+bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
+hold off;
--- a/statistics/code/mymedian.m
+++ b/statistics/code/mymedian.m
@ -0,0 +1,13 @@
+function m = mymedian( x )
+% returns the median of the vector x
+  xs = sort( x );
+  if ( length( xs ) == 0 )
+    m = NaN;
+  elseif ( rem( length( xs ), 2 ) == 0 )
+    index = length( xs )/2;
+    m = (xs( index ) + xs( index+1 ))/2;
+  else
+    index = (length( xs ) + 1)/2;
+    m = xs( index );
+  end
+end
--- a/statistics/code/quartiles.m
+++ b/statistics/code/quartiles.m
@ -1,25 +1,15 @@
-% generate data:
-x = randn( 1, 100000 );
-
-% histogram:
-[h,b] = hist( x, 100 );
-% normalize:
-bs = b(2)-b(1);
-h = h/sum(h)/bs;
-
-% plot:
-bar( b, h );
-xlabel( 'x' );
-
-% median, quartile:
-xs = sort( x )
-q = [ xs(length(xs)/4), xs(length(xs)/2), xs(3*length(xs)/4) ];
-%q = quantile( x, [0.25, 0.5, 0.75 ] );
-
-% plot:
-bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
-hold on;
-bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
-bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
-bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
-hold off;
+function q = quartiles( x )
+  % returns a vector with the first, second, and third quartile of the vector x
+  xs = sort( x );
+  if ( length( xs ) == 0 )
+    q = [];
+  elseif ( rem( length( xs ), 2 ) == 0 )
+    index = length( xs )/2;
+    m = (xs( index ) + xs( index+1 ))/2;
+    q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
+  else
+    index = (length( xs ) + 1)/2;
+    m = xs( index );
+    q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
+  end
+end
--- a/statistics/code/randomwalk.m
+++ b/statistics/code/randomwalk.m
@ -1,4 +1,6 @@
 function x = randomwalk(n,p)
+% returns a random wolk with n steps and 
+% probability p for positive steps.
    r = rand(n,1);
    r(r<p) = -1.0;
    r(r>=p) = +1.0;
--- a/statistics/code/rollthedie.m
+++ b/statistics/code/rollthedie.m
@ -0,0 +1,4 @@
+function x = rollthedie( n )
+% return a vector with the result of rolling a die n times
+  x = randi( [1, 6], n, 1 );
+end
--- a/statistics/lecture/Makefile
+++ b/statistics/lecture/Makefile
@ -0,0 +1,18 @@
+TEXFILES=$(wildcard *.tex)
+PDFFILES=$(TEXFILES:.tex=.pdf)
+
+pdf : $(PDFFILES)
+
+$(PDFFILES) : %.pdf : %.tex
+	pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
+
+clean :
+	rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
+
+cleanall : clean
+	rm -f $(PDFFILES)
+
+watch :
+	while true; do ! make -q pdf && make pdf; sleep 0.5; done
+
+
--- a/statistics/lecture/descriptivestatistics.tex
+++ b/statistics/lecture/descriptivestatistics.tex
@ -1,43 +1,55 @@
-\documentclass{beamer}
+\documentclass[12pt]{report}

 %%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\title[]{Scientific Computing --- Descriptive Statistics}
-\author[]{Jan Benda}
-\institute[]{Neuroethology}
-\date[]{WS 15/16}
-\titlegraphic{\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
+\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
+\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
+\date{WS 15/16}

-%%%%% beamer %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>
-{
-  \usetheme{Singapore}
-  \setbeamercovered{opaque}
-  \usecolortheme{tuebingen}
-  \setbeamertemplate{navigation symbols}{}
-  \usefonttheme{default}
-  \useoutertheme{infolines}
-  % \useoutertheme{miniframes}
-}
-
-%\AtBeginSection[]
-%{
-%  \begin{frame}<beamer>
-%    \begin{center}
-%      \Huge \insertsectionhead
-%    \end{center}
-%  \end{frame}
-%}
-
-\setbeamertemplate{blocks}[rounded][shadow=true]
-\setcounter{tocdepth}{1}
+%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% \newcommand{\tr}[2]{#1}  % en
+% \usepackage[english]{babel}
+\newcommand{\tr}[2]{#2}  % de
+\usepackage[german]{babel}

 %%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\usepackage[english]{babel}
+\usepackage{pslatex}   % nice font for pdf file
+\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
+
+%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
+\setcounter{tocdepth}{1}
+
+%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{graphicx}
+\usepackage{xcolor}
+\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
+
+%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{listings}
+\lstset{
+  inputpath=../code,
+  basicstyle=\ttfamily\footnotesize,
+  numbers=left,
+  showstringspaces=false,
+  language=Matlab,
+  commentstyle=\itshape\color{darkgray},
+  keywordstyle=\color{blue},
+  stringstyle=\color{green},
+  backgroundcolor=\color{blue!10},
+  breaklines=true,
+  breakautoindent=true,
+  columns=flexible,
+  frame=single,
+  caption={\protect\filename@parse{\lstname}\protect\filename@base},
+  captionpos=t,
+  xleftmargin=1em,
+  xrightmargin=1em,
+  aboveskip=10pt
+}
+
+%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{amsmath}
 \usepackage{bm} 
-\usepackage{pslatex}   % nice font for pdf file
-%\usepackage{multimedia}
-
 \usepackage{dsfont}
 \newcommand{\naZ}{\mathds{N}}
 \newcommand{\gaZ}{\mathds{Z}}
@ -47,59 +59,45 @@
 \newcommand{\reZpN}{\mathds{R^+_0}}
 \newcommand{\koZ}{\mathds{C}}

-%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\usepackage{graphicx}
-\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}

-%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\usepackage{listings}
-\lstset{
- basicstyle=\ttfamily,
- numbers=left,
- showstringspaces=false,
- language=Matlab,
- commentstyle=\itshape\color{darkgray},
- keywordstyle=\color{blue},
- stringstyle=\color{green},
- backgroundcolor=\color{blue!10},
- breaklines=true,
- breakautoindent=true,
- columns=flexible,
- frame=single,
- captionpos=b,
- xleftmargin=1em,
- xrightmargin=1em,
- aboveskip=10pt
- }
+%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{ifthen}
+
+\newcommand{\code}[1]{\texttt{#1}}
+
+\newcommand{\source}[1]{    
+  \begin{flushright}
+    \color{gray}\scriptsize \url{#1}
+  \end{flushright}
+}
+
+\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
+  {\medskip}
+
+%\newcommand{\showlisting}{yes}
+\newcommand{\showlisting}{no}
+\newcounter{theexercise} 
+\setcounter{theexercise}{1}
+\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
+  \arabic{theexercise}:} \stepcounter{theexercise}\newline \newcommand{\exercisesource}{#1}}%
+  {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\equal{\showlisting}{yes}}{\medskip\lstinputlisting{\exercisesource}}{}}\medskip}
+

- 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{document} 

-\begin{frame}[plain]
-  \frametitle{}
-  \vspace{-1cm}
-  \titlepage % erzeugt Titelseite
-\end{frame}
+\maketitle
+
+%\tableofcontents

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}
-  \frametitle{Content}
-  \tableofcontents
-\end{frame}
-
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Descriptive statistics}
+\section{Statistics of real-valued data}

-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Statistics of ratio data}
-
-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Statistics of ratio data}
  \begin{itemize}
  \item Location, central tendency
    \begin{itemize}
@ -107,7 +105,6 @@
    \item median
    \item mode
    \end{itemize}
-
  \item Spread, dispersion
    \begin{itemize}
    \item variance
@ -116,163 +113,294 @@
    \item coefficient of variation
    \item minimum, maximum
    \end{itemize}
-
  \item Shape
    \begin{itemize}
    \item skewnees
    \item kurtosis
    \end{itemize}
-
  \item Dependence
    \begin{itemize}
    \item Pearson correlation coefficient
    \item Spearman's rank correlation coefficient
    \end{itemize}
-
  \end{itemize}
-\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Median, Quartile, Percentile}
+
+\begin{figure}[t]
+  \includegraphics[width=1\textwidth]{median}
+  \caption{\label{medianfig} Median.}
+\end{figure}
+
+\begin{definition}[\tr{median}{Median}]
+  \tr{Half of the observations $X=(x_1, x_2, \ldots, x_n)$ are
+    larger than the median and half of them are smaller than the
+    median.}  {Der Median teilt eine Liste von Messwerten so in zwei
+    H\"alften, dass die eine H\"alfte der Daten nicht gr\"o{\ss}er
+    und die andere H\"alfte nicht kleiner als der Median ist.}
+\end{definition}
+
+\begin{exercise}[mymedian.m]
+  \tr{Write a function that computes the median of a vector.}
+  {Schreibe eine Funktion, die den Median eines Vektors zur\"uckgibt.}
+\end{exercise}
+
+\code{matlab} stellt die Funktion \code{median()} zur Berechnung des Medians bereit.
+
+\begin{exercise}[checkmymedian.m]
+  \tr{Write a script that tests whether your median function really
+    returns a median above which are the same number of data than
+    below. In particular the script should test data vectors of
+    different length.}  {Schreibe ein Skript, das testet ob die
+    \code{mymedian} Funktion wirklich die Zahl zur\"uckgibt, \"uber
+    der genausoviele Datenwerte liegen wie darunter. Das Skript sollte
+    insbesondere verschieden lange Datenvektoren testen.}
+\end{exercise}
+
+\begin{figure}[t]
+  \includegraphics[width=1\textwidth]{quartile}
+  \caption{\label{quartilefig} Median und Quartile.}
+\end{figure}
+
+\begin{definition}[\tr{quartile}{Quartile}]
+  Die Quartile Q1, Q2 und Q3 unterteilen die Daten in vier gleich
+  gro{\ss}e Gruppen, die jeweils ein Viertel der Daten enthalten.
+  Das mittlere Quartil entspricht dem Median.
+\end{definition}
+
+\begin{exercise}[quartiles.m]
+  \tr{Write a function that computes the first, second, and third quartile of a vector.}
+  {Schreibe eine Funktion, die das erste, zweite und dritte Quartil als Vektor zur\"uckgibt.}
+\end{exercise}
+
+\subsection{Histogram}
+
+Histogramme z\"ahlen die H\"aufigkeit $n_i$ des Auftretens von
+$N=\sum_{i=1}^M n_i$ Messwerten in $M$ Messbereichsklassen $i$ (Bins).
+Die Klassen unterteilen den Wertebereich meist in angrenzende und
+gleich gro{\ss}e Intervalle.  Histogramme sch\"atzen die
+Wahrscheinlichkeitsverteilung der Messwerte ab.
+
+\begin{exercise}[rollthedie.m]
+  \tr{Write a function that simulates rolling a die $n$ times.}
+  {Schreibe eine Funktion, die das $n$-malige W\"urfeln mit einem W\"urfel simuliert.}
+\end{exercise}
+
+\begin{exercise}[diehistograms.m]
+  \tr{Plot histograms from rolling the die 20, 100, 1000 times.  Use
+    the plain hist(x) function, force 6 bins via hist( x, 6 ), and set
+    meaningfull bins positions.}  {Plotte Histogramme von 20, 100, und
+    1000-mal w\"urfeln.  Benutze \code{hist(x)}, erzwinge sechs Bins
+    mit \code{hist(x,6)}, und setze selbst sinnvolle Bins. Normiere
+    anschliessend das Histogram auf geeignete Weise.}
+\end{exercise}
+
+\begin{figure}[t]
+  \includegraphics[width=1\textwidth]{diehistograms}
+  \caption{\label{diehistogramsfig} \tr{Histograms of rolling a die
+      100 or 500 times.  Left: plain histograms counting the frequency
+      of the six possible outcomes.  Right: the same data normalized
+      to their sum.}{Histogramme des Ergebnisses von 100 oder 500 mal
+      W\"urfeln. Links: das absolute Histogramm z\"ahlt die Anzahl des
+      Auftretens jeder Augenzahl. Rechts: Normiert auf die Summe des
+      Histogramms werden die beiden Messungen vergleichbar.}}
+\end{figure}
+
+Bei ganzzahligen Messdaten (z.B. die Augenzahl eines W\"urfels) 
+kann f\"ur jede auftretende Zahl eine Klasse definiert werden.
+Damit die H\"ohe der Histogrammbalken unabh\"angig von der Anzahl der Messwerte wird,
+normiert man das Histogram auf die Anzahl der Messwerte.
+Die H\"ohe der Histogrammbalken gibt dann die Wahrscheinlichkeit $P(x_i)$
+des Auftretens der Gr\"o{\ss}e $x_i$ in der $i$-ten Klasse an
+\[ P_i = \frac{n_i}{N} = \frac{n_i}{\sum_{i=1}^M n_i} \; . \]
+
+
+\subsection{Probability density function}
+
+Meistens haben wir es jedoch mit reellen Messgr\"o{\ss}en zu tun.
+
+\begin{exercise}[gaussianbins.m]
+  \tr{Draw 100 random data from a Gaussian distribution and plot
+    histograms with different bin sizes of the data.}  {Ziehe 100
+    normalverteilte Zufallszahlen und erzeuge Histogramme mit
+    unterschiedlichen Klassenbreiten. Was f\"allt auf?}
+\end{exercise}
+
+\begin{figure}[t]
+  \includegraphics[width=1\textwidth]{pdfhistogram}
+  \caption{\label{pdfhistogramfig} \tr{Histograms of normally
+      distributed data with different bin sizes.}{Histogramme mit
+      verschiednenen Klassenbreiten eines Datensatzes von
+      normalverteilten Messwerten. Links: Die H\"ohe des absoluten
+      Histogramms h\"angt von der Klassenbreite ab. Rechts: Bei auf
+      das Integral normierten Histogrammen werden auch
+      unterschiedliche Klassenbreiten vergleichbar.}}
+\end{figure}
+
+Histogramme von reellen Messwerten m\"ussen auf das Integral 1 normiert werden, so dass
+das Integral (nicht die Summe) \"uber das Histogramm eins ergibt. Das Integral
+ist die Fl\"ache des Histograms. Diese setzt sich zusammen aus der Fl\"ache der einzelnen
+Histogrammbalken. Diese haben die H\"ohe $n_i$ und die Breite $\Delta x$. Die Gesamtfl\"ache
+$A$ des Histogramms ist also
+\[ A = \sum_{i=1}^N ( n_i \cdot \Delta x ) = \Delta x \sum_{i=1}^N n_i \]
+und das normierte Histogramm hat die H\"ohe
+\[ p(x_i) = \frac{n_i}{\Delta x \sum_{i=1}^N n_i} \]
+Es muss also nicht nur durch die Summe, sondern auch durch die Breite der Klassen $\Delta x$
+geteilt werden.
+
+$p(x_i)$ kann keine Wahrscheinlichkeit sein, da $p(x_i)$ nun eine
+Einheit hat --- das Inverse der Einheit der Messgr\"osse $x$. Man
+spricht von einer Wahrscheinlichkeitsdichte.
+
+\begin{figure}[t]
+  \includegraphics[width=1\textwidth]{pdfprobabilities}
+  \caption{\label{pdfprobabilitiesfig} Wahrscheinlichkeiten bei
+  einer Wahrscheinlichkeitsdichtefunktion.}
+\end{figure}
+  
+\begin{exercise}
+  \tr{Plot the Gaussian probability density}{Plotte die Gauss'sche Wahrscheinlichkeitsdichte }
+  \[ p_g(x) = 1/\sqrt{2\pi\sigma^2}e^{-\frac{(x-\mu)^2}{2\sigma^2}}\]
+  \tr{What does it mean?}{Was bedeutet die folgende Wahrscheinlichkeit?}
+  \[ P(x_1 < x < x2) = \int_{x_1}^{x_2} p(x) \, dx \]
+  \tr{How large is}{Wie gro{\ss} ist}
+  \[ \int_{-\infty}^{+\infty} p(x) \, dx \; ?\]
+  \tr{Why?}{Warum?}
+\end{exercise}
+

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Data types}

-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Data types: nominal scale}
+\subsubsection{Nominal scale}
+\begin{itemize}
+\item Binary
  \begin{itemize}
-  \item Binary
-    \begin{itemize}
-    \item ``yes/no'',
-    \item ``true/false'',
-    \item ``success/failure'', etc.
-    \end{itemize}
-  \item Categorial
-    \begin{itemize}
-    \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
-    \item blood type (``A/B/AB/0''),
-    \item parts of speech (``noun/veerb/preposition/article/...''),
-    \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
-    \end{itemize}
-  \item Each observation/measurement/sample is put into one category
-  \item There is no reasonable order among the categories.\\
-    example: [rods, cones] vs. [cones, rods]
-  \pause
-  \item Statistics: mode, i.e. the most common item
+  \item ``yes/no'',
+  \item ``true/false'',
+  \item ``success/failure'', etc.
  \end{itemize}
-\end{frame}
-
-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Data types: ordinal scale}
+\item Categorial
  \begin{itemize}
-  \item Like nominal scale, but with an order
-  \item Examples: ranks, ratings
-    \begin{itemize}
-    \item ``bad/ok/good'',
-    \item ``cold/warm/hot'',
-    \item ``young/old'', etc.
-    \end{itemize}
-  \item {\bf But:} there is no reasonable measure of {\em distance}
-    between the classes
-  \pause
-  \item Statistics: mode, median
+  \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
+  \item blood type (``A/B/AB/0''),
+  \item parts of speech (``noun/veerb/preposition/article/...''),
+  \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
  \end{itemize}
-\end{frame}
+\item Each observation/measurement/sample is put into one category
+\item There is no reasonable order among the categories.\\
+  example: [rods, cones] vs. [cones, rods]
+\item Statistics: mode, i.e. the most common item
+\end{itemize}

-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Data types: interval scale}
+\subsubsection{Ordinal scale}
+\begin{itemize}
+\item Like nominal scale, but with an order
+\item Examples: ranks, ratings
  \begin{itemize}
-  \item Quantitative/metric values
-  \item Reasonable measure of distance between values, but no absolute zero
-  \item Examples: 
-    \begin{itemize}
-    \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
-    \item Direction measured in degrees from magnetic or true north
-    \end{itemize}
-  \pause
-  \item Statistics:
-    \begin{itemize}
-    \item Central tendency: mode, median, arithmetic mean
-    \item Dispersion: range, standard deviation
-    \end{itemize}
+  \item ``bad/ok/good'',
+  \item ``cold/warm/hot'',
+  \item ``young/old'', etc.
  \end{itemize}
-\end{frame}
+\item {\bf But:} there is no reasonable measure of {\em distance}
+  between the classes
+\item Statistics: mode, median
+\end{itemize}

-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Data types: absolute/ratio scale}
+\subsubsection{Interval scale}
+\begin{itemize}
+\item Quantitative/metric values
+\item Reasonable measure of distance between values, but no absolute zero
+\item Examples: 
  \begin{itemize}
-  \item Like interval scale, but with absolute origin/zero
-  \item Examples: 
-    \begin{itemize}
-    \item Temperature in $^\circ$K
-    \item Length, mass, duration, electric charge, ...
-    \item Plane angle, etc.
-    \item Count (e.g. number of spikes in response to a stimulus)
-    \end{itemize}
-  \pause
-  \item Statistics:
-    \begin{itemize}
-    \item Central tendency: mode, median, arithmetic, geometric, harmonic mean
-    \item Dispersion: range, standard deviation
-    \item Coefficient of variation (ratio standard deviation/mean)
-    \item All other statistical measures
-    \end{itemize}
+  \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
+  \item Direction measured in degrees from magnetic or true north
  \end{itemize}
-\end{frame}
-
-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Data types}
+\item Statistics:
  \begin{itemize}
-  \item Data type selects
-    \begin{itemize}
-    \item statistics 
-    \item type of plots (bar graph versus x-y plot)
-    \item correct tests
-    \end{itemize}
-  \item Scales exhibit increasing information content from nominal
-    to absolute.\\
-    Conversion  ,,downwards'' is always possible
-  \item For example: size measured in meter (ratio scale) $\rightarrow$
-    categories ``small/medium/large'' (ordinal scale)
+  \item Central tendency: mode, median, arithmetic mean
+  \item Dispersion: range, standard deviation
  \end{itemize}
-\end{frame}
+\end{itemize}

-%-------------------------------------------------------------
-\begin{frame}
-  \frametitle{Examples from neuroscience}
+\subsubsection{Absolute/ratio scale}
+\begin{itemize}
+\item Like interval scale, but with absolute origin/zero
+\item Examples: 
  \begin{itemize}
-
-  \item {\bf absolute:}\pause
-    \begin{itemize}
-    \item size of neuron/brain
-    \item length of axon
-    \item ion concentration
-    \item membrane potential
-    \item firing rate
-    \end{itemize}
-
-  \item {\bf interval:}\pause
-    \begin{itemize}
-    \item edge orientation
-    \end{itemize}
-
-  \item {\bf ordinal:} \pause 
-    \begin{itemize}
-    \item stages of a disease
-    \item ratings
-    \end{itemize}
-
-  \item {\bf nominal:}\pause 
-    \begin{itemize}
-    \item cell type
-    \item odor
-    \item states of an ion channel
-    \end{itemize}
-
+  \item Temperature in $^\circ$K
+  \item Length, mass, duration, electric charge, ...
+  \item Plane angle, etc.
+  \item Count (e.g. number of spikes in response to a stimulus)
  \end{itemize}
-\end{frame}
+\item Statistics:
+  \begin{itemize}
+  \item Central tendency: mode, median, arithmetic, geometric, harmonic mean
+  \item Dispersion: range, standard deviation
+  \item Coefficient of variation (ratio standard deviation/mean)
+  \item All other statistical measures
+  \end{itemize}
+\end{itemize}
+
+\subsubsection{Data types}
+\begin{itemize}
+\item Data type selects
+  \begin{itemize}
+  \item statistics 
+  \item type of plots (bar graph versus x-y plot)
+  \item correct tests
+  \end{itemize}
+\item Scales exhibit increasing information content from nominal
+  to absolute.\\
+  Conversion  ,,downwards'' is always possible
+\item For example: size measured in meter (ratio scale) $\rightarrow$
+  categories ``small/medium/large'' (ordinal scale)
+\end{itemize}
+
+\subsubsection{Examples from neuroscience}
+\begin{itemize}
+\item {\bf absolute:}
+  \begin{itemize}
+  \item size of neuron/brain
+  \item length of axon
+  \item ion concentration
+  \item membrane potential
+  \item firing rate
+  \end{itemize}
+
+\item {\bf interval:}
+  \begin{itemize}
+  \item edge orientation
+  \end{itemize}
+
+\item {\bf ordinal:}
+  \begin{itemize}
+  \item stages of a disease
+  \item ratings
+  \end{itemize}
+
+\item {\bf nominal:}
+  \begin{itemize}
+  \item cell type
+  \item odor
+  \item states of an ion channel
+  \end{itemize}
+
+\end{itemize}


-\end{document}
+\end{document}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Statistics}
+What is "a statistic"? % dt. Sch\"atzfunktion
+\begin{definition}[statistic]
+  A statistic (singular) is a single measure of some attribute of a
+  sample (e.g., its arithmetic mean value). It is calculated by
+  applying a function (statistical algorithm) to the values of the
+  items of the sample, which are known together as a set of data.
+  
+  \source{http://en.wikipedia.org/wiki/Statistic}
+\end{definition}
--- a/statistics/lecture/diehistograms.py
+++ b/statistics/lecture/diehistograms.py
@ -0,0 +1,32 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# roll the die:
+x1 = np.random.random_integers( 1, 6, 100 )
+x2 = np.random.random_integers( 1, 6, 500 )
+bins = np.arange(0.5, 7, 1.0)
+
+plt.xkcd()
+
+fig = plt.figure( figsize=(6,4) )
+ax = fig.add_subplot( 1, 2, 1 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Frequency' )
+ax.hist([x2, x1], bins, color=['#FFCC00', '#FFFF66' ])
+
+ax = fig.add_subplot( 1, 2, 2 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Probability' )
+ax.hist([x2, x1], bins, normed=True, color=['#FFCC00', '#FFFF66' ])
+plt.tight_layout()
+fig.savefig( 'diehistograms.pdf' )
+plt.show()
+
--- a/statistics/lecture/median.py
+++ b/statistics/lecture/median.py
@ -0,0 +1,33 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# normal distribution:
+x = np.arange( -4.0, 4.0, 0.01 )
+g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
+
+plt.xkcd()
+fig = plt.figure( figsize=(6,4) )
+ax = fig.add_subplot( 1, 1, 1 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Probability density p(x)' )
+ax.set_ylim( 0.0, 0.46 )
+ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
+ax.text(-1.0, 0.1, '50%', ha='center' )
+ax.text(+1.0, 0.1, '50%', ha='center' )
+ax.annotate('Median',
+            xy=(0.1, 0.3), xycoords='data',
+            xytext=(1.6, 0.35), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
+            connectionstyle="angle3,angleA=10,angleB=40") )
+ax.fill_between( x[x<0], 0.0, g[x<0], color='#ffcc00' )
+ax.fill_between( x[x>0], 0.0, g[x>0], color='#99ff00' )
+ax.plot(x,g, 'b', lw=4)
+ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
+plt.tight_layout()
+fig.savefig( 'median.pdf' )
+plt.show()
+
--- a/statistics/lecture/pdfhistogram.py
+++ b/statistics/lecture/pdfhistogram.py
@ -0,0 +1,39 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# normal distribution:
+x = np.arange( -4.0, 4.0, 0.01 )
+g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
+r = np.random.randn( 100 )
+
+plt.xkcd()
+
+fig = plt.figure( figsize=(6,4) )
+ax = fig.add_subplot( 1, 2, 1 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Frequency' )
+#ax.set_ylim( 0.0, 0.46 )
+#ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
+ax.hist(r, 5, color='#CC0000')
+ax.hist(r, 20, color='#FFCC00')
+
+ax = fig.add_subplot( 1, 2, 2 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Probability density p(x)' )
+#ax.set_ylim( 0.0, 0.46 )
+#ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
+ax.hist(r, 5, normed=True, color='#CC0000')
+ax.hist(r, 20, normed=True, color='#FFCC00')
+
+plt.tight_layout()
+fig.savefig( 'pdfhistogram.pdf' )
+plt.show()
+
--- a/statistics/lecture/pdfprobabilities.py
+++ b/statistics/lecture/pdfprobabilities.py
@ -0,0 +1,36 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# normal distribution:
+x = np.arange( -3.0, 5.0, 0.01 )
+g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
+x1=0.0
+x2=1.0
+
+plt.xkcd()
+fig = plt.figure( figsize=(6,4) )
+ax = fig.add_subplot( 1, 1, 1 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Probability density p(x)' )
+ax.set_ylim( 0.0, 0.46 )
+ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
+ax.annotate('Gaussian',
+            xy=(-1.0, 0.28), xycoords='data',
+            xytext=(-2.5, 0.35), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
+            connectionstyle="angle3,angleA=10,angleB=110") )
+ax.annotate('$P(0<x<1) = \int_0^1 p(x) \, dx$',
+            xy=(0.6, 0.28), xycoords='data',
+            xytext=(1.2, 0.4), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
+            connectionstyle="angle3,angleA=10,angleB=80") )
+ax.fill_between( x[(x>x1)&(x<x2)], 0.0, g[(x>x1)&(x<x2)], color='#cc0000' )
+ax.plot(x,g, 'b', lw=4)
+plt.tight_layout()
+fig.savefig( 'pdfprobabilities.pdf' )
+plt.show()
+
--- a/statistics/lecture/quartile.py
+++ b/statistics/lecture/quartile.py
@ -0,0 +1,50 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# normal distribution:
+x = np.arange( -4.0, 4.0, 0.01 )
+g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
+q = [ -0.67488, 0.0, 0.67488 ]
+
+plt.xkcd()
+fig = plt.figure( figsize=(6,4) )
+ax = fig.add_subplot( 1, 1, 1 )
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xlabel( 'x' )
+ax.set_ylabel( 'Probability density p(x)' )
+ax.set_ylim( 0.0, 0.46 )
+ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
+ax.text(-1.2, 0.1, '25%', ha='center' )
+ax.text(-0.35, 0.1, '25%', ha='center' )
+ax.text(+0.35, 0.1, '25%', ha='center' )
+ax.text(+1.2, 0.1, '25%', ha='center' )
+ax.annotate('1. quartile',
+            xy=(-0.75, 0.2), xycoords='data',
+            xytext=(-1.7, 0.25), textcoords='data', ha='right',
+            arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
+            connectionstyle="angle3,angleA=170,angleB=120") )
+ax.annotate('3. quartile',
+            xy=(0.75, 0.17), xycoords='data',
+            xytext=(1.7, 0.22), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
+            connectionstyle="angle3,angleA=10,angleB=70") )
+ax.annotate('Median',
+            xy=(0.1, 0.3), xycoords='data',
+            xytext=(1.6, 0.35), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
+            connectionstyle="angle3,angleA=10,angleB=40") )
+ax.fill_between( x[x<q[0]], 0.0, g[x<q[0]], color='#ffcc00' )
+ax.fill_between( x[(x>q[0])&(x<q[1])], 0.0, g[(x>q[0])&(x<q[1])], color='#ff0000' )
+ax.fill_between( x[(x>q[1])&(x<q[2])], 0.0, g[(x>q[1])&(x<q[2])], color='#ff9900' )
+ax.fill_between( x[x>q[2]], 0.0, g[x>q[2]], color='#ffff66' )
+ax.plot(x,g, 'b', lw=4)
+ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
+ax.plot([q[0], q[0]], [0.0, 0.4], 'k', lw=2 )
+ax.plot([q[2], q[2]], [0.0, 0.4], 'k', lw=2 )
+plt.tight_layout()
+fig.savefig( 'quartile.pdf' )
+plt.show()
+