Reorganized the folders and started a common script for the lectures.

This commit is contained in:
Jan Benda 2015-10-25 20:24:13 +01:00
parent dd50324683
commit 5791337dea
48 changed files with 1476 additions and 648 deletions

17
Makefile Normal file
View File

@ -0,0 +1,17 @@
BASENAME=scientificcomputing-script
pdf : $(BASENAME).pdf
$(BASENAME).pdf : $(BASENAME).tex
export TEXMFOUTPUT=.; pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
clean :
rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc
cleanall : clean
rm -f $(PDFFILE)
watch :
while true; do ! make -q pdf && make pdf; sleep 0.5; done

View File

@ -0,0 +1,22 @@
BASENAME=bootstrap
PYFILES=$(wildcard *.py)
PYPDFFILES=$(PYFILES:.py=.pdf)
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
$(PYPDFFILES) : %.pdf : %.py
python $<
clean :
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
cleanall : clean
rm -f $(BASENAME)-chapter.pdf
watch :
while true; do ! make -q pdf && make pdf; sleep 0.5; done

View File

@ -0,0 +1,225 @@
\documentclass[12pt]{report}
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
\date{WS 15/16}
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \newcommand{\tr}[2]{#1} % en
% \usepackage[english]{babel}
\newcommand{\tr}[2]{#2} % de
\usepackage[german]{babel}
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{pslatex} % nice font for pdf file
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
\setcounter{tocdepth}{1}
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[sf,bf,it,big,clearempty]{titlesec}
\setcounter{secnumdepth}{1}
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{graphicx}
\usepackage{xcolor}
\pagecolor{white}
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
\put(0,4){\line(1,0){170}}%
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
\put(0,0){\makebox(0,0){{\tiny 0}}}%
\put(10,0){\makebox(0,0){{\tiny 1}}}%
\put(20,0){\makebox(0,0){{\tiny 2}}}%
\put(30,0){\makebox(0,0){{\tiny 3}}}%
\put(40,0){\makebox(0,0){{\tiny 4}}}%
\put(50,0){\makebox(0,0){{\tiny 5}}}%
\put(60,0){\makebox(0,0){{\tiny 6}}}%
\put(70,0){\makebox(0,0){{\tiny 7}}}%
\put(80,0){\makebox(0,0){{\tiny 8}}}%
\put(90,0){\makebox(0,0){{\tiny 9}}}%
\put(100,0){\makebox(0,0){{\tiny 10}}}%
\put(110,0){\makebox(0,0){{\tiny 11}}}%
\put(120,0){\makebox(0,0){{\tiny 12}}}%
\put(130,0){\makebox(0,0){{\tiny 13}}}%
\put(140,0){\makebox(0,0){{\tiny 14}}}%
\put(150,0){\makebox(0,0){{\tiny 15}}}%
\put(160,0){\makebox(0,0){{\tiny 16}}}%
\put(170,0){\makebox(0,0){{\tiny 17}}}%
\end{picture}\par}
% figures:
\setlength{\fboxsep}{0pt}
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
%\newcommand{\texpicture}[1]{}
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
% maximum number of floats:
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{0}
\setcounter{totalnumber}{2}
% float placement fractions:
\renewcommand{\textfraction}{0.2}
\renewcommand{\topfraction}{0.8}
\renewcommand{\bottomfraction}{0.0}
\renewcommand{\floatpagefraction}{0.5}
% spacing for floats:
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
% spacing for a floating page:
\makeatletter
\setlength{\@fptop}{0pt}
\setlength{\@fpsep}{8pt plus 2.0fil}
\setlength{\@fpbot}{0pt plus 1.0fil}
\makeatother
% rules for floats:
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
% captions:
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
% put caption on separate float:
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
% references to panels of a figure within the caption:
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
% references to figures:
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
\newcommand{\fref}[1]{\textup{\ref{#1}}}
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
% references to figures in normal text:
\newcommand{\fig}{Fig.}
\newcommand{\Fig}{Figure}
\newcommand{\figs}{Figs.}
\newcommand{\Figs}{Figures}
\newcommand{\figref}[1]{\fig~\fref{#1}}
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
\newcommand{\figsref}[1]{\figs~\fref{#1}}
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
% references to figures within bracketed text:
\newcommand{\figb}{Fig.}
\newcommand{\figsb}{Figs.}
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
% references to tables:
\newcommand{\tref}[1]{\textup{\ref{#1}}}
% references to tables in normal text:
\newcommand{\tab}{Tab.}
\newcommand{\Tab}{Table}
\newcommand{\tabs}{Tabs.}
\newcommand{\Tabs}{Tables}
\newcommand{\tabref}[1]{\tab~\tref{#1}}
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
% references to tables within bracketed text:
\newcommand{\tabb}{Tab.}
\newcommand{\tabsb}{Tab.}
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\eqref}[1]{(\ref{#1})}
\newcommand{\eqn}{\tr{Eq}{Gl}.}
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
inputpath=../code,
basicstyle=\ttfamily\footnotesize,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
caption={\protect\filename@parse{\lstname}\protect\filename@base},
captionpos=t,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{ifthen}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\source}[1]{
\begin{flushright}
\color{gray}\scriptsize \url{#1}
\end{flushright}
}
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
{\medskip}
\newcounter{maxexercise}
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
\newcounter{theexercise}
\setcounter{theexercise}{1}
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
\graphicspath{{figures/}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\include{bootstrap}
\end{document}

View File

@ -0,0 +1,64 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling
aus der Stichprobe. Das hat mehrere Vorteile:
\begin{itemize}
\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein).
\item H\"ohere Genauigkeit als klassische Methoden.
\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr
\"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht
f\"ur jede Statistik eine andere Formel.
\end{itemize}
\begin{figure}[t]
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
\includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex]
\includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312}
\caption{\tr{Why can we only measure a sample of the
population?}{Warum k\"onnen wir nur eine Stichprobe der
Grundgesamtheit messen?}}
\end{figure}
\begin{figure}[t]
\includegraphics[height=0.2\textheight]{srs1}\\[2ex]
\includegraphics[height=0.2\textheight]{srs2}\\[2ex]
\includegraphics[height=0.2\textheight]{srs3}
\caption{Bootstrap der Stichprobenvertielung (a) Von der
Grundgesamtheit (population) mit unbekanntem Parameter
(z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random
samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur
jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen
der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe
gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf
die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu
haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele
Bootstrap-Stichproben generiert werden (resampling) und so
Eigenschaften der Stichprobenverteilung empirisch bestimmt
werden. Aus Hesterberg et al. 2003, Bootstrap Methods and
Permuation Tests}
\end{figure}
\section{Bootstrap des Standardfehlers}
Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und
benutzen diese um die Stichprobenverteilung einer Statistik zu
berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang
wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen
mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe
kann also einmal, mehrmals oder gar nicht in einer Bootstrap
Stichprobe vorkommen.
\begin{exercise}[bootstrapsem.m]
Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert,
Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$).
Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils
den Mittelwert.
Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und
die Standardabweichung.
Was hat das mit dem Standardfehler zu tun?
\end{exercise}

View File

Before

Width:  |  Height:  |  Size: 724 KiB

After

Width:  |  Height:  |  Size: 724 KiB

View File

Before

Width:  |  Height:  |  Size: 386 KiB

After

Width:  |  Height:  |  Size: 386 KiB

View File

Before

Width:  |  Height:  |  Size: 461 KiB

After

Width:  |  Height:  |  Size: 461 KiB

View File

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

View File

Before

Width:  |  Height:  |  Size: 55 KiB

After

Width:  |  Height:  |  Size: 55 KiB

View File

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 73 KiB

View File

@ -0,0 +1,22 @@
BASENAME=likelihood
PYFILES=$(wildcard *.py)
PYPDFFILES=$(PYFILES:.py=.pdf)
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
$(PYPDFFILES) : %.pdf : %.py
python $<
clean :
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
cleanall : clean
rm -f $(BASENAME)-chapter.pdf
watch :
while true; do ! make -q pdf && make pdf; sleep 0.5; done

View File

@ -0,0 +1,225 @@
\documentclass[12pt]{report}
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
\date{WS 15/16}
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \newcommand{\tr}[2]{#1} % en
% \usepackage[english]{babel}
\newcommand{\tr}[2]{#2} % de
\usepackage[german]{babel}
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{pslatex} % nice font for pdf file
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
\setcounter{tocdepth}{1}
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[sf,bf,it,big,clearempty]{titlesec}
\setcounter{secnumdepth}{1}
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{graphicx}
\usepackage{xcolor}
\pagecolor{white}
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
\put(0,4){\line(1,0){170}}%
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
\put(0,0){\makebox(0,0){{\tiny 0}}}%
\put(10,0){\makebox(0,0){{\tiny 1}}}%
\put(20,0){\makebox(0,0){{\tiny 2}}}%
\put(30,0){\makebox(0,0){{\tiny 3}}}%
\put(40,0){\makebox(0,0){{\tiny 4}}}%
\put(50,0){\makebox(0,0){{\tiny 5}}}%
\put(60,0){\makebox(0,0){{\tiny 6}}}%
\put(70,0){\makebox(0,0){{\tiny 7}}}%
\put(80,0){\makebox(0,0){{\tiny 8}}}%
\put(90,0){\makebox(0,0){{\tiny 9}}}%
\put(100,0){\makebox(0,0){{\tiny 10}}}%
\put(110,0){\makebox(0,0){{\tiny 11}}}%
\put(120,0){\makebox(0,0){{\tiny 12}}}%
\put(130,0){\makebox(0,0){{\tiny 13}}}%
\put(140,0){\makebox(0,0){{\tiny 14}}}%
\put(150,0){\makebox(0,0){{\tiny 15}}}%
\put(160,0){\makebox(0,0){{\tiny 16}}}%
\put(170,0){\makebox(0,0){{\tiny 17}}}%
\end{picture}\par}
% figures:
\setlength{\fboxsep}{0pt}
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
%\newcommand{\texpicture}[1]{}
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
% maximum number of floats:
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{0}
\setcounter{totalnumber}{2}
% float placement fractions:
\renewcommand{\textfraction}{0.2}
\renewcommand{\topfraction}{0.8}
\renewcommand{\bottomfraction}{0.0}
\renewcommand{\floatpagefraction}{0.5}
% spacing for floats:
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
% spacing for a floating page:
\makeatletter
\setlength{\@fptop}{0pt}
\setlength{\@fpsep}{8pt plus 2.0fil}
\setlength{\@fpbot}{0pt plus 1.0fil}
\makeatother
% rules for floats:
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
% captions:
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
% put caption on separate float:
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
% references to panels of a figure within the caption:
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
% references to figures:
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
\newcommand{\fref}[1]{\textup{\ref{#1}}}
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
% references to figures in normal text:
\newcommand{\fig}{Fig.}
\newcommand{\Fig}{Figure}
\newcommand{\figs}{Figs.}
\newcommand{\Figs}{Figures}
\newcommand{\figref}[1]{\fig~\fref{#1}}
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
\newcommand{\figsref}[1]{\figs~\fref{#1}}
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
% references to figures within bracketed text:
\newcommand{\figb}{Fig.}
\newcommand{\figsb}{Figs.}
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
% references to tables:
\newcommand{\tref}[1]{\textup{\ref{#1}}}
% references to tables in normal text:
\newcommand{\tab}{Tab.}
\newcommand{\Tab}{Table}
\newcommand{\tabs}{Tabs.}
\newcommand{\Tabs}{Tables}
\newcommand{\tabref}[1]{\tab~\tref{#1}}
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
% references to tables within bracketed text:
\newcommand{\tabb}{Tab.}
\newcommand{\tabsb}{Tab.}
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\eqref}[1]{(\ref{#1})}
\newcommand{\eqn}{\tr{Eq}{Gl}.}
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
inputpath=../code,
basicstyle=\ttfamily\footnotesize,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
caption={\protect\filename@parse{\lstname}\protect\filename@base},
captionpos=t,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{ifthen}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\source}[1]{
\begin{flushright}
\color{gray}\scriptsize \url{#1}
\end{flushright}
}
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
{\medskip}
\newcounter{maxexercise}
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
\newcounter{theexercise}
\setcounter{theexercise}{1}
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
\graphicspath{{figures/}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\include{likelihood}
\end{document}

View File

@ -0,0 +1,212 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}}
In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$
einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung
die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der
Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die
Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am
gr\"o{\ss}ten ist.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Maximum Likelihood}
Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben
$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem
Parameter(n) $\theta$. Das k\"onnte die Normalverteilung
\begin{equation}
\label{normpdfmean}
p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}}
\end{equation}
sein mit
fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als
Parameter $\theta$.
Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann
ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$
\begin{equation}
p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
\end{equation}
Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'')
den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$,
\begin{equation}
{\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta)
\end{equation}
Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die
Likelihood maximiert (``mle'': Maximum-Likelihood Estimate):
\begin{equation}
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
\end{equation}
$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei
dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$
bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat.
An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn
man die Funktionswerte mit einer streng monoton steigenden Funktion
transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens
das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht:
\begin{eqnarray}
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
\end{eqnarray}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Beispiel: Das arithmetische Mittel}
Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean}
entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten,
welcher Wert von $\theta$ maximiert dessen Likelhood?
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlemean}
\caption{\label{mlemeanfig} Maximum Likelihood Estimation des
Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen
Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus
denen die Daten stammen k\"onnten. Unteln links: Die Likelihood
in Abh\"angigkeit des Mittelwerts als Parameter der
Normalverteilungen. Unten rechts: die entsprechende
Log-Likelihood. An der Position des Maximums bei $\theta=2$
\"andert sich nichts (Pfeil).}
\end{figure}
Die Log-Likelihood \eqnref{loglikelihood} ist
\begin{eqnarray*}
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2}
\end{eqnarray*}
Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung
nach dem Parameter $\theta$ und setzen diese gleich Null:
\begin{eqnarray*}
\frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\
\Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\
\Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i
\end{eqnarray*}
Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h.
das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer
Normalverteilung mit diesem Mittelwert gezogen worden sind.
\begin{exercise}[mlemean.m]
Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$
und einer Standardabweichung $\ne 1$.
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
die Log-Likelihood (aus der Summe der logarithmierten
Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche
die Position der Maxima mit den aus den Daten berechneten
Mittelwerte.
\end{exercise}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Kurvenfit als Maximum Likelihood Estimation}
Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern
$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter
$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die
entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer
Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die
Log-Likelihood
\begin{eqnarray*}
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\
\end{eqnarray*}
Der einzige Unterschied zum vorherigen Beispiel ist, dass die
Mittelwerte der Normalverteilungen nun durch die Funktionswerte
gegeben sind.
Der Parameter $\theta$ soll so gew\"ahlt werden, dass die
Log-Likelihood maximal wird. Der erste Term der Summe ist
unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem
Maximum weggelassen werden.
\begin{eqnarray*}
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
\end{eqnarray*}
Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood
umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums.
\begin{equation}
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
\end{equation}
Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen
Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des
Parameters $\theta$ welcher den quadratischen Abstand minimiert ist
also identisch mit der Maximierung der Wahrscheinlichkeit, dass die
Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des
$\chi^2$ ist also ein Maximum-Likelihood Estimate.
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlepropline}
\caption{\label{mleproplinefig} Maximum Likelihood Estimation der
Steigung einer Ursprungsgeraden.}
\end{figure}
\subsection{Beispiel: einfache Proportionalit\"at}
Als Funktion nehmen wir die Ursprungsgerade
\[ f(x) = \theta x \]
mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$
und setzen diese gleich Null:
\begin{eqnarray}
\frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
& = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
& = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\
\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
\end{eqnarray}
Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein
Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht
n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von
linear kombinierten Basisfunktionen. Parameter die nichtlinear in
einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den
Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren
zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg,
zur\"uckzugreifen.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Fits von Wahrscheinlichkeitsverteilungen}
Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter
einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und
Standardabweichung der Normalverteilung) an ein Datenset fitten wolle.
Ein erster Gedanke k\"onnte sein, die
Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen
Abstands an ein Histogram der Daten zu fitten. Das ist aber aus
folgenden Gr\"unden nicht die Methode der Wahl: (i)
Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen
insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen,
wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind
nicht unabh\"angig, da das normierte Histogram sich zu Eins
aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten
die die Minimierung des quadratischen Abstands zu einem Maximum
Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm
h\"angt von der Wahl der Klassenbreite ab.
Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein
Datenset zu fitten, haben wir oben schon bei dem Beispiel zur
Absch\"atzung des Mittelwertes einer Normalverteilung gesehen ---
Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der
gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood
\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein
nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie
z.B. dem Gradientenabstieg, gel\"ost wird.
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlepdf}
\caption{\label{mlepdffig} Maximum Likelihood Estimation einer
Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung
2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt.
Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung
des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.}
\end{figure}

View File

@ -0,0 +1,22 @@
BASENAME=linear_regression
PYFILES=$(wildcard *.py)
PYPDFFILES=$(PYFILES:.py=.pdf)
pdf : $(BASENAME).pdf $(PYPDFFILES)
$(BASENAME).pdf : $(BASENAME).tex
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
$(PYPDFFILES) : %.pdf : %.py
python $<
clean :
rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc $(BASENAME).nav $(BASENAME).snm $(BASENAME).vrb
cleanall : clean
rm -f $(BASENAME).pdf
watch :
while true; do ! make -q pdf && make pdf; sleep 0.5; done

View File

@ -0,0 +1,61 @@
% Copyright 2007 by Till Tantau
%
% This file may be distributed and/or modified
%
% 1. under the LaTeX Project Public License and/or
% 2. under the GNU Public License.
%
% See the file doc/licenses/LICENSE for more details.
\usepackage{color}
\definecolor{karminrot}{RGB}{165,30,55}
\definecolor{gold}{RGB}{180,160,105}
\definecolor{anthrazit}{RGB}{50 ,65 ,75 }
\mode<presentation>
\setbeamercolor*{normal text}{fg=anthrazit,bg=white}
\setbeamercolor*{alerted text}{fg=anthrazit}
\setbeamercolor*{example text}{fg=anthrazit}
\setbeamercolor*{structure}{fg=gold,bg=karminrot}
\providecommand*{\beamer@bftext@only}{%
\relax
\ifmmode
\expandafter\beamer@bftext@warning
\else
\expandafter\bfseries
\fi
}
\providecommand*{\beamer@bftext@warning}{%
\ClassWarning{beamer}
{Cannot use bold for alerted text in math mode}%
}
\setbeamerfont{alerted text}{series=\beamer@bftext@only}
\setbeamercolor{palette primary}{fg=karminrot,bg=white}
\setbeamercolor{palette secondary}{fg=gold,bg=white}
\setbeamercolor{palette tertiary}{fg=anthrazit,bg=white}
\setbeamercolor{palette quaternary}{fg=black,bg=white}
\setbeamercolor{sidebar}{bg=karminrot!100}
\setbeamercolor{palette sidebar primary}{fg=karminrot}
\setbeamercolor{palette sidebar secondary}{fg=karminrot}
\setbeamercolor{palette sidebar tertiary}{fg=karminrot}
\setbeamercolor{palette sidebar quaternary}{fg=karminrot}
\setbeamercolor{item projected}{fg=black,bg=black!20}
\setbeamercolor*{block body}{}
\setbeamercolor*{block body alerted}{}
\setbeamercolor*{block body example}{}
\setbeamercolor*{block title}{parent=structure}
\setbeamercolor*{block title alerted}{parent=alerted text}
\setbeamercolor*{block title example}{parent=example text}
\setbeamercolor*{titlelike}{parent=structure}
\mode
<all>

View File

@ -0,0 +1,236 @@
\documentclass[12pt]{report}
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
\author{Jan Grewe \& Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
\date{WS 15/16}
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \newcommand{\tr}[2]{#1} % en
% \usepackage[english]{babel}
\newcommand{\tr}[2]{#2} % de
\usepackage[german]{babel}
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{pslatex} % nice font for pdf file
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
\setcounter{tocdepth}{1}
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[sf,bf,it,big,clearempty]{titlesec}
\setcounter{secnumdepth}{1}
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{graphicx}
\usepackage{xcolor}
\pagecolor{white}
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
\put(0,4){\line(1,0){170}}%
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
\put(0,0){\makebox(0,0){{\tiny 0}}}%
\put(10,0){\makebox(0,0){{\tiny 1}}}%
\put(20,0){\makebox(0,0){{\tiny 2}}}%
\put(30,0){\makebox(0,0){{\tiny 3}}}%
\put(40,0){\makebox(0,0){{\tiny 4}}}%
\put(50,0){\makebox(0,0){{\tiny 5}}}%
\put(60,0){\makebox(0,0){{\tiny 6}}}%
\put(70,0){\makebox(0,0){{\tiny 7}}}%
\put(80,0){\makebox(0,0){{\tiny 8}}}%
\put(90,0){\makebox(0,0){{\tiny 9}}}%
\put(100,0){\makebox(0,0){{\tiny 10}}}%
\put(110,0){\makebox(0,0){{\tiny 11}}}%
\put(120,0){\makebox(0,0){{\tiny 12}}}%
\put(130,0){\makebox(0,0){{\tiny 13}}}%
\put(140,0){\makebox(0,0){{\tiny 14}}}%
\put(150,0){\makebox(0,0){{\tiny 15}}}%
\put(160,0){\makebox(0,0){{\tiny 16}}}%
\put(170,0){\makebox(0,0){{\tiny 17}}}%
\end{picture}\par}
% figures:
\setlength{\fboxsep}{0pt}
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
%\newcommand{\texpicture}[1]{}
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
% maximum number of floats:
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{0}
\setcounter{totalnumber}{2}
% float placement fractions:
\renewcommand{\textfraction}{0.2}
\renewcommand{\topfraction}{0.8}
\renewcommand{\bottomfraction}{0.0}
\renewcommand{\floatpagefraction}{0.5}
% spacing for floats:
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
% spacing for a floating page:
\makeatletter
\setlength{\@fptop}{0pt}
\setlength{\@fpsep}{8pt plus 2.0fil}
\setlength{\@fpbot}{0pt plus 1.0fil}
\makeatother
% rules for floats:
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
% captions:
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
% put caption on separate float:
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
% references to panels of a figure within the caption:
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
% references to figures:
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
\newcommand{\fref}[1]{\textup{\ref{#1}}}
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
% references to figures in normal text:
\newcommand{\fig}{Fig.}
\newcommand{\Fig}{Figure}
\newcommand{\figs}{Figs.}
\newcommand{\Figs}{Figures}
\newcommand{\figref}[1]{\fig~\fref{#1}}
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
\newcommand{\figsref}[1]{\figs~\fref{#1}}
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
% references to figures within bracketed text:
\newcommand{\figb}{Fig.}
\newcommand{\figsb}{Figs.}
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
% references to tables:
\newcommand{\tref}[1]{\textup{\ref{#1}}}
% references to tables in normal text:
\newcommand{\tab}{Tab.}
\newcommand{\Tab}{Table}
\newcommand{\tabs}{Tabs.}
\newcommand{\Tabs}{Tables}
\newcommand{\tabref}[1]{\tab~\tref{#1}}
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
% references to tables within bracketed text:
\newcommand{\tabb}{Tab.}
\newcommand{\tabsb}{Tab.}
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\eqref}[1]{(\ref{#1})}
\newcommand{\eqn}{\tr{Eq}{Gl}.}
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
basicstyle=\ttfamily\footnotesize,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
caption={\protect\filename@parse{\lstname}\protect\filename@base},
captionpos=t,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{ifthen}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\source}[1]{
\begin{flushright}
\color{gray}\scriptsize \url{#1}
\end{flushright}
}
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
{\medskip}
\newcounter{maxexercise}
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
\newcounter{theexercise}
\setcounter{theexercise}{1}
\newcommand{\codepath}{}
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\codepath\exercisesource}}}\medskip\stepcounter{theexercise}}
\graphicspath{{statistics/lecture/}{statistics/lecture/figures/}{bootstrap/lecture/}{bootstrap/lecture/figures/}{likelihood/lecture/}{likelihood/lecture/figures/}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\maketitle
\tableofcontents
\renewcommand{\codepath}{statistics/code/}
\include{statistics/lecture/descriptivestatistics}
\renewcommand{\codepath}{bootstrap/code/}
\include{bootstrap/lecture/bootstrap}
\renewcommand{\codepath}{likelihood/code/}
\include{likelihood/lecture/likelihood}
\end{document}

View File

@ -24,4 +24,7 @@ yy = gampdf(xx, p(1), p(2));
plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
hold off;
xlabel('x');
ylabel('pdf');
legend('show');
savefigpdf(gcf, 'mlepdffit.pdf', 12, 8)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -183,7 +183,7 @@ Normalverteilung entstammen, sonder aus der Gamma-Verteilung.
\end{parts}
\begin{solution}
\lstinputlisting{mlepdffit.m}
%\includegraphics[width=1\textwidth]{mlepdffit}
\includegraphics[width=1\textwidth]{mlepdffit}
\end{solution}
\end{questions}

View File

@ -1,21 +1,20 @@
TEXFILES=descriptivestatistics.tex linear_regression.tex #$(wildcard *.tex)
PDFFILES=$(TEXFILES:.tex=.pdf)
BASENAME=descriptivestatistics
PYFILES=$(wildcard *.py)
PYPDFFILES=$(PYFILES:.py=.pdf)
pdf : $(PDFFILES) $(PYPDFFILES)
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
$(PDFFILES) : %.pdf : %.tex
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
$(PYPDFFILES) : %.pdf : %.py
python $<
clean :
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
cleanall : clean
rm -f $(PDFFILES)
rm -f $(BASENAME)-chapter.pdf
watch :
while true; do ! make -q pdf && make pdf; sleep 0.5; done

View File

@ -0,0 +1,361 @@
\documentclass[12pt]{report}
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
\date{WS 15/16}
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \newcommand{\tr}[2]{#1} % en
% \usepackage[english]{babel}
\newcommand{\tr}[2]{#2} % de
\usepackage[german]{babel}
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{pslatex} % nice font for pdf file
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
\setcounter{tocdepth}{1}
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[sf,bf,it,big,clearempty]{titlesec}
\setcounter{secnumdepth}{1}
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{graphicx}
\usepackage{xcolor}
\pagecolor{white}
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
\put(0,4){\line(1,0){170}}%
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
\put(0,0){\makebox(0,0){{\tiny 0}}}%
\put(10,0){\makebox(0,0){{\tiny 1}}}%
\put(20,0){\makebox(0,0){{\tiny 2}}}%
\put(30,0){\makebox(0,0){{\tiny 3}}}%
\put(40,0){\makebox(0,0){{\tiny 4}}}%
\put(50,0){\makebox(0,0){{\tiny 5}}}%
\put(60,0){\makebox(0,0){{\tiny 6}}}%
\put(70,0){\makebox(0,0){{\tiny 7}}}%
\put(80,0){\makebox(0,0){{\tiny 8}}}%
\put(90,0){\makebox(0,0){{\tiny 9}}}%
\put(100,0){\makebox(0,0){{\tiny 10}}}%
\put(110,0){\makebox(0,0){{\tiny 11}}}%
\put(120,0){\makebox(0,0){{\tiny 12}}}%
\put(130,0){\makebox(0,0){{\tiny 13}}}%
\put(140,0){\makebox(0,0){{\tiny 14}}}%
\put(150,0){\makebox(0,0){{\tiny 15}}}%
\put(160,0){\makebox(0,0){{\tiny 16}}}%
\put(170,0){\makebox(0,0){{\tiny 17}}}%
\end{picture}\par}
% figures:
\setlength{\fboxsep}{0pt}
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
%\newcommand{\texpicture}[1]{}
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
% maximum number of floats:
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{0}
\setcounter{totalnumber}{2}
% float placement fractions:
\renewcommand{\textfraction}{0.2}
\renewcommand{\topfraction}{0.8}
\renewcommand{\bottomfraction}{0.0}
\renewcommand{\floatpagefraction}{0.5}
% spacing for floats:
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
% spacing for a floating page:
\makeatletter
\setlength{\@fptop}{0pt}
\setlength{\@fpsep}{8pt plus 2.0fil}
\setlength{\@fpbot}{0pt plus 1.0fil}
\makeatother
% rules for floats:
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
% captions:
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
% put caption on separate float:
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
% references to panels of a figure within the caption:
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
% references to figures:
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
\newcommand{\fref}[1]{\textup{\ref{#1}}}
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
% references to figures in normal text:
\newcommand{\fig}{Fig.}
\newcommand{\Fig}{Figure}
\newcommand{\figs}{Figs.}
\newcommand{\Figs}{Figures}
\newcommand{\figref}[1]{\fig~\fref{#1}}
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
\newcommand{\figsref}[1]{\figs~\fref{#1}}
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
% references to figures within bracketed text:
\newcommand{\figb}{Fig.}
\newcommand{\figsb}{Figs.}
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
% references to tables:
\newcommand{\tref}[1]{\textup{\ref{#1}}}
% references to tables in normal text:
\newcommand{\tab}{Tab.}
\newcommand{\Tab}{Table}
\newcommand{\tabs}{Tabs.}
\newcommand{\Tabs}{Tables}
\newcommand{\tabref}[1]{\tab~\tref{#1}}
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
% references to tables within bracketed text:
\newcommand{\tabb}{Tab.}
\newcommand{\tabsb}{Tab.}
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\eqref}[1]{(\ref{#1})}
\newcommand{\eqn}{\tr{Eq}{Gl}.}
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
inputpath=../code,
basicstyle=\ttfamily\footnotesize,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
caption={\protect\filename@parse{\lstname}\protect\filename@base},
captionpos=t,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{ifthen}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\source}[1]{
\begin{flushright}
\color{gray}\scriptsize \url{#1}
\end{flushright}
}
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
{\medskip}
\newcounter{maxexercise}
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
\newcounter{theexercise}
\setcounter{theexercise}{1}
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
\graphicspath{{figures/}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\include{descriptivestatistics}
\end{document}
\end{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Statistics}
What is "a statistic"? % dt. Sch\"atzfunktion
\begin{definition}[statistic]
A statistic (singular) is a single measure of some attribute of a
sample (e.g., its arithmetic mean value). It is calculated by
applying a function (statistical algorithm) to the values of the
items of the sample, which are known together as a set of data.
\source{http://en.wikipedia.org/wiki/Statistic}
\end{definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Data types}
\subsection{Nominal scale}
\begin{itemize}
\item Binary
\begin{itemize}
\item ``yes/no'',
\item ``true/false'',
\item ``success/failure'', etc.
\end{itemize}
\item Categorial
\begin{itemize}
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
\item blood type (``A/B/AB/0''),
\item parts of speech (``noun/veerb/preposition/article/...''),
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
\end{itemize}
\item Each observation/measurement/sample is put into one category
\item There is no reasonable order among the categories.\\
example: [rods, cones] vs. [cones, rods]
\item Statistics: mode, i.e. the most common item
\end{itemize}
\subsection{Ordinal scale}
\begin{itemize}
\item Like nominal scale, but with an order
\item Examples: ranks, ratings
\begin{itemize}
\item ``bad/ok/good'',
\item ``cold/warm/hot'',
\item ``young/old'', etc.
\end{itemize}
\item {\bf But:} there is no reasonable measure of {\em distance}
between the classes
\item Statistics: mode, median
\end{itemize}
\subsection{Interval scale}
\begin{itemize}
\item Quantitative/metric values
\item Reasonable measure of distance between values, but no absolute zero
\item Examples:
\begin{itemize}
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
\item Direction measured in degrees from magnetic or true north
\end{itemize}
\item Statistics:
\begin{itemize}
\item Central tendency: mode, median, arithmetic mean
\item Dispersion: range, standard deviation
\end{itemize}
\end{itemize}
\subsection{Absolute/ratio scale}
\begin{itemize}
\item Like interval scale, but with absolute origin/zero
\item Examples:
\begin{itemize}
\item Temperature in $^\circ$K
\item Length, mass, duration, electric charge, ...
\item Plane angle, etc.
\item Count (e.g. number of spikes in response to a stimulus)
\end{itemize}
\item Statistics:
\begin{itemize}
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
\item Dispersion: range, standard deviation
\item Coefficient of variation (ratio standard deviation/mean)
\item All other statistical measures
\end{itemize}
\end{itemize}
\subsection{Data types}
\begin{itemize}
\item Data type selects
\begin{itemize}
\item statistics
\item type of plots (bar graph versus x-y plot)
\item correct tests
\end{itemize}
\item Scales exhibit increasing information content from nominal
to absolute.\\
Conversion ,,downwards'' is always possible
\item For example: size measured in meter (ratio scale) $\rightarrow$
categories ``small/medium/large'' (ordinal scale)
\end{itemize}
\subsection{Examples from neuroscience}
\begin{itemize}
\item {\bf absolute:}
\begin{itemize}
\item size of neuron/brain
\item length of axon
\item ion concentration
\item membrane potential
\item firing rate
\end{itemize}
\item {\bf interval:}
\begin{itemize}
\item edge orientation
\end{itemize}
\item {\bf ordinal:}
\begin{itemize}
\item stages of a disease
\item ratings
\end{itemize}
\item {\bf nominal:}
\begin{itemize}
\item cell type
\item odor
\item states of an ion channel
\end{itemize}
\end{itemize}

View File

@ -1,229 +1,3 @@
\documentclass[12pt]{report}
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
\date{WS 15/16}
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \newcommand{\tr}[2]{#1} % en
% \usepackage[english]{babel}
\newcommand{\tr}[2]{#2} % de
\usepackage[german]{babel}
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{pslatex} % nice font for pdf file
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
\setcounter{tocdepth}{1}
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[sf,bf,it,big,clearempty]{titlesec}
\setcounter{secnumdepth}{1}
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{graphicx}
\usepackage{xcolor}
\pagecolor{white}
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
\put(0,4){\line(1,0){170}}%
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
\put(0,0){\makebox(0,0){{\tiny 0}}}%
\put(10,0){\makebox(0,0){{\tiny 1}}}%
\put(20,0){\makebox(0,0){{\tiny 2}}}%
\put(30,0){\makebox(0,0){{\tiny 3}}}%
\put(40,0){\makebox(0,0){{\tiny 4}}}%
\put(50,0){\makebox(0,0){{\tiny 5}}}%
\put(60,0){\makebox(0,0){{\tiny 6}}}%
\put(70,0){\makebox(0,0){{\tiny 7}}}%
\put(80,0){\makebox(0,0){{\tiny 8}}}%
\put(90,0){\makebox(0,0){{\tiny 9}}}%
\put(100,0){\makebox(0,0){{\tiny 10}}}%
\put(110,0){\makebox(0,0){{\tiny 11}}}%
\put(120,0){\makebox(0,0){{\tiny 12}}}%
\put(130,0){\makebox(0,0){{\tiny 13}}}%
\put(140,0){\makebox(0,0){{\tiny 14}}}%
\put(150,0){\makebox(0,0){{\tiny 15}}}%
\put(160,0){\makebox(0,0){{\tiny 16}}}%
\put(170,0){\makebox(0,0){{\tiny 17}}}%
\end{picture}\par}
% figures:
\setlength{\fboxsep}{0pt}
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
%\newcommand{\texpicture}[1]{}
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
% maximum number of floats:
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{0}
\setcounter{totalnumber}{2}
% float placement fractions:
\renewcommand{\textfraction}{0.2}
\renewcommand{\topfraction}{0.8}
\renewcommand{\bottomfraction}{0.0}
\renewcommand{\floatpagefraction}{0.5}
% spacing for floats:
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
% spacing for a floating page:
\makeatletter
\setlength{\@fptop}{0pt}
\setlength{\@fpsep}{8pt plus 2.0fil}
\setlength{\@fpbot}{0pt plus 1.0fil}
\makeatother
% rules for floats:
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
% captions:
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
% put caption on separate float:
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
% references to panels of a figure within the caption:
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
% references to figures:
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
\newcommand{\fref}[1]{\textup{\ref{#1}}}
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
% references to figures in normal text:
\newcommand{\fig}{Fig.}
\newcommand{\Fig}{Figure}
\newcommand{\figs}{Figs.}
\newcommand{\Figs}{Figures}
\newcommand{\figref}[1]{\fig~\fref{#1}}
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
\newcommand{\figsref}[1]{\figs~\fref{#1}}
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
% references to figures within bracketed text:
\newcommand{\figb}{Fig.}
\newcommand{\figsb}{Figs.}
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
% references to tables:
\newcommand{\tref}[1]{\textup{\ref{#1}}}
% references to tables in normal text:
\newcommand{\tab}{Tab.}
\newcommand{\Tab}{Table}
\newcommand{\tabs}{Tabs.}
\newcommand{\Tabs}{Tables}
\newcommand{\tabref}[1]{\tab~\tref{#1}}
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
% references to tables within bracketed text:
\newcommand{\tabb}{Tab.}
\newcommand{\tabsb}{Tab.}
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\eqref}[1]{(\ref{#1})}
\newcommand{\eqn}{\tr{Eq}{Gl}.}
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
inputpath=../code,
basicstyle=\ttfamily\footnotesize,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
caption={\protect\filename@parse{\lstname}\protect\filename@base},
captionpos=t,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{ifthen}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\source}[1]{
\begin{flushright}
\color{gray}\scriptsize \url{#1}
\end{flushright}
}
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
{\medskip}
\newcounter{maxexercise}
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
\newcounter{theexercise}
\setcounter{theexercise}{1}
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
\graphicspath{{figures/}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\maketitle
%\tableofcontents
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
@ -453,418 +227,3 @@ Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}).
$x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null.
$\xi$ sind normalverteilte Zufallszahlen.}
\end{figure}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling
aus der Stichprobe. Das hat mehrere Vorteile:
\begin{itemize}
\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein).
\item H\"ohere Genauigkeit als klassische Methoden.
\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr
\"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht
f\"ur jede Statistik eine andere Formel.
\end{itemize}
\begin{figure}[t]
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
\includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex]
\includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312}
\caption{\tr{Why can we only measure a sample of the
population?}{Warum k\"onnen wir nur eine Stichprobe der
Grundgesamtheit messen?}}
\end{figure}
\begin{figure}[t]
\includegraphics[height=0.2\textheight]{srs1}\\[2ex]
\includegraphics[height=0.2\textheight]{srs2}\\[2ex]
\includegraphics[height=0.2\textheight]{srs3}
\caption{Bootstrap der Stichprobenvertielung (a) Von der
Grundgesamtheit (population) mit unbekanntem Parameter
(z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random
samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur
jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen
der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe
gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf
die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu
haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele
Bootstrap-Stichproben generiert werden (resampling) und so
Eigenschaften der Stichprobenverteilung empirisch bestimmt
werden. Aus Hesterberg et al. 2003, Bootstrap Methods and
Permuation Tests}
\end{figure}
\section{Bootstrap des Standardfehlers}
Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und
benutzen diese um die Stichprobenverteilung einer Statistik zu
berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang
wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen
mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe
kann also einmal, mehrmals oder gar nicht in einer Bootstrap
Stichprobe vorkommen.
\begin{exercise}[bootstrapsem.m]
Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert,
Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$).
Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils
den Mittelwert.
Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und
die Standardabweichung.
Was hat das mit dem Standardfehler zu tun?
\end{exercise}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}}
In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$
einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung
die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der
Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die
Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am
gr\"o{\ss}ten ist.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Maximum Likelihood}
Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben
$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem
Parameter(n) $\theta$. Das k\"onnte die Normalverteilung
\begin{equation}
\label{normpdfmean}
p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}}
\end{equation}
sein mit
fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als
Parameter $\theta$.
Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann
ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$
\begin{equation}
p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
\end{equation}
Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'')
den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$,
\begin{equation}
{\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta)
\end{equation}
Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die
Likelihood maximiert (``mle'': Maximum-Likelihood Estimate):
\begin{equation}
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
\end{equation}
$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei
dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$
bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat.
An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn
man die Funktionswerte mit einer streng monoton steigenden Funktion
transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens
das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht:
\begin{eqnarray}
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
\end{eqnarray}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Beispiel: Das arithmetische Mittel}
Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean}
entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten,
welcher Wert von $\theta$ maximiert dessen Likelhood?
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlemean}
\caption{\label{mlemeanfig} Maximum Likelihood Estimation des
Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen
Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus
denen die Daten stammen k\"onnten. Unteln links: Die Likelihood
in Abh\"angigkeit des Mittelwerts als Parameter der
Normalverteilungen. Unten rechts: die entsprechende
Log-Likelihood. An der Position des Maximums bei $\theta=2$
\"andert sich nichts (Pfeil).}
\end{figure}
Die Log-Likelihood \eqnref{loglikelihood} ist
\begin{eqnarray*}
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2}
\end{eqnarray*}
Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung
nach dem Parameter $\theta$ und setzen diese gleich Null:
\begin{eqnarray*}
\frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\
\Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\
\Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i
\end{eqnarray*}
Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h.
das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer
Normalverteilung mit diesem Mittelwert gezogen worden sind.
\begin{exercise}[mlemean.m]
Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$
und einer Standardabweichung $\ne 1$.
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
die Log-Likelihood (aus der Summe der logarithmierten
Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche
die Position der Maxima mit den aus den Daten berechneten
Mittelwerte.
\end{exercise}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Kurvenfit als Maximum Likelihood Estimation}
Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern
$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter
$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die
entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer
Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die
Log-Likelihood
\begin{eqnarray*}
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\
\end{eqnarray*}
Der einzige Unterschied zum vorherigen Beispiel ist, dass die
Mittelwerte der Normalverteilungen nun durch die Funktionswerte
gegeben sind.
Der Parameter $\theta$ soll so gew\"ahlt werden, dass die
Log-Likelihood maximal wird. Der erste Term der Summe ist
unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem
Maximum weggelassen werden.
\begin{eqnarray*}
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
\end{eqnarray*}
Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood
umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums.
\begin{equation}
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
\end{equation}
Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen
Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des
Parameters $\theta$ welcher den quadratischen Abstand minimiert ist
also identisch mit der Maximierung der Wahrscheinlichkeit, dass die
Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des
$\chi^2$ ist also ein Maximum-Likelihood Estimate.
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlepropline}
\caption{\label{mleproplinefig} Maximum Likelihood Estimation der
Steigung einer Ursprungsgeraden.}
\end{figure}
\subsection{Beispiel: einfache Proportionalit\"at}
Als Funktion nehmen wir die Ursprungsgerade
\[ f(x) = \theta x \]
mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$
und setzen diese gleich Null:
\begin{eqnarray}
\frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
& = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
& = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\
\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
\end{eqnarray}
Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein
Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht
n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von
linear kombinierten Basisfunktionen. Parameter die nichtlinear in
einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den
Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren
zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg,
zur\"uckzugreifen.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Fits von Wahrscheinlichkeitsverteilungen}
Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter
einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und
Standardabweichung der Normalverteilung) an ein Datenset fitten wolle.
Ein erster Gedanke k\"onnte sein, die
Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen
Abstands an ein Histogram der Daten zu fitten. Das ist aber aus
folgenden Gr\"unden nicht die Methode der Wahl: (i)
Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen
insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen,
wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind
nicht unabh\"angig, da das normierte Histogram sich zu Eins
aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten
die die Minimierung des quadratischen Abstands zu einem Maximum
Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm
h\"angt von der Wahl der Klassenbreite ab.
Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein
Datenset zu fitten, haben wir oben schon bei dem Beispiel zur
Absch\"atzung des Mittelwertes einer Normalverteilung gesehen ---
Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der
gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood
\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein
nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie
z.B. dem Gradientenabstieg, gel\"ost wird.
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlepdf}
\caption{\label{mlepdffig} Maximum Likelihood Estimation einer
Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung
2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt.
Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung
des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.}
\end{figure}
\end{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Statistics}
What is "a statistic"? % dt. Sch\"atzfunktion
\begin{definition}[statistic]
A statistic (singular) is a single measure of some attribute of a
sample (e.g., its arithmetic mean value). It is calculated by
applying a function (statistical algorithm) to the values of the
items of the sample, which are known together as a set of data.
\source{http://en.wikipedia.org/wiki/Statistic}
\end{definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Data types}
\subsection{Nominal scale}
\begin{itemize}
\item Binary
\begin{itemize}
\item ``yes/no'',
\item ``true/false'',
\item ``success/failure'', etc.
\end{itemize}
\item Categorial
\begin{itemize}
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
\item blood type (``A/B/AB/0''),
\item parts of speech (``noun/veerb/preposition/article/...''),
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
\end{itemize}
\item Each observation/measurement/sample is put into one category
\item There is no reasonable order among the categories.\\
example: [rods, cones] vs. [cones, rods]
\item Statistics: mode, i.e. the most common item
\end{itemize}
\subsection{Ordinal scale}
\begin{itemize}
\item Like nominal scale, but with an order
\item Examples: ranks, ratings
\begin{itemize}
\item ``bad/ok/good'',
\item ``cold/warm/hot'',
\item ``young/old'', etc.
\end{itemize}
\item {\bf But:} there is no reasonable measure of {\em distance}
between the classes
\item Statistics: mode, median
\end{itemize}
\subsection{Interval scale}
\begin{itemize}
\item Quantitative/metric values
\item Reasonable measure of distance between values, but no absolute zero
\item Examples:
\begin{itemize}
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
\item Direction measured in degrees from magnetic or true north
\end{itemize}
\item Statistics:
\begin{itemize}
\item Central tendency: mode, median, arithmetic mean
\item Dispersion: range, standard deviation
\end{itemize}
\end{itemize}
\subsection{Absolute/ratio scale}
\begin{itemize}
\item Like interval scale, but with absolute origin/zero
\item Examples:
\begin{itemize}
\item Temperature in $^\circ$K
\item Length, mass, duration, electric charge, ...
\item Plane angle, etc.
\item Count (e.g. number of spikes in response to a stimulus)
\end{itemize}
\item Statistics:
\begin{itemize}
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
\item Dispersion: range, standard deviation
\item Coefficient of variation (ratio standard deviation/mean)
\item All other statistical measures
\end{itemize}
\end{itemize}
\subsection{Data types}
\begin{itemize}
\item Data type selects
\begin{itemize}
\item statistics
\item type of plots (bar graph versus x-y plot)
\item correct tests
\end{itemize}
\item Scales exhibit increasing information content from nominal
to absolute.\\
Conversion ,,downwards'' is always possible
\item For example: size measured in meter (ratio scale) $\rightarrow$
categories ``small/medium/large'' (ordinal scale)
\end{itemize}
\subsection{Examples from neuroscience}
\begin{itemize}
\item {\bf absolute:}
\begin{itemize}
\item size of neuron/brain
\item length of axon
\item ion concentration
\item membrane potential
\item firing rate
\end{itemize}
\item {\bf interval:}
\begin{itemize}
\item edge orientation
\end{itemize}
\item {\bf ordinal:}
\begin{itemize}
\item stages of a disease
\item ratings
\end{itemize}
\item {\bf nominal:}
\begin{itemize}
\item cell type
\item odor
\item states of an ion channel
\end{itemize}
\end{itemize}