Reorganized the folders and started a common script for the lectures.
17
Makefile
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
BASENAME=scientificcomputing-script
|
||||||
|
|
||||||
|
pdf : $(BASENAME).pdf
|
||||||
|
|
||||||
|
$(BASENAME).pdf : $(BASENAME).tex
|
||||||
|
export TEXMFOUTPUT=.; pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc
|
||||||
|
|
||||||
|
cleanall : clean
|
||||||
|
rm -f $(PDFFILE)
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
|
22
bootstrap/lecture/Makefile
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
BASENAME=bootstrap
|
||||||
|
PYFILES=$(wildcard *.py)
|
||||||
|
PYPDFFILES=$(PYFILES:.py=.pdf)
|
||||||
|
|
||||||
|
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
|
||||||
|
|
||||||
|
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
|
||||||
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
$(PYPDFFILES) : %.pdf : %.py
|
||||||
|
python $<
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
|
||||||
|
|
||||||
|
cleanall : clean
|
||||||
|
rm -f $(BASENAME)-chapter.pdf
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
|
225
bootstrap/lecture/bootstrap-chapter.tex
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
\documentclass[12pt]{report}
|
||||||
|
|
||||||
|
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||||
|
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||||
|
\date{WS 15/16}
|
||||||
|
|
||||||
|
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% \newcommand{\tr}[2]{#1} % en
|
||||||
|
% \usepackage[english]{babel}
|
||||||
|
\newcommand{\tr}[2]{#2} % de
|
||||||
|
\usepackage[german]{babel}
|
||||||
|
|
||||||
|
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{pslatex} % nice font for pdf file
|
||||||
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||||
|
|
||||||
|
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||||
|
\setcounter{tocdepth}{1}
|
||||||
|
|
||||||
|
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
||||||
|
\setcounter{secnumdepth}{1}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\pagecolor{white}
|
||||||
|
|
||||||
|
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
||||||
|
\put(0,4){\line(1,0){170}}%
|
||||||
|
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
||||||
|
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
||||||
|
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
||||||
|
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
||||||
|
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
||||||
|
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
||||||
|
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
||||||
|
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
||||||
|
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
||||||
|
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
||||||
|
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
||||||
|
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
||||||
|
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
||||||
|
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
||||||
|
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
||||||
|
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
||||||
|
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
||||||
|
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
||||||
|
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
||||||
|
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
||||||
|
\end{picture}\par}
|
||||||
|
|
||||||
|
% figures:
|
||||||
|
\setlength{\fboxsep}{0pt}
|
||||||
|
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
||||||
|
%\newcommand{\texpicture}[1]{}
|
||||||
|
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||||
|
|
||||||
|
% maximum number of floats:
|
||||||
|
\setcounter{topnumber}{2}
|
||||||
|
\setcounter{bottomnumber}{0}
|
||||||
|
\setcounter{totalnumber}{2}
|
||||||
|
|
||||||
|
% float placement fractions:
|
||||||
|
\renewcommand{\textfraction}{0.2}
|
||||||
|
\renewcommand{\topfraction}{0.8}
|
||||||
|
\renewcommand{\bottomfraction}{0.0}
|
||||||
|
\renewcommand{\floatpagefraction}{0.5}
|
||||||
|
|
||||||
|
% spacing for floats:
|
||||||
|
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
||||||
|
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
||||||
|
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
||||||
|
|
||||||
|
% spacing for a floating page:
|
||||||
|
\makeatletter
|
||||||
|
\setlength{\@fptop}{0pt}
|
||||||
|
\setlength{\@fpsep}{8pt plus 2.0fil}
|
||||||
|
\setlength{\@fpbot}{0pt plus 1.0fil}
|
||||||
|
\makeatother
|
||||||
|
|
||||||
|
% rules for floats:
|
||||||
|
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
||||||
|
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
||||||
|
|
||||||
|
% captions:
|
||||||
|
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
||||||
|
|
||||||
|
% put caption on separate float:
|
||||||
|
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
||||||
|
|
||||||
|
% references to panels of a figure within the caption:
|
||||||
|
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
||||||
|
% references to figures:
|
||||||
|
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
||||||
|
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
||||||
|
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
||||||
|
% references to figures in normal text:
|
||||||
|
\newcommand{\fig}{Fig.}
|
||||||
|
\newcommand{\Fig}{Figure}
|
||||||
|
\newcommand{\figs}{Figs.}
|
||||||
|
\newcommand{\Figs}{Figures}
|
||||||
|
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
||||||
|
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
||||||
|
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
||||||
|
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
||||||
|
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
||||||
|
% references to figures within bracketed text:
|
||||||
|
\newcommand{\figb}{Fig.}
|
||||||
|
\newcommand{\figsb}{Figs.}
|
||||||
|
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
||||||
|
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
||||||
|
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
||||||
|
|
||||||
|
% references to tables:
|
||||||
|
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
||||||
|
% references to tables in normal text:
|
||||||
|
\newcommand{\tab}{Tab.}
|
||||||
|
\newcommand{\Tab}{Table}
|
||||||
|
\newcommand{\tabs}{Tabs.}
|
||||||
|
\newcommand{\Tabs}{Tables}
|
||||||
|
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
||||||
|
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
||||||
|
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
||||||
|
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
||||||
|
% references to tables within bracketed text:
|
||||||
|
\newcommand{\tabb}{Tab.}
|
||||||
|
\newcommand{\tabsb}{Tab.}
|
||||||
|
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
||||||
|
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%\newcommand{\eqref}[1]{(\ref{#1})}
|
||||||
|
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
||||||
|
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{listings}
|
||||||
|
\lstset{
|
||||||
|
inputpath=../code,
|
||||||
|
basicstyle=\ttfamily\footnotesize,
|
||||||
|
numbers=left,
|
||||||
|
showstringspaces=false,
|
||||||
|
language=Matlab,
|
||||||
|
commentstyle=\itshape\color{darkgray},
|
||||||
|
keywordstyle=\color{blue},
|
||||||
|
stringstyle=\color{green},
|
||||||
|
backgroundcolor=\color{blue!10},
|
||||||
|
breaklines=true,
|
||||||
|
breakautoindent=true,
|
||||||
|
columns=flexible,
|
||||||
|
frame=single,
|
||||||
|
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||||
|
captionpos=t,
|
||||||
|
xleftmargin=1em,
|
||||||
|
xrightmargin=1em,
|
||||||
|
aboveskip=10pt
|
||||||
|
}
|
||||||
|
|
||||||
|
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{bm}
|
||||||
|
\usepackage{dsfont}
|
||||||
|
\newcommand{\naZ}{\mathds{N}}
|
||||||
|
\newcommand{\gaZ}{\mathds{Z}}
|
||||||
|
\newcommand{\raZ}{\mathds{Q}}
|
||||||
|
\newcommand{\reZ}{\mathds{R}}
|
||||||
|
\newcommand{\reZp}{\mathds{R^+}}
|
||||||
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||||
|
\newcommand{\koZ}{\mathds{C}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{ifthen}
|
||||||
|
|
||||||
|
\newcommand{\code}[1]{\texttt{#1}}
|
||||||
|
|
||||||
|
\newcommand{\source}[1]{
|
||||||
|
\begin{flushright}
|
||||||
|
\color{gray}\scriptsize \url{#1}
|
||||||
|
\end{flushright}
|
||||||
|
}
|
||||||
|
|
||||||
|
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||||
|
{\medskip}
|
||||||
|
|
||||||
|
\newcounter{maxexercise}
|
||||||
|
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
||||||
|
\newcounter{theexercise}
|
||||||
|
\setcounter{theexercise}{1}
|
||||||
|
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||||
|
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
||||||
|
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
|
||||||
|
|
||||||
|
\graphicspath{{figures/}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\include{bootstrap}
|
||||||
|
|
||||||
|
\end{document}
|
64
bootstrap/lecture/bootstrap.tex
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
|
||||||
|
|
||||||
|
Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling
|
||||||
|
aus der Stichprobe. Das hat mehrere Vorteile:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein).
|
||||||
|
\item H\"ohere Genauigkeit als klassische Methoden.
|
||||||
|
\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr
|
||||||
|
\"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht
|
||||||
|
f\"ur jede Statistik eine andere Formel.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
|
||||||
|
\includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex]
|
||||||
|
\includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312}
|
||||||
|
\caption{\tr{Why can we only measure a sample of the
|
||||||
|
population?}{Warum k\"onnen wir nur eine Stichprobe der
|
||||||
|
Grundgesamtheit messen?}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[height=0.2\textheight]{srs1}\\[2ex]
|
||||||
|
\includegraphics[height=0.2\textheight]{srs2}\\[2ex]
|
||||||
|
\includegraphics[height=0.2\textheight]{srs3}
|
||||||
|
\caption{Bootstrap der Stichprobenvertielung (a) Von der
|
||||||
|
Grundgesamtheit (population) mit unbekanntem Parameter
|
||||||
|
(z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random
|
||||||
|
samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur
|
||||||
|
jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen
|
||||||
|
der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe
|
||||||
|
gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf
|
||||||
|
die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu
|
||||||
|
haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele
|
||||||
|
Bootstrap-Stichproben generiert werden (resampling) und so
|
||||||
|
Eigenschaften der Stichprobenverteilung empirisch bestimmt
|
||||||
|
werden. Aus Hesterberg et al. 2003, Bootstrap Methods and
|
||||||
|
Permuation Tests}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\section{Bootstrap des Standardfehlers}
|
||||||
|
|
||||||
|
Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und
|
||||||
|
benutzen diese um die Stichprobenverteilung einer Statistik zu
|
||||||
|
berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang
|
||||||
|
wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen
|
||||||
|
mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe
|
||||||
|
kann also einmal, mehrmals oder gar nicht in einer Bootstrap
|
||||||
|
Stichprobe vorkommen.
|
||||||
|
|
||||||
|
\begin{exercise}[bootstrapsem.m]
|
||||||
|
Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert,
|
||||||
|
Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$).
|
||||||
|
|
||||||
|
Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils
|
||||||
|
den Mittelwert.
|
||||||
|
|
||||||
|
Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und
|
||||||
|
die Standardabweichung.
|
||||||
|
|
||||||
|
Was hat das mit dem Standardfehler zu tun?
|
||||||
|
\end{exercise}
|
Before Width: | Height: | Size: 724 KiB After Width: | Height: | Size: 724 KiB |
Before Width: | Height: | Size: 386 KiB After Width: | Height: | Size: 386 KiB |
Before Width: | Height: | Size: 461 KiB After Width: | Height: | Size: 461 KiB |
Before Width: | Height: | Size: 59 KiB After Width: | Height: | Size: 59 KiB |
Before Width: | Height: | Size: 55 KiB After Width: | Height: | Size: 55 KiB |
Before Width: | Height: | Size: 73 KiB After Width: | Height: | Size: 73 KiB |
22
likelihood/lecture/Makefile
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
BASENAME=likelihood
|
||||||
|
PYFILES=$(wildcard *.py)
|
||||||
|
PYPDFFILES=$(PYFILES:.py=.pdf)
|
||||||
|
|
||||||
|
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
|
||||||
|
|
||||||
|
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
|
||||||
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
$(PYPDFFILES) : %.pdf : %.py
|
||||||
|
python $<
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
|
||||||
|
|
||||||
|
cleanall : clean
|
||||||
|
rm -f $(BASENAME)-chapter.pdf
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
|
225
likelihood/lecture/likelihood-chapter.tex
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
\documentclass[12pt]{report}
|
||||||
|
|
||||||
|
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||||
|
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||||
|
\date{WS 15/16}
|
||||||
|
|
||||||
|
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% \newcommand{\tr}[2]{#1} % en
|
||||||
|
% \usepackage[english]{babel}
|
||||||
|
\newcommand{\tr}[2]{#2} % de
|
||||||
|
\usepackage[german]{babel}
|
||||||
|
|
||||||
|
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{pslatex} % nice font for pdf file
|
||||||
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||||
|
|
||||||
|
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||||
|
\setcounter{tocdepth}{1}
|
||||||
|
|
||||||
|
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
||||||
|
\setcounter{secnumdepth}{1}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\pagecolor{white}
|
||||||
|
|
||||||
|
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
||||||
|
\put(0,4){\line(1,0){170}}%
|
||||||
|
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
||||||
|
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
||||||
|
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
||||||
|
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
||||||
|
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
||||||
|
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
||||||
|
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
||||||
|
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
||||||
|
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
||||||
|
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
||||||
|
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
||||||
|
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
||||||
|
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
||||||
|
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
||||||
|
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
||||||
|
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
||||||
|
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
||||||
|
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
||||||
|
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
||||||
|
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
||||||
|
\end{picture}\par}
|
||||||
|
|
||||||
|
% figures:
|
||||||
|
\setlength{\fboxsep}{0pt}
|
||||||
|
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
||||||
|
%\newcommand{\texpicture}[1]{}
|
||||||
|
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||||
|
|
||||||
|
% maximum number of floats:
|
||||||
|
\setcounter{topnumber}{2}
|
||||||
|
\setcounter{bottomnumber}{0}
|
||||||
|
\setcounter{totalnumber}{2}
|
||||||
|
|
||||||
|
% float placement fractions:
|
||||||
|
\renewcommand{\textfraction}{0.2}
|
||||||
|
\renewcommand{\topfraction}{0.8}
|
||||||
|
\renewcommand{\bottomfraction}{0.0}
|
||||||
|
\renewcommand{\floatpagefraction}{0.5}
|
||||||
|
|
||||||
|
% spacing for floats:
|
||||||
|
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
||||||
|
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
||||||
|
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
||||||
|
|
||||||
|
% spacing for a floating page:
|
||||||
|
\makeatletter
|
||||||
|
\setlength{\@fptop}{0pt}
|
||||||
|
\setlength{\@fpsep}{8pt plus 2.0fil}
|
||||||
|
\setlength{\@fpbot}{0pt plus 1.0fil}
|
||||||
|
\makeatother
|
||||||
|
|
||||||
|
% rules for floats:
|
||||||
|
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
||||||
|
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
||||||
|
|
||||||
|
% captions:
|
||||||
|
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
||||||
|
|
||||||
|
% put caption on separate float:
|
||||||
|
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
||||||
|
|
||||||
|
% references to panels of a figure within the caption:
|
||||||
|
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
||||||
|
% references to figures:
|
||||||
|
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
||||||
|
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
||||||
|
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
||||||
|
% references to figures in normal text:
|
||||||
|
\newcommand{\fig}{Fig.}
|
||||||
|
\newcommand{\Fig}{Figure}
|
||||||
|
\newcommand{\figs}{Figs.}
|
||||||
|
\newcommand{\Figs}{Figures}
|
||||||
|
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
||||||
|
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
||||||
|
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
||||||
|
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
||||||
|
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
||||||
|
% references to figures within bracketed text:
|
||||||
|
\newcommand{\figb}{Fig.}
|
||||||
|
\newcommand{\figsb}{Figs.}
|
||||||
|
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
||||||
|
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
||||||
|
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
||||||
|
|
||||||
|
% references to tables:
|
||||||
|
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
||||||
|
% references to tables in normal text:
|
||||||
|
\newcommand{\tab}{Tab.}
|
||||||
|
\newcommand{\Tab}{Table}
|
||||||
|
\newcommand{\tabs}{Tabs.}
|
||||||
|
\newcommand{\Tabs}{Tables}
|
||||||
|
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
||||||
|
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
||||||
|
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
||||||
|
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
||||||
|
% references to tables within bracketed text:
|
||||||
|
\newcommand{\tabb}{Tab.}
|
||||||
|
\newcommand{\tabsb}{Tab.}
|
||||||
|
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
||||||
|
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%\newcommand{\eqref}[1]{(\ref{#1})}
|
||||||
|
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
||||||
|
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{listings}
|
||||||
|
\lstset{
|
||||||
|
inputpath=../code,
|
||||||
|
basicstyle=\ttfamily\footnotesize,
|
||||||
|
numbers=left,
|
||||||
|
showstringspaces=false,
|
||||||
|
language=Matlab,
|
||||||
|
commentstyle=\itshape\color{darkgray},
|
||||||
|
keywordstyle=\color{blue},
|
||||||
|
stringstyle=\color{green},
|
||||||
|
backgroundcolor=\color{blue!10},
|
||||||
|
breaklines=true,
|
||||||
|
breakautoindent=true,
|
||||||
|
columns=flexible,
|
||||||
|
frame=single,
|
||||||
|
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||||
|
captionpos=t,
|
||||||
|
xleftmargin=1em,
|
||||||
|
xrightmargin=1em,
|
||||||
|
aboveskip=10pt
|
||||||
|
}
|
||||||
|
|
||||||
|
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{bm}
|
||||||
|
\usepackage{dsfont}
|
||||||
|
\newcommand{\naZ}{\mathds{N}}
|
||||||
|
\newcommand{\gaZ}{\mathds{Z}}
|
||||||
|
\newcommand{\raZ}{\mathds{Q}}
|
||||||
|
\newcommand{\reZ}{\mathds{R}}
|
||||||
|
\newcommand{\reZp}{\mathds{R^+}}
|
||||||
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||||
|
\newcommand{\koZ}{\mathds{C}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{ifthen}
|
||||||
|
|
||||||
|
\newcommand{\code}[1]{\texttt{#1}}
|
||||||
|
|
||||||
|
\newcommand{\source}[1]{
|
||||||
|
\begin{flushright}
|
||||||
|
\color{gray}\scriptsize \url{#1}
|
||||||
|
\end{flushright}
|
||||||
|
}
|
||||||
|
|
||||||
|
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||||
|
{\medskip}
|
||||||
|
|
||||||
|
\newcounter{maxexercise}
|
||||||
|
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
||||||
|
\newcounter{theexercise}
|
||||||
|
\setcounter{theexercise}{1}
|
||||||
|
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||||
|
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
||||||
|
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
|
||||||
|
|
||||||
|
\graphicspath{{figures/}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\include{likelihood}
|
||||||
|
|
||||||
|
\end{document}
|
212
likelihood/lecture/likelihood.tex
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}}
|
||||||
|
|
||||||
|
In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$
|
||||||
|
einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung
|
||||||
|
die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der
|
||||||
|
Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die
|
||||||
|
Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am
|
||||||
|
gr\"o{\ss}ten ist.
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Maximum Likelihood}
|
||||||
|
Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben
|
||||||
|
$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem
|
||||||
|
Parameter(n) $\theta$. Das k\"onnte die Normalverteilung
|
||||||
|
\begin{equation}
|
||||||
|
\label{normpdfmean}
|
||||||
|
p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}}
|
||||||
|
\end{equation}
|
||||||
|
sein mit
|
||||||
|
fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als
|
||||||
|
Parameter $\theta$.
|
||||||
|
|
||||||
|
Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
|
||||||
|
die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann
|
||||||
|
ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
|
||||||
|
Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$
|
||||||
|
\begin{equation}
|
||||||
|
p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
|
||||||
|
\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
|
||||||
|
\end{equation}
|
||||||
|
Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'')
|
||||||
|
den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$,
|
||||||
|
\begin{equation}
|
||||||
|
{\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta)
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die
|
||||||
|
Likelihood maximiert (``mle'': Maximum-Likelihood Estimate):
|
||||||
|
\begin{equation}
|
||||||
|
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
||||||
|
\end{equation}
|
||||||
|
$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei
|
||||||
|
dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$
|
||||||
|
bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat.
|
||||||
|
|
||||||
|
An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn
|
||||||
|
man die Funktionswerte mit einer streng monoton steigenden Funktion
|
||||||
|
transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens
|
||||||
|
das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht:
|
||||||
|
\begin{eqnarray}
|
||||||
|
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
||||||
|
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
||||||
|
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
|
||||||
|
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
|
||||||
|
\end{eqnarray}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\subsection{Beispiel: Das arithmetische Mittel}
|
||||||
|
|
||||||
|
Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean}
|
||||||
|
entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten,
|
||||||
|
welcher Wert von $\theta$ maximiert dessen Likelhood?
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{mlemean}
|
||||||
|
\caption{\label{mlemeanfig} Maximum Likelihood Estimation des
|
||||||
|
Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen
|
||||||
|
Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus
|
||||||
|
denen die Daten stammen k\"onnten. Unteln links: Die Likelihood
|
||||||
|
in Abh\"angigkeit des Mittelwerts als Parameter der
|
||||||
|
Normalverteilungen. Unten rechts: die entsprechende
|
||||||
|
Log-Likelihood. An der Position des Maximums bei $\theta=2$
|
||||||
|
\"andert sich nichts (Pfeil).}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Die Log-Likelihood \eqnref{loglikelihood} ist
|
||||||
|
\begin{eqnarray*}
|
||||||
|
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
||||||
|
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\
|
||||||
|
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2}
|
||||||
|
\end{eqnarray*}
|
||||||
|
Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung
|
||||||
|
nach dem Parameter $\theta$ und setzen diese gleich Null:
|
||||||
|
\begin{eqnarray*}
|
||||||
|
\frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
|
||||||
|
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\
|
||||||
|
\Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\
|
||||||
|
\Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i
|
||||||
|
\end{eqnarray*}
|
||||||
|
Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h.
|
||||||
|
das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer
|
||||||
|
Normalverteilung mit diesem Mittelwert gezogen worden sind.
|
||||||
|
|
||||||
|
\begin{exercise}[mlemean.m]
|
||||||
|
Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$
|
||||||
|
und einer Standardabweichung $\ne 1$.
|
||||||
|
|
||||||
|
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
|
||||||
|
die Log-Likelihood (aus der Summe der logarithmierten
|
||||||
|
Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche
|
||||||
|
die Position der Maxima mit den aus den Daten berechneten
|
||||||
|
Mittelwerte.
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Kurvenfit als Maximum Likelihood Estimation}
|
||||||
|
Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern
|
||||||
|
$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter
|
||||||
|
$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die
|
||||||
|
entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer
|
||||||
|
Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die
|
||||||
|
Log-Likelihood
|
||||||
|
\begin{eqnarray*}
|
||||||
|
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
||||||
|
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\
|
||||||
|
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\
|
||||||
|
\end{eqnarray*}
|
||||||
|
Der einzige Unterschied zum vorherigen Beispiel ist, dass die
|
||||||
|
Mittelwerte der Normalverteilungen nun durch die Funktionswerte
|
||||||
|
gegeben sind.
|
||||||
|
|
||||||
|
Der Parameter $\theta$ soll so gew\"ahlt werden, dass die
|
||||||
|
Log-Likelihood maximal wird. Der erste Term der Summe ist
|
||||||
|
unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem
|
||||||
|
Maximum weggelassen werden.
|
||||||
|
\begin{eqnarray*}
|
||||||
|
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
|
||||||
|
\end{eqnarray*}
|
||||||
|
Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood
|
||||||
|
umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums.
|
||||||
|
\begin{equation}
|
||||||
|
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
|
||||||
|
\end{equation}
|
||||||
|
Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen
|
||||||
|
Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des
|
||||||
|
Parameters $\theta$ welcher den quadratischen Abstand minimiert ist
|
||||||
|
also identisch mit der Maximierung der Wahrscheinlichkeit, dass die
|
||||||
|
Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des
|
||||||
|
$\chi^2$ ist also ein Maximum-Likelihood Estimate.
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{mlepropline}
|
||||||
|
\caption{\label{mleproplinefig} Maximum Likelihood Estimation der
|
||||||
|
Steigung einer Ursprungsgeraden.}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Beispiel: einfache Proportionalit\"at}
|
||||||
|
Als Funktion nehmen wir die Ursprungsgerade
|
||||||
|
\[ f(x) = \theta x \]
|
||||||
|
mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit
|
||||||
|
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
|
||||||
|
Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$
|
||||||
|
und setzen diese gleich Null:
|
||||||
|
\begin{eqnarray}
|
||||||
|
\frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
||||||
|
& = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
||||||
|
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
|
||||||
|
& = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
|
||||||
|
\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\
|
||||||
|
\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
|
||||||
|
\end{eqnarray}
|
||||||
|
Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
|
||||||
|
der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein
|
||||||
|
Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht
|
||||||
|
n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von
|
||||||
|
linear kombinierten Basisfunktionen. Parameter die nichtlinear in
|
||||||
|
einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den
|
||||||
|
Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren
|
||||||
|
zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg,
|
||||||
|
zur\"uckzugreifen.
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Fits von Wahrscheinlichkeitsverteilungen}
|
||||||
|
Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter
|
||||||
|
einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und
|
||||||
|
Standardabweichung der Normalverteilung) an ein Datenset fitten wolle.
|
||||||
|
|
||||||
|
Ein erster Gedanke k\"onnte sein, die
|
||||||
|
Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen
|
||||||
|
Abstands an ein Histogram der Daten zu fitten. Das ist aber aus
|
||||||
|
folgenden Gr\"unden nicht die Methode der Wahl: (i)
|
||||||
|
Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen
|
||||||
|
insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen,
|
||||||
|
wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind
|
||||||
|
nicht unabh\"angig, da das normierte Histogram sich zu Eins
|
||||||
|
aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten
|
||||||
|
die die Minimierung des quadratischen Abstands zu einem Maximum
|
||||||
|
Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm
|
||||||
|
h\"angt von der Wahl der Klassenbreite ab.
|
||||||
|
|
||||||
|
Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein
|
||||||
|
Datenset zu fitten, haben wir oben schon bei dem Beispiel zur
|
||||||
|
Absch\"atzung des Mittelwertes einer Normalverteilung gesehen ---
|
||||||
|
Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der
|
||||||
|
gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood
|
||||||
|
\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein
|
||||||
|
nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie
|
||||||
|
z.B. dem Gradientenabstieg, gel\"ost wird.
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{mlepdf}
|
||||||
|
\caption{\label{mlepdffig} Maximum Likelihood Estimation einer
|
||||||
|
Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung
|
||||||
|
2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt.
|
||||||
|
Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung
|
||||||
|
des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.}
|
||||||
|
\end{figure}
|
22
regression/lecture/Makefile
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
BASENAME=linear_regression
|
||||||
|
PYFILES=$(wildcard *.py)
|
||||||
|
PYPDFFILES=$(PYFILES:.py=.pdf)
|
||||||
|
|
||||||
|
pdf : $(BASENAME).pdf $(PYPDFFILES)
|
||||||
|
|
||||||
|
$(BASENAME).pdf : $(BASENAME).tex
|
||||||
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
$(PYPDFFILES) : %.pdf : %.py
|
||||||
|
python $<
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc $(BASENAME).nav $(BASENAME).snm $(BASENAME).vrb
|
||||||
|
|
||||||
|
cleanall : clean
|
||||||
|
rm -f $(BASENAME).pdf
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
|
61
regression/lecture/beamercolorthemetuebingen.sty
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
% Copyright 2007 by Till Tantau
|
||||||
|
%
|
||||||
|
% This file may be distributed and/or modified
|
||||||
|
%
|
||||||
|
% 1. under the LaTeX Project Public License and/or
|
||||||
|
% 2. under the GNU Public License.
|
||||||
|
%
|
||||||
|
% See the file doc/licenses/LICENSE for more details.
|
||||||
|
|
||||||
|
\usepackage{color}
|
||||||
|
\definecolor{karminrot}{RGB}{165,30,55}
|
||||||
|
\definecolor{gold}{RGB}{180,160,105}
|
||||||
|
\definecolor{anthrazit}{RGB}{50 ,65 ,75 }
|
||||||
|
|
||||||
|
\mode<presentation>
|
||||||
|
|
||||||
|
\setbeamercolor*{normal text}{fg=anthrazit,bg=white}
|
||||||
|
\setbeamercolor*{alerted text}{fg=anthrazit}
|
||||||
|
\setbeamercolor*{example text}{fg=anthrazit}
|
||||||
|
\setbeamercolor*{structure}{fg=gold,bg=karminrot}
|
||||||
|
|
||||||
|
\providecommand*{\beamer@bftext@only}{%
|
||||||
|
\relax
|
||||||
|
\ifmmode
|
||||||
|
\expandafter\beamer@bftext@warning
|
||||||
|
\else
|
||||||
|
\expandafter\bfseries
|
||||||
|
\fi
|
||||||
|
}
|
||||||
|
\providecommand*{\beamer@bftext@warning}{%
|
||||||
|
\ClassWarning{beamer}
|
||||||
|
{Cannot use bold for alerted text in math mode}%
|
||||||
|
}
|
||||||
|
|
||||||
|
\setbeamerfont{alerted text}{series=\beamer@bftext@only}
|
||||||
|
|
||||||
|
\setbeamercolor{palette primary}{fg=karminrot,bg=white}
|
||||||
|
\setbeamercolor{palette secondary}{fg=gold,bg=white}
|
||||||
|
\setbeamercolor{palette tertiary}{fg=anthrazit,bg=white}
|
||||||
|
\setbeamercolor{palette quaternary}{fg=black,bg=white}
|
||||||
|
|
||||||
|
\setbeamercolor{sidebar}{bg=karminrot!100}
|
||||||
|
|
||||||
|
\setbeamercolor{palette sidebar primary}{fg=karminrot}
|
||||||
|
\setbeamercolor{palette sidebar secondary}{fg=karminrot}
|
||||||
|
\setbeamercolor{palette sidebar tertiary}{fg=karminrot}
|
||||||
|
\setbeamercolor{palette sidebar quaternary}{fg=karminrot}
|
||||||
|
|
||||||
|
\setbeamercolor{item projected}{fg=black,bg=black!20}
|
||||||
|
|
||||||
|
\setbeamercolor*{block body}{}
|
||||||
|
\setbeamercolor*{block body alerted}{}
|
||||||
|
\setbeamercolor*{block body example}{}
|
||||||
|
\setbeamercolor*{block title}{parent=structure}
|
||||||
|
\setbeamercolor*{block title alerted}{parent=alerted text}
|
||||||
|
\setbeamercolor*{block title example}{parent=example text}
|
||||||
|
|
||||||
|
\setbeamercolor*{titlelike}{parent=structure}
|
||||||
|
|
||||||
|
\mode
|
||||||
|
<all>
|
236
scientificcomputing-script.tex
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
\documentclass[12pt]{report}
|
||||||
|
|
||||||
|
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||||
|
\author{Jan Grewe \& Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||||
|
\date{WS 15/16}
|
||||||
|
|
||||||
|
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% \newcommand{\tr}[2]{#1} % en
|
||||||
|
% \usepackage[english]{babel}
|
||||||
|
\newcommand{\tr}[2]{#2} % de
|
||||||
|
\usepackage[german]{babel}
|
||||||
|
|
||||||
|
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{pslatex} % nice font for pdf file
|
||||||
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||||
|
|
||||||
|
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||||
|
\setcounter{tocdepth}{1}
|
||||||
|
|
||||||
|
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
||||||
|
\setcounter{secnumdepth}{1}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\pagecolor{white}
|
||||||
|
|
||||||
|
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
||||||
|
\put(0,4){\line(1,0){170}}%
|
||||||
|
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
||||||
|
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
||||||
|
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
||||||
|
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
||||||
|
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
||||||
|
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
||||||
|
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
||||||
|
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
||||||
|
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
||||||
|
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
||||||
|
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
||||||
|
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
||||||
|
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
||||||
|
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
||||||
|
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
||||||
|
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
||||||
|
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
||||||
|
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
||||||
|
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
||||||
|
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
||||||
|
\end{picture}\par}
|
||||||
|
|
||||||
|
% figures:
|
||||||
|
\setlength{\fboxsep}{0pt}
|
||||||
|
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
||||||
|
%\newcommand{\texpicture}[1]{}
|
||||||
|
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||||
|
|
||||||
|
% maximum number of floats:
|
||||||
|
\setcounter{topnumber}{2}
|
||||||
|
\setcounter{bottomnumber}{0}
|
||||||
|
\setcounter{totalnumber}{2}
|
||||||
|
|
||||||
|
% float placement fractions:
|
||||||
|
\renewcommand{\textfraction}{0.2}
|
||||||
|
\renewcommand{\topfraction}{0.8}
|
||||||
|
\renewcommand{\bottomfraction}{0.0}
|
||||||
|
\renewcommand{\floatpagefraction}{0.5}
|
||||||
|
|
||||||
|
% spacing for floats:
|
||||||
|
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
||||||
|
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
||||||
|
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
||||||
|
|
||||||
|
% spacing for a floating page:
|
||||||
|
\makeatletter
|
||||||
|
\setlength{\@fptop}{0pt}
|
||||||
|
\setlength{\@fpsep}{8pt plus 2.0fil}
|
||||||
|
\setlength{\@fpbot}{0pt plus 1.0fil}
|
||||||
|
\makeatother
|
||||||
|
|
||||||
|
% rules for floats:
|
||||||
|
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
||||||
|
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
||||||
|
|
||||||
|
% captions:
|
||||||
|
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
||||||
|
|
||||||
|
% put caption on separate float:
|
||||||
|
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
||||||
|
|
||||||
|
% references to panels of a figure within the caption:
|
||||||
|
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
||||||
|
% references to figures:
|
||||||
|
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
||||||
|
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
||||||
|
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
||||||
|
% references to figures in normal text:
|
||||||
|
\newcommand{\fig}{Fig.}
|
||||||
|
\newcommand{\Fig}{Figure}
|
||||||
|
\newcommand{\figs}{Figs.}
|
||||||
|
\newcommand{\Figs}{Figures}
|
||||||
|
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
||||||
|
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
||||||
|
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
||||||
|
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
||||||
|
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
||||||
|
% references to figures within bracketed text:
|
||||||
|
\newcommand{\figb}{Fig.}
|
||||||
|
\newcommand{\figsb}{Figs.}
|
||||||
|
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
||||||
|
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
||||||
|
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
||||||
|
|
||||||
|
% references to tables:
|
||||||
|
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
||||||
|
% references to tables in normal text:
|
||||||
|
\newcommand{\tab}{Tab.}
|
||||||
|
\newcommand{\Tab}{Table}
|
||||||
|
\newcommand{\tabs}{Tabs.}
|
||||||
|
\newcommand{\Tabs}{Tables}
|
||||||
|
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
||||||
|
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
||||||
|
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
||||||
|
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
||||||
|
% references to tables within bracketed text:
|
||||||
|
\newcommand{\tabb}{Tab.}
|
||||||
|
\newcommand{\tabsb}{Tab.}
|
||||||
|
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
||||||
|
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%\newcommand{\eqref}[1]{(\ref{#1})}
|
||||||
|
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
||||||
|
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{listings}
|
||||||
|
\lstset{
|
||||||
|
basicstyle=\ttfamily\footnotesize,
|
||||||
|
numbers=left,
|
||||||
|
showstringspaces=false,
|
||||||
|
language=Matlab,
|
||||||
|
commentstyle=\itshape\color{darkgray},
|
||||||
|
keywordstyle=\color{blue},
|
||||||
|
stringstyle=\color{green},
|
||||||
|
backgroundcolor=\color{blue!10},
|
||||||
|
breaklines=true,
|
||||||
|
breakautoindent=true,
|
||||||
|
columns=flexible,
|
||||||
|
frame=single,
|
||||||
|
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||||
|
captionpos=t,
|
||||||
|
xleftmargin=1em,
|
||||||
|
xrightmargin=1em,
|
||||||
|
aboveskip=10pt
|
||||||
|
}
|
||||||
|
|
||||||
|
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{bm}
|
||||||
|
\usepackage{dsfont}
|
||||||
|
\newcommand{\naZ}{\mathds{N}}
|
||||||
|
\newcommand{\gaZ}{\mathds{Z}}
|
||||||
|
\newcommand{\raZ}{\mathds{Q}}
|
||||||
|
\newcommand{\reZ}{\mathds{R}}
|
||||||
|
\newcommand{\reZp}{\mathds{R^+}}
|
||||||
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||||
|
\newcommand{\koZ}{\mathds{C}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{ifthen}
|
||||||
|
|
||||||
|
\newcommand{\code}[1]{\texttt{#1}}
|
||||||
|
|
||||||
|
\newcommand{\source}[1]{
|
||||||
|
\begin{flushright}
|
||||||
|
\color{gray}\scriptsize \url{#1}
|
||||||
|
\end{flushright}
|
||||||
|
}
|
||||||
|
|
||||||
|
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||||
|
{\medskip}
|
||||||
|
|
||||||
|
\newcounter{maxexercise}
|
||||||
|
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
||||||
|
\newcounter{theexercise}
|
||||||
|
\setcounter{theexercise}{1}
|
||||||
|
\newcommand{\codepath}{}
|
||||||
|
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||||
|
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
||||||
|
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\codepath\exercisesource}}}\medskip\stepcounter{theexercise}}
|
||||||
|
|
||||||
|
\graphicspath{{statistics/lecture/}{statistics/lecture/figures/}{bootstrap/lecture/}{bootstrap/lecture/figures/}{likelihood/lecture/}{likelihood/lecture/figures/}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
\tableofcontents
|
||||||
|
|
||||||
|
\renewcommand{\codepath}{statistics/code/}
|
||||||
|
\include{statistics/lecture/descriptivestatistics}
|
||||||
|
|
||||||
|
\renewcommand{\codepath}{bootstrap/code/}
|
||||||
|
\include{bootstrap/lecture/bootstrap}
|
||||||
|
|
||||||
|
\renewcommand{\codepath}{likelihood/code/}
|
||||||
|
\include{likelihood/lecture/likelihood}
|
||||||
|
|
||||||
|
\end{document}
|
@ -24,4 +24,7 @@ yy = gampdf(xx, p(1), p(2));
|
|||||||
plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
|
plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
|
||||||
|
|
||||||
hold off;
|
hold off;
|
||||||
|
xlabel('x');
|
||||||
|
ylabel('pdf');
|
||||||
legend('show');
|
legend('show');
|
||||||
|
savefigpdf(gcf, 'mlepdffit.pdf', 12, 8)
|
||||||
|
BIN
statistics/exercises/mlepdffit.pdf
Normal file
BIN
statistics/exercises/mlepropfit.pdf
Normal file
BIN
statistics/exercises/mlestd.pdf
Normal file
@ -183,7 +183,7 @@ Normalverteilung entstammen, sonder aus der Gamma-Verteilung.
|
|||||||
\end{parts}
|
\end{parts}
|
||||||
\begin{solution}
|
\begin{solution}
|
||||||
\lstinputlisting{mlepdffit.m}
|
\lstinputlisting{mlepdffit.m}
|
||||||
%\includegraphics[width=1\textwidth]{mlepdffit}
|
\includegraphics[width=1\textwidth]{mlepdffit}
|
||||||
\end{solution}
|
\end{solution}
|
||||||
|
|
||||||
\end{questions}
|
\end{questions}
|
||||||
|
@ -1,21 +1,20 @@
|
|||||||
TEXFILES=descriptivestatistics.tex linear_regression.tex #$(wildcard *.tex)
|
BASENAME=descriptivestatistics
|
||||||
PDFFILES=$(TEXFILES:.tex=.pdf)
|
|
||||||
PYFILES=$(wildcard *.py)
|
PYFILES=$(wildcard *.py)
|
||||||
PYPDFFILES=$(PYFILES:.py=.pdf)
|
PYPDFFILES=$(PYFILES:.py=.pdf)
|
||||||
|
|
||||||
pdf : $(PDFFILES) $(PYPDFFILES)
|
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
|
||||||
|
|
||||||
$(PDFFILES) : %.pdf : %.tex
|
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
|
||||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
$(PYPDFFILES) : %.pdf : %.py
|
$(PYPDFFILES) : %.pdf : %.py
|
||||||
python $<
|
python $<
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
|
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
|
||||||
|
|
||||||
cleanall : clean
|
cleanall : clean
|
||||||
rm -f $(PDFFILES)
|
rm -f $(BASENAME)-chapter.pdf
|
||||||
|
|
||||||
watch :
|
watch :
|
||||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
361
statistics/lecture/descriptivestatistics-chapter.tex
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
\documentclass[12pt]{report}
|
||||||
|
|
||||||
|
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||||
|
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||||
|
\date{WS 15/16}
|
||||||
|
|
||||||
|
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% \newcommand{\tr}[2]{#1} % en
|
||||||
|
% \usepackage[english]{babel}
|
||||||
|
\newcommand{\tr}[2]{#2} % de
|
||||||
|
\usepackage[german]{babel}
|
||||||
|
|
||||||
|
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{pslatex} % nice font for pdf file
|
||||||
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||||
|
|
||||||
|
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||||
|
\setcounter{tocdepth}{1}
|
||||||
|
|
||||||
|
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
||||||
|
\setcounter{secnumdepth}{1}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\pagecolor{white}
|
||||||
|
|
||||||
|
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
||||||
|
\put(0,4){\line(1,0){170}}%
|
||||||
|
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
||||||
|
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
||||||
|
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
||||||
|
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
||||||
|
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
||||||
|
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
||||||
|
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
||||||
|
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
||||||
|
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
||||||
|
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
||||||
|
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
||||||
|
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
||||||
|
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
||||||
|
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
||||||
|
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
||||||
|
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
||||||
|
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
||||||
|
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
||||||
|
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
||||||
|
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
||||||
|
\end{picture}\par}
|
||||||
|
|
||||||
|
% figures:
|
||||||
|
\setlength{\fboxsep}{0pt}
|
||||||
|
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
||||||
|
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
||||||
|
%\newcommand{\texpicture}[1]{}
|
||||||
|
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||||
|
|
||||||
|
% maximum number of floats:
|
||||||
|
\setcounter{topnumber}{2}
|
||||||
|
\setcounter{bottomnumber}{0}
|
||||||
|
\setcounter{totalnumber}{2}
|
||||||
|
|
||||||
|
% float placement fractions:
|
||||||
|
\renewcommand{\textfraction}{0.2}
|
||||||
|
\renewcommand{\topfraction}{0.8}
|
||||||
|
\renewcommand{\bottomfraction}{0.0}
|
||||||
|
\renewcommand{\floatpagefraction}{0.5}
|
||||||
|
|
||||||
|
% spacing for floats:
|
||||||
|
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
||||||
|
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
||||||
|
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
||||||
|
|
||||||
|
% spacing for a floating page:
|
||||||
|
\makeatletter
|
||||||
|
\setlength{\@fptop}{0pt}
|
||||||
|
\setlength{\@fpsep}{8pt plus 2.0fil}
|
||||||
|
\setlength{\@fpbot}{0pt plus 1.0fil}
|
||||||
|
\makeatother
|
||||||
|
|
||||||
|
% rules for floats:
|
||||||
|
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
||||||
|
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
||||||
|
|
||||||
|
% captions:
|
||||||
|
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
||||||
|
|
||||||
|
% put caption on separate float:
|
||||||
|
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
||||||
|
|
||||||
|
% references to panels of a figure within the caption:
|
||||||
|
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
||||||
|
% references to figures:
|
||||||
|
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
||||||
|
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
||||||
|
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
||||||
|
% references to figures in normal text:
|
||||||
|
\newcommand{\fig}{Fig.}
|
||||||
|
\newcommand{\Fig}{Figure}
|
||||||
|
\newcommand{\figs}{Figs.}
|
||||||
|
\newcommand{\Figs}{Figures}
|
||||||
|
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
||||||
|
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
||||||
|
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
||||||
|
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
||||||
|
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
||||||
|
% references to figures within bracketed text:
|
||||||
|
\newcommand{\figb}{Fig.}
|
||||||
|
\newcommand{\figsb}{Figs.}
|
||||||
|
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
||||||
|
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
||||||
|
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
||||||
|
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
||||||
|
|
||||||
|
% references to tables:
|
||||||
|
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
||||||
|
% references to tables in normal text:
|
||||||
|
\newcommand{\tab}{Tab.}
|
||||||
|
\newcommand{\Tab}{Table}
|
||||||
|
\newcommand{\tabs}{Tabs.}
|
||||||
|
\newcommand{\Tabs}{Tables}
|
||||||
|
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
||||||
|
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
||||||
|
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
||||||
|
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
||||||
|
% references to tables within bracketed text:
|
||||||
|
\newcommand{\tabb}{Tab.}
|
||||||
|
\newcommand{\tabsb}{Tab.}
|
||||||
|
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
||||||
|
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%\newcommand{\eqref}[1]{(\ref{#1})}
|
||||||
|
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
||||||
|
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
||||||
|
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
||||||
|
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
||||||
|
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{listings}
|
||||||
|
\lstset{
|
||||||
|
inputpath=../code,
|
||||||
|
basicstyle=\ttfamily\footnotesize,
|
||||||
|
numbers=left,
|
||||||
|
showstringspaces=false,
|
||||||
|
language=Matlab,
|
||||||
|
commentstyle=\itshape\color{darkgray},
|
||||||
|
keywordstyle=\color{blue},
|
||||||
|
stringstyle=\color{green},
|
||||||
|
backgroundcolor=\color{blue!10},
|
||||||
|
breaklines=true,
|
||||||
|
breakautoindent=true,
|
||||||
|
columns=flexible,
|
||||||
|
frame=single,
|
||||||
|
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||||
|
captionpos=t,
|
||||||
|
xleftmargin=1em,
|
||||||
|
xrightmargin=1em,
|
||||||
|
aboveskip=10pt
|
||||||
|
}
|
||||||
|
|
||||||
|
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{bm}
|
||||||
|
\usepackage{dsfont}
|
||||||
|
\newcommand{\naZ}{\mathds{N}}
|
||||||
|
\newcommand{\gaZ}{\mathds{Z}}
|
||||||
|
\newcommand{\raZ}{\mathds{Q}}
|
||||||
|
\newcommand{\reZ}{\mathds{R}}
|
||||||
|
\newcommand{\reZp}{\mathds{R^+}}
|
||||||
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||||
|
\newcommand{\koZ}{\mathds{C}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{ifthen}
|
||||||
|
|
||||||
|
\newcommand{\code}[1]{\texttt{#1}}
|
||||||
|
|
||||||
|
\newcommand{\source}[1]{
|
||||||
|
\begin{flushright}
|
||||||
|
\color{gray}\scriptsize \url{#1}
|
||||||
|
\end{flushright}
|
||||||
|
}
|
||||||
|
|
||||||
|
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||||
|
{\medskip}
|
||||||
|
|
||||||
|
\newcounter{maxexercise}
|
||||||
|
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
||||||
|
\newcounter{theexercise}
|
||||||
|
\setcounter{theexercise}{1}
|
||||||
|
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||||
|
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
||||||
|
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
|
||||||
|
|
||||||
|
\graphicspath{{figures/}}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\include{descriptivestatistics}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Statistics}
|
||||||
|
What is "a statistic"? % dt. Sch\"atzfunktion
|
||||||
|
\begin{definition}[statistic]
|
||||||
|
A statistic (singular) is a single measure of some attribute of a
|
||||||
|
sample (e.g., its arithmetic mean value). It is calculated by
|
||||||
|
applying a function (statistical algorithm) to the values of the
|
||||||
|
items of the sample, which are known together as a set of data.
|
||||||
|
|
||||||
|
\source{http://en.wikipedia.org/wiki/Statistic}
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Data types}
|
||||||
|
|
||||||
|
\subsection{Nominal scale}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Binary
|
||||||
|
\begin{itemize}
|
||||||
|
\item ``yes/no'',
|
||||||
|
\item ``true/false'',
|
||||||
|
\item ``success/failure'', etc.
|
||||||
|
\end{itemize}
|
||||||
|
\item Categorial
|
||||||
|
\begin{itemize}
|
||||||
|
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
||||||
|
\item blood type (``A/B/AB/0''),
|
||||||
|
\item parts of speech (``noun/veerb/preposition/article/...''),
|
||||||
|
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
||||||
|
\end{itemize}
|
||||||
|
\item Each observation/measurement/sample is put into one category
|
||||||
|
\item There is no reasonable order among the categories.\\
|
||||||
|
example: [rods, cones] vs. [cones, rods]
|
||||||
|
\item Statistics: mode, i.e. the most common item
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Ordinal scale}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Like nominal scale, but with an order
|
||||||
|
\item Examples: ranks, ratings
|
||||||
|
\begin{itemize}
|
||||||
|
\item ``bad/ok/good'',
|
||||||
|
\item ``cold/warm/hot'',
|
||||||
|
\item ``young/old'', etc.
|
||||||
|
\end{itemize}
|
||||||
|
\item {\bf But:} there is no reasonable measure of {\em distance}
|
||||||
|
between the classes
|
||||||
|
\item Statistics: mode, median
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Interval scale}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Quantitative/metric values
|
||||||
|
\item Reasonable measure of distance between values, but no absolute zero
|
||||||
|
\item Examples:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
||||||
|
\item Direction measured in degrees from magnetic or true north
|
||||||
|
\end{itemize}
|
||||||
|
\item Statistics:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Central tendency: mode, median, arithmetic mean
|
||||||
|
\item Dispersion: range, standard deviation
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Absolute/ratio scale}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Like interval scale, but with absolute origin/zero
|
||||||
|
\item Examples:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Temperature in $^\circ$K
|
||||||
|
\item Length, mass, duration, electric charge, ...
|
||||||
|
\item Plane angle, etc.
|
||||||
|
\item Count (e.g. number of spikes in response to a stimulus)
|
||||||
|
\end{itemize}
|
||||||
|
\item Statistics:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
||||||
|
\item Dispersion: range, standard deviation
|
||||||
|
\item Coefficient of variation (ratio standard deviation/mean)
|
||||||
|
\item All other statistical measures
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Data types}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Data type selects
|
||||||
|
\begin{itemize}
|
||||||
|
\item statistics
|
||||||
|
\item type of plots (bar graph versus x-y plot)
|
||||||
|
\item correct tests
|
||||||
|
\end{itemize}
|
||||||
|
\item Scales exhibit increasing information content from nominal
|
||||||
|
to absolute.\\
|
||||||
|
Conversion ,,downwards'' is always possible
|
||||||
|
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
||||||
|
categories ``small/medium/large'' (ordinal scale)
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Examples from neuroscience}
|
||||||
|
\begin{itemize}
|
||||||
|
\item {\bf absolute:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item size of neuron/brain
|
||||||
|
\item length of axon
|
||||||
|
\item ion concentration
|
||||||
|
\item membrane potential
|
||||||
|
\item firing rate
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item {\bf interval:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item edge orientation
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item {\bf ordinal:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item stages of a disease
|
||||||
|
\item ratings
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item {\bf nominal:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item cell type
|
||||||
|
\item odor
|
||||||
|
\item states of an ion channel
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
@ -1,229 +1,3 @@
|
|||||||
\documentclass[12pt]{report}
|
|
||||||
|
|
||||||
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
|
||||||
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
|
||||||
\date{WS 15/16}
|
|
||||||
|
|
||||||
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
% \newcommand{\tr}[2]{#1} % en
|
|
||||||
% \usepackage[english]{babel}
|
|
||||||
\newcommand{\tr}[2]{#2} % de
|
|
||||||
\usepackage[german]{babel}
|
|
||||||
|
|
||||||
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{pslatex} % nice font for pdf file
|
|
||||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
|
||||||
|
|
||||||
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
|
||||||
\setcounter{tocdepth}{1}
|
|
||||||
|
|
||||||
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
|
||||||
\setcounter{secnumdepth}{1}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
|
||||||
|
|
||||||
|
|
||||||
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{graphicx}
|
|
||||||
\usepackage{xcolor}
|
|
||||||
\pagecolor{white}
|
|
||||||
|
|
||||||
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
|
||||||
\put(0,4){\line(1,0){170}}%
|
|
||||||
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
|
||||||
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
|
||||||
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
|
||||||
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
|
||||||
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
|
||||||
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
|
||||||
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
|
||||||
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
|
||||||
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
|
||||||
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
|
||||||
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
|
||||||
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
|
||||||
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
|
||||||
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
|
||||||
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
|
||||||
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
|
||||||
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
|
||||||
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
|
||||||
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
|
||||||
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
|
||||||
\end{picture}\par}
|
|
||||||
|
|
||||||
% figures:
|
|
||||||
\setlength{\fboxsep}{0pt}
|
|
||||||
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
|
||||||
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
|
||||||
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
|
||||||
%\newcommand{\texpicture}[1]{}
|
|
||||||
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
|
||||||
|
|
||||||
% maximum number of floats:
|
|
||||||
\setcounter{topnumber}{2}
|
|
||||||
\setcounter{bottomnumber}{0}
|
|
||||||
\setcounter{totalnumber}{2}
|
|
||||||
|
|
||||||
% float placement fractions:
|
|
||||||
\renewcommand{\textfraction}{0.2}
|
|
||||||
\renewcommand{\topfraction}{0.8}
|
|
||||||
\renewcommand{\bottomfraction}{0.0}
|
|
||||||
\renewcommand{\floatpagefraction}{0.5}
|
|
||||||
|
|
||||||
% spacing for floats:
|
|
||||||
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
|
||||||
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
|
||||||
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
|
||||||
|
|
||||||
% spacing for a floating page:
|
|
||||||
\makeatletter
|
|
||||||
\setlength{\@fptop}{0pt}
|
|
||||||
\setlength{\@fpsep}{8pt plus 2.0fil}
|
|
||||||
\setlength{\@fpbot}{0pt plus 1.0fil}
|
|
||||||
\makeatother
|
|
||||||
|
|
||||||
% rules for floats:
|
|
||||||
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
|
||||||
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
|
||||||
|
|
||||||
% captions:
|
|
||||||
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
|
||||||
|
|
||||||
% put caption on separate float:
|
|
||||||
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
|
||||||
|
|
||||||
% references to panels of a figure within the caption:
|
|
||||||
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
|
||||||
% references to figures:
|
|
||||||
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
|
||||||
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
|
||||||
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
|
||||||
% references to figures in normal text:
|
|
||||||
\newcommand{\fig}{Fig.}
|
|
||||||
\newcommand{\Fig}{Figure}
|
|
||||||
\newcommand{\figs}{Figs.}
|
|
||||||
\newcommand{\Figs}{Figures}
|
|
||||||
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
|
||||||
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
|
||||||
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
|
||||||
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
|
||||||
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
|
||||||
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
|
||||||
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
|
||||||
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
|
||||||
% references to figures within bracketed text:
|
|
||||||
\newcommand{\figb}{Fig.}
|
|
||||||
\newcommand{\figsb}{Figs.}
|
|
||||||
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
|
||||||
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
|
||||||
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
|
||||||
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
|
||||||
|
|
||||||
% references to tables:
|
|
||||||
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
|
||||||
% references to tables in normal text:
|
|
||||||
\newcommand{\tab}{Tab.}
|
|
||||||
\newcommand{\Tab}{Table}
|
|
||||||
\newcommand{\tabs}{Tabs.}
|
|
||||||
\newcommand{\Tabs}{Tables}
|
|
||||||
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
|
||||||
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
|
||||||
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
|
||||||
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
|
||||||
% references to tables within bracketed text:
|
|
||||||
\newcommand{\tabb}{Tab.}
|
|
||||||
\newcommand{\tabsb}{Tab.}
|
|
||||||
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
|
||||||
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%\newcommand{\eqref}[1]{(\ref{#1})}
|
|
||||||
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
|
||||||
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
|
||||||
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
|
||||||
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
|
||||||
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
|
||||||
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
|
||||||
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
|
||||||
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{listings}
|
|
||||||
\lstset{
|
|
||||||
inputpath=../code,
|
|
||||||
basicstyle=\ttfamily\footnotesize,
|
|
||||||
numbers=left,
|
|
||||||
showstringspaces=false,
|
|
||||||
language=Matlab,
|
|
||||||
commentstyle=\itshape\color{darkgray},
|
|
||||||
keywordstyle=\color{blue},
|
|
||||||
stringstyle=\color{green},
|
|
||||||
backgroundcolor=\color{blue!10},
|
|
||||||
breaklines=true,
|
|
||||||
breakautoindent=true,
|
|
||||||
columns=flexible,
|
|
||||||
frame=single,
|
|
||||||
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
|
||||||
captionpos=t,
|
|
||||||
xleftmargin=1em,
|
|
||||||
xrightmargin=1em,
|
|
||||||
aboveskip=10pt
|
|
||||||
}
|
|
||||||
|
|
||||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{amsmath}
|
|
||||||
\usepackage{bm}
|
|
||||||
\usepackage{dsfont}
|
|
||||||
\newcommand{\naZ}{\mathds{N}}
|
|
||||||
\newcommand{\gaZ}{\mathds{Z}}
|
|
||||||
\newcommand{\raZ}{\mathds{Q}}
|
|
||||||
\newcommand{\reZ}{\mathds{R}}
|
|
||||||
\newcommand{\reZp}{\mathds{R^+}}
|
|
||||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
|
||||||
\newcommand{\koZ}{\mathds{C}}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{ifthen}
|
|
||||||
|
|
||||||
\newcommand{\code}[1]{\texttt{#1}}
|
|
||||||
|
|
||||||
\newcommand{\source}[1]{
|
|
||||||
\begin{flushright}
|
|
||||||
\color{gray}\scriptsize \url{#1}
|
|
||||||
\end{flushright}
|
|
||||||
}
|
|
||||||
|
|
||||||
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
|
||||||
{\medskip}
|
|
||||||
|
|
||||||
\newcounter{maxexercise}
|
|
||||||
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
|
||||||
\newcounter{theexercise}
|
|
||||||
\setcounter{theexercise}{1}
|
|
||||||
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
|
||||||
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
|
||||||
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
|
|
||||||
|
|
||||||
\graphicspath{{figures/}}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\begin{document}
|
|
||||||
|
|
||||||
\maketitle
|
|
||||||
|
|
||||||
%\tableofcontents
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
|
\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
|
||||||
@ -453,418 +227,3 @@ Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}).
|
|||||||
$x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null.
|
$x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null.
|
||||||
$\xi$ sind normalverteilte Zufallszahlen.}
|
$\xi$ sind normalverteilte Zufallszahlen.}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
|
|
||||||
|
|
||||||
Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling
|
|
||||||
aus der Stichprobe. Das hat mehrere Vorteile:
|
|
||||||
\begin{itemize}
|
|
||||||
\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein).
|
|
||||||
\item H\"ohere Genauigkeit als klassische Methoden.
|
|
||||||
\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr
|
|
||||||
\"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht
|
|
||||||
f\"ur jede Statistik eine andere Formel.
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
|
||||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
|
|
||||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex]
|
|
||||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312}
|
|
||||||
\caption{\tr{Why can we only measure a sample of the
|
|
||||||
population?}{Warum k\"onnen wir nur eine Stichprobe der
|
|
||||||
Grundgesamtheit messen?}}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
|
||||||
\includegraphics[height=0.2\textheight]{srs1}\\[2ex]
|
|
||||||
\includegraphics[height=0.2\textheight]{srs2}\\[2ex]
|
|
||||||
\includegraphics[height=0.2\textheight]{srs3}
|
|
||||||
\caption{Bootstrap der Stichprobenvertielung (a) Von der
|
|
||||||
Grundgesamtheit (population) mit unbekanntem Parameter
|
|
||||||
(z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random
|
|
||||||
samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur
|
|
||||||
jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen
|
|
||||||
der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe
|
|
||||||
gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf
|
|
||||||
die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu
|
|
||||||
haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele
|
|
||||||
Bootstrap-Stichproben generiert werden (resampling) und so
|
|
||||||
Eigenschaften der Stichprobenverteilung empirisch bestimmt
|
|
||||||
werden. Aus Hesterberg et al. 2003, Bootstrap Methods and
|
|
||||||
Permuation Tests}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\section{Bootstrap des Standardfehlers}
|
|
||||||
|
|
||||||
Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und
|
|
||||||
benutzen diese um die Stichprobenverteilung einer Statistik zu
|
|
||||||
berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang
|
|
||||||
wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen
|
|
||||||
mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe
|
|
||||||
kann also einmal, mehrmals oder gar nicht in einer Bootstrap
|
|
||||||
Stichprobe vorkommen.
|
|
||||||
|
|
||||||
\begin{exercise}[bootstrapsem.m]
|
|
||||||
Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert,
|
|
||||||
Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$).
|
|
||||||
|
|
||||||
Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils
|
|
||||||
den Mittelwert.
|
|
||||||
|
|
||||||
Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und
|
|
||||||
die Standardabweichung.
|
|
||||||
|
|
||||||
Was hat das mit dem Standardfehler zu tun?
|
|
||||||
\end{exercise}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}}
|
|
||||||
|
|
||||||
In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$
|
|
||||||
einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung
|
|
||||||
die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der
|
|
||||||
Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die
|
|
||||||
Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am
|
|
||||||
gr\"o{\ss}ten ist.
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Maximum Likelihood}
|
|
||||||
Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben
|
|
||||||
$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem
|
|
||||||
Parameter(n) $\theta$. Das k\"onnte die Normalverteilung
|
|
||||||
\begin{equation}
|
|
||||||
\label{normpdfmean}
|
|
||||||
p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}}
|
|
||||||
\end{equation}
|
|
||||||
sein mit
|
|
||||||
fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als
|
|
||||||
Parameter $\theta$.
|
|
||||||
|
|
||||||
Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
|
|
||||||
die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann
|
|
||||||
ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
|
|
||||||
Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$
|
|
||||||
\begin{equation}
|
|
||||||
p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
|
|
||||||
\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
|
|
||||||
\end{equation}
|
|
||||||
Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'')
|
|
||||||
den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$,
|
|
||||||
\begin{equation}
|
|
||||||
{\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta)
|
|
||||||
\end{equation}
|
|
||||||
|
|
||||||
Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die
|
|
||||||
Likelihood maximiert (``mle'': Maximum-Likelihood Estimate):
|
|
||||||
\begin{equation}
|
|
||||||
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
|
||||||
\end{equation}
|
|
||||||
$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei
|
|
||||||
dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$
|
|
||||||
bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat.
|
|
||||||
|
|
||||||
An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn
|
|
||||||
man die Funktionswerte mit einer streng monoton steigenden Funktion
|
|
||||||
transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens
|
|
||||||
das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht:
|
|
||||||
\begin{eqnarray}
|
|
||||||
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
|
||||||
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
|
||||||
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
|
|
||||||
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
|
|
||||||
\end{eqnarray}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Beispiel: Das arithmetische Mittel}
|
|
||||||
|
|
||||||
Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean}
|
|
||||||
entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten,
|
|
||||||
welcher Wert von $\theta$ maximiert dessen Likelhood?
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
|
||||||
\includegraphics[width=1\textwidth]{mlemean}
|
|
||||||
\caption{\label{mlemeanfig} Maximum Likelihood Estimation des
|
|
||||||
Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen
|
|
||||||
Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus
|
|
||||||
denen die Daten stammen k\"onnten. Unteln links: Die Likelihood
|
|
||||||
in Abh\"angigkeit des Mittelwerts als Parameter der
|
|
||||||
Normalverteilungen. Unten rechts: die entsprechende
|
|
||||||
Log-Likelihood. An der Position des Maximums bei $\theta=2$
|
|
||||||
\"andert sich nichts (Pfeil).}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
Die Log-Likelihood \eqnref{loglikelihood} ist
|
|
||||||
\begin{eqnarray*}
|
|
||||||
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
|
||||||
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\
|
|
||||||
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2}
|
|
||||||
\end{eqnarray*}
|
|
||||||
Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung
|
|
||||||
nach dem Parameter $\theta$ und setzen diese gleich Null:
|
|
||||||
\begin{eqnarray*}
|
|
||||||
\frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
|
|
||||||
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\
|
|
||||||
\Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\
|
|
||||||
\Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i
|
|
||||||
\end{eqnarray*}
|
|
||||||
Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h.
|
|
||||||
das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer
|
|
||||||
Normalverteilung mit diesem Mittelwert gezogen worden sind.
|
|
||||||
|
|
||||||
\begin{exercise}[mlemean.m]
|
|
||||||
Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$
|
|
||||||
und einer Standardabweichung $\ne 1$.
|
|
||||||
|
|
||||||
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
|
|
||||||
die Log-Likelihood (aus der Summe der logarithmierten
|
|
||||||
Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche
|
|
||||||
die Position der Maxima mit den aus den Daten berechneten
|
|
||||||
Mittelwerte.
|
|
||||||
\end{exercise}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Kurvenfit als Maximum Likelihood Estimation}
|
|
||||||
Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern
|
|
||||||
$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter
|
|
||||||
$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die
|
|
||||||
entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer
|
|
||||||
Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die
|
|
||||||
Log-Likelihood
|
|
||||||
\begin{eqnarray*}
|
|
||||||
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
|
||||||
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\
|
|
||||||
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\
|
|
||||||
\end{eqnarray*}
|
|
||||||
Der einzige Unterschied zum vorherigen Beispiel ist, dass die
|
|
||||||
Mittelwerte der Normalverteilungen nun durch die Funktionswerte
|
|
||||||
gegeben sind.
|
|
||||||
|
|
||||||
Der Parameter $\theta$ soll so gew\"ahlt werden, dass die
|
|
||||||
Log-Likelihood maximal wird. Der erste Term der Summe ist
|
|
||||||
unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem
|
|
||||||
Maximum weggelassen werden.
|
|
||||||
\begin{eqnarray*}
|
|
||||||
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
|
|
||||||
\end{eqnarray*}
|
|
||||||
Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood
|
|
||||||
umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums.
|
|
||||||
\begin{equation}
|
|
||||||
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
|
|
||||||
\end{equation}
|
|
||||||
Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen
|
|
||||||
Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des
|
|
||||||
Parameters $\theta$ welcher den quadratischen Abstand minimiert ist
|
|
||||||
also identisch mit der Maximierung der Wahrscheinlichkeit, dass die
|
|
||||||
Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des
|
|
||||||
$\chi^2$ ist also ein Maximum-Likelihood Estimate.
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
|
||||||
\includegraphics[width=1\textwidth]{mlepropline}
|
|
||||||
\caption{\label{mleproplinefig} Maximum Likelihood Estimation der
|
|
||||||
Steigung einer Ursprungsgeraden.}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Beispiel: einfache Proportionalit\"at}
|
|
||||||
Als Funktion nehmen wir die Ursprungsgerade
|
|
||||||
\[ f(x) = \theta x \]
|
|
||||||
mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit
|
|
||||||
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
|
|
||||||
Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$
|
|
||||||
und setzen diese gleich Null:
|
|
||||||
\begin{eqnarray}
|
|
||||||
\frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
|
||||||
& = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
|
||||||
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
|
|
||||||
& = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
|
|
||||||
\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\
|
|
||||||
\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
|
|
||||||
\end{eqnarray}
|
|
||||||
Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
|
|
||||||
der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein
|
|
||||||
Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht
|
|
||||||
n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von
|
|
||||||
linear kombinierten Basisfunktionen. Parameter die nichtlinear in
|
|
||||||
einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den
|
|
||||||
Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren
|
|
||||||
zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg,
|
|
||||||
zur\"uckzugreifen.
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Fits von Wahrscheinlichkeitsverteilungen}
|
|
||||||
Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter
|
|
||||||
einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und
|
|
||||||
Standardabweichung der Normalverteilung) an ein Datenset fitten wolle.
|
|
||||||
|
|
||||||
Ein erster Gedanke k\"onnte sein, die
|
|
||||||
Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen
|
|
||||||
Abstands an ein Histogram der Daten zu fitten. Das ist aber aus
|
|
||||||
folgenden Gr\"unden nicht die Methode der Wahl: (i)
|
|
||||||
Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen
|
|
||||||
insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen,
|
|
||||||
wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind
|
|
||||||
nicht unabh\"angig, da das normierte Histogram sich zu Eins
|
|
||||||
aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten
|
|
||||||
die die Minimierung des quadratischen Abstands zu einem Maximum
|
|
||||||
Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm
|
|
||||||
h\"angt von der Wahl der Klassenbreite ab.
|
|
||||||
|
|
||||||
Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein
|
|
||||||
Datenset zu fitten, haben wir oben schon bei dem Beispiel zur
|
|
||||||
Absch\"atzung des Mittelwertes einer Normalverteilung gesehen ---
|
|
||||||
Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der
|
|
||||||
gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood
|
|
||||||
\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein
|
|
||||||
nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie
|
|
||||||
z.B. dem Gradientenabstieg, gel\"ost wird.
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
|
||||||
\includegraphics[width=1\textwidth]{mlepdf}
|
|
||||||
\caption{\label{mlepdffig} Maximum Likelihood Estimation einer
|
|
||||||
Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung
|
|
||||||
2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt.
|
|
||||||
Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung
|
|
||||||
des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\end{document}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Statistics}
|
|
||||||
What is "a statistic"? % dt. Sch\"atzfunktion
|
|
||||||
\begin{definition}[statistic]
|
|
||||||
A statistic (singular) is a single measure of some attribute of a
|
|
||||||
sample (e.g., its arithmetic mean value). It is calculated by
|
|
||||||
applying a function (statistical algorithm) to the values of the
|
|
||||||
items of the sample, which are known together as a set of data.
|
|
||||||
|
|
||||||
\source{http://en.wikipedia.org/wiki/Statistic}
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Data types}
|
|
||||||
|
|
||||||
\subsection{Nominal scale}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Binary
|
|
||||||
\begin{itemize}
|
|
||||||
\item ``yes/no'',
|
|
||||||
\item ``true/false'',
|
|
||||||
\item ``success/failure'', etc.
|
|
||||||
\end{itemize}
|
|
||||||
\item Categorial
|
|
||||||
\begin{itemize}
|
|
||||||
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
|
||||||
\item blood type (``A/B/AB/0''),
|
|
||||||
\item parts of speech (``noun/veerb/preposition/article/...''),
|
|
||||||
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
|
||||||
\end{itemize}
|
|
||||||
\item Each observation/measurement/sample is put into one category
|
|
||||||
\item There is no reasonable order among the categories.\\
|
|
||||||
example: [rods, cones] vs. [cones, rods]
|
|
||||||
\item Statistics: mode, i.e. the most common item
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\subsection{Ordinal scale}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Like nominal scale, but with an order
|
|
||||||
\item Examples: ranks, ratings
|
|
||||||
\begin{itemize}
|
|
||||||
\item ``bad/ok/good'',
|
|
||||||
\item ``cold/warm/hot'',
|
|
||||||
\item ``young/old'', etc.
|
|
||||||
\end{itemize}
|
|
||||||
\item {\bf But:} there is no reasonable measure of {\em distance}
|
|
||||||
between the classes
|
|
||||||
\item Statistics: mode, median
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\subsection{Interval scale}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Quantitative/metric values
|
|
||||||
\item Reasonable measure of distance between values, but no absolute zero
|
|
||||||
\item Examples:
|
|
||||||
\begin{itemize}
|
|
||||||
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
|
||||||
\item Direction measured in degrees from magnetic or true north
|
|
||||||
\end{itemize}
|
|
||||||
\item Statistics:
|
|
||||||
\begin{itemize}
|
|
||||||
\item Central tendency: mode, median, arithmetic mean
|
|
||||||
\item Dispersion: range, standard deviation
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\subsection{Absolute/ratio scale}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Like interval scale, but with absolute origin/zero
|
|
||||||
\item Examples:
|
|
||||||
\begin{itemize}
|
|
||||||
\item Temperature in $^\circ$K
|
|
||||||
\item Length, mass, duration, electric charge, ...
|
|
||||||
\item Plane angle, etc.
|
|
||||||
\item Count (e.g. number of spikes in response to a stimulus)
|
|
||||||
\end{itemize}
|
|
||||||
\item Statistics:
|
|
||||||
\begin{itemize}
|
|
||||||
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
|
||||||
\item Dispersion: range, standard deviation
|
|
||||||
\item Coefficient of variation (ratio standard deviation/mean)
|
|
||||||
\item All other statistical measures
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\subsection{Data types}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Data type selects
|
|
||||||
\begin{itemize}
|
|
||||||
\item statistics
|
|
||||||
\item type of plots (bar graph versus x-y plot)
|
|
||||||
\item correct tests
|
|
||||||
\end{itemize}
|
|
||||||
\item Scales exhibit increasing information content from nominal
|
|
||||||
to absolute.\\
|
|
||||||
Conversion ,,downwards'' is always possible
|
|
||||||
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
|
||||||
categories ``small/medium/large'' (ordinal scale)
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\subsection{Examples from neuroscience}
|
|
||||||
\begin{itemize}
|
|
||||||
\item {\bf absolute:}
|
|
||||||
\begin{itemize}
|
|
||||||
\item size of neuron/brain
|
|
||||||
\item length of axon
|
|
||||||
\item ion concentration
|
|
||||||
\item membrane potential
|
|
||||||
\item firing rate
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\item {\bf interval:}
|
|
||||||
\begin{itemize}
|
|
||||||
\item edge orientation
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\item {\bf ordinal:}
|
|
||||||
\begin{itemize}
|
|
||||||
\item stages of a disease
|
|
||||||
\item ratings
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\item {\bf nominal:}
|
|
||||||
\begin{itemize}
|
|
||||||
\item cell type
|
|
||||||
\item odor
|
|
||||||
\item states of an ion channel
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
|