From 5791337deace5152694852997b32ffee2c14f544 Mon Sep 17 00:00:00 2001 From: Jan Benda Date: Sun, 25 Oct 2015 20:24:13 +0100 Subject: [PATCH] Reorganized the folders and started a common script for the lectures. --- Makefile | 17 + {statistics => bootstrap}/code/bootstrapsem.m | 0 bootstrap/lecture/Makefile | 22 + bootstrap/lecture/bootstrap-chapter.tex | 225 ++++++ bootstrap/lecture/bootstrap.tex | 64 ++ .../figures/2012-10-29_16-26-05_771.jpg | Bin .../figures/2012-10-29_16-29-35_312.jpg | Bin .../figures/2012-10-29_16-41-39_523.jpg | Bin .../lecture/figures/srs1.png | Bin .../lecture/figures/srs2.png | Bin .../lecture/figures/srs3.png | Bin {statistics => likelihood}/code/mlemean.m | 0 likelihood/lecture/Makefile | 22 + likelihood/lecture/likelihood-chapter.tex | 225 ++++++ likelihood/lecture/likelihood.tex | 212 ++++++ {statistics => likelihood}/lecture/mlemean.py | 0 {statistics => likelihood}/lecture/mlepdf.py | 0 .../lecture/mlepropline.py | 0 {statistics => regression}/code/iv_curve.mat | Bin .../code/lin_regression.mat | Bin {statistics => regression}/code/lsq_error.m | 0 .../code/lsq_gradient.m | 0 .../code/lsq_gradient_sigmoid.m | 0 .../code/lsq_sigmoid_error.m | 0 .../code/membraneVoltage.mat | Bin .../code/plot_error_surface.m | 0 .../code/sigmoidal_gradient_descent.m | 0 regression/lecture/Makefile | 22 + .../lecture/beamercolorthemetuebingen.sty | 61 ++ .../lecture/figures/charging_curve.pdf | Bin .../lecture/figures/lin_regress.pdf | Bin .../lecture/figures/lin_regress_abscissa.pdf | Bin .../lecture/figures/lin_regress_slope.pdf | Bin .../lecture/figures/linear_least_squares.pdf | Bin .../lecture/figures/one_d_problem_a.pdf | Bin .../lecture/figures/one_d_problem_b.pdf | Bin .../lecture/figures/one_d_problem_c.pdf | Bin .../lecture/figures/surface.pdf | Bin .../lecture/linear_regression.tex | 0 scientificcomputing-script.tex | 236 +++++++ statistics/exercises/mlepdffit.m | 3 + statistics/exercises/mlepdffit.pdf | Bin 0 -> 5918 bytes statistics/exercises/mlepropfit.pdf | Bin 0 -> 9574 bytes statistics/exercises/mlestd.pdf | Bin 0 -> 5166 bytes statistics/exercises/statistics04.tex | 2 +- statistics/lecture/Makefile | 11 +- .../lecture/descriptivestatistics-chapter.tex | 361 ++++++++++ statistics/lecture/descriptivestatistics.tex | 641 ------------------ 48 files changed, 1476 insertions(+), 648 deletions(-) create mode 100644 Makefile rename {statistics => bootstrap}/code/bootstrapsem.m (100%) create mode 100644 bootstrap/lecture/Makefile create mode 100644 bootstrap/lecture/bootstrap-chapter.tex create mode 100644 bootstrap/lecture/bootstrap.tex rename {statistics => bootstrap}/lecture/figures/2012-10-29_16-26-05_771.jpg (100%) rename {statistics => bootstrap}/lecture/figures/2012-10-29_16-29-35_312.jpg (100%) rename {statistics => bootstrap}/lecture/figures/2012-10-29_16-41-39_523.jpg (100%) rename {statistics => bootstrap}/lecture/figures/srs1.png (100%) rename {statistics => bootstrap}/lecture/figures/srs2.png (100%) rename {statistics => bootstrap}/lecture/figures/srs3.png (100%) rename {statistics => likelihood}/code/mlemean.m (100%) create mode 100644 likelihood/lecture/Makefile create mode 100644 likelihood/lecture/likelihood-chapter.tex create mode 100644 likelihood/lecture/likelihood.tex rename {statistics => likelihood}/lecture/mlemean.py (100%) rename {statistics => likelihood}/lecture/mlepdf.py (100%) rename {statistics => likelihood}/lecture/mlepropline.py (100%) rename {statistics => regression}/code/iv_curve.mat (100%) rename {statistics => regression}/code/lin_regression.mat (100%) rename {statistics => regression}/code/lsq_error.m (100%) rename {statistics => regression}/code/lsq_gradient.m (100%) rename {statistics => regression}/code/lsq_gradient_sigmoid.m (100%) rename {statistics => regression}/code/lsq_sigmoid_error.m (100%) rename {statistics => regression}/code/membraneVoltage.mat (100%) rename {statistics => regression}/code/plot_error_surface.m (100%) rename {statistics => regression}/code/sigmoidal_gradient_descent.m (100%) create mode 100644 regression/lecture/Makefile create mode 100644 regression/lecture/beamercolorthemetuebingen.sty rename {statistics => regression}/lecture/figures/charging_curve.pdf (100%) rename {statistics => regression}/lecture/figures/lin_regress.pdf (100%) rename {statistics => regression}/lecture/figures/lin_regress_abscissa.pdf (100%) rename {statistics => regression}/lecture/figures/lin_regress_slope.pdf (100%) rename {statistics => regression}/lecture/figures/linear_least_squares.pdf (100%) rename {statistics => regression}/lecture/figures/one_d_problem_a.pdf (100%) rename {statistics => regression}/lecture/figures/one_d_problem_b.pdf (100%) rename {statistics => regression}/lecture/figures/one_d_problem_c.pdf (100%) rename {statistics => regression}/lecture/figures/surface.pdf (100%) rename {statistics => regression}/lecture/linear_regression.tex (100%) create mode 100644 scientificcomputing-script.tex create mode 100644 statistics/exercises/mlepdffit.pdf create mode 100644 statistics/exercises/mlepropfit.pdf create mode 100644 statistics/exercises/mlestd.pdf create mode 100644 statistics/lecture/descriptivestatistics-chapter.tex diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..74de0a4 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +BASENAME=scientificcomputing-script + +pdf : $(BASENAME).pdf + +$(BASENAME).pdf : $(BASENAME).tex + export TEXMFOUTPUT=.; pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +clean : + rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc + +cleanall : clean + rm -f $(PDFFILE) + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/statistics/code/bootstrapsem.m b/bootstrap/code/bootstrapsem.m similarity index 100% rename from statistics/code/bootstrapsem.m rename to bootstrap/code/bootstrapsem.m diff --git a/bootstrap/lecture/Makefile b/bootstrap/lecture/Makefile new file mode 100644 index 0000000..f7f02ba --- /dev/null +++ b/bootstrap/lecture/Makefile @@ -0,0 +1,22 @@ +BASENAME=bootstrap +PYFILES=$(wildcard *.py) +PYPDFFILES=$(PYFILES:.py=.pdf) + +pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES) + +$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex + pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +$(PYPDFFILES) : %.pdf : %.py + python $< + +clean : + rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log + +cleanall : clean + rm -f $(BASENAME)-chapter.pdf + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/bootstrap/lecture/bootstrap-chapter.tex b/bootstrap/lecture/bootstrap-chapter.tex new file mode 100644 index 0000000..d185cd4 --- /dev/null +++ b/bootstrap/lecture/bootstrap-chapter.tex @@ -0,0 +1,225 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + inputpath=../code, + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\include{bootstrap} + +\end{document} diff --git a/bootstrap/lecture/bootstrap.tex b/bootstrap/lecture/bootstrap.tex new file mode 100644 index 0000000..e5d63aa --- /dev/null +++ b/bootstrap/lecture/bootstrap.tex @@ -0,0 +1,64 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}} + +Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling +aus der Stichprobe. Das hat mehrere Vorteile: +\begin{itemize} +\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein). +\item H\"ohere Genauigkeit als klassische Methoden. +\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr + \"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht + f\"ur jede Statistik eine andere Formel. +\end{itemize} + +\begin{figure}[t] + \includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex] + \includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex] + \includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312} + \caption{\tr{Why can we only measure a sample of the + population?}{Warum k\"onnen wir nur eine Stichprobe der + Grundgesamtheit messen?}} +\end{figure} + +\begin{figure}[t] + \includegraphics[height=0.2\textheight]{srs1}\\[2ex] + \includegraphics[height=0.2\textheight]{srs2}\\[2ex] + \includegraphics[height=0.2\textheight]{srs3} + \caption{Bootstrap der Stichprobenvertielung (a) Von der + Grundgesamtheit (population) mit unbekanntem Parameter + (z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random + samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur + jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen + der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe + gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf + die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu + haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele + Bootstrap-Stichproben generiert werden (resampling) und so + Eigenschaften der Stichprobenverteilung empirisch bestimmt + werden. Aus Hesterberg et al. 2003, Bootstrap Methods and + Permuation Tests} +\end{figure} + +\section{Bootstrap des Standardfehlers} + +Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und +benutzen diese um die Stichprobenverteilung einer Statistik zu +berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang +wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen +mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe +kann also einmal, mehrmals oder gar nicht in einer Bootstrap +Stichprobe vorkommen. + +\begin{exercise}[bootstrapsem.m] + Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert, + Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$). + + Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils + den Mittelwert. + + Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und + die Standardabweichung. + + Was hat das mit dem Standardfehler zu tun? +\end{exercise} diff --git a/statistics/lecture/figures/2012-10-29_16-26-05_771.jpg b/bootstrap/lecture/figures/2012-10-29_16-26-05_771.jpg similarity index 100% rename from statistics/lecture/figures/2012-10-29_16-26-05_771.jpg rename to bootstrap/lecture/figures/2012-10-29_16-26-05_771.jpg diff --git a/statistics/lecture/figures/2012-10-29_16-29-35_312.jpg b/bootstrap/lecture/figures/2012-10-29_16-29-35_312.jpg similarity index 100% rename from statistics/lecture/figures/2012-10-29_16-29-35_312.jpg rename to bootstrap/lecture/figures/2012-10-29_16-29-35_312.jpg diff --git a/statistics/lecture/figures/2012-10-29_16-41-39_523.jpg b/bootstrap/lecture/figures/2012-10-29_16-41-39_523.jpg similarity index 100% rename from statistics/lecture/figures/2012-10-29_16-41-39_523.jpg rename to bootstrap/lecture/figures/2012-10-29_16-41-39_523.jpg diff --git a/statistics/lecture/figures/srs1.png b/bootstrap/lecture/figures/srs1.png similarity index 100% rename from statistics/lecture/figures/srs1.png rename to bootstrap/lecture/figures/srs1.png diff --git a/statistics/lecture/figures/srs2.png b/bootstrap/lecture/figures/srs2.png similarity index 100% rename from statistics/lecture/figures/srs2.png rename to bootstrap/lecture/figures/srs2.png diff --git a/statistics/lecture/figures/srs3.png b/bootstrap/lecture/figures/srs3.png similarity index 100% rename from statistics/lecture/figures/srs3.png rename to bootstrap/lecture/figures/srs3.png diff --git a/statistics/code/mlemean.m b/likelihood/code/mlemean.m similarity index 100% rename from statistics/code/mlemean.m rename to likelihood/code/mlemean.m diff --git a/likelihood/lecture/Makefile b/likelihood/lecture/Makefile new file mode 100644 index 0000000..4e6367b --- /dev/null +++ b/likelihood/lecture/Makefile @@ -0,0 +1,22 @@ +BASENAME=likelihood +PYFILES=$(wildcard *.py) +PYPDFFILES=$(PYFILES:.py=.pdf) + +pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES) + +$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex + pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +$(PYPDFFILES) : %.pdf : %.py + python $< + +clean : + rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log + +cleanall : clean + rm -f $(BASENAME)-chapter.pdf + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/likelihood/lecture/likelihood-chapter.tex b/likelihood/lecture/likelihood-chapter.tex new file mode 100644 index 0000000..732acbe --- /dev/null +++ b/likelihood/lecture/likelihood-chapter.tex @@ -0,0 +1,225 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + inputpath=../code, + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\include{likelihood} + +\end{document} diff --git a/likelihood/lecture/likelihood.tex b/likelihood/lecture/likelihood.tex new file mode 100644 index 0000000..752d659 --- /dev/null +++ b/likelihood/lecture/likelihood.tex @@ -0,0 +1,212 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}} + +In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$ +einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung +die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der +Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die +Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am +gr\"o{\ss}ten ist. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Maximum Likelihood} +Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben +$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem +Parameter(n) $\theta$. Das k\"onnte die Normalverteilung +\begin{equation} + \label{normpdfmean} + p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}} +\end{equation} +sein mit +fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als +Parameter $\theta$. + +Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$ +die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann +ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des +Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$ +\begin{equation} + p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta) + \ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; . +\end{equation} +Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'') +den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$, +\begin{equation} + {\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta) +\end{equation} + +Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die +Likelihood maximiert (``mle'': Maximum-Likelihood Estimate): +\begin{equation} + \theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n) +\end{equation} +$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei +dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$ +bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat. + +An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn +man die Funktionswerte mit einer streng monoton steigenden Funktion +transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens +das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht: +\begin{eqnarray} + \theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ + & = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ + & = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\ + & = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood} +\end{eqnarray} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Beispiel: Das arithmetische Mittel} + +Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean} +entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten, +welcher Wert von $\theta$ maximiert dessen Likelhood? + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{mlemean} + \caption{\label{mlemeanfig} Maximum Likelihood Estimation des + Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen + Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus + denen die Daten stammen k\"onnten. Unteln links: Die Likelihood + in Abh\"angigkeit des Mittelwerts als Parameter der + Normalverteilungen. Unten rechts: die entsprechende + Log-Likelihood. An der Position des Maximums bei $\theta=2$ + \"andert sich nichts (Pfeil).} +\end{figure} + +Die Log-Likelihood \eqnref{loglikelihood} ist +\begin{eqnarray*} + \log {\cal L}(\theta|x_1,x_2, \ldots x_n) + & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\ + & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2} +\end{eqnarray*} +Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung +nach dem Parameter $\theta$ und setzen diese gleich Null: +\begin{eqnarray*} + \frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\ + \Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\ + \Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\ + \Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i +\end{eqnarray*} +Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h. +das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer +Normalverteilung mit diesem Mittelwert gezogen worden sind. + +\begin{exercise}[mlemean.m] + Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$ + und einer Standardabweichung $\ne 1$. + + Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und + die Log-Likelihood (aus der Summe der logarithmierten + Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche + die Position der Maxima mit den aus den Daten berechneten + Mittelwerte. +\end{exercise} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Kurvenfit als Maximum Likelihood Estimation} +Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern +$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter +$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die +entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer +Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die +Log-Likelihood +\begin{eqnarray*} + \log {\cal L}(\theta|x_1,x_2, \ldots x_n) + & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\ + & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\ +\end{eqnarray*} +Der einzige Unterschied zum vorherigen Beispiel ist, dass die +Mittelwerte der Normalverteilungen nun durch die Funktionswerte +gegeben sind. + +Der Parameter $\theta$ soll so gew\"ahlt werden, dass die +Log-Likelihood maximal wird. Der erste Term der Summe ist +unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem +Maximum weggelassen werden. +\begin{eqnarray*} + & = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 +\end{eqnarray*} +Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood +umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums. +\begin{equation} + \theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2 +\end{equation} +Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen +Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des +Parameters $\theta$ welcher den quadratischen Abstand minimiert ist +also identisch mit der Maximierung der Wahrscheinlichkeit, dass die +Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des +$\chi^2$ ist also ein Maximum-Likelihood Estimate. + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{mlepropline} + \caption{\label{mleproplinefig} Maximum Likelihood Estimation der + Steigung einer Ursprungsgeraden.} +\end{figure} + + +\subsection{Beispiel: einfache Proportionalit\"at} +Als Funktion nehmen wir die Ursprungsgerade +\[ f(x) = \theta x \] +mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit +\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \] +Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$ +und setzen diese gleich Null: +\begin{eqnarray} + \frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ + & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ + & = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\ + & = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\ +\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\ +\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope} +\end{eqnarray} +Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung +der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein +Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht +n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von +linear kombinierten Basisfunktionen. Parameter die nichtlinear in +einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den +Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren +zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg, +zur\"uckzugreifen. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Fits von Wahrscheinlichkeitsverteilungen} +Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter +einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und +Standardabweichung der Normalverteilung) an ein Datenset fitten wolle. + +Ein erster Gedanke k\"onnte sein, die +Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen +Abstands an ein Histogram der Daten zu fitten. Das ist aber aus +folgenden Gr\"unden nicht die Methode der Wahl: (i) +Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen +insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen, +wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind +nicht unabh\"angig, da das normierte Histogram sich zu Eins +aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten +die die Minimierung des quadratischen Abstands zu einem Maximum +Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm +h\"angt von der Wahl der Klassenbreite ab. + +Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein +Datenset zu fitten, haben wir oben schon bei dem Beispiel zur +Absch\"atzung des Mittelwertes einer Normalverteilung gesehen --- +Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der +gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood +\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein +nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie +z.B. dem Gradientenabstieg, gel\"ost wird. + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{mlepdf} + \caption{\label{mlepdffig} Maximum Likelihood Estimation einer + Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung + 2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt. + Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung + des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.} +\end{figure} diff --git a/statistics/lecture/mlemean.py b/likelihood/lecture/mlemean.py similarity index 100% rename from statistics/lecture/mlemean.py rename to likelihood/lecture/mlemean.py diff --git a/statistics/lecture/mlepdf.py b/likelihood/lecture/mlepdf.py similarity index 100% rename from statistics/lecture/mlepdf.py rename to likelihood/lecture/mlepdf.py diff --git a/statistics/lecture/mlepropline.py b/likelihood/lecture/mlepropline.py similarity index 100% rename from statistics/lecture/mlepropline.py rename to likelihood/lecture/mlepropline.py diff --git a/statistics/code/iv_curve.mat b/regression/code/iv_curve.mat similarity index 100% rename from statistics/code/iv_curve.mat rename to regression/code/iv_curve.mat diff --git a/statistics/code/lin_regression.mat b/regression/code/lin_regression.mat similarity index 100% rename from statistics/code/lin_regression.mat rename to regression/code/lin_regression.mat diff --git a/statistics/code/lsq_error.m b/regression/code/lsq_error.m similarity index 100% rename from statistics/code/lsq_error.m rename to regression/code/lsq_error.m diff --git a/statistics/code/lsq_gradient.m b/regression/code/lsq_gradient.m similarity index 100% rename from statistics/code/lsq_gradient.m rename to regression/code/lsq_gradient.m diff --git a/statistics/code/lsq_gradient_sigmoid.m b/regression/code/lsq_gradient_sigmoid.m similarity index 100% rename from statistics/code/lsq_gradient_sigmoid.m rename to regression/code/lsq_gradient_sigmoid.m diff --git a/statistics/code/lsq_sigmoid_error.m b/regression/code/lsq_sigmoid_error.m similarity index 100% rename from statistics/code/lsq_sigmoid_error.m rename to regression/code/lsq_sigmoid_error.m diff --git a/statistics/code/membraneVoltage.mat b/regression/code/membraneVoltage.mat similarity index 100% rename from statistics/code/membraneVoltage.mat rename to regression/code/membraneVoltage.mat diff --git a/statistics/code/plot_error_surface.m b/regression/code/plot_error_surface.m similarity index 100% rename from statistics/code/plot_error_surface.m rename to regression/code/plot_error_surface.m diff --git a/statistics/code/sigmoidal_gradient_descent.m b/regression/code/sigmoidal_gradient_descent.m similarity index 100% rename from statistics/code/sigmoidal_gradient_descent.m rename to regression/code/sigmoidal_gradient_descent.m diff --git a/regression/lecture/Makefile b/regression/lecture/Makefile new file mode 100644 index 0000000..4486638 --- /dev/null +++ b/regression/lecture/Makefile @@ -0,0 +1,22 @@ +BASENAME=linear_regression +PYFILES=$(wildcard *.py) +PYPDFFILES=$(PYFILES:.py=.pdf) + +pdf : $(BASENAME).pdf $(PYPDFFILES) + +$(BASENAME).pdf : $(BASENAME).tex + pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +$(PYPDFFILES) : %.pdf : %.py + python $< + +clean : + rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc $(BASENAME).nav $(BASENAME).snm $(BASENAME).vrb + +cleanall : clean + rm -f $(BASENAME).pdf + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/regression/lecture/beamercolorthemetuebingen.sty b/regression/lecture/beamercolorthemetuebingen.sty new file mode 100644 index 0000000..c4a5da6 --- /dev/null +++ b/regression/lecture/beamercolorthemetuebingen.sty @@ -0,0 +1,61 @@ +% Copyright 2007 by Till Tantau +% +% This file may be distributed and/or modified +% +% 1. under the LaTeX Project Public License and/or +% 2. under the GNU Public License. +% +% See the file doc/licenses/LICENSE for more details. + +\usepackage{color} +\definecolor{karminrot}{RGB}{165,30,55} +\definecolor{gold}{RGB}{180,160,105} +\definecolor{anthrazit}{RGB}{50 ,65 ,75 } + +\mode + +\setbeamercolor*{normal text}{fg=anthrazit,bg=white} +\setbeamercolor*{alerted text}{fg=anthrazit} +\setbeamercolor*{example text}{fg=anthrazit} +\setbeamercolor*{structure}{fg=gold,bg=karminrot} + +\providecommand*{\beamer@bftext@only}{% + \relax + \ifmmode + \expandafter\beamer@bftext@warning + \else + \expandafter\bfseries + \fi +} +\providecommand*{\beamer@bftext@warning}{% + \ClassWarning{beamer} + {Cannot use bold for alerted text in math mode}% +} + +\setbeamerfont{alerted text}{series=\beamer@bftext@only} + +\setbeamercolor{palette primary}{fg=karminrot,bg=white} +\setbeamercolor{palette secondary}{fg=gold,bg=white} +\setbeamercolor{palette tertiary}{fg=anthrazit,bg=white} +\setbeamercolor{palette quaternary}{fg=black,bg=white} + +\setbeamercolor{sidebar}{bg=karminrot!100} + +\setbeamercolor{palette sidebar primary}{fg=karminrot} +\setbeamercolor{palette sidebar secondary}{fg=karminrot} +\setbeamercolor{palette sidebar tertiary}{fg=karminrot} +\setbeamercolor{palette sidebar quaternary}{fg=karminrot} + +\setbeamercolor{item projected}{fg=black,bg=black!20} + +\setbeamercolor*{block body}{} +\setbeamercolor*{block body alerted}{} +\setbeamercolor*{block body example}{} +\setbeamercolor*{block title}{parent=structure} +\setbeamercolor*{block title alerted}{parent=alerted text} +\setbeamercolor*{block title example}{parent=example text} + +\setbeamercolor*{titlelike}{parent=structure} + +\mode + diff --git a/statistics/lecture/figures/charging_curve.pdf b/regression/lecture/figures/charging_curve.pdf similarity index 100% rename from statistics/lecture/figures/charging_curve.pdf rename to regression/lecture/figures/charging_curve.pdf diff --git a/statistics/lecture/figures/lin_regress.pdf b/regression/lecture/figures/lin_regress.pdf similarity index 100% rename from statistics/lecture/figures/lin_regress.pdf rename to regression/lecture/figures/lin_regress.pdf diff --git a/statistics/lecture/figures/lin_regress_abscissa.pdf b/regression/lecture/figures/lin_regress_abscissa.pdf similarity index 100% rename from statistics/lecture/figures/lin_regress_abscissa.pdf rename to regression/lecture/figures/lin_regress_abscissa.pdf diff --git a/statistics/lecture/figures/lin_regress_slope.pdf b/regression/lecture/figures/lin_regress_slope.pdf similarity index 100% rename from statistics/lecture/figures/lin_regress_slope.pdf rename to regression/lecture/figures/lin_regress_slope.pdf diff --git a/statistics/lecture/figures/linear_least_squares.pdf b/regression/lecture/figures/linear_least_squares.pdf similarity index 100% rename from statistics/lecture/figures/linear_least_squares.pdf rename to regression/lecture/figures/linear_least_squares.pdf diff --git a/statistics/lecture/figures/one_d_problem_a.pdf b/regression/lecture/figures/one_d_problem_a.pdf similarity index 100% rename from statistics/lecture/figures/one_d_problem_a.pdf rename to regression/lecture/figures/one_d_problem_a.pdf diff --git a/statistics/lecture/figures/one_d_problem_b.pdf b/regression/lecture/figures/one_d_problem_b.pdf similarity index 100% rename from statistics/lecture/figures/one_d_problem_b.pdf rename to regression/lecture/figures/one_d_problem_b.pdf diff --git a/statistics/lecture/figures/one_d_problem_c.pdf b/regression/lecture/figures/one_d_problem_c.pdf similarity index 100% rename from statistics/lecture/figures/one_d_problem_c.pdf rename to regression/lecture/figures/one_d_problem_c.pdf diff --git a/statistics/lecture/figures/surface.pdf b/regression/lecture/figures/surface.pdf similarity index 100% rename from statistics/lecture/figures/surface.pdf rename to regression/lecture/figures/surface.pdf diff --git a/statistics/lecture/linear_regression.tex b/regression/lecture/linear_regression.tex similarity index 100% rename from statistics/lecture/linear_regression.tex rename to regression/lecture/linear_regression.tex diff --git a/scientificcomputing-script.tex b/scientificcomputing-script.tex new file mode 100644 index 0000000..7627470 --- /dev/null +++ b/scientificcomputing-script.tex @@ -0,0 +1,236 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Grewe \& Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newcommand{\codepath}{} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\codepath\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{statistics/lecture/}{statistics/lecture/figures/}{bootstrap/lecture/}{bootstrap/lecture/figures/}{likelihood/lecture/}{likelihood/lecture/figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\maketitle + +\tableofcontents + +\renewcommand{\codepath}{statistics/code/} +\include{statistics/lecture/descriptivestatistics} + +\renewcommand{\codepath}{bootstrap/code/} +\include{bootstrap/lecture/bootstrap} + +\renewcommand{\codepath}{likelihood/code/} +\include{likelihood/lecture/likelihood} + +\end{document} diff --git a/statistics/exercises/mlepdffit.m b/statistics/exercises/mlepdffit.m index 900fe22..f10128b 100644 --- a/statistics/exercises/mlepdffit.m +++ b/statistics/exercises/mlepdffit.m @@ -24,4 +24,7 @@ yy = gampdf(xx, p(1), p(2)); plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' ); hold off; +xlabel('x'); +ylabel('pdf'); legend('show'); +savefigpdf(gcf, 'mlepdffit.pdf', 12, 8) diff --git a/statistics/exercises/mlepdffit.pdf b/statistics/exercises/mlepdffit.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b718c98771d789dc5fb5a215ab092687f710a66a GIT binary patch literal 5918 zcmb_=c|4SB`?!#pHnb_KhtdeM&oDEY5yM!rHMX(EY#4@_W??v$C5p0iiWW;G6h#Y; zSEtm`aunH;&M8Y&IEAu>gzqy_=XBop{r$e*&*wbz$K20#-PgX{_x0#HSx{jpI2NM& z>p*HA1P34iJ~IenVuG;aga(KM0V5DXLr}RqF-Hg>s62+4W5HqZ*&H$%A`%NZj1Y)4 z>#=8D%En5yj;<^H9Cct7xpN~`X1g$1AWU*Yt?4V%o4RKvUp`|zDnH=8{_DSYl8E*gV(=2Ui7b={tu`uJ?3 zGQ2Xw2h-?xPsgu3J{EN}W{L*j$ef#gDo5@sL?8{BYKG6vubqA~NH@uc8V<)Qt+p3v zn|3Q6S{`e8z+EqB60)^qWngx_^>J;pfsgAUUaOCmBauN7J2IlXw|lQE(mQ>s@o2P1 zW8=QO&K28;w}&+NFD8ERuzk$cKP8${#xxwA-nO%8Sdq81t=-%4#2p)JU|9;?AK!MQ zyFR&EdQBfCHTtccf0seJ7$}cV7ceEPtNY}8sBi^T_@|lHlMuIi=n*#q4Toy?uzn% zG%rsHIeG1I@%am_dIsZgij8B@4hPEmn^)Qy=D>wcwI&y9+O4DQHep;bZPiSR;g3{L z@p7TiYSGLs)gkXV%OmfjF{%UV56@=b+^v4=NA=2!idiqoUBysGy2`FHxlO0<-By&Z z-SqZ!*jP{3&&W2_)z`xNSJbuvsq5vHntECFs?qo{a?<^SdNAu}z`urFZmiGx*tPbD zKsCPHW~TE1)6_VRH_KelKRkR_4KuNkq~eix0aa$@Li>nHmXkX#UdJ`ta`9k_L9CI6 z0+&P`R5(UM%Q=5{7CV zC5nO?`@xH&f$>@TEy8L$`!xYU#r@kni+8Wh2`qYhU=6eQhMc)wL7?UX&(?#-oD;9( zh=esAx#kW%zWv*wB%YlkVZ>X1ziUSXvDdY{u zU8FZ96eit_FU(V0=bT>BlzXY)znDp=P^1M~BRgJ#tiu4uea=tGX`@fn9ZNJA zL_xRAMyIeT4>u_|mwDhvaXP3eA&_$eV5yZEF06hG+!9=ub}q- za^_94vr^+bwsK=%eqUAu8Ol2FJVD7*S{<_S)udM2PV5iMDHy(K_arvH;NIx=Mn>R^ zidDCv>8#!fw%L}^A9}{M2F|uW8lq>Tb~t$MoYv4)3fL2%!gR25t_Kl~Taa1QN)N+;4GXeGYcC7tx zSyd7C_N2y07+@l}SN3%3{ zwOY)gj<-I3a{e)ko+_wo)E>uMXH%x~cP)FHqMpcTU`LoOHT2liWV_T5aoX^jo<;ko z#g*+n-VSJb)oAk z%KGz<_S;=4>$ij7_^|bAz0Qb%+v0N>H&oWuHEzu^G<)*WBlfs1&XAI5pj)x)(o=$U zY#{-vQd4|~e(0)q(B>!T>l%zUGCiqDm;t8-w%lRH32Y2i+qYU?ysJfvTeNn&IiE(M z#Zje(R@Sb|PwHG;w_U!){G>Kv8709l%R)`N4(kyo=dLD?%+^tpZ}^2C3uL@JrfAmM zv-G!@^jJSynpxuUv?FpcR|&gR=>rIL7h{!{Se1uP51)O`^Bz5VS=A!ekf^HiXq`&S z-BNWn#(1kWYyTSUOq;dZ<;L{52WNB^9bdjhJ-^S2W4%Q=U7Pe#pR&97-RX*xMe?ho zyrO$=CR7swe}Qhgz3;|uReiqHQ4$_BQ|DNj8l~a8Wel;n6y|t9sr3Aqb>!@eAz0~q ziQ?${v*SRLhuwfEbZ-i@)z+q0??C10RD;;*w%7+YCUEjIqYIu#wt1zWXzpNkhG8d4 z)+7^Q%583a3Q?WS4V|!F{;%RL&*5cB0-MiQLL5f@7wc|*@bGsSgE?B z$}rXp$ql~Kfu8D81!33Um;SInL{e<|@uv#|ei2#w_<=*gTQ(2{j}tfgW@a&K#qZ?* z80mW*Iv#cVC#g6WvD_iDb@-)f`kTEY;gTp{ubaq{XVkvlL;BS(Mqh_77ItkQX}xXf zZTDGIE$4L>`b_imbenC1=t)4>O)UN?)9m_{vlfA6b`8dOQ2*ua-U!p2m0A^(@o7P2 zcBRHh5Lgv95HT~(+q-Nsc=OLW(`|3BZM$4@Iovt6Dfa02-jQ49rdr#3`yl5Eu^BboR%8nvX2C6RQefJ?a84FNAgLHpo*qaDfRw`@yK)O)B^v&H=ZS$ zH1h{IN?le#XrKnQLr|

_E~}s1iz4ktY1q(+y>BVNO1)jPzH~-ZaHV6>0fbB^N$Q zxc`Z^!|qfZEU)9OV9-fFhDIo-<+`kdojvvTTZtUdv?#-^7ls~QB9t=rbkw~i3V~Kl%|zOOlRI*n|-qRMbx10r#(0XjT<`o$X6nS|$t9nNlasAR&O0y41XAP8p(Q!I;*%n6|6Z$|#b z%7r->g+UpS$qSJ;F_j7!tYD59U~&Sup(N=0pKn6}E}H~($Jrz81r$yo*E&+jagKCw zVMPYB2yCb+8De6}0277-lQIM_|M`Jg1aL$wk`9GtN7a#SkP@XKf)LPkfHZ^`Dk4fr z(0LLfxR!wtP+)$GIG6;b+dBbbKA#uN6$5BE0gi&9FouA?kP*TO=L>@Y6x;|3vJ|rY zi8L^2zAyyeNYFsBSU^M|!o$Pi;TSky7=S<#2m}NYjX(f!a_w`KzZ|u zaOvO3LPhX-o!~5f2!cyTpx{WvS3wq_IYO>L%;kpyGQkWcUm_+!SvWKj{4rrn3=RoH zq5P3BB>2G43^txcz~WiPSk_lbg1>0^ohT3i%EEvy09Qn?AlMQXM@WL23B_E0jufy8 zmOkO7BdNoMxt;q6lfz3v<$>i zz)vCIr;)$o$R^*Ae4z^%PV#@V2auJ%yO0ZhXE8zioyY_sbN62?{YP!LxBq`z9}*%< zOoqVTo@l{mNkYJ$V`K5Z4EsMAB{u7ungWTCH=o&T7J|d$$TSjxfkJ_LvROobzA%IV z<|RWQ;DHT6maYgesi0&N5Gm%0c^vY5-I(#jUlu&BtPY4gMrZ&DDuuB*{tO9E3?(DP zAp(S0z{KIe_P`+c8v7feeX+(UV_yObYwU}{u^D(Ifr9~CAzUE(5+oj%{ADrE9r35c z{}94?5LmrnG!fr=0~rGRL(f0Q=+A}-Hg6I%T-NTu)%OP_`p>TXwS4vdTefcpPcK=; zfT5AqjGhHiR!}G;8iz7Q5pZY_4_4cML(P-&g?jd8E_P<-hJZ^T2e4;|1Kq*KC^7_W zLRoOYjUyDv`V=0Hfg|BK3>xs(qk%%O4Dao5z)`>vg3?0+0Otsim=i(-?6{#4DS&~a z!0j+J78bw>wcabHT-v2FqwtqVrr9zHB1PP$f5Lp}j@&HCS90mvY0}C*W5fVJaWDgMf6^26* zz{|?_Fbob0#^rA?3?4kjzlR|~_x~M+!5ID9K8%skzsoTGPcS5Sq<=31V~qKCT090k z>c8$23mIG|q6q?qBUC<{E!!V+hMMPkVR{~bQE>yJpx5Q^s=kHevH5M5nMM=Ioh0E1{F Aod5s; literal 0 HcmV?d00001 diff --git a/statistics/exercises/mlepropfit.pdf b/statistics/exercises/mlepropfit.pdf new file mode 100644 index 0000000000000000000000000000000000000000..da9856e452781a3b66d6de91729d4343c2014a23 GIT binary patch literal 9574 zcmb_?c|4SD_qcr-S+W+Yk$o96!^~i^Z)M;2b!N;k48zQjB}*crLUvLqkwlh|5+TZ7 zvJ*-PkxD9Szk7Omp7(v8-~0V7pU->$V5l((L&F>5abzF7wl;`Lqu?=tpzzC`?!8rz zrof`LyW<@`a{`Z!ykt22^xNtY*hNf-@l_VfeNvm-$!}e=?8gFWQT$&6FI%KQ9ZUp` zY&LRF{zzT}|9rjne(Zx+%+ze`&RwVIpWhW*em3v@+^ERf$)HBQPuE`F?tQIpy5qOI zJFl2zqmBJ3c3^(&SPU!RV7dD_elip6TT&ivf!h&(=j=;d~jPV-8A=gD(xomu#7kDI)&BV2pB!b*(;ni>7yTaFx-%XNu!F>)tu>ShksRR1cC?8x* zu|&mY2j@qm ztf=yWsxcdn4h4Sg;&U6RNG*TxY3Fqe(B7i*T+<`i@`C7guD;0)P>G#oQuiV z2+x z9ln0`aO=<64=&sf&Zn-n?rwa)5$U+GerR+=<=&&?+uQRMiPHXBgI84H^@{=@69Fq(9YsEClhu|#1s8nv6WlRxa0$t zHLOMY*fv z+d=-DJ{{AbOhgnVqf5r=&HZoPi6H^h3as#|m>D5rX77rc`eys~+d*~d!xkVF(g z$n8kQfI_}gfaA?lW~akAqn!_VXdTk})iHaWJ0KRFKN5L*-ZK@;khT_Mlh!wSV* z16q|7vFQE1BbB!tc1}JY*PF6CKk7d>Bs3a{T$kaP_mRyi5>MVR9nGAcUKO4_v9@Jm zt-LoCxnw=q!1fhW(J|Jgl$-hX(cNrB?^LBb@deM1^^u$a8&hxBR%vz3%V#R)DK}h_ z?wVAy;y!?zyAIq5PB~>3{>@;kK|C??iOq!CQ=d9g)5R}vEoINDjsk(i#WO3r*W`E^ z-`PrtKc=bnNuN);SfkO$!T;PC6#i8|3MDCQbU)y@RA@uUS?oN6Vs+;VOYXpnGZ&=z zhf}Y5heU!4?oal=B7gqc?54xvcco@_G3D>ztf}p-Ph*I;EOx zCbP_iGc}Y<9~QLAPjva5hX0b^{-*&o+6P)3m)CmK^a8T+c$tTJ`-Eb63iThCOjzx4qkO>xXTdq91%pnOq$%C_Q=H-7 z7}vRKd^ckD+x1ly;~rBourfFYwYo_dC55t{Yj%1-*&DrO#5iA!T{-TH+84IN|7qn6 zy4`c6((m?EjELJ(RE?3A>CT8V=*c6zK_PyvftH4ALzcj$?0U_%smQ zBXiHkps_JG_2G2wm^yY%KmO`Wo_9sOR#CaO?d6+VI_&|lapoD8r3I^8reayZ+8U7( zvE>C22N$zvy50GtxX;!SA*doPE1hJy8%HNcHJoT}O<`a^2W#v+HZx~+v#rSLv*=ak zng$C|9g!-}+*8cRZ(Uz}``6pF$e&fRCl7vSFdxTasfpKQMox8UmSY1J`d`20_BCXx zp6JRtwJZ3rv1YhuA*{M#?TvWUacMScJDX`&X>!`-Zz@Snpama)0ThC0}S!uu&`=Ir`^Z5b&>_~~cKRgB<%aZo#Z5jg?-#a;h5fppu8iK^d>MZO`JyRJ#+d&B z|ENrKV($yEAGhD(eblE}HS;VtymnZMb+x!*-R;;^ZpWJn2Ju@pX;vpiG)L`z1pnyI%9={7v1(bLA-eXS0Vv1**?kY`-SO?O99g zUh<7M{3O!mlhz;=r8`b2J31)**fuvw%M2~sXPTBA@Z-HHF9;Zr- zl%~=($-1IS+c2N6Cq_sEBJ23Zs`n!*J(sjE+wElINHw>O&WLGz$qY-qpV|@d>Ou~y zY-wNWY?~e%mev#yP<)m_qg%s)&r_j~P4*Tq<3_o@KrVXt>`L=JuZt$(F(~Wfu=wN< zbLUGb=pHq_rp?})x-WCizR;C=)D3uKk`TvPaxGEt)8d(yyGX*SuA_=E(@1?<`ctQ* z3$v4z4sQs1d&34E2e)Smcn6yC%BH8UR}K33N2I=8GvY{`L95McO21ny7&tq7Vd}g}b^9|$eUc|1{wa~fNN+jb{$w8w z0{`+I;xuVKU4Gl3m*so=`5fFO{LR~}`G;;gAa7`>WZU1s-z_?-$&%ofND@3c8M}~ z;?9{{6BU#m3;ThUT%7pOMlGzGk;Pga#MEZfJK0YV`(#{-XH?{9MeU#YFr4mn3qX_d zVDsfo-2-v?FLTfw&VFp6-){Z5hwOZ)cIRcW38Vm8ZIBt=!?3$(CV$5IsrMckB zJsI)1Ot9z5=d7xOItF>gg;6taq7Mb{N-e6zw!XWorHkukjz+?x3wn!2+FOHIRO?dW zKT>22(@Qvb8DBfrs3+I3u(CRrPdMs@!}GW;p9k%Ivj@Ld;9goi_iPd_IklP6Ex4w@ zQsW!Irj#UUvi{nVFK91V{`r}MC3kUkl36~RpTylCFzLHyaE>*6n+xFWezP~KucmmC zlkK`x3EpFzP&+zq8?5VYYaR8dX`ughYcv$&-B>-d*(D!Fs(HTLzwx%R%)K0)QP2om z;@Y=@O2Y*_dpN0d-nN`=?j`80(pt&_?Mo@dsp5FaJ>}&UX)p7n@x_5e*GCcJcjmJc zUb|r06zifdE)`U|x!HxJ-*N4hGaV0wP=@m_KFC2R$FP^FGDs4|Oh#>VO&e35)bUTw zvAh!ovM^$a&HXFwAPnRBpyIZ9<)eUt@tV<^Ixj+wT|%LbI|yij&XYt~ytfw=7`T!q z=6(f@{o*$PGacUVhwLCIm%}gO6S0CHw49?}+iXqh_*Vm*>CF?3+5dn{^qm8e;X{dv zI(;RamU;qOiR;$aCR|t+a)OwAn6*rTtMno-Oi3nBN-E&4=<(^Zbn ztNTn6FIK!P4Xdj+C|&5bev8hK#q}$G@yNlrDIe!|^$R$W=@L0O*>mPDsir`-S1t>G zr7ygGEGt!(DLWjjHcPCm$|&f|80l=j+xE(`>g+;TK~>BU>0M=lwwJ-WDZVi`v+43B z8{68P`qH4RJGCo;F93|a#l97m?}7CTj5IFAL_c9)riLQYT3DQ9=+e;iZqB7n8vprJ z_{5ot8<*YORIK}ZPj#{b~sg9^2K=bgg_%wJt{guN_P6F?cYZf+Z#o?c9r>xQ0Z z7tg%IgIcuFSFw8|-Yn5^;d+AUa^VRZps5u01$W(f#k6H$^nJ)}{JG67F(|#6-3B$l z`P33B<>2GtEK^NpXycOwZ4K~*+#%?Fif5|ejdw#R%bi-#1AC>qVZXqQHYcl^ zS*cn-lziSuh`Ic&H%2$nDapBBb>I5-nY#13mhR_lzt74SG@xs~CdZ6oTrb81aaXs= z_Za9#+;Y|uQ@_WfX)H^yl)`V_)0)v8>I%!}Ni7R}lcJ^jy#Kw@W-Ri?a-_&OpEYdEVYv^JTKZ&#Ey^=&7Pa6=RTGRC# zj6s)rds^#8-STU>-NwduKamee_Z-qu_F}HiyL`@{xP4PHVeZ0+)mH7 zHO$iL`ZFkxPiT< z**n$IdB}3Y_WteQ`t5@QQ_2(l<>iH|t8aoDNiJ3X4ckXFgJ|Z7R>DatAt&9wjdmBL zZhx&$`A{Tan-IYTZl-QO@wVe;RGn5Jj4KSEwXi1dZCQh#UMSTkv&SCtZd@-K4K>Rz z6#wyBffy7f?o5*P58Q0aZFJT`8%CSnlRW$-hxSZJ*U#c6vY&lcvomI$`HS{Nxpr9p z4US{EfRTx35O`d;I}CfX-w53PRqhQ?2@@aXC2&VI zh#0`$FtVFLSv1~t+MNpnIUO6~nj8(%PFfWoX!GYCkC>7dfr=|^M3)t=-5d2hznT{w zwH<&<5gPH+F!`)F_o3L8O0YODe6wlsl*V&Fk7@D*W18nU z+#jo<{Lut`bn?Q}gEq>Vs|#;@9AIP=kK#dN>cgf(-%`^J1wCW#)xM52QL`Sk_dEVj zjQ!(O}=VWVAcC!Yx*;rHK}SgRHKZ^=AUi`9O|rDb}S>584vs_m0A zwT5E4#Yb*Mr@YUc-dWH2xhfsf>A*&SakZgs&E+-vd>f+`SeVg1AiIakiyr zCKH(U=1l7WFJ)GJD|0=5P!gwOtO096=*&EfQSevhdG``X)<+w8!7ubm>&*MJdH zW3lMXYdHdKDS;Eud4!os{Nrnc8#68sdY4DF6f)TCc!e9e)an&PN}&2eTpN!sTs$Az z{H1Z`d3!VuVj&a$(EL%ML(P-rPNnL8gN2J?*V02eyVZH~DbBZgAD$ZKO!}ZZx1(<4 zHYC}@a2- zz7S%DYeHXiv#ttaw%3k5W3=z7q8e4xhYLAZj(e4lQ^$*ygnB4BH7>-cem84SKRV1C zN;QfLZVRmab|PoEdo3)F3-js{s6`BEYG5;X=Swdn4EzAy=3!u@vVo9yWc)?wXVx7=CEuFU` zq>p(CoTxP|m<8S9J2L8ed}H?N-AQYYfzbyV@6_<7^@NeTnRj6E=8s4jugqj_r5`}{ zAFxt{e7>8jcx;f&eNS`8&zYQFdL*)G$O_9YtU|uX9x!dIc0L<6jm;nru5qVjsJna{iRhdP8m(_}WP2!6H7?vGygU zn6OO(-*-OiYz2D+D4iAy1AlpP_RvWXwGpCv#qpzi_^R#Ef(U%Zbj(wD`sL&&^%EZr z7tA6tx8`ycUR)sqb)^6Hmw;oLO(&kx%%r!zpA`LgXigE8@Q&I3$3;H*Wk?&Q)x@f= zdPos;V98O>nZMS&Q}nE{KJaz%Q>W|rImOcq`y&RzUge*!8~)y6+tj=qI(54=1{am+I< z`yTj~TC5gOy7&wRZ7u`dds(L4?@x6BOIj6ks)GZE1RNY{9w=`P+_)z)9J+t+lu%ZV zh7s`dveFyQXScFA#ddO2P8_oU6sSBTgM0vVOdbtHtfwia+aG2O(Zn&WGQ3v@-7Me3ADe zZjEd;VyC?I3S+8WCx1Fk?%w#&ThYyqmM+K`#Ddz~I97OqGpYK-h@YRL5Id$a8Qw8M z^*8-quEl;Io86R6ShM;z+3l6(7O_<`cRFou|N1<1^PGJXj~-g=clsPk9${-#I4w#xL{Qub14l( zj@$hr)%b(Lm<&+E>CA0Gz@QCk0K5YXsbKnm?ic8{dUOeY4D0}AfPUa#3R1yPGolX_ z>;|H%ZwvAO0~|u>Ea}{SlfwTZ{Y`0rpBc6z);O!zU?QxL-I1L|J9c_@N4h9H4JR}?w3?cj)vQoi#Do*RDfvvgmQTm2( zba-HJAm9XWcpxc=iVoM3{Y8QX)^spb7W``qEkH}w*}@u3Ba=x1L>gE{Swk5PfvYNk z2^35q{v??a0ER20WC4~G9|GDI2){oV0&iNfel%Jz8VWsm@}%-fRb?{87Yf(V(15~J zpeia5USmw^iM&CfH@RmFpWqK z0@DR!u;fshmMqQ(1xF$AChl7v%k*Tyl#Dmq9kqBAnZ|naHN&z1G&-Md|KMnv4 ztk4!fR1zW#fwG__s{(@~A#fN(1qoON;0%EAIWSKDCr(@QFC>{_54cnNf3gQmFCtD9 zA~2iRg#MFA6G}hsf3x(zYP*HS|JnM$Kzd+ef-Nl2hGbl5AW*$b4F4b9{$KQx5AH8D zgF`8#Uy<#DgW^eex<*vMQE)&{9~_!MrUYVuxWohplYr7f4_7DD^YVN_KJK6n@bfmKH+2UGuW5{an&$Kn@v=)WZX2N(VV0oe<96Z%(e&<273OV59g z(Z32Ja1Ch5o}`yMVD-4zbl{G$i=fi*foQNfa7ToLRh8kuc8CfB;)@T$Q;0aQBti}1 zfCVnxP_P|Pl8q@uKvV=+1+1cqR)wKqFtCHY0hrDh9IlS?LLrm^-Ra41EvOt0e!>Y_n|Zp zK=1K)7z{Z6f5K22D*qcyO+^K`$Nx^NrurXXDkz{;_{TmKjsKk%0s9Xy6wto?NlT+( zh$K7(_&+8v)Q%X52hIfybZ2Cs1EO~aV5mtDfeiSA-W)+q48d-iMtU%$9s-3x8X6#s w4E0nHMn(omI8t8&r4BPt(}QXM-*n=2{}L!te0h6Wr32T4mCSs8==7c-Vmw*UYD literal 0 HcmV?d00001 diff --git a/statistics/exercises/mlestd.pdf b/statistics/exercises/mlestd.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dad420c487808522a054f48ea53c5b43afd38afb GIT binary patch literal 5166 zcmb_gdt6Lg`*$zHaa=+PZInrx*>i7ZPEBg28>OpC3DeA;X=<7oGka>9Q>0VT<&aM9 zg_KJtU0tg~l6!h3Dmg-QK_&D?M1E^04&HNqzu)`$*nia8&;41?v!3ZKq`h1u;#lil;WlZ}%)7b!niL7@ zO}qp>ryJ$+X8n_-4~^YMtA4!yuI9v#>tYA1g;In7qulj+FPG%bbZ>(TR1C^ywM`nm&(wnKX!b$S0a0=R$VYW?9Mq_ zZj!)pz0H}q)|yAYBPw4T*)-69KE5OQ?w+;m!-PlP7mK8CcC>3<3pNxyT(@T}E3zN$ z8z3bH7ad!ZAbBKc5IJpQybh*3)@OGM4~;LOSS0hWrW=ao`6m}nSzz?%l?vmA*2JB) z2X3ysc`9zoQQXTM}D)3Ucw9ZHnk+F#2~tF}qfXSMu#F(GO|>Co&?k18?R(09G3Irl>kuV*)l zyLZaOqTzj2j3{%5;Z`kSmDK~YCb^z`Ul|_R_@{dqN4#r$`O?mG$|(CyiM|tWpK&p^ z-RIkuZYhIOCi=xJcAsvTm-yS#X!p0rJkqAN>n1y1nmb}*%y^wF`l4ihjp@uS`iv7L z+VT7t@vsoK-iqX$E4R#3=53smduigK@Z{m~rjxiMZC)F2i{2OW3dzP4-32CnGmURI zPZK3XT3l5m6gv2&WM}W2KQ$pmdx;&f_VIx`8RGSBT90uXc?)dD+Bw(U@jITDxAnMa zi)YZWVapaUYzY2cA+OAy`zaVMXX%X_D<+jP)^97i9B}qR=dCL5mww(&1;+1{L4`?4 zmgo03cEYJSJlW`poDWV{J2t+d#inJP4|bWnbAW7SLR_S`ct%Jnvm&QJeJjb+-`K;$ zjwDLZEH^98xLmeaQoZ9meoB`y4j(6uFUY%e1<{yxYQ-qGYhu@L}yrCl_ihtf~fMvV3A$%!=@Us}SclLZ{F)b&kY(h+~*HoAH3 z=sUy2PH9<7Ye(wWl1xFDKTA$)g+~@Q+Jr*_Ej?3Je=y01zT-hQ*kJ3RKgJFDwq>%e?&Ui`(8<(p-!f0fS-i|~no3+gYq zFFqWxyO$pvvwgvioQ1wd(Z$|zKdtA?4UgsP8g3sJO*ium)Sb>-bVOA#ImEQnxZXYG z>fWlt!u}(xyNQLo3cl~v18-Wa3}R}Xt458XcwK2mb?w*N8|YTezJbD*BdJ<}SdiIV(V=65U2V^xH9y{W0T2o~2CcLotEH`U$jK!~Jl7I%qF?vo? z+(Dd4RAAgwc7}J%iUz~XQhS4$K4ykGI?Kj6=6esnLSJD2@@R2GplN>6>1la#6P4HWPopQpp!oMmX)A9K`DpE5z9Z`oh3qdy|Z=D}_CxU+(a zY1b}MPBe{p`}6IvqkHzzCq+opW0&oIJ!`s6)` ztK9p;>`14o;q}|eo}BOpI@wHl6n^TO3- zEj~M4Xynntw z@5q#tyv>i>Zfn$azt%O{^MzP0GpmVnRT&t@iy0#&;g?;x{tk(vMu?HSkR6LU< zE<9n}!uruQ!Q4udwnX@3&YSdY&x%*vi(S$Ff;p!N5&M?j?Ax2tskk+^WWu^l{Deu7 zd-rrqe3jbH9+eM-s_J**f4}ZdaHUy~GiJMXI!(?v_&$~=j$%w3_Uw{L!oS9CiRJ0! z;!GFHw6l+hd>Jr(ZYyu)QVQ>$-v7_Ozbql~;BLeh70~^LnwK zw{2X}b$fYNRQ0m~DRQ@QVAncZe)z*IeN9Js6#-l;Sdr7D-6A1 zm<{_+p;0NgAsX#3Waky3Kwvk%7=c|7p@eTM*FgRxz^2kj5Sc~^z`-7J6yQ9F1f-OR z427r|2n1wAC08p2hzh_%H>F(QiJ<;40FV&ug=kQqXQx3OJO@=-A2sadK%0yi_rL+b z2y?+ZFsOnspgBky3J>$}5qJV)AUQn~n(iJ{-5IXj-z+q}0Uxa~W z`H9QKF~8Od5$7q%%N zlFvzhll5mkm?sg55KuKxCa`UEI8-82s{+2z#L^!MWeDSwsbmI+Gt@(t)*1z05Q3mk zAR?B?tnl4s<#& zj|CNks03DXxgO5;b1?x8TNA1X1x0`~p;DQOt+B!ns<6QtBg1%TPy`LJ!uz_oL8x3V z4UwP_g~%e331q4{BvSH2k#M;(1R@g|cwkZ~6tO)(`+YPB-mLIJD5_w?aCmq)F`P=2 zE5$IG#bUuE3QVC8fCNDmAw&5Zf=o3J_!|sF;`>=mrXmi;L=?zFVTmtHCX(P!j(8v) zr9^>B#~ zz?Sl5Vk^9cAVfrbwG_p3U^G+#qY64h7|0@1`Ktn%LjEcmi7Hsd;mlmJ%|5Cx)8*;Epn zM1q!iaUskwq+v2vF=#{(yBF4ErvF|0EckrKYLtiw2XF`#hmFyX55%C;sdPvL4bdnT zbbueQ2a?yfylRVR1zIvg-^Puk}r`WO7MFe1ba%f2-p`0pffo@Ko~lJU?-VK4(fq{ zBiM-t`CHoA@F*-g%Z9;ZP^q>wmW3UU#j~L^sSFFcjRlXy`M)IsFmR\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\include{descriptivestatistics} + +\end{document} + + +\end{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Statistics} +What is "a statistic"? % dt. Sch\"atzfunktion +\begin{definition}[statistic] + A statistic (singular) is a single measure of some attribute of a + sample (e.g., its arithmetic mean value). It is calculated by + applying a function (statistical algorithm) to the values of the + items of the sample, which are known together as a set of data. + + \source{http://en.wikipedia.org/wiki/Statistic} +\end{definition} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Data types} + +\subsection{Nominal scale} +\begin{itemize} +\item Binary + \begin{itemize} + \item ``yes/no'', + \item ``true/false'', + \item ``success/failure'', etc. + \end{itemize} +\item Categorial + \begin{itemize} + \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''), + \item blood type (``A/B/AB/0''), + \item parts of speech (``noun/veerb/preposition/article/...''), + \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc. + \end{itemize} +\item Each observation/measurement/sample is put into one category +\item There is no reasonable order among the categories.\\ + example: [rods, cones] vs. [cones, rods] +\item Statistics: mode, i.e. the most common item +\end{itemize} + +\subsection{Ordinal scale} +\begin{itemize} +\item Like nominal scale, but with an order +\item Examples: ranks, ratings + \begin{itemize} + \item ``bad/ok/good'', + \item ``cold/warm/hot'', + \item ``young/old'', etc. + \end{itemize} +\item {\bf But:} there is no reasonable measure of {\em distance} + between the classes +\item Statistics: mode, median +\end{itemize} + +\subsection{Interval scale} +\begin{itemize} +\item Quantitative/metric values +\item Reasonable measure of distance between values, but no absolute zero +\item Examples: + \begin{itemize} + \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C) + \item Direction measured in degrees from magnetic or true north + \end{itemize} +\item Statistics: + \begin{itemize} + \item Central tendency: mode, median, arithmetic mean + \item Dispersion: range, standard deviation + \end{itemize} +\end{itemize} + +\subsection{Absolute/ratio scale} +\begin{itemize} +\item Like interval scale, but with absolute origin/zero +\item Examples: + \begin{itemize} + \item Temperature in $^\circ$K + \item Length, mass, duration, electric charge, ... + \item Plane angle, etc. + \item Count (e.g. number of spikes in response to a stimulus) + \end{itemize} +\item Statistics: + \begin{itemize} + \item Central tendency: mode, median, arithmetic, geometric, harmonic mean + \item Dispersion: range, standard deviation + \item Coefficient of variation (ratio standard deviation/mean) + \item All other statistical measures + \end{itemize} +\end{itemize} + +\subsection{Data types} +\begin{itemize} +\item Data type selects + \begin{itemize} + \item statistics + \item type of plots (bar graph versus x-y plot) + \item correct tests + \end{itemize} +\item Scales exhibit increasing information content from nominal + to absolute.\\ + Conversion ,,downwards'' is always possible +\item For example: size measured in meter (ratio scale) $\rightarrow$ + categories ``small/medium/large'' (ordinal scale) +\end{itemize} + +\subsection{Examples from neuroscience} +\begin{itemize} +\item {\bf absolute:} + \begin{itemize} + \item size of neuron/brain + \item length of axon + \item ion concentration + \item membrane potential + \item firing rate + \end{itemize} + +\item {\bf interval:} + \begin{itemize} + \item edge orientation + \end{itemize} + +\item {\bf ordinal:} + \begin{itemize} + \item stages of a disease + \item ratings + \end{itemize} + +\item {\bf nominal:} + \begin{itemize} + \item cell type + \item odor + \item states of an ion channel + \end{itemize} + +\end{itemize} + diff --git a/statistics/lecture/descriptivestatistics.tex b/statistics/lecture/descriptivestatistics.tex index f0e0f08..85ce58d 100644 --- a/statistics/lecture/descriptivestatistics.tex +++ b/statistics/lecture/descriptivestatistics.tex @@ -1,229 +1,3 @@ -\documentclass[12pt]{report} - -%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} -\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} -\date{WS 15/16} - -%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \newcommand{\tr}[2]{#1} % en -% \usepackage[english]{babel} -\newcommand{\tr}[2]{#2} % de -\usepackage[german]{babel} - -%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{pslatex} % nice font for pdf file -\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} - -%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} -\setcounter{tocdepth}{1} - -%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage[sf,bf,it,big,clearempty]{titlesec} -\setcounter{secnumdepth}{1} - - -%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro - - -%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{graphicx} -\usepackage{xcolor} -\pagecolor{white} - -\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% - \put(0,4){\line(1,0){170}}% - \multiput(0,2)(10,0){18}{\line(0,1){4}}% - \multiput(0,3)(1,0){170}{\line(0,1){2}}% - \put(0,0){\makebox(0,0){{\tiny 0}}}% - \put(10,0){\makebox(0,0){{\tiny 1}}}% - \put(20,0){\makebox(0,0){{\tiny 2}}}% - \put(30,0){\makebox(0,0){{\tiny 3}}}% - \put(40,0){\makebox(0,0){{\tiny 4}}}% - \put(50,0){\makebox(0,0){{\tiny 5}}}% - \put(60,0){\makebox(0,0){{\tiny 6}}}% - \put(70,0){\makebox(0,0){{\tiny 7}}}% - \put(80,0){\makebox(0,0){{\tiny 8}}}% - \put(90,0){\makebox(0,0){{\tiny 9}}}% - \put(100,0){\makebox(0,0){{\tiny 10}}}% - \put(110,0){\makebox(0,0){{\tiny 11}}}% - \put(120,0){\makebox(0,0){{\tiny 12}}}% - \put(130,0){\makebox(0,0){{\tiny 13}}}% - \put(140,0){\makebox(0,0){{\tiny 14}}}% - \put(150,0){\makebox(0,0){{\tiny 15}}}% - \put(160,0){\makebox(0,0){{\tiny 16}}}% - \put(170,0){\makebox(0,0){{\tiny 17}}}% - \end{picture}\par} - -% figures: -\setlength{\fboxsep}{0pt} -\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} -%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} -%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} -%\newcommand{\texpicture}[1]{} -\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} - -% maximum number of floats: -\setcounter{topnumber}{2} -\setcounter{bottomnumber}{0} -\setcounter{totalnumber}{2} - -% float placement fractions: -\renewcommand{\textfraction}{0.2} -\renewcommand{\topfraction}{0.8} -\renewcommand{\bottomfraction}{0.0} -\renewcommand{\floatpagefraction}{0.5} - -% spacing for floats: -\setlength{\floatsep}{12pt plus 2pt minus 2pt} -\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} -\setlength{\intextsep}{12pt plus 2pt minus 2pt} - -% spacing for a floating page: -\makeatletter - \setlength{\@fptop}{0pt} - \setlength{\@fpsep}{8pt plus 2.0fil} - \setlength{\@fpbot}{0pt plus 1.0fil} -\makeatother - -% rules for floats: -\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} -\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} - -% captions: -\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} - -% put caption on separate float: -\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} - -% references to panels of a figure within the caption: -\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} -% references to figures: -\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} -\newcommand{\fref}[1]{\textup{\ref{#1}}} -\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} -% references to figures in normal text: -\newcommand{\fig}{Fig.} -\newcommand{\Fig}{Figure} -\newcommand{\figs}{Figs.} -\newcommand{\Figs}{Figures} -\newcommand{\figref}[1]{\fig~\fref{#1}} -\newcommand{\Figref}[1]{\Fig~\fref{#1}} -\newcommand{\figsref}[1]{\figs~\fref{#1}} -\newcommand{\Figsref}[1]{\Figs~\fref{#1}} -\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} -\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} -\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} -\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} -% references to figures within bracketed text: -\newcommand{\figb}{Fig.} -\newcommand{\figsb}{Figs.} -\newcommand{\figrefb}[1]{\figb~\fref{#1}} -\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} -\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} -\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} - -% references to tables: -\newcommand{\tref}[1]{\textup{\ref{#1}}} -% references to tables in normal text: -\newcommand{\tab}{Tab.} -\newcommand{\Tab}{Table} -\newcommand{\tabs}{Tabs.} -\newcommand{\Tabs}{Tables} -\newcommand{\tabref}[1]{\tab~\tref{#1}} -\newcommand{\Tabref}[1]{\Tab~\tref{#1}} -\newcommand{\tabsref}[1]{\tabs~\tref{#1}} -\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} -% references to tables within bracketed text: -\newcommand{\tabb}{Tab.} -\newcommand{\tabsb}{Tab.} -\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} -\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} - - -%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%\newcommand{\eqref}[1]{(\ref{#1})} -\newcommand{\eqn}{\tr{Eq}{Gl}.} -\newcommand{\Eqn}{\tr{Eq}{Gl}.} -\newcommand{\eqns}{\tr{Eqs}{Gln}.} -\newcommand{\Eqns}{\tr{Eqs}{Gln}.} -\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} -\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} -\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} -\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} - - -%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{listings} -\lstset{ - inputpath=../code, - basicstyle=\ttfamily\footnotesize, - numbers=left, - showstringspaces=false, - language=Matlab, - commentstyle=\itshape\color{darkgray}, - keywordstyle=\color{blue}, - stringstyle=\color{green}, - backgroundcolor=\color{blue!10}, - breaklines=true, - breakautoindent=true, - columns=flexible, - frame=single, - caption={\protect\filename@parse{\lstname}\protect\filename@base}, - captionpos=t, - xleftmargin=1em, - xrightmargin=1em, - aboveskip=10pt -} - -%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{amsmath} -\usepackage{bm} -\usepackage{dsfont} -\newcommand{\naZ}{\mathds{N}} -\newcommand{\gaZ}{\mathds{Z}} -\newcommand{\raZ}{\mathds{Q}} -\newcommand{\reZ}{\mathds{R}} -\newcommand{\reZp}{\mathds{R^+}} -\newcommand{\reZpN}{\mathds{R^+_0}} -\newcommand{\koZ}{\mathds{C}} - - -%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{ifthen} - -\newcommand{\code}[1]{\texttt{#1}} - -\newcommand{\source}[1]{ - \begin{flushright} - \color{gray}\scriptsize \url{#1} - \end{flushright} -} - -\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% - {\medskip} - -\newcounter{maxexercise} -\setcounter{maxexercise}{9} % show listings up to exercise maxexercise -\newcounter{theexercise} -\setcounter{theexercise}{1} -\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} - \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% - {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} - -\graphicspath{{figures/}} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{document} - -\maketitle - -%\tableofcontents - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{\tr{Descriptive statistics}{Deskriptive Statistik}} @@ -453,418 +227,3 @@ Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}). $x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null. $\xi$ sind normalverteilte Zufallszahlen.} \end{figure} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}} - -Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling -aus der Stichprobe. Das hat mehrere Vorteile: -\begin{itemize} -\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein). -\item H\"ohere Genauigkeit als klassische Methoden. -\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr - \"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht - f\"ur jede Statistik eine andere Formel. -\end{itemize} - -\begin{figure}[t] - \includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex] - \includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex] - \includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312} - \caption{\tr{Why can we only measure a sample of the - population?}{Warum k\"onnen wir nur eine Stichprobe der - Grundgesamtheit messen?}} -\end{figure} - -\begin{figure}[t] - \includegraphics[height=0.2\textheight]{srs1}\\[2ex] - \includegraphics[height=0.2\textheight]{srs2}\\[2ex] - \includegraphics[height=0.2\textheight]{srs3} - \caption{Bootstrap der Stichprobenvertielung (a) Von der - Grundgesamtheit (population) mit unbekanntem Parameter - (z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random - samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur - jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen - der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe - gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf - die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu - haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele - Bootstrap-Stichproben generiert werden (resampling) und so - Eigenschaften der Stichprobenverteilung empirisch bestimmt - werden. Aus Hesterberg et al. 2003, Bootstrap Methods and - Permuation Tests} -\end{figure} - -\section{Bootstrap des Standardfehlers} - -Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und -benutzen diese um die Stichprobenverteilung einer Statistik zu -berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang -wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen -mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe -kann also einmal, mehrmals oder gar nicht in einer Bootstrap -Stichprobe vorkommen. - -\begin{exercise}[bootstrapsem.m] - Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert, - Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$). - - Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils - den Mittelwert. - - Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und - die Standardabweichung. - - Was hat das mit dem Standardfehler zu tun? -\end{exercise} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}} - -In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$ -einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung -die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der -Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die -Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am -gr\"o{\ss}ten ist. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Maximum Likelihood} -Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben -$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem -Parameter(n) $\theta$. Das k\"onnte die Normalverteilung -\begin{equation} - \label{normpdfmean} - p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}} -\end{equation} -sein mit -fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als -Parameter $\theta$. - -Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$ -die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann -ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des -Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$ -\begin{equation} - p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta) - \ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; . -\end{equation} -Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'') -den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$, -\begin{equation} - {\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta) -\end{equation} - -Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die -Likelihood maximiert (``mle'': Maximum-Likelihood Estimate): -\begin{equation} - \theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n) -\end{equation} -$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei -dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$ -bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat. - -An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn -man die Funktionswerte mit einer streng monoton steigenden Funktion -transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens -das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht: -\begin{eqnarray} - \theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ - & = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ - & = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\ - & = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood} -\end{eqnarray} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Beispiel: Das arithmetische Mittel} - -Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean} -entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten, -welcher Wert von $\theta$ maximiert dessen Likelhood? - -\begin{figure}[t] - \includegraphics[width=1\textwidth]{mlemean} - \caption{\label{mlemeanfig} Maximum Likelihood Estimation des - Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen - Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus - denen die Daten stammen k\"onnten. Unteln links: Die Likelihood - in Abh\"angigkeit des Mittelwerts als Parameter der - Normalverteilungen. Unten rechts: die entsprechende - Log-Likelihood. An der Position des Maximums bei $\theta=2$ - \"andert sich nichts (Pfeil).} -\end{figure} - -Die Log-Likelihood \eqnref{loglikelihood} ist -\begin{eqnarray*} - \log {\cal L}(\theta|x_1,x_2, \ldots x_n) - & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\ - & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2} -\end{eqnarray*} -Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung -nach dem Parameter $\theta$ und setzen diese gleich Null: -\begin{eqnarray*} - \frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\ - \Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\ - \Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\ - \Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i -\end{eqnarray*} -Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h. -das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer -Normalverteilung mit diesem Mittelwert gezogen worden sind. - -\begin{exercise}[mlemean.m] - Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$ - und einer Standardabweichung $\ne 1$. - - Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und - die Log-Likelihood (aus der Summe der logarithmierten - Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche - die Position der Maxima mit den aus den Daten berechneten - Mittelwerte. -\end{exercise} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Kurvenfit als Maximum Likelihood Estimation} -Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern -$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter -$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die -entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer -Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die -Log-Likelihood -\begin{eqnarray*} - \log {\cal L}(\theta|x_1,x_2, \ldots x_n) - & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\ - & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\ -\end{eqnarray*} -Der einzige Unterschied zum vorherigen Beispiel ist, dass die -Mittelwerte der Normalverteilungen nun durch die Funktionswerte -gegeben sind. - -Der Parameter $\theta$ soll so gew\"ahlt werden, dass die -Log-Likelihood maximal wird. Der erste Term der Summe ist -unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem -Maximum weggelassen werden. -\begin{eqnarray*} - & = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 -\end{eqnarray*} -Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood -umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums. -\begin{equation} - \theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2 -\end{equation} -Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen -Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des -Parameters $\theta$ welcher den quadratischen Abstand minimiert ist -also identisch mit der Maximierung der Wahrscheinlichkeit, dass die -Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des -$\chi^2$ ist also ein Maximum-Likelihood Estimate. - -\begin{figure}[t] - \includegraphics[width=1\textwidth]{mlepropline} - \caption{\label{mleproplinefig} Maximum Likelihood Estimation der - Steigung einer Ursprungsgeraden.} -\end{figure} - - -\subsection{Beispiel: einfache Proportionalit\"at} -Als Funktion nehmen wir die Ursprungsgerade -\[ f(x) = \theta x \] -mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit -\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \] -Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$ -und setzen diese gleich Null: -\begin{eqnarray} - \frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ - & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ - & = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\ - & = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\ -\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\ -\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope} -\end{eqnarray} -Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung -der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein -Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht -n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von -linear kombinierten Basisfunktionen. Parameter die nichtlinear in -einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den -Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren -zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg, -zur\"uckzugreifen. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Fits von Wahrscheinlichkeitsverteilungen} -Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter -einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und -Standardabweichung der Normalverteilung) an ein Datenset fitten wolle. - -Ein erster Gedanke k\"onnte sein, die -Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen -Abstands an ein Histogram der Daten zu fitten. Das ist aber aus -folgenden Gr\"unden nicht die Methode der Wahl: (i) -Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen -insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen, -wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind -nicht unabh\"angig, da das normierte Histogram sich zu Eins -aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten -die die Minimierung des quadratischen Abstands zu einem Maximum -Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm -h\"angt von der Wahl der Klassenbreite ab. - -Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein -Datenset zu fitten, haben wir oben schon bei dem Beispiel zur -Absch\"atzung des Mittelwertes einer Normalverteilung gesehen --- -Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der -gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood -\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein -nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie -z.B. dem Gradientenabstieg, gel\"ost wird. - -\begin{figure}[t] - \includegraphics[width=1\textwidth]{mlepdf} - \caption{\label{mlepdffig} Maximum Likelihood Estimation einer - Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung - 2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt. - Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung - des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.} -\end{figure} - -\end{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Statistics} -What is "a statistic"? % dt. Sch\"atzfunktion -\begin{definition}[statistic] - A statistic (singular) is a single measure of some attribute of a - sample (e.g., its arithmetic mean value). It is calculated by - applying a function (statistical algorithm) to the values of the - items of the sample, which are known together as a set of data. - - \source{http://en.wikipedia.org/wiki/Statistic} -\end{definition} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Data types} - -\subsection{Nominal scale} -\begin{itemize} -\item Binary - \begin{itemize} - \item ``yes/no'', - \item ``true/false'', - \item ``success/failure'', etc. - \end{itemize} -\item Categorial - \begin{itemize} - \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''), - \item blood type (``A/B/AB/0''), - \item parts of speech (``noun/veerb/preposition/article/...''), - \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc. - \end{itemize} -\item Each observation/measurement/sample is put into one category -\item There is no reasonable order among the categories.\\ - example: [rods, cones] vs. [cones, rods] -\item Statistics: mode, i.e. the most common item -\end{itemize} - -\subsection{Ordinal scale} -\begin{itemize} -\item Like nominal scale, but with an order -\item Examples: ranks, ratings - \begin{itemize} - \item ``bad/ok/good'', - \item ``cold/warm/hot'', - \item ``young/old'', etc. - \end{itemize} -\item {\bf But:} there is no reasonable measure of {\em distance} - between the classes -\item Statistics: mode, median -\end{itemize} - -\subsection{Interval scale} -\begin{itemize} -\item Quantitative/metric values -\item Reasonable measure of distance between values, but no absolute zero -\item Examples: - \begin{itemize} - \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C) - \item Direction measured in degrees from magnetic or true north - \end{itemize} -\item Statistics: - \begin{itemize} - \item Central tendency: mode, median, arithmetic mean - \item Dispersion: range, standard deviation - \end{itemize} -\end{itemize} - -\subsection{Absolute/ratio scale} -\begin{itemize} -\item Like interval scale, but with absolute origin/zero -\item Examples: - \begin{itemize} - \item Temperature in $^\circ$K - \item Length, mass, duration, electric charge, ... - \item Plane angle, etc. - \item Count (e.g. number of spikes in response to a stimulus) - \end{itemize} -\item Statistics: - \begin{itemize} - \item Central tendency: mode, median, arithmetic, geometric, harmonic mean - \item Dispersion: range, standard deviation - \item Coefficient of variation (ratio standard deviation/mean) - \item All other statistical measures - \end{itemize} -\end{itemize} - -\subsection{Data types} -\begin{itemize} -\item Data type selects - \begin{itemize} - \item statistics - \item type of plots (bar graph versus x-y plot) - \item correct tests - \end{itemize} -\item Scales exhibit increasing information content from nominal - to absolute.\\ - Conversion ,,downwards'' is always possible -\item For example: size measured in meter (ratio scale) $\rightarrow$ - categories ``small/medium/large'' (ordinal scale) -\end{itemize} - -\subsection{Examples from neuroscience} -\begin{itemize} -\item {\bf absolute:} - \begin{itemize} - \item size of neuron/brain - \item length of axon - \item ion concentration - \item membrane potential - \item firing rate - \end{itemize} - -\item {\bf interval:} - \begin{itemize} - \item edge orientation - \end{itemize} - -\item {\bf ordinal:} - \begin{itemize} - \item stages of a disease - \item ratings - \end{itemize} - -\item {\bf nominal:} - \begin{itemize} - \item cell type - \item odor - \item states of an ion channel - \end{itemize} - -\end{itemize} -