diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..74de0a4 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +BASENAME=scientificcomputing-script + +pdf : $(BASENAME).pdf + +$(BASENAME).pdf : $(BASENAME).tex + export TEXMFOUTPUT=.; pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +clean : + rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc + +cleanall : clean + rm -f $(PDFFILE) + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/statistics/code/bootstrapsem.m b/bootstrap/code/bootstrapsem.m similarity index 100% rename from statistics/code/bootstrapsem.m rename to bootstrap/code/bootstrapsem.m diff --git a/bootstrap/lecture/Makefile b/bootstrap/lecture/Makefile new file mode 100644 index 0000000..f7f02ba --- /dev/null +++ b/bootstrap/lecture/Makefile @@ -0,0 +1,22 @@ +BASENAME=bootstrap +PYFILES=$(wildcard *.py) +PYPDFFILES=$(PYFILES:.py=.pdf) + +pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES) + +$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex + pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +$(PYPDFFILES) : %.pdf : %.py + python $< + +clean : + rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log + +cleanall : clean + rm -f $(BASENAME)-chapter.pdf + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/bootstrap/lecture/bootstrap-chapter.tex b/bootstrap/lecture/bootstrap-chapter.tex new file mode 100644 index 0000000..d185cd4 --- /dev/null +++ b/bootstrap/lecture/bootstrap-chapter.tex @@ -0,0 +1,225 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + inputpath=../code, + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\include{bootstrap} + +\end{document} diff --git a/bootstrap/lecture/bootstrap.tex b/bootstrap/lecture/bootstrap.tex new file mode 100644 index 0000000..e5d63aa --- /dev/null +++ b/bootstrap/lecture/bootstrap.tex @@ -0,0 +1,64 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}} + +Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling +aus der Stichprobe. Das hat mehrere Vorteile: +\begin{itemize} +\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein). +\item H\"ohere Genauigkeit als klassische Methoden. +\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr + \"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht + f\"ur jede Statistik eine andere Formel. +\end{itemize} + +\begin{figure}[t] + \includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex] + \includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex] + \includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312} + \caption{\tr{Why can we only measure a sample of the + population?}{Warum k\"onnen wir nur eine Stichprobe der + Grundgesamtheit messen?}} +\end{figure} + +\begin{figure}[t] + \includegraphics[height=0.2\textheight]{srs1}\\[2ex] + \includegraphics[height=0.2\textheight]{srs2}\\[2ex] + \includegraphics[height=0.2\textheight]{srs3} + \caption{Bootstrap der Stichprobenvertielung (a) Von der + Grundgesamtheit (population) mit unbekanntem Parameter + (z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random + samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur + jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen + der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe + gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf + die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu + haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele + Bootstrap-Stichproben generiert werden (resampling) und so + Eigenschaften der Stichprobenverteilung empirisch bestimmt + werden. Aus Hesterberg et al. 2003, Bootstrap Methods and + Permuation Tests} +\end{figure} + +\section{Bootstrap des Standardfehlers} + +Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und +benutzen diese um die Stichprobenverteilung einer Statistik zu +berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang +wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen +mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe +kann also einmal, mehrmals oder gar nicht in einer Bootstrap +Stichprobe vorkommen. + +\begin{exercise}[bootstrapsem.m] + Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert, + Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$). + + Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils + den Mittelwert. + + Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und + die Standardabweichung. + + Was hat das mit dem Standardfehler zu tun? +\end{exercise} diff --git a/statistics/lecture/figures/2012-10-29_16-26-05_771.jpg b/bootstrap/lecture/figures/2012-10-29_16-26-05_771.jpg similarity index 100% rename from statistics/lecture/figures/2012-10-29_16-26-05_771.jpg rename to bootstrap/lecture/figures/2012-10-29_16-26-05_771.jpg diff --git a/statistics/lecture/figures/2012-10-29_16-29-35_312.jpg b/bootstrap/lecture/figures/2012-10-29_16-29-35_312.jpg similarity index 100% rename from statistics/lecture/figures/2012-10-29_16-29-35_312.jpg rename to bootstrap/lecture/figures/2012-10-29_16-29-35_312.jpg diff --git a/statistics/lecture/figures/2012-10-29_16-41-39_523.jpg b/bootstrap/lecture/figures/2012-10-29_16-41-39_523.jpg similarity index 100% rename from statistics/lecture/figures/2012-10-29_16-41-39_523.jpg rename to bootstrap/lecture/figures/2012-10-29_16-41-39_523.jpg diff --git a/statistics/lecture/figures/srs1.png b/bootstrap/lecture/figures/srs1.png similarity index 100% rename from statistics/lecture/figures/srs1.png rename to bootstrap/lecture/figures/srs1.png diff --git a/statistics/lecture/figures/srs2.png b/bootstrap/lecture/figures/srs2.png similarity index 100% rename from statistics/lecture/figures/srs2.png rename to bootstrap/lecture/figures/srs2.png diff --git a/statistics/lecture/figures/srs3.png b/bootstrap/lecture/figures/srs3.png similarity index 100% rename from statistics/lecture/figures/srs3.png rename to bootstrap/lecture/figures/srs3.png diff --git a/statistics/code/mlemean.m b/likelihood/code/mlemean.m similarity index 100% rename from statistics/code/mlemean.m rename to likelihood/code/mlemean.m diff --git a/likelihood/lecture/Makefile b/likelihood/lecture/Makefile new file mode 100644 index 0000000..4e6367b --- /dev/null +++ b/likelihood/lecture/Makefile @@ -0,0 +1,22 @@ +BASENAME=likelihood +PYFILES=$(wildcard *.py) +PYPDFFILES=$(PYFILES:.py=.pdf) + +pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES) + +$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex + pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +$(PYPDFFILES) : %.pdf : %.py + python $< + +clean : + rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log + +cleanall : clean + rm -f $(BASENAME)-chapter.pdf + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/likelihood/lecture/likelihood-chapter.tex b/likelihood/lecture/likelihood-chapter.tex new file mode 100644 index 0000000..732acbe --- /dev/null +++ b/likelihood/lecture/likelihood-chapter.tex @@ -0,0 +1,225 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + inputpath=../code, + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\include{likelihood} + +\end{document} diff --git a/likelihood/lecture/likelihood.tex b/likelihood/lecture/likelihood.tex new file mode 100644 index 0000000..752d659 --- /dev/null +++ b/likelihood/lecture/likelihood.tex @@ -0,0 +1,212 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}} + +In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$ +einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung +die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der +Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die +Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am +gr\"o{\ss}ten ist. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Maximum Likelihood} +Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben +$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem +Parameter(n) $\theta$. Das k\"onnte die Normalverteilung +\begin{equation} + \label{normpdfmean} + p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}} +\end{equation} +sein mit +fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als +Parameter $\theta$. + +Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$ +die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann +ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des +Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$ +\begin{equation} + p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta) + \ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; . +\end{equation} +Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'') +den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$, +\begin{equation} + {\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta) +\end{equation} + +Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die +Likelihood maximiert (``mle'': Maximum-Likelihood Estimate): +\begin{equation} + \theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n) +\end{equation} +$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei +dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$ +bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat. + +An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn +man die Funktionswerte mit einer streng monoton steigenden Funktion +transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens +das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht: +\begin{eqnarray} + \theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ + & = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ + & = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\ + & = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood} +\end{eqnarray} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Beispiel: Das arithmetische Mittel} + +Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean} +entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten, +welcher Wert von $\theta$ maximiert dessen Likelhood? + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{mlemean} + \caption{\label{mlemeanfig} Maximum Likelihood Estimation des + Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen + Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus + denen die Daten stammen k\"onnten. Unteln links: Die Likelihood + in Abh\"angigkeit des Mittelwerts als Parameter der + Normalverteilungen. Unten rechts: die entsprechende + Log-Likelihood. An der Position des Maximums bei $\theta=2$ + \"andert sich nichts (Pfeil).} +\end{figure} + +Die Log-Likelihood \eqnref{loglikelihood} ist +\begin{eqnarray*} + \log {\cal L}(\theta|x_1,x_2, \ldots x_n) + & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\ + & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2} +\end{eqnarray*} +Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung +nach dem Parameter $\theta$ und setzen diese gleich Null: +\begin{eqnarray*} + \frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\ + \Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\ + \Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\ + \Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i +\end{eqnarray*} +Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h. +das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer +Normalverteilung mit diesem Mittelwert gezogen worden sind. + +\begin{exercise}[mlemean.m] + Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$ + und einer Standardabweichung $\ne 1$. + + Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und + die Log-Likelihood (aus der Summe der logarithmierten + Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche + die Position der Maxima mit den aus den Daten berechneten + Mittelwerte. +\end{exercise} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Kurvenfit als Maximum Likelihood Estimation} +Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern +$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter +$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die +entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer +Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die +Log-Likelihood +\begin{eqnarray*} + \log {\cal L}(\theta|x_1,x_2, \ldots x_n) + & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\ + & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\ +\end{eqnarray*} +Der einzige Unterschied zum vorherigen Beispiel ist, dass die +Mittelwerte der Normalverteilungen nun durch die Funktionswerte +gegeben sind. + +Der Parameter $\theta$ soll so gew\"ahlt werden, dass die +Log-Likelihood maximal wird. Der erste Term der Summe ist +unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem +Maximum weggelassen werden. +\begin{eqnarray*} + & = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 +\end{eqnarray*} +Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood +umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums. +\begin{equation} + \theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2 +\end{equation} +Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen +Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des +Parameters $\theta$ welcher den quadratischen Abstand minimiert ist +also identisch mit der Maximierung der Wahrscheinlichkeit, dass die +Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des +$\chi^2$ ist also ein Maximum-Likelihood Estimate. + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{mlepropline} + \caption{\label{mleproplinefig} Maximum Likelihood Estimation der + Steigung einer Ursprungsgeraden.} +\end{figure} + + +\subsection{Beispiel: einfache Proportionalit\"at} +Als Funktion nehmen wir die Ursprungsgerade +\[ f(x) = \theta x \] +mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit +\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \] +Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$ +und setzen diese gleich Null: +\begin{eqnarray} + \frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ + & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ + & = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\ + & = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\ +\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\ +\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope} +\end{eqnarray} +Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung +der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein +Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht +n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von +linear kombinierten Basisfunktionen. Parameter die nichtlinear in +einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den +Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren +zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg, +zur\"uckzugreifen. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Fits von Wahrscheinlichkeitsverteilungen} +Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter +einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und +Standardabweichung der Normalverteilung) an ein Datenset fitten wolle. + +Ein erster Gedanke k\"onnte sein, die +Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen +Abstands an ein Histogram der Daten zu fitten. Das ist aber aus +folgenden Gr\"unden nicht die Methode der Wahl: (i) +Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen +insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen, +wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind +nicht unabh\"angig, da das normierte Histogram sich zu Eins +aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten +die die Minimierung des quadratischen Abstands zu einem Maximum +Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm +h\"angt von der Wahl der Klassenbreite ab. + +Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein +Datenset zu fitten, haben wir oben schon bei dem Beispiel zur +Absch\"atzung des Mittelwertes einer Normalverteilung gesehen --- +Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der +gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood +\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein +nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie +z.B. dem Gradientenabstieg, gel\"ost wird. + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{mlepdf} + \caption{\label{mlepdffig} Maximum Likelihood Estimation einer + Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung + 2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt. + Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung + des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.} +\end{figure} diff --git a/statistics/lecture/mlemean.py b/likelihood/lecture/mlemean.py similarity index 100% rename from statistics/lecture/mlemean.py rename to likelihood/lecture/mlemean.py diff --git a/statistics/lecture/mlepdf.py b/likelihood/lecture/mlepdf.py similarity index 100% rename from statistics/lecture/mlepdf.py rename to likelihood/lecture/mlepdf.py diff --git a/statistics/lecture/mlepropline.py b/likelihood/lecture/mlepropline.py similarity index 100% rename from statistics/lecture/mlepropline.py rename to likelihood/lecture/mlepropline.py diff --git a/statistics/code/iv_curve.mat b/regression/code/iv_curve.mat similarity index 100% rename from statistics/code/iv_curve.mat rename to regression/code/iv_curve.mat diff --git a/statistics/code/lin_regression.mat b/regression/code/lin_regression.mat similarity index 100% rename from statistics/code/lin_regression.mat rename to regression/code/lin_regression.mat diff --git a/statistics/code/lsq_error.m b/regression/code/lsq_error.m similarity index 100% rename from statistics/code/lsq_error.m rename to regression/code/lsq_error.m diff --git a/statistics/code/lsq_gradient.m b/regression/code/lsq_gradient.m similarity index 100% rename from statistics/code/lsq_gradient.m rename to regression/code/lsq_gradient.m diff --git a/statistics/code/lsq_gradient_sigmoid.m b/regression/code/lsq_gradient_sigmoid.m similarity index 100% rename from statistics/code/lsq_gradient_sigmoid.m rename to regression/code/lsq_gradient_sigmoid.m diff --git a/statistics/code/lsq_sigmoid_error.m b/regression/code/lsq_sigmoid_error.m similarity index 100% rename from statistics/code/lsq_sigmoid_error.m rename to regression/code/lsq_sigmoid_error.m diff --git a/statistics/code/membraneVoltage.mat b/regression/code/membraneVoltage.mat similarity index 100% rename from statistics/code/membraneVoltage.mat rename to regression/code/membraneVoltage.mat diff --git a/statistics/code/plot_error_surface.m b/regression/code/plot_error_surface.m similarity index 100% rename from statistics/code/plot_error_surface.m rename to regression/code/plot_error_surface.m diff --git a/statistics/code/sigmoidal_gradient_descent.m b/regression/code/sigmoidal_gradient_descent.m similarity index 100% rename from statistics/code/sigmoidal_gradient_descent.m rename to regression/code/sigmoidal_gradient_descent.m diff --git a/regression/lecture/Makefile b/regression/lecture/Makefile new file mode 100644 index 0000000..4486638 --- /dev/null +++ b/regression/lecture/Makefile @@ -0,0 +1,22 @@ +BASENAME=linear_regression +PYFILES=$(wildcard *.py) +PYPDFFILES=$(PYFILES:.py=.pdf) + +pdf : $(BASENAME).pdf $(PYPDFFILES) + +$(BASENAME).pdf : $(BASENAME).tex + pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true + +$(PYPDFFILES) : %.pdf : %.py + python $< + +clean : + rm -f *~ $(BASENAME).aux $(BASENAME).log $(BASENAME).out $(BASENAME).toc $(BASENAME).nav $(BASENAME).snm $(BASENAME).vrb + +cleanall : clean + rm -f $(BASENAME).pdf + +watch : + while true; do ! make -q pdf && make pdf; sleep 0.5; done + + diff --git a/regression/lecture/beamercolorthemetuebingen.sty b/regression/lecture/beamercolorthemetuebingen.sty new file mode 100644 index 0000000..c4a5da6 --- /dev/null +++ b/regression/lecture/beamercolorthemetuebingen.sty @@ -0,0 +1,61 @@ +% Copyright 2007 by Till Tantau +% +% This file may be distributed and/or modified +% +% 1. under the LaTeX Project Public License and/or +% 2. under the GNU Public License. +% +% See the file doc/licenses/LICENSE for more details. + +\usepackage{color} +\definecolor{karminrot}{RGB}{165,30,55} +\definecolor{gold}{RGB}{180,160,105} +\definecolor{anthrazit}{RGB}{50 ,65 ,75 } + +\mode + +\setbeamercolor*{normal text}{fg=anthrazit,bg=white} +\setbeamercolor*{alerted text}{fg=anthrazit} +\setbeamercolor*{example text}{fg=anthrazit} +\setbeamercolor*{structure}{fg=gold,bg=karminrot} + +\providecommand*{\beamer@bftext@only}{% + \relax + \ifmmode + \expandafter\beamer@bftext@warning + \else + \expandafter\bfseries + \fi +} +\providecommand*{\beamer@bftext@warning}{% + \ClassWarning{beamer} + {Cannot use bold for alerted text in math mode}% +} + +\setbeamerfont{alerted text}{series=\beamer@bftext@only} + +\setbeamercolor{palette primary}{fg=karminrot,bg=white} +\setbeamercolor{palette secondary}{fg=gold,bg=white} +\setbeamercolor{palette tertiary}{fg=anthrazit,bg=white} +\setbeamercolor{palette quaternary}{fg=black,bg=white} + +\setbeamercolor{sidebar}{bg=karminrot!100} + +\setbeamercolor{palette sidebar primary}{fg=karminrot} +\setbeamercolor{palette sidebar secondary}{fg=karminrot} +\setbeamercolor{palette sidebar tertiary}{fg=karminrot} +\setbeamercolor{palette sidebar quaternary}{fg=karminrot} + +\setbeamercolor{item projected}{fg=black,bg=black!20} + +\setbeamercolor*{block body}{} +\setbeamercolor*{block body alerted}{} +\setbeamercolor*{block body example}{} +\setbeamercolor*{block title}{parent=structure} +\setbeamercolor*{block title alerted}{parent=alerted text} +\setbeamercolor*{block title example}{parent=example text} + +\setbeamercolor*{titlelike}{parent=structure} + +\mode + diff --git a/statistics/lecture/figures/charging_curve.pdf b/regression/lecture/figures/charging_curve.pdf similarity index 100% rename from statistics/lecture/figures/charging_curve.pdf rename to regression/lecture/figures/charging_curve.pdf diff --git a/statistics/lecture/figures/lin_regress.pdf b/regression/lecture/figures/lin_regress.pdf similarity index 100% rename from statistics/lecture/figures/lin_regress.pdf rename to regression/lecture/figures/lin_regress.pdf diff --git a/statistics/lecture/figures/lin_regress_abscissa.pdf b/regression/lecture/figures/lin_regress_abscissa.pdf similarity index 100% rename from statistics/lecture/figures/lin_regress_abscissa.pdf rename to regression/lecture/figures/lin_regress_abscissa.pdf diff --git a/statistics/lecture/figures/lin_regress_slope.pdf b/regression/lecture/figures/lin_regress_slope.pdf similarity index 100% rename from statistics/lecture/figures/lin_regress_slope.pdf rename to regression/lecture/figures/lin_regress_slope.pdf diff --git a/statistics/lecture/figures/linear_least_squares.pdf b/regression/lecture/figures/linear_least_squares.pdf similarity index 100% rename from statistics/lecture/figures/linear_least_squares.pdf rename to regression/lecture/figures/linear_least_squares.pdf diff --git a/statistics/lecture/figures/one_d_problem_a.pdf b/regression/lecture/figures/one_d_problem_a.pdf similarity index 100% rename from statistics/lecture/figures/one_d_problem_a.pdf rename to regression/lecture/figures/one_d_problem_a.pdf diff --git a/statistics/lecture/figures/one_d_problem_b.pdf b/regression/lecture/figures/one_d_problem_b.pdf similarity index 100% rename from statistics/lecture/figures/one_d_problem_b.pdf rename to regression/lecture/figures/one_d_problem_b.pdf diff --git a/statistics/lecture/figures/one_d_problem_c.pdf b/regression/lecture/figures/one_d_problem_c.pdf similarity index 100% rename from statistics/lecture/figures/one_d_problem_c.pdf rename to regression/lecture/figures/one_d_problem_c.pdf diff --git a/statistics/lecture/figures/surface.pdf b/regression/lecture/figures/surface.pdf similarity index 100% rename from statistics/lecture/figures/surface.pdf rename to regression/lecture/figures/surface.pdf diff --git a/statistics/lecture/linear_regression.tex b/regression/lecture/linear_regression.tex similarity index 100% rename from statistics/lecture/linear_regression.tex rename to regression/lecture/linear_regression.tex diff --git a/scientificcomputing-script.tex b/scientificcomputing-script.tex new file mode 100644 index 0000000..7627470 --- /dev/null +++ b/scientificcomputing-script.tex @@ -0,0 +1,236 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Grewe \& Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newcommand{\codepath}{} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\codepath\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{statistics/lecture/}{statistics/lecture/figures/}{bootstrap/lecture/}{bootstrap/lecture/figures/}{likelihood/lecture/}{likelihood/lecture/figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\maketitle + +\tableofcontents + +\renewcommand{\codepath}{statistics/code/} +\include{statistics/lecture/descriptivestatistics} + +\renewcommand{\codepath}{bootstrap/code/} +\include{bootstrap/lecture/bootstrap} + +\renewcommand{\codepath}{likelihood/code/} +\include{likelihood/lecture/likelihood} + +\end{document} diff --git a/statistics/exercises/mlepdffit.m b/statistics/exercises/mlepdffit.m index 900fe22..f10128b 100644 --- a/statistics/exercises/mlepdffit.m +++ b/statistics/exercises/mlepdffit.m @@ -24,4 +24,7 @@ yy = gampdf(xx, p(1), p(2)); plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' ); hold off; +xlabel('x'); +ylabel('pdf'); legend('show'); +savefigpdf(gcf, 'mlepdffit.pdf', 12, 8) diff --git a/statistics/exercises/mlepdffit.pdf b/statistics/exercises/mlepdffit.pdf new file mode 100644 index 0000000..b718c98 Binary files /dev/null and b/statistics/exercises/mlepdffit.pdf differ diff --git a/statistics/exercises/mlepropfit.pdf b/statistics/exercises/mlepropfit.pdf new file mode 100644 index 0000000..da9856e Binary files /dev/null and b/statistics/exercises/mlepropfit.pdf differ diff --git a/statistics/exercises/mlestd.pdf b/statistics/exercises/mlestd.pdf new file mode 100644 index 0000000..dad420c Binary files /dev/null and b/statistics/exercises/mlestd.pdf differ diff --git a/statistics/exercises/statistics04.tex b/statistics/exercises/statistics04.tex index 60d4e27..276f7ea 100644 --- a/statistics/exercises/statistics04.tex +++ b/statistics/exercises/statistics04.tex @@ -183,7 +183,7 @@ Normalverteilung entstammen, sonder aus der Gamma-Verteilung. \end{parts} \begin{solution} \lstinputlisting{mlepdffit.m} - %\includegraphics[width=1\textwidth]{mlepdffit} + \includegraphics[width=1\textwidth]{mlepdffit} \end{solution} \end{questions} diff --git a/statistics/lecture/Makefile b/statistics/lecture/Makefile index 6f5131d..0c28bef 100644 --- a/statistics/lecture/Makefile +++ b/statistics/lecture/Makefile @@ -1,21 +1,20 @@ -TEXFILES=descriptivestatistics.tex linear_regression.tex #$(wildcard *.tex) -PDFFILES=$(TEXFILES:.tex=.pdf) +BASENAME=descriptivestatistics PYFILES=$(wildcard *.py) PYPDFFILES=$(PYFILES:.py=.pdf) -pdf : $(PDFFILES) $(PYPDFFILES) +pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES) -$(PDFFILES) : %.pdf : %.tex +$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true $(PYPDFFILES) : %.pdf : %.py python $< clean : - rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb) + rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log cleanall : clean - rm -f $(PDFFILES) + rm -f $(BASENAME)-chapter.pdf watch : while true; do ! make -q pdf && make pdf; sleep 0.5; done diff --git a/statistics/lecture/descriptivestatistics-chapter.tex b/statistics/lecture/descriptivestatistics-chapter.tex new file mode 100644 index 0000000..6d498e2 --- /dev/null +++ b/statistics/lecture/descriptivestatistics-chapter.tex @@ -0,0 +1,361 @@ +\documentclass[12pt]{report} + +%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} +\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} +\date{WS 15/16} + +%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \newcommand{\tr}[2]{#1} % en +% \usepackage[english]{babel} +\newcommand{\tr}[2]{#2} % de +\usepackage[german]{babel} + +%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{pslatex} % nice font for pdf file +\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} + +%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} +\setcounter{tocdepth}{1} + +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{graphicx} +\usepackage{xcolor} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{\tr{Eq}{Gl}.} +\newcommand{\Eqn}{\tr{Eq}{Gl}.} +\newcommand{\eqns}{\tr{Eqs}{Gln}.} +\newcommand{\Eqns}{\tr{Eqs}{Gln}.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + + +%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{listings} +\lstset{ + inputpath=../code, + basicstyle=\ttfamily\footnotesize, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + caption={\protect\filename@parse{\lstname}\protect\filename@base}, + captionpos=t, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt +} + +%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{amsmath} +\usepackage{bm} +\usepackage{dsfont} +\newcommand{\naZ}{\mathds{N}} +\newcommand{\gaZ}{\mathds{Z}} +\newcommand{\raZ}{\mathds{Q}} +\newcommand{\reZ}{\mathds{R}} +\newcommand{\reZp}{\mathds{R^+}} +\newcommand{\reZpN}{\mathds{R^+_0}} +\newcommand{\koZ}{\mathds{C}} + + +%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{ifthen} + +\newcommand{\code}[1]{\texttt{#1}} + +\newcommand{\source}[1]{ + \begin{flushright} + \color{gray}\scriptsize \url{#1} + \end{flushright} +} + +\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% + {\medskip} + +\newcounter{maxexercise} +\setcounter{maxexercise}{9} % show listings up to exercise maxexercise +\newcounter{theexercise} +\setcounter{theexercise}{1} +\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} + \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% + {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} + +\graphicspath{{figures/}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\include{descriptivestatistics} + +\end{document} + + +\end{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Statistics} +What is "a statistic"? % dt. Sch\"atzfunktion +\begin{definition}[statistic] + A statistic (singular) is a single measure of some attribute of a + sample (e.g., its arithmetic mean value). It is calculated by + applying a function (statistical algorithm) to the values of the + items of the sample, which are known together as a set of data. + + \source{http://en.wikipedia.org/wiki/Statistic} +\end{definition} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Data types} + +\subsection{Nominal scale} +\begin{itemize} +\item Binary + \begin{itemize} + \item ``yes/no'', + \item ``true/false'', + \item ``success/failure'', etc. + \end{itemize} +\item Categorial + \begin{itemize} + \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''), + \item blood type (``A/B/AB/0''), + \item parts of speech (``noun/veerb/preposition/article/...''), + \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc. + \end{itemize} +\item Each observation/measurement/sample is put into one category +\item There is no reasonable order among the categories.\\ + example: [rods, cones] vs. [cones, rods] +\item Statistics: mode, i.e. the most common item +\end{itemize} + +\subsection{Ordinal scale} +\begin{itemize} +\item Like nominal scale, but with an order +\item Examples: ranks, ratings + \begin{itemize} + \item ``bad/ok/good'', + \item ``cold/warm/hot'', + \item ``young/old'', etc. + \end{itemize} +\item {\bf But:} there is no reasonable measure of {\em distance} + between the classes +\item Statistics: mode, median +\end{itemize} + +\subsection{Interval scale} +\begin{itemize} +\item Quantitative/metric values +\item Reasonable measure of distance between values, but no absolute zero +\item Examples: + \begin{itemize} + \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C) + \item Direction measured in degrees from magnetic or true north + \end{itemize} +\item Statistics: + \begin{itemize} + \item Central tendency: mode, median, arithmetic mean + \item Dispersion: range, standard deviation + \end{itemize} +\end{itemize} + +\subsection{Absolute/ratio scale} +\begin{itemize} +\item Like interval scale, but with absolute origin/zero +\item Examples: + \begin{itemize} + \item Temperature in $^\circ$K + \item Length, mass, duration, electric charge, ... + \item Plane angle, etc. + \item Count (e.g. number of spikes in response to a stimulus) + \end{itemize} +\item Statistics: + \begin{itemize} + \item Central tendency: mode, median, arithmetic, geometric, harmonic mean + \item Dispersion: range, standard deviation + \item Coefficient of variation (ratio standard deviation/mean) + \item All other statistical measures + \end{itemize} +\end{itemize} + +\subsection{Data types} +\begin{itemize} +\item Data type selects + \begin{itemize} + \item statistics + \item type of plots (bar graph versus x-y plot) + \item correct tests + \end{itemize} +\item Scales exhibit increasing information content from nominal + to absolute.\\ + Conversion ,,downwards'' is always possible +\item For example: size measured in meter (ratio scale) $\rightarrow$ + categories ``small/medium/large'' (ordinal scale) +\end{itemize} + +\subsection{Examples from neuroscience} +\begin{itemize} +\item {\bf absolute:} + \begin{itemize} + \item size of neuron/brain + \item length of axon + \item ion concentration + \item membrane potential + \item firing rate + \end{itemize} + +\item {\bf interval:} + \begin{itemize} + \item edge orientation + \end{itemize} + +\item {\bf ordinal:} + \begin{itemize} + \item stages of a disease + \item ratings + \end{itemize} + +\item {\bf nominal:} + \begin{itemize} + \item cell type + \item odor + \item states of an ion channel + \end{itemize} + +\end{itemize} + diff --git a/statistics/lecture/descriptivestatistics.tex b/statistics/lecture/descriptivestatistics.tex index f0e0f08..85ce58d 100644 --- a/statistics/lecture/descriptivestatistics.tex +++ b/statistics/lecture/descriptivestatistics.tex @@ -1,229 +1,3 @@ -\documentclass[12pt]{report} - -%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}} -\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}} -\date{WS 15/16} - -%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \newcommand{\tr}[2]{#1} % en -% \usepackage[english]{babel} -\newcommand{\tr}[2]{#2} % de -\usepackage[german]{babel} - -%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{pslatex} % nice font for pdf file -\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref} - -%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} -\setcounter{tocdepth}{1} - -%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage[sf,bf,it,big,clearempty]{titlesec} -\setcounter{secnumdepth}{1} - - -%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro - - -%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{graphicx} -\usepackage{xcolor} -\pagecolor{white} - -\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% - \put(0,4){\line(1,0){170}}% - \multiput(0,2)(10,0){18}{\line(0,1){4}}% - \multiput(0,3)(1,0){170}{\line(0,1){2}}% - \put(0,0){\makebox(0,0){{\tiny 0}}}% - \put(10,0){\makebox(0,0){{\tiny 1}}}% - \put(20,0){\makebox(0,0){{\tiny 2}}}% - \put(30,0){\makebox(0,0){{\tiny 3}}}% - \put(40,0){\makebox(0,0){{\tiny 4}}}% - \put(50,0){\makebox(0,0){{\tiny 5}}}% - \put(60,0){\makebox(0,0){{\tiny 6}}}% - \put(70,0){\makebox(0,0){{\tiny 7}}}% - \put(80,0){\makebox(0,0){{\tiny 8}}}% - \put(90,0){\makebox(0,0){{\tiny 9}}}% - \put(100,0){\makebox(0,0){{\tiny 10}}}% - \put(110,0){\makebox(0,0){{\tiny 11}}}% - \put(120,0){\makebox(0,0){{\tiny 12}}}% - \put(130,0){\makebox(0,0){{\tiny 13}}}% - \put(140,0){\makebox(0,0){{\tiny 14}}}% - \put(150,0){\makebox(0,0){{\tiny 15}}}% - \put(160,0){\makebox(0,0){{\tiny 16}}}% - \put(170,0){\makebox(0,0){{\tiny 17}}}% - \end{picture}\par} - -% figures: -\setlength{\fboxsep}{0pt} -\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} -%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} -%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} -%\newcommand{\texpicture}[1]{} -\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} - -% maximum number of floats: -\setcounter{topnumber}{2} -\setcounter{bottomnumber}{0} -\setcounter{totalnumber}{2} - -% float placement fractions: -\renewcommand{\textfraction}{0.2} -\renewcommand{\topfraction}{0.8} -\renewcommand{\bottomfraction}{0.0} -\renewcommand{\floatpagefraction}{0.5} - -% spacing for floats: -\setlength{\floatsep}{12pt plus 2pt minus 2pt} -\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} -\setlength{\intextsep}{12pt plus 2pt minus 2pt} - -% spacing for a floating page: -\makeatletter - \setlength{\@fptop}{0pt} - \setlength{\@fpsep}{8pt plus 2.0fil} - \setlength{\@fpbot}{0pt plus 1.0fil} -\makeatother - -% rules for floats: -\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} -\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} - -% captions: -\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} - -% put caption on separate float: -\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} - -% references to panels of a figure within the caption: -\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} -% references to figures: -\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} -\newcommand{\fref}[1]{\textup{\ref{#1}}} -\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} -% references to figures in normal text: -\newcommand{\fig}{Fig.} -\newcommand{\Fig}{Figure} -\newcommand{\figs}{Figs.} -\newcommand{\Figs}{Figures} -\newcommand{\figref}[1]{\fig~\fref{#1}} -\newcommand{\Figref}[1]{\Fig~\fref{#1}} -\newcommand{\figsref}[1]{\figs~\fref{#1}} -\newcommand{\Figsref}[1]{\Figs~\fref{#1}} -\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} -\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} -\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} -\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} -% references to figures within bracketed text: -\newcommand{\figb}{Fig.} -\newcommand{\figsb}{Figs.} -\newcommand{\figrefb}[1]{\figb~\fref{#1}} -\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} -\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} -\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} - -% references to tables: -\newcommand{\tref}[1]{\textup{\ref{#1}}} -% references to tables in normal text: -\newcommand{\tab}{Tab.} -\newcommand{\Tab}{Table} -\newcommand{\tabs}{Tabs.} -\newcommand{\Tabs}{Tables} -\newcommand{\tabref}[1]{\tab~\tref{#1}} -\newcommand{\Tabref}[1]{\Tab~\tref{#1}} -\newcommand{\tabsref}[1]{\tabs~\tref{#1}} -\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} -% references to tables within bracketed text: -\newcommand{\tabb}{Tab.} -\newcommand{\tabsb}{Tab.} -\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} -\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} - - -%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%\newcommand{\eqref}[1]{(\ref{#1})} -\newcommand{\eqn}{\tr{Eq}{Gl}.} -\newcommand{\Eqn}{\tr{Eq}{Gl}.} -\newcommand{\eqns}{\tr{Eqs}{Gln}.} -\newcommand{\Eqns}{\tr{Eqs}{Gln}.} -\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} -\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} -\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} -\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} - - -%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{listings} -\lstset{ - inputpath=../code, - basicstyle=\ttfamily\footnotesize, - numbers=left, - showstringspaces=false, - language=Matlab, - commentstyle=\itshape\color{darkgray}, - keywordstyle=\color{blue}, - stringstyle=\color{green}, - backgroundcolor=\color{blue!10}, - breaklines=true, - breakautoindent=true, - columns=flexible, - frame=single, - caption={\protect\filename@parse{\lstname}\protect\filename@base}, - captionpos=t, - xleftmargin=1em, - xrightmargin=1em, - aboveskip=10pt -} - -%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{amsmath} -\usepackage{bm} -\usepackage{dsfont} -\newcommand{\naZ}{\mathds{N}} -\newcommand{\gaZ}{\mathds{Z}} -\newcommand{\raZ}{\mathds{Q}} -\newcommand{\reZ}{\mathds{R}} -\newcommand{\reZp}{\mathds{R^+}} -\newcommand{\reZpN}{\mathds{R^+_0}} -\newcommand{\koZ}{\mathds{C}} - - -%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{ifthen} - -\newcommand{\code}[1]{\texttt{#1}} - -\newcommand{\source}[1]{ - \begin{flushright} - \color{gray}\scriptsize \url{#1} - \end{flushright} -} - -\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}% - {\medskip} - -\newcounter{maxexercise} -\setcounter{maxexercise}{9} % show listings up to exercise maxexercise -\newcounter{theexercise} -\setcounter{theexercise}{1} -\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung} - \arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}% - {\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}} - -\graphicspath{{figures/}} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{document} - -\maketitle - -%\tableofcontents - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{\tr{Descriptive statistics}{Deskriptive Statistik}} @@ -453,418 +227,3 @@ Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}). $x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null. $\xi$ sind normalverteilte Zufallszahlen.} \end{figure} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}} - -Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling -aus der Stichprobe. Das hat mehrere Vorteile: -\begin{itemize} -\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein). -\item H\"ohere Genauigkeit als klassische Methoden. -\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr - \"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht - f\"ur jede Statistik eine andere Formel. -\end{itemize} - -\begin{figure}[t] - \includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex] - \includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex] - \includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312} - \caption{\tr{Why can we only measure a sample of the - population?}{Warum k\"onnen wir nur eine Stichprobe der - Grundgesamtheit messen?}} -\end{figure} - -\begin{figure}[t] - \includegraphics[height=0.2\textheight]{srs1}\\[2ex] - \includegraphics[height=0.2\textheight]{srs2}\\[2ex] - \includegraphics[height=0.2\textheight]{srs3} - \caption{Bootstrap der Stichprobenvertielung (a) Von der - Grundgesamtheit (population) mit unbekanntem Parameter - (z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random - samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur - jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen - der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe - gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf - die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu - haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele - Bootstrap-Stichproben generiert werden (resampling) und so - Eigenschaften der Stichprobenverteilung empirisch bestimmt - werden. Aus Hesterberg et al. 2003, Bootstrap Methods and - Permuation Tests} -\end{figure} - -\section{Bootstrap des Standardfehlers} - -Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und -benutzen diese um die Stichprobenverteilung einer Statistik zu -berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang -wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen -mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe -kann also einmal, mehrmals oder gar nicht in einer Bootstrap -Stichprobe vorkommen. - -\begin{exercise}[bootstrapsem.m] - Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert, - Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$). - - Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils - den Mittelwert. - - Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und - die Standardabweichung. - - Was hat das mit dem Standardfehler zu tun? -\end{exercise} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}} - -In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$ -einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung -die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der -Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die -Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am -gr\"o{\ss}ten ist. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Maximum Likelihood} -Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben -$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem -Parameter(n) $\theta$. Das k\"onnte die Normalverteilung -\begin{equation} - \label{normpdfmean} - p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}} -\end{equation} -sein mit -fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als -Parameter $\theta$. - -Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$ -die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann -ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des -Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$ -\begin{equation} - p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta) - \ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; . -\end{equation} -Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'') -den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$, -\begin{equation} - {\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta) -\end{equation} - -Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die -Likelihood maximiert (``mle'': Maximum-Likelihood Estimate): -\begin{equation} - \theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n) -\end{equation} -$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei -dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$ -bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat. - -An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn -man die Funktionswerte mit einer streng monoton steigenden Funktion -transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens -das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht: -\begin{eqnarray} - \theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ - & = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\ - & = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\ - & = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood} -\end{eqnarray} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Beispiel: Das arithmetische Mittel} - -Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean} -entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten, -welcher Wert von $\theta$ maximiert dessen Likelhood? - -\begin{figure}[t] - \includegraphics[width=1\textwidth]{mlemean} - \caption{\label{mlemeanfig} Maximum Likelihood Estimation des - Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen - Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus - denen die Daten stammen k\"onnten. Unteln links: Die Likelihood - in Abh\"angigkeit des Mittelwerts als Parameter der - Normalverteilungen. Unten rechts: die entsprechende - Log-Likelihood. An der Position des Maximums bei $\theta=2$ - \"andert sich nichts (Pfeil).} -\end{figure} - -Die Log-Likelihood \eqnref{loglikelihood} ist -\begin{eqnarray*} - \log {\cal L}(\theta|x_1,x_2, \ldots x_n) - & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\ - & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2} -\end{eqnarray*} -Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung -nach dem Parameter $\theta$ und setzen diese gleich Null: -\begin{eqnarray*} - \frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\ - \Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\ - \Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\ - \Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i -\end{eqnarray*} -Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h. -das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer -Normalverteilung mit diesem Mittelwert gezogen worden sind. - -\begin{exercise}[mlemean.m] - Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$ - und einer Standardabweichung $\ne 1$. - - Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und - die Log-Likelihood (aus der Summe der logarithmierten - Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche - die Position der Maxima mit den aus den Daten berechneten - Mittelwerte. -\end{exercise} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Kurvenfit als Maximum Likelihood Estimation} -Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern -$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter -$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die -entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer -Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die -Log-Likelihood -\begin{eqnarray*} - \log {\cal L}(\theta|x_1,x_2, \ldots x_n) - & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\ - & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\ -\end{eqnarray*} -Der einzige Unterschied zum vorherigen Beispiel ist, dass die -Mittelwerte der Normalverteilungen nun durch die Funktionswerte -gegeben sind. - -Der Parameter $\theta$ soll so gew\"ahlt werden, dass die -Log-Likelihood maximal wird. Der erste Term der Summe ist -unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem -Maximum weggelassen werden. -\begin{eqnarray*} - & = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 -\end{eqnarray*} -Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood -umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums. -\begin{equation} - \theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2 -\end{equation} -Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen -Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des -Parameters $\theta$ welcher den quadratischen Abstand minimiert ist -also identisch mit der Maximierung der Wahrscheinlichkeit, dass die -Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des -$\chi^2$ ist also ein Maximum-Likelihood Estimate. - -\begin{figure}[t] - \includegraphics[width=1\textwidth]{mlepropline} - \caption{\label{mleproplinefig} Maximum Likelihood Estimation der - Steigung einer Ursprungsgeraden.} -\end{figure} - - -\subsection{Beispiel: einfache Proportionalit\"at} -Als Funktion nehmen wir die Ursprungsgerade -\[ f(x) = \theta x \] -mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit -\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \] -Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$ -und setzen diese gleich Null: -\begin{eqnarray} - \frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ - & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\ - & = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\ - & = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\ -\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\ -\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope} -\end{eqnarray} -Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung -der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein -Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht -n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von -linear kombinierten Basisfunktionen. Parameter die nichtlinear in -einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den -Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren -zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg, -zur\"uckzugreifen. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Fits von Wahrscheinlichkeitsverteilungen} -Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter -einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und -Standardabweichung der Normalverteilung) an ein Datenset fitten wolle. - -Ein erster Gedanke k\"onnte sein, die -Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen -Abstands an ein Histogram der Daten zu fitten. Das ist aber aus -folgenden Gr\"unden nicht die Methode der Wahl: (i) -Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen -insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen, -wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind -nicht unabh\"angig, da das normierte Histogram sich zu Eins -aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten -die die Minimierung des quadratischen Abstands zu einem Maximum -Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm -h\"angt von der Wahl der Klassenbreite ab. - -Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein -Datenset zu fitten, haben wir oben schon bei dem Beispiel zur -Absch\"atzung des Mittelwertes einer Normalverteilung gesehen --- -Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der -gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood -\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein -nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie -z.B. dem Gradientenabstieg, gel\"ost wird. - -\begin{figure}[t] - \includegraphics[width=1\textwidth]{mlepdf} - \caption{\label{mlepdffig} Maximum Likelihood Estimation einer - Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung - 2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt. - Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung - des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.} -\end{figure} - -\end{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Statistics} -What is "a statistic"? % dt. Sch\"atzfunktion -\begin{definition}[statistic] - A statistic (singular) is a single measure of some attribute of a - sample (e.g., its arithmetic mean value). It is calculated by - applying a function (statistical algorithm) to the values of the - items of the sample, which are known together as a set of data. - - \source{http://en.wikipedia.org/wiki/Statistic} -\end{definition} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Data types} - -\subsection{Nominal scale} -\begin{itemize} -\item Binary - \begin{itemize} - \item ``yes/no'', - \item ``true/false'', - \item ``success/failure'', etc. - \end{itemize} -\item Categorial - \begin{itemize} - \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''), - \item blood type (``A/B/AB/0''), - \item parts of speech (``noun/veerb/preposition/article/...''), - \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc. - \end{itemize} -\item Each observation/measurement/sample is put into one category -\item There is no reasonable order among the categories.\\ - example: [rods, cones] vs. [cones, rods] -\item Statistics: mode, i.e. the most common item -\end{itemize} - -\subsection{Ordinal scale} -\begin{itemize} -\item Like nominal scale, but with an order -\item Examples: ranks, ratings - \begin{itemize} - \item ``bad/ok/good'', - \item ``cold/warm/hot'', - \item ``young/old'', etc. - \end{itemize} -\item {\bf But:} there is no reasonable measure of {\em distance} - between the classes -\item Statistics: mode, median -\end{itemize} - -\subsection{Interval scale} -\begin{itemize} -\item Quantitative/metric values -\item Reasonable measure of distance between values, but no absolute zero -\item Examples: - \begin{itemize} - \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C) - \item Direction measured in degrees from magnetic or true north - \end{itemize} -\item Statistics: - \begin{itemize} - \item Central tendency: mode, median, arithmetic mean - \item Dispersion: range, standard deviation - \end{itemize} -\end{itemize} - -\subsection{Absolute/ratio scale} -\begin{itemize} -\item Like interval scale, but with absolute origin/zero -\item Examples: - \begin{itemize} - \item Temperature in $^\circ$K - \item Length, mass, duration, electric charge, ... - \item Plane angle, etc. - \item Count (e.g. number of spikes in response to a stimulus) - \end{itemize} -\item Statistics: - \begin{itemize} - \item Central tendency: mode, median, arithmetic, geometric, harmonic mean - \item Dispersion: range, standard deviation - \item Coefficient of variation (ratio standard deviation/mean) - \item All other statistical measures - \end{itemize} -\end{itemize} - -\subsection{Data types} -\begin{itemize} -\item Data type selects - \begin{itemize} - \item statistics - \item type of plots (bar graph versus x-y plot) - \item correct tests - \end{itemize} -\item Scales exhibit increasing information content from nominal - to absolute.\\ - Conversion ,,downwards'' is always possible -\item For example: size measured in meter (ratio scale) $\rightarrow$ - categories ``small/medium/large'' (ordinal scale) -\end{itemize} - -\subsection{Examples from neuroscience} -\begin{itemize} -\item {\bf absolute:} - \begin{itemize} - \item size of neuron/brain - \item length of axon - \item ion concentration - \item membrane potential - \item firing rate - \end{itemize} - -\item {\bf interval:} - \begin{itemize} - \item edge orientation - \end{itemize} - -\item {\bf ordinal:} - \begin{itemize} - \item stages of a disease - \item ratings - \end{itemize} - -\item {\bf nominal:} - \begin{itemize} - \item cell type - \item odor - \item states of an ion channel - \end{itemize} - -\end{itemize} -