New bootstrap exercises
This commit is contained in:
47
statistics/lecture/boxwhisker.py
Normal file
47
statistics/lecture/boxwhisker.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
#x = np.random.randn( 40, 10 )
|
||||
#np.save('boxwhiskerdata', x )
|
||||
x = np.load('boxwhiskerdata.npy')
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
ax = fig.add_subplot( 1, 1, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel('Experiment')
|
||||
ax.set_ylabel('x')
|
||||
ax.set_ylim( -4.0, 4.0)
|
||||
ax.annotate('Median',
|
||||
xy=(3.9, 0.1), xycoords='data',
|
||||
xytext=(3.5, -2.5), textcoords='data', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.8,1.0),
|
||||
connectionstyle="angle3,angleA=-110,angleB=60") )
|
||||
ax.annotate('1. quartile',
|
||||
xy=(5.8, -0.7), xycoords='data',
|
||||
xytext=(5.5, -3.5), textcoords='data', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.5,1.0),
|
||||
connectionstyle="angle3,angleA=30,angleB=70") )
|
||||
ax.annotate('3. quartile',
|
||||
xy=(6.1, 0.6), xycoords='data',
|
||||
xytext=(6.5, 3.0), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.0),
|
||||
connectionstyle="angle3,angleA=30,angleB=70") )
|
||||
ax.annotate('minimum',
|
||||
xy=(6.1, -2.3), xycoords='data',
|
||||
xytext=(7.2, -3.3), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=100") )
|
||||
ax.annotate('maximum',
|
||||
xy=(5.9, 2.8), xycoords='data',
|
||||
xytext=(4.9, 3.5), textcoords='data', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
|
||||
connectionstyle="angle3,angleA=0,angleB=120") )
|
||||
ax.boxplot( x, whis=100.0 )
|
||||
plt.tight_layout()
|
||||
plt.savefig('boxwhisker.pdf')
|
||||
plt.show()
|
||||
|
||||
BIN
statistics/lecture/boxwhiskerdata.npy
Normal file
BIN
statistics/lecture/boxwhiskerdata.npy
Normal file
Binary file not shown.
@@ -82,6 +82,8 @@
|
||||
\arabic{theexercise}:} \stepcounter{theexercise}\newline \newcommand{\exercisesource}{#1}}%
|
||||
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\equal{\showlisting}{yes}}{\medskip\lstinputlisting{\exercisesource}}{}}\medskip}
|
||||
|
||||
\graphicspath{{figures/}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
@@ -269,6 +271,24 @@ spricht von einer Wahrscheinlichkeitsdichte.
|
||||
\tr{Why?}{Warum?}
|
||||
\end{exercise}
|
||||
|
||||
\begin{exercise}[boxwhisker.m]
|
||||
\tr{Generate eine $40 \times 10$ matrix of random numbers and
|
||||
illustrate their distribution in a box-whicker plot
|
||||
(\code{boxplot()} function). How to interpret the plot?}
|
||||
{Erzeuge ein $40 \times 10$ Matrix
|
||||
von Zufallszahlen und illustriere ihre Verteilungen in einem
|
||||
Box-Whisker Plot (\code{boxplot()} Funktion, lies die Hilfe!). Wie ist der
|
||||
Box-Whisker Plot zu interpretieren? Was hat es mit den Ausreissern auf sich?
|
||||
Wie kann man erreichen, dass die Whisker den kleinsten und den gr\"o{\ss}ten
|
||||
Datenwert anzeigen? Warum sind die unterschiedlichen Box-Whiskers nicht alle gleich,
|
||||
obwohl sie aus der selben Verteilung gezogen worden sind?}
|
||||
\end{exercise}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{boxwhisker}
|
||||
\caption{\label{boxwhiskerfig} Box-whisker plots illustrate distributions.}
|
||||
\end{figure}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Data types}
|
||||
@@ -390,6 +410,71 @@ spricht von einer Wahrscheinlichkeitsdichte.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
|
||||
|
||||
Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling
|
||||
aus der Stichprobe. Das hat mehrere Vorteile:
|
||||
\begin{itemize}
|
||||
\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein).
|
||||
\item H\"ohere Genauigkeit als klassische Methoden.
|
||||
\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr
|
||||
\"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht
|
||||
f\"ur jede Statistik eine andere Formel.
|
||||
\end{itemize}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
|
||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex]
|
||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312}
|
||||
\caption{\tr{Why can we only measure a sample of the
|
||||
population?}{Warum k\"onnen wir nur eine Stichprobe der
|
||||
Grundgesamtheit messen?}}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[height=0.2\textheight]{srs1}\\[2ex]
|
||||
\includegraphics[height=0.2\textheight]{srs2}\\[2ex]
|
||||
\includegraphics[height=0.2\textheight]{srs3}
|
||||
\caption{Bootstrap der Stichprobenvertielung (a) Von der
|
||||
Grundgesamtheit (population) mit unbekanntem Parameter
|
||||
(z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random
|
||||
samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur
|
||||
jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen
|
||||
der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe
|
||||
gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf
|
||||
die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu
|
||||
haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele
|
||||
Bootstrap-Stichproben generiert werden (resampling) und so
|
||||
Eigenschaften der Stichprobenverteilung empirisch bestimmt
|
||||
werden. Aus Hesterberg et al. 2003, Bootstrap Methods and
|
||||
Permuation Tests}
|
||||
\end{figure}
|
||||
|
||||
\section{Bootstrap des Standardfehlers}
|
||||
|
||||
Beim Bootstrap erzeugen wir durch resampling neue Stichproben und
|
||||
benutzen diese um die Stichprobenverteilung einer Statistik zu
|
||||
berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang
|
||||
wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen
|
||||
mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe
|
||||
kann also einmal, mehrmals oder gar nicht in einer Bootstrap
|
||||
Stichprobe vorkommen.
|
||||
|
||||
\begin{exercise}[bootstrapsem.m]
|
||||
Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert,
|
||||
Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$).
|
||||
|
||||
Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils
|
||||
den Mittelwert.
|
||||
|
||||
Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und
|
||||
die Standardabweichung.
|
||||
|
||||
Was hat das mit dem Standardfehler zu tun?
|
||||
\end{exercise}
|
||||
|
||||
\end{document}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
BIN
statistics/lecture/figures/2012-10-29_16-26-05_771.jpg
Executable file
BIN
statistics/lecture/figures/2012-10-29_16-26-05_771.jpg
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 724 KiB |
BIN
statistics/lecture/figures/2012-10-29_16-29-35_312.jpg
Executable file
BIN
statistics/lecture/figures/2012-10-29_16-29-35_312.jpg
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 386 KiB |
BIN
statistics/lecture/figures/2012-10-29_16-41-39_523.jpg
Executable file
BIN
statistics/lecture/figures/2012-10-29_16-41-39_523.jpg
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 461 KiB |
BIN
statistics/lecture/figures/srs1.png
Normal file
BIN
statistics/lecture/figures/srs1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 59 KiB |
BIN
statistics/lecture/figures/srs2.png
Normal file
BIN
statistics/lecture/figures/srs2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
BIN
statistics/lecture/figures/srs3.png
Normal file
BIN
statistics/lecture/figures/srs3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 73 KiB |
Reference in New Issue
Block a user