translated bootstrap exercises

2017-12-04 22:41:58 +01:00 · 2017-12-04 22:41:58 +01:00 · 9abe1c43f4
commit 9abe1c43f4
parent e39f29847f
3 changed files with 229 additions and 49 deletions
--- a/bootstrap/exercises/exercises01-de.tex
+++ b/bootstrap/exercises/exercises01-de.tex
@ -0,0 +1,173 @@
+\documentclass[12pt,a4paper,pdftex]{exam}
+
+\usepackage[german]{babel}
+\usepackage{pslatex}
+\usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
+\usepackage{xcolor}
+\usepackage{graphicx}
+\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
+
+%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
+\pagestyle{headandfoot}
+\ifprintanswers
+\newcommand{\stitle}{: L\"osungen}
+\else
+\newcommand{\stitle}{}
+\fi
+\header{{\bfseries\large \"Ubung\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large 17. Januar, 2017}}
+\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
+jan.benda@uni-tuebingen.de}
+\runningfooter{}{\thepage}{}
+
+\setlength{\baselineskip}{15pt}
+\setlength{\parindent}{0.0cm}
+\setlength{\parskip}{0.3cm}
+\renewcommand{\baselinestretch}{1.15}
+
+%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{listings}
+\lstset{
+  language=Matlab,
+  basicstyle=\ttfamily\footnotesize,
+  numbers=left,
+  numberstyle=\tiny,
+  title=\lstname,
+  showstringspaces=false,
+  commentstyle=\itshape\color{darkgray},
+  breaklines=true,
+  breakautoindent=true,
+  columns=flexible,
+  frame=single,
+  xleftmargin=1em,
+  xrightmargin=1em,
+  aboveskip=10pt
+}
+
+%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{bm} 
+\usepackage{dsfont}
+\newcommand{\naZ}{\mathds{N}}
+\newcommand{\gaZ}{\mathds{Z}}
+\newcommand{\raZ}{\mathds{Q}}
+\newcommand{\reZ}{\mathds{R}}
+\newcommand{\reZp}{\mathds{R^+}}
+\newcommand{\reZpN}{\mathds{R^+_0}}
+\newcommand{\koZ}{\mathds{C}}
+
+%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newcommand{\continue}{\ifprintanswers%
+\else
+\vfill\hspace*{\fill}$\rightarrow$\newpage%
+\fi}
+\newcommand{\continuepage}{\ifprintanswers%
+\newpage
+\else
+\vfill\hspace*{\fill}$\rightarrow$\newpage%
+\fi}
+\newcommand{\newsolutionpage}{\ifprintanswers%
+\newpage%
+\else
+\fi}
+
+%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newcommand{\qt}[1]{\textbf{#1}\\}
+\newcommand{\pref}[1]{(\ref{#1})}
+\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
+\newcommand{\code}[1]{\texttt{#1}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{document}
+
+\input{instructions}
+
+\begin{questions}
+
+\question \qt{Bootstrap des Standardfehlers}
+Wir wollen den Standardfehler, die Standardabweichung des Mittelwerts,
+eines Datensatze mit Hilfe der Bootstrapmethode berechnen und mit der
+Formel ``Standardabweichung geteilt durch Wurzel aus $n$''
+vergleichen.
+\begin{parts}
+  \part Lade von Ilias die Datei \code{thymusglandweights.dat} herunter.
+  Darin befindet sich ein Datensatz vom Gewicht der Thymus Dr\"use in 14-Tage alten
+  H\"uhnerembryos in mg.
+  \part Lade diese Daten in Matlab (\code{load} Funktion).
+  \part Bestimme Histogramm, Mittelwert und Standardfehler aus den ersten 80 Datenpunkten.
+  \part Bestimme den Standardfehler aus den ersten 80 Datenpunkten durch 500-mal Bootstrappen.
+  \part Bestimme das 95\,\% Konfidenzintervall f\"ur den Mittelwert
+  aus der Bootstrap Verteilung (\code{quantile()} Funktion) --- also
+  das Interval innerhalb dessen mit 95\,\% Wahrscheinlichkeit der
+  wahre Mittelwert liegen wird.
+  \part Benutze den ganzen Datensatz und die Bootstrapping Technik, um die Abh\"angigkeit
+  des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen.
+  \part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$.
+\end{parts}
+\begin{solution}
+  \lstinputlisting{bootstrapmean.m}
+  \lstinputlisting{bootstraptymus.m}
+  \includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
+  \includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
+  \includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
+\end{solution}
+
+
+\question \qt{Student t-Verteilung}
+Durch Standardabweichungen normierte Mittelwerte sind nicht Gaussverteilt,
+wenn beide aus Normalverteilten Daten abgesch\"atzt werden.
+Die Verteilung von $t=\bar x/(\sigma_x/\sqrt{m})$ folgt vielmehr
+der Student t-Verteilung.
+\begin{parts}
+\part Erzeuge 100000 normalverteilte Zufallszahlen.
+\part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50.
+\part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte
+dieser Mittelwerte.
+\part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve.
+\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$
+(Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt?
+\end{parts}
+\newsolutionpage
+\begin{solution}
+  \lstinputlisting{tdistribution.m}
+  \includegraphics[width=1\textwidth]{tdistribution-n03}\\
+  \includegraphics[width=1\textwidth]{tdistribution-n05}\\
+  \includegraphics[width=1\textwidth]{tdistribution-n10}\\
+  \includegraphics[width=1\textwidth]{tdistribution-n50}
+\end{solution}
+
+
+\continue
+\question \qt{Permutationstest}
+Wir wollen die Signifikanz einer Korrelation durch einen
+Permutationstest bestimmen.
+\begin{parts}
+\part Erzeuge 1000 korrelierte Zufallszahlen $x$, $y$ durch
+\begin{verbatim}
+n = 1000
+a = 0.2;
+x = randn(n, 1);
+y = randn(n, 1) + a*x;
+\end{verbatim}
+\part Erstelle einen Scatterplot der beiden Variablen.
+\part Warum ist $y$ mit $x$ korreliert?
+\part Berechne den Korrelationskoeffizienten zwischen $x$ und $y$.
+\part Was m\"usste man tun, um die Korrelationen zwischen den $x$-$y$
+Paaren zu zerst\"oren?
+\part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten.
+\part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten.
+\part Ist die Korrelation der urspr\"unglichen Daten signifikant?
+\part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe
+auf gleiche Weise die Signifikanz.
+\end{parts}
+\begin{solution}
+  \lstinputlisting{correlationsignificance.m}
+  \includegraphics[width=1\textwidth]{correlationsignificance}
+\end{solution}
+
+
+\end{questions}
+
+\end{document}
--- a/bootstrap/exercises/exercises01.tex
+++ b/bootstrap/exercises/exercises01.tex
@ -1,6 +1,6 @@
 \documentclass[12pt,a4paper,pdftex]{exam}

-\usepackage[german]{babel}
+\usepackage[english]{babel}
 \usepackage{pslatex}
 \usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
 \usepackage{xcolor}
@ -11,11 +11,11 @@
 \usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
 \pagestyle{headandfoot}
 \ifprintanswers
-\newcommand{\stitle}{: L\"osungen}
+\newcommand{\stitle}{: Solutions}
 \else
 \newcommand{\stitle}{}
 \fi
-\header{{\bfseries\large \"Ubung\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large 17. Januar, 2017}}
+\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 5th, 2017}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
@ -87,24 +87,27 @@ jan.benda@uni-tuebingen.de}
 \begin{questions}

 \question \qt{Bootstrap des Standardfehlers}
-Wir wollen den Standardfehler, die Standardabweichung des Mittelwerts,
-eines Datensatze mit Hilfe der Bootstrapmethode berechnen und mit der
-Formel ``Standardabweichung geteilt durch Wurzel aus $n$''
-vergleichen.
+We want to compute the standard error of the mean of a data set by
+means of the bootstrap method and compare the result with the formula
+``standard deviation divided by the square-root of $n$''.
 \begin{parts}
-  \part Lade von Ilias die Datei \code{thymusglandweights.dat} herunter.
-  Darin befindet sich ein Datensatz vom Gewicht der Thymus Dr\"use in 14-Tage alten
-  H\"uhnerembryos in mg.
-  \part Lade diese Daten in Matlab (\code{load} Funktion).
-  \part Bestimme Histogramm, Mittelwert und Standardfehler aus den ersten 80 Datenpunkten.
-  \part Bestimme den Standardfehler aus den ersten 80 Datenpunkten durch 500-mal Bootstrappen.
-  \part Bestimme das 95\,\% Konfidenzintervall f\"ur den Mittelwert
-  aus der Bootstrap Verteilung (\code{quantile()} Funktion) --- also
-  das Interval innerhalb dessen mit 95\,\% Wahrscheinlichkeit der
-  wahre Mittelwert liegen wird.
-  \part Benutze den ganzen Datensatz und die Bootstrapping Technik, um die Abh\"angigkeit
-  des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen.
-  \part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$.
+  \part Download the file \code{thymusglandweights.dat} from Ilias.
+  This is a data set of the weights of the thymus glands of 14-day old chicken embryos
+  measured in milligram.
+  \part Load the data into Matlab (\code{load} function).
+  \part Compute histogram, mean, and standard error of the mean of the first 80 data points.
+  \part Compute the standard error of the mean of the first 80 data
+  points by means of 500 times bootstrapping. Write a function that
+  bootstraps the standard error of the mean of a given data set. The
+  function should also return a vector with the bootstrapped means.
+  \part Compute the 95\,\% confidence interval for the mean from the
+  bootstrap distribution (\code{quantile()} function) --- the
+  interval that contains the true mean with 95\,\% probability.
+  \part Use the whole data set and the bootstrap method for computing
+  the dependence of the standard error of the mean from the sample
+  size $n$.
+  \part Compare your result with the formula for the standard error
+  $\sigma/\sqrt{n}$.
 \end{parts}
 \begin{solution}
  \lstinputlisting{bootstrapmean.m}
@ -115,19 +118,24 @@ vergleichen.
 \end{solution}


-\question \qt{Student t-Verteilung}
-Durch Standardabweichungen normierte Mittelwerte sind nicht Gaussverteilt,
-wenn beide aus Normalverteilten Daten abgesch\"atzt werden.
-Die Verteilung von $t=\bar x/(\sigma_x/\sqrt{m})$ folgt vielmehr
-der Student t-Verteilung.
+\question \qt{Student t-distribution} 
+The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{m})$, the
+estimated mean of a data set divided by the estimated standard error
+of the mean, is not a normal distribution but a Student-t distribution.
+We want to compute the Student-t distribution and compare it with the 
+normal distribution.
 \begin{parts}
-\part Erzeuge 100000 normalverteilte Zufallszahlen.
-\part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50.
-\part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte
-dieser Mittelwerte.
-\part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve.
-\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$
-(Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt?
+\part Generate 100000 normally distributed random numbers.
+\part Draw from these data 1000 samples of size $n=3$, 5, 10, and 50.
+\part Compute the mean $\bar x$ of the samples and plot the
+probability density of these means.
+\part Compare the resulting probability densities with corresponding
+normal distributions.
+\part Compute in addition $t=\bar x/(\sigma_x/\sqrt{n})$ (standard
+deviation of the samples $\sigma_x$) and compare their distribution
+with the normal distribution with standard deviation of one. Is $t$
+normally distributed? Under which conditions is $t$ normally
+distributed?
 \end{parts}
 \newsolutionpage
 \begin{solution}
@ -140,27 +148,26 @@ dieser Mittelwerte.


 \continue
-\question \qt{Permutationstest}
-Wir wollen die Signifikanz einer Korrelation durch einen
-Permutationstest bestimmen.
+\question \qt{Permutation test}
+We want to compute the significance of a correlation by means of a permutation test.
 \begin{parts}
-\part Erzeuge 1000 korrelierte Zufallszahlen $x$, $y$ durch
+\part Generate 1000 correlated pairs $x$, $y$ of random numbers according to:
 \begin{verbatim}
 n = 1000
 a = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + a*x;
 \end{verbatim}
-\part Erstelle einen Scatterplot der beiden Variablen.
-\part Warum ist $y$ mit $x$ korreliert?
-\part Berechne den Korrelationskoeffizienten zwischen $x$ und $y$.
-\part Was m\"usste man tun, um die Korrelationen zwischen den $x$-$y$
-Paaren zu zerst\"oren?
-\part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten.
-\part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten.
-\part Ist die Korrelation der urspr\"unglichen Daten signifikant?
-\part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe
-auf gleiche Weise die Signifikanz.
+\part Generate a scatter plot of the two variables.
+\part Why is $y$ correlated with $x$?
+\part Compute the correlation coefficient between $x$ and $y$.
+\part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
+\part Do exactly this 1000 times and compute each time the correlation coefficient.
+\part Compute the probability density of these correlation coefficients.
+\part Is the correlation of the original data set significant?
+\part What does significance of the correlation mean?
+\part Vary the sample size \code{n} and compute in the same way the
+significance of the correlation.
 \end{parts}
 \begin{solution}
  \lstinputlisting{correlationsignificance.m}
--- a/bootstrap/exercises/instructions.tex
+++ b/bootstrap/exercises/instructions.tex
@ -1,6 +1,6 @@
-\vspace*{-6.5ex}
+\vspace*{-7.8ex}
 \begin{center}
-\textbf{\Large Einf\"uhrung in die wissenschaftliche Datenverarbeitung}\\[1ex]
+\textbf{\Large Introduction to Scientific Computing}\\[2.3ex]
 {\large Jan Grewe, Jan Benda}\\[-3ex]
-Abteilung Neuroethologie \hfill --- \hfill Institut f\"ur Neurobiologie \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
+Neuroethology Lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
 \end{center}