translated bootstrap exercises

2017-12-04 22:41:58 +01:00 · 2017-12-04 22:41:58 +01:00 · 9abe1c43f4
commit 9abe1c43f4
parent e39f29847f
3 changed files with 229 additions and 49 deletions
--- a/bootstrap/exercises/exercises01-de.tex
+++ b/bootstrap/exercises/exercises01-de.tex
@ -0,0 +1,173 @@
 \documentclass[12pt,a4paper,pdftex]{exam}
 \usepackage[german]{babel}
 \usepackage{pslatex}
 \usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
 \usepackage{xcolor}
 \usepackage{graphicx}
 \usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
 %%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
 \pagestyle{headandfoot}
 \ifprintanswers
 \newcommand{\stitle}{: L\"osungen}
 \else
 \newcommand{\stitle}{}
 \fi
 \header{{\bfseries\large \"Ubung\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large 17. Januar, 2017}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
 \setlength{\baselineskip}{15pt}
 \setlength{\parindent}{0.0cm}
 \setlength{\parskip}{0.3cm}
 \renewcommand{\baselinestretch}{1.15}
 %%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{listings}
 \lstset{
  language=Matlab,
  basicstyle=\ttfamily\footnotesize,
  numbers=left,
  numberstyle=\tiny,
  title=\lstname,
  showstringspaces=false,
  commentstyle=\itshape\color{darkgray},
  breaklines=true,
  breakautoindent=true,
  columns=flexible,
  frame=single,
  xleftmargin=1em,
  xrightmargin=1em,
  aboveskip=10pt
 }
 %%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage{amsmath}
 \usepackage{amssymb}
 \usepackage{bm} 
 \usepackage{dsfont}
 \newcommand{\naZ}{\mathds{N}}
 \newcommand{\gaZ}{\mathds{Z}}
 \newcommand{\raZ}{\mathds{Q}}
 \newcommand{\reZ}{\mathds{R}}
 \newcommand{\reZp}{\mathds{R^+}}
 \newcommand{\reZpN}{\mathds{R^+_0}}
 \newcommand{\koZ}{\mathds{C}}
 %%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \newcommand{\continue}{\ifprintanswers%
 \else
 \vfill\hspace*{\fill}$\rightarrow$\newpage%
 \fi}
 \newcommand{\continuepage}{\ifprintanswers%
 \newpage
 \else
 \vfill\hspace*{\fill}$\rightarrow$\newpage%
 \fi}
 \newcommand{\newsolutionpage}{\ifprintanswers%
 \newpage%
 \else
 \fi}
 %%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \newcommand{\qt}[1]{\textbf{#1}\\}
 \newcommand{\pref}[1]{(\ref{#1})}
 \newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
 \newcommand{\code}[1]{\texttt{#1}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{document}
 \input{instructions}
 \begin{questions}
 \question \qt{Bootstrap des Standardfehlers}
 Wir wollen den Standardfehler, die Standardabweichung des Mittelwerts,
 eines Datensatze mit Hilfe der Bootstrapmethode berechnen und mit der
 Formel ``Standardabweichung geteilt durch Wurzel aus $n$''
 vergleichen.
 \begin{parts}
  \part Lade von Ilias die Datei \code{thymusglandweights.dat} herunter.
  Darin befindet sich ein Datensatz vom Gewicht der Thymus Dr\"use in 14-Tage alten
  H\"uhnerembryos in mg.
  \part Lade diese Daten in Matlab (\code{load} Funktion).
  \part Bestimme Histogramm, Mittelwert und Standardfehler aus den ersten 80 Datenpunkten.
  \part Bestimme den Standardfehler aus den ersten 80 Datenpunkten durch 500-mal Bootstrappen.
  \part Bestimme das 95\,\% Konfidenzintervall f\"ur den Mittelwert
  aus der Bootstrap Verteilung (\code{quantile()} Funktion) --- also
  das Interval innerhalb dessen mit 95\,\% Wahrscheinlichkeit der
  wahre Mittelwert liegen wird.
  \part Benutze den ganzen Datensatz und die Bootstrapping Technik, um die Abh\"angigkeit
  des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen.
  \part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$.
 \end{parts}
 \begin{solution}
  \lstinputlisting{bootstrapmean.m}
  \lstinputlisting{bootstraptymus.m}
  \includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
  \includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
  \includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
 \end{solution}
 \question \qt{Student t-Verteilung}
 Durch Standardabweichungen normierte Mittelwerte sind nicht Gaussverteilt,
 wenn beide aus Normalverteilten Daten abgesch\"atzt werden.
 Die Verteilung von $t=\bar x/(\sigma_x/\sqrt{m})$ folgt vielmehr
 der Student t-Verteilung.
 \begin{parts}
 \part Erzeuge 100000 normalverteilte Zufallszahlen.
 \part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50.
 \part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte
 dieser Mittelwerte.
 \part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve.
 \part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$
 (Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt?
 \end{parts}
 \newsolutionpage
 \begin{solution}
  \lstinputlisting{tdistribution.m}
  \includegraphics[width=1\textwidth]{tdistribution-n03}\\
  \includegraphics[width=1\textwidth]{tdistribution-n05}\\
  \includegraphics[width=1\textwidth]{tdistribution-n10}\\
  \includegraphics[width=1\textwidth]{tdistribution-n50}
 \end{solution}
 \continue
 \question \qt{Permutationstest}
 Wir wollen die Signifikanz einer Korrelation durch einen
 Permutationstest bestimmen.
 \begin{parts}
 \part Erzeuge 1000 korrelierte Zufallszahlen $x$, $y$ durch
 \begin{verbatim}
 n = 1000
 a = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + a*x;
 \end{verbatim}
 \part Erstelle einen Scatterplot der beiden Variablen.
 \part Warum ist $y$ mit $x$ korreliert?
 \part Berechne den Korrelationskoeffizienten zwischen $x$ und $y$.
 \part Was m\"usste man tun, um die Korrelationen zwischen den $x$-$y$
 Paaren zu zerst\"oren?
 \part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten.
 \part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten.
 \part Ist die Korrelation der urspr\"unglichen Daten signifikant?
 \part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe
 auf gleiche Weise die Signifikanz.
 \end{parts}
 \begin{solution}
  \lstinputlisting{correlationsignificance.m}
  \includegraphics[width=1\textwidth]{correlationsignificance}
 \end{solution}
 \end{questions}
 \end{document}
--- a/bootstrap/exercises/exercises01.tex
+++ b/bootstrap/exercises/exercises01.tex
@ -1,6 +1,6 @@
 \documentclass[12pt,a4paper,pdftex]{exam}
-\usepackage[german]{babel}
+\usepackage[english]{babel}
 \usepackage{pslatex}
 \usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
 \usepackage{xcolor}
@ -11,11 +11,11 @@
 \usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
 \pagestyle{headandfoot}
 \ifprintanswers
-\newcommand{\stitle}{: L\"osungen}
+\newcommand{\stitle}{: Solutions}
 \else
 \newcommand{\stitle}{}
 \fi
-\header{{\bfseries\large \"Ubung\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large 17. Januar, 2017}}
+\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 5th, 2017}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
@ -87,24 +87,27 @@ jan.benda@uni-tuebingen.de}
 \begin{questions}
 \question \qt{Bootstrap des Standardfehlers}
-Wir wollen den Standardfehler, die Standardabweichung des Mittelwerts,
+We want to compute the standard error of the mean of a data set by
-eines Datensatze mit Hilfe der Bootstrapmethode berechnen und mit der
+means of the bootstrap method and compare the result with the formula
-Formel ``Standardabweichung geteilt durch Wurzel aus $n$''
+``standard deviation divided by the square-root of $n$''.
 vergleichen.
 \begin{parts}
-  \part Lade von Ilias die Datei \code{thymusglandweights.dat} herunter.
+  \part Download the file \code{thymusglandweights.dat} from Ilias.
-  Darin befindet sich ein Datensatz vom Gewicht der Thymus Dr\"use in 14-Tage alten
+  This is a data set of the weights of the thymus glands of 14-day old chicken embryos
-  H\"uhnerembryos in mg.
+  measured in milligram.
-  \part Lade diese Daten in Matlab (\code{load} Funktion).
+  \part Load the data into Matlab (\code{load} function).
-  \part Bestimme Histogramm, Mittelwert und Standardfehler aus den ersten 80 Datenpunkten.
+  \part Compute histogram, mean, and standard error of the mean of the first 80 data points.
-  \part Bestimme den Standardfehler aus den ersten 80 Datenpunkten durch 500-mal Bootstrappen.
+  \part Compute the standard error of the mean of the first 80 data
-  \part Bestimme das 95\,\% Konfidenzintervall f\"ur den Mittelwert
+  points by means of 500 times bootstrapping. Write a function that
-  aus der Bootstrap Verteilung (\code{quantile()} Funktion) --- also
+  bootstraps the standard error of the mean of a given data set. The
-  das Interval innerhalb dessen mit 95\,\% Wahrscheinlichkeit der
+  function should also return a vector with the bootstrapped means.
-  wahre Mittelwert liegen wird.
+  \part Compute the 95\,\% confidence interval for the mean from the
-  \part Benutze den ganzen Datensatz und die Bootstrapping Technik, um die Abh\"angigkeit
+  bootstrap distribution (\code{quantile()} function) --- the
-  des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen.
+  interval that contains the true mean with 95\,\% probability.
-  \part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$.
+  \part Use the whole data set and the bootstrap method for computing
  the dependence of the standard error of the mean from the sample
  size $n$.
  \part Compare your result with the formula for the standard error
  $\sigma/\sqrt{n}$.
 \end{parts}
 \begin{solution}
  \lstinputlisting{bootstrapmean.m}
@ -115,19 +118,24 @@ vergleichen.
 \end{solution}
-\question \qt{Student t-Verteilung}
+\question \qt{Student t-distribution} 
-Durch Standardabweichungen normierte Mittelwerte sind nicht Gaussverteilt,
+The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{m})$, the
-wenn beide aus Normalverteilten Daten abgesch\"atzt werden.
+estimated mean of a data set divided by the estimated standard error
-Die Verteilung von $t=\bar x/(\sigma_x/\sqrt{m})$ folgt vielmehr
+of the mean, is not a normal distribution but a Student-t distribution.
-der Student t-Verteilung.
+We want to compute the Student-t distribution and compare it with the 
 normal distribution.
 \begin{parts}
-\part Erzeuge 100000 normalverteilte Zufallszahlen.
+\part Generate 100000 normally distributed random numbers.
-\part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50.
+\part Draw from these data 1000 samples of size $n=3$, 5, 10, and 50.
-\part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte
+\part Compute the mean $\bar x$ of the samples and plot the
-dieser Mittelwerte.
+probability density of these means.
-\part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve.
+\part Compare the resulting probability densities with corresponding
-\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$
+normal distributions.
-(Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt?
+\part Compute in addition $t=\bar x/(\sigma_x/\sqrt{n})$ (standard
 deviation of the samples $\sigma_x$) and compare their distribution
 with the normal distribution with standard deviation of one. Is $t$
 normally distributed? Under which conditions is $t$ normally
 distributed?
 \end{parts}
 \newsolutionpage
 \begin{solution}
@ -140,27 +148,26 @@ dieser Mittelwerte.
 \continue
-\question \qt{Permutationstest}
+\question \qt{Permutation test}
-Wir wollen die Signifikanz einer Korrelation durch einen
+We want to compute the significance of a correlation by means of a permutation test.
 Permutationstest bestimmen.
 \begin{parts}
-\part Erzeuge 1000 korrelierte Zufallszahlen $x$, $y$ durch
+\part Generate 1000 correlated pairs $x$, $y$ of random numbers according to:
 \begin{verbatim}
 n = 1000
 a = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + a*x;
 \end{verbatim}
-\part Erstelle einen Scatterplot der beiden Variablen.
+\part Generate a scatter plot of the two variables.
-\part Warum ist $y$ mit $x$ korreliert?
+\part Why is $y$ correlated with $x$?
-\part Berechne den Korrelationskoeffizienten zwischen $x$ und $y$.
+\part Compute the correlation coefficient between $x$ and $y$.
-\part Was m\"usste man tun, um die Korrelationen zwischen den $x$-$y$
+\part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
-Paaren zu zerst\"oren?
+\part Do exactly this 1000 times and compute each time the correlation coefficient.
-\part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten.
+\part Compute the probability density of these correlation coefficients.
-\part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten.
+\part Is the correlation of the original data set significant?
-\part Ist die Korrelation der urspr\"unglichen Daten signifikant?
+\part What does significance of the correlation mean?
-\part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe
+\part Vary the sample size \code{n} and compute in the same way the
-auf gleiche Weise die Signifikanz.
+significance of the correlation.
 \end{parts}
 \begin{solution}
  \lstinputlisting{correlationsignificance.m}
--- a/bootstrap/exercises/instructions.tex
+++ b/bootstrap/exercises/instructions.tex
@ -1,6 +1,6 @@
-\vspace*{-6.5ex}
+\vspace*{-7.8ex}
 \begin{center}
-\textbf{\Large Einf\"uhrung in die wissenschaftliche Datenverarbeitung}\\[1ex]
+\textbf{\Large Introduction to Scientific Computing}\\[2.3ex]
 {\large Jan Grewe, Jan Benda}\\[-3ex]
-Abteilung Neuroethologie \hfill --- \hfill Institut f\"ur Neurobiologie \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
+Neuroethology Lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
 \end{center}