scientificComputing/bootstrap/exercises/exercises01.tex

\documentclass[12pt,a4paper,pdftex]{exam}

\usepackage[english]{babel}
\usepackage{pslatex}
\usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
\usepackage{xcolor}
\usepackage{graphicx}
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}

%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
\pagestyle{headandfoot}
\ifprintanswers
\newcommand{\stitle}{: Solutions}
\else
\newcommand{\stitle}{}
\fi
\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 5th, 2017}}
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
jan.benda@uni-tuebingen.de}
\runningfooter{}{\thepage}{}

\setlength{\baselineskip}{15pt}
\setlength{\parindent}{0.0cm}
\setlength{\parskip}{0.3cm}
\renewcommand{\baselinestretch}{1.15}

%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
  language=Matlab,
  basicstyle=\ttfamily\footnotesize,
  numbers=left,
  numberstyle=\tiny,
  title=\lstname,
  showstringspaces=false,
  commentstyle=\itshape\color{darkgray},
  breaklines=true,
  breakautoindent=true,
  columns=flexible,
  frame=single,
  xleftmargin=1em,
  xrightmargin=1em,
  aboveskip=10pt
}

%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}

%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\continue}{\ifprintanswers%
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\continuepage}{\ifprintanswers%
\newpage
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\newsolutionpage}{\ifprintanswers%
\newpage%
\else
\fi}

%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\qt}[1]{\textbf{#1}\\}
\newcommand{\pref}[1]{(\ref{#1})}
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
\newcommand{\code}[1]{\texttt{#1}}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\input{instructions}

\begin{questions}

\question \qt{Bootstrap des Standardfehlers}
We want to compute the standard error of the mean of a data set by
means of the bootstrap method and compare the result with the formula
``standard deviation divided by the square-root of $n$''.
\begin{parts}
  \part Download the file \code{thymusglandweights.dat} from Ilias.
  This is a data set of the weights of the thymus glands of 14-day old chicken embryos
  measured in milligram.
  \part Load the data into Matlab (\code{load} function).
  \part Compute histogram, mean, and standard error of the mean of the first 80 data points.
  \part Compute the standard error of the mean of the first 80 data
  points by means of 500 times bootstrapping. Write a function that
  bootstraps the standard error of the mean of a given data set. The
  function should also return a vector with the bootstrapped means.
  \part Compute the 95\,\% confidence interval for the mean from the
  bootstrap distribution (\code{quantile()} function) --- the
  interval that contains the true mean with 95\,\% probability.
  \part Use the whole data set and the bootstrap method for computing
  the dependence of the standard error of the mean from the sample
  size $n$.
  \part Compare your result with the formula for the standard error
  $\sigma/\sqrt{n}$.
\end{parts}
\begin{solution}
  \lstinputlisting{bootstrapmean.m}
  \lstinputlisting{bootstraptymus.m}
  \includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
  \includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
  \includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
\end{solution}


\question \qt{Student t-distribution}
The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{m})$, the
estimated mean of a data set divided by the estimated standard error
of the mean, is not a normal distribution but a Student-t distribution.
We want to compute the Student-t distribution and compare it with the
normal distribution.
\begin{parts}
\part Generate 100000 normally distributed random numbers.
\part Draw from these data 1000 samples of size $n=3$, 5, 10, and 50.
\part Compute the mean $\bar x$ of the samples and plot the
probability density of these means.
\part Compare the resulting probability densities with corresponding
normal distributions.
\part Compute in addition $t=\bar x/(\sigma_x/\sqrt{n})$ (standard
deviation of the samples $\sigma_x$) and compare their distribution
with the normal distribution with standard deviation of one. Is $t$
normally distributed? Under which conditions is $t$ normally
distributed?
\end{parts}
\newsolutionpage
\begin{solution}
  \lstinputlisting{tdistribution.m}
  \includegraphics[width=1\textwidth]{tdistribution-n03}\\
  \includegraphics[width=1\textwidth]{tdistribution-n05}\\
  \includegraphics[width=1\textwidth]{tdistribution-n10}\\
  \includegraphics[width=1\textwidth]{tdistribution-n50}
\end{solution}


\continue
\question \qt{Permutation test} \label{permutationtest}
We want to compute the significance of a correlation by means of a permutation test.
\begin{parts}
  \part \label{permutationtestdata} Generate 1000 correlated pairs
  $x$, $y$ of random numbers according to:
\begin{verbatim}
n = 1000
a = 0.2;
x = randn(n, 1);
y = randn(n, 1) + a*x;
\end{verbatim}
  \part Generate a scatter plot of the two variables.
  \part Why is $y$ correlated with $x$?
  \part Compute the correlation coefficient between $x$ and $y$.
  \part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
  \part Do exactly this 1000 times and compute each time the correlation coefficient.
  \part Compute and plot the probability density of these correlation
  coefficients.
  \part Is the correlation of the original data set significant?
  \part What does significance of the correlation mean?
  \part Vary the sample size \code{n} and compute in the same way the
  significance of the correlation.
\end{parts}
\begin{solution}
  \lstinputlisting{correlationsignificance.m}
  \includegraphics[width=1\textwidth]{correlationsignificance}
\end{solution}

\question \qt{Bootstrap of the correlation coefficient}
The permutation test generates the distribution of the null hypothesis
of uncorrelated data and we check whether the correlation coefficient
of the data differs significantly from this
distribution. Alternatively we can bootstrap the data while keeping
the pairs and determine the confidence interval of the correlation
coefficient of the data. If this differs significantly from a
correlation coefficient of zero we can conclude that the correlation
coefficient of the data quantifies indeed a correlated data.

We take the same data set that we have generated in exercise
\ref{permutationtest} (\ref{permutationtestdata}).
\begin{parts}
  \part Bootstrap 1000 times the correlation coefficient from the data.
  \part Compute and plot the probability density of these correlation
  coefficients.
  \part Is the correlation of the original data set significant?
\end{parts}
\begin{solution}
  \lstinputlisting{correlationbootstrap.m}
  \includegraphics[width=1\textwidth]{correlationbootstrap}
\end{solution}

\end{questions}

\end{document}