[bootstrap] split exercises

This commit is contained in:
Jan Benda 2020-12-14 23:57:36 +01:00
parent 520f011f67
commit 475b3148c8
3 changed files with 87 additions and 66 deletions

View File

@ -1,4 +1,3 @@
TEXFILES=resampling-1.tex
# resampling2.tex
TEXFILES=$(wildcard resampling-?.tex)
include ../../exercises.mk

View File

@ -18,69 +18,6 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
\question \qt{Bootstrap the standard error of the mean}
We want to compute the standard error of the mean of a data set by
means of the bootstrap method and compare the result with the formula
``standard deviation divided by the square-root of $n$''.
\begin{parts}
\part Download the file \code{thymusglandweights.dat} from Ilias.
This is a data set of the weights of the thymus glands of 14-day old chicken embryos
measured in milligram.
\part Load the data into Matlab (\code{load} function).
\part Compute histogram, mean, and standard error of the mean of the first 80 data points.
\part Compute the standard error of the mean of the first 80 data
points by bootstrapping the data 500 times. Write a function that
bootstraps the standard error of the mean of a given data set. The
function should also return a vector with the bootstrapped means.
\part Compute the 95\,\% confidence interval for the mean from the
bootstrap distribution (\code{quantile()} function) --- the
interval that contains the true mean with 95\,\% probability.
\part Use the whole data set and the bootstrap method for computing
the dependence of the standard error of the mean from the sample
size $n$.
\part Compare your result with the formula for the standard error
$\sigma/\sqrt{n}$.
\end{parts}
\begin{solution}
\lstinputlisting{bootstrapmean.m}
\lstinputlisting{bootstraptymus.m}
\includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
\includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
\includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
\end{solution}
\question \qt{Student t-distribution}
The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
estimated mean $\bar x$ of a data set of size $n$ divided by the
estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
$\sigma_x$ is the estimated standard deviation, is not a normal
distribution but a Student-t distribution. We want to compute the
Student-t distribution and compare it with the normal distribution.
\begin{parts}
\part Generate 100000 normally distributed random numbers.
\part Draw from these data 1000 samples of size $n=3$, 5, 10, and
50. For each sample size $n$ ...
\part ... compute the mean $\bar x$ of the samples and plot the
probability density of these means.
\part ... compare the resulting probability densities with corresponding
normal distributions.
\part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
distribution with the normal distribution with standard deviation of
one. Is $t$ normally distributed? Under which conditions is $t$
normally distributed?
\end{parts}
\newsolutionpage
\begin{solution}
\lstinputlisting{tdistribution.m}
\includegraphics[width=1\textwidth]{tdistribution-n03}\\
\includegraphics[width=1\textwidth]{tdistribution-n05}\\
\includegraphics[width=1\textwidth]{tdistribution-n10}\\
\includegraphics[width=1\textwidth]{tdistribution-n50}
\end{solution}
\continue
\question \qt{Permutation test of correlations} \label{correlationtest}
We want to compute the significance of a correlation by means of a permutation test.
\begin{parts}
@ -109,6 +46,7 @@ y = randn(n, 1) + a*x;
\includegraphics[width=1\textwidth]{correlationsignificance}
\end{solution}
\newsolutionpage
\question \qt{Bootstrap the correlation coefficient}
The permutation test generates the distribution of the null hypothesis
of uncorrelated data and we check whether the correlation coefficient
@ -137,7 +75,7 @@ We take the same data set that we have generated in exercise
\end{solution}
\continuepage
\continue
\question \qt{Permutation test of difference of means}
We want to test whether two data sets come from distributions that
differ in their mean by means of a permutation test.

View File

@ -0,0 +1,84 @@
\documentclass[12pt,a4paper,pdftex]{exam}
\newcommand{\exercisetopic}{Resampling}
\newcommand{\exercisenum}{X2}
\newcommand{\exercisedate}{December 14th, 2020}
\input{../../exercisesheader}
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\input{../../exercisestitle}
\begin{questions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
\question \qt{Bootstrap the standard error of the mean}
We want to compute the standard error of the mean of a data set by
means of the bootstrap method and compare the result with the formula
``standard deviation divided by the square-root of $n$''.
\begin{parts}
\part Download the file \code{thymusglandweights.dat} from Ilias.
This is a data set of the weights of the thymus glands of 14-day old chicken embryos
measured in milligram.
\part Load the data into Matlab (\code{load} function).
\part Compute histogram, mean, and standard error of the mean of the first 80 data points.
\part Compute the standard error of the mean of the first 80 data
points by bootstrapping the data 500 times. Write a function that
bootstraps the standard error of the mean of a given data set. The
function should also return a vector with the bootstrapped means.
\part Compute the 95\,\% confidence interval for the mean from the
bootstrap distribution (\code{quantile()} function) --- the
interval that contains the true mean with 95\,\% probability.
\part Use the whole data set and the bootstrap method for computing
the dependence of the standard error of the mean from the sample
size $n$.
\part Compare your result with the formula for the standard error
$\sigma/\sqrt{n}$.
\end{parts}
\begin{solution}
\lstinputlisting{bootstrapmean.m}
\lstinputlisting{bootstraptymus.m}
\includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
\includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
\includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
\end{solution}
\question \qt{Student t-distribution}
The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
estimated mean $\bar x$ of a data set of size $n$ divided by the
estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
$\sigma_x$ is the estimated standard deviation, is not a normal
distribution but a Student-t distribution. We want to compute the
Student-t distribution and compare it with the normal distribution.
\begin{parts}
\part Generate 100000 normally distributed random numbers.
\part Draw from these data 1000 samples of size $n=3$, 5, 10, and
50. For each sample size $n$ ...
\part ... compute the mean $\bar x$ of the samples and plot the
probability density of these means.
\part ... compare the resulting probability densities with corresponding
normal distributions.
\part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
distribution with the normal distribution with standard deviation of
one. Is $t$ normally distributed? Under which conditions is $t$
normally distributed?
\end{parts}
\newsolutionpage
\begin{solution}
\lstinputlisting{tdistribution.m}
\includegraphics[width=1\textwidth]{tdistribution-n03}\\
\includegraphics[width=1\textwidth]{tdistribution-n05}\\
\includegraphics[width=1\textwidth]{tdistribution-n10}\\
\includegraphics[width=1\textwidth]{tdistribution-n50}
\end{solution}
\end{questions}
\end{document}