205 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			205 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| \documentclass[12pt,a4paper,pdftex]{exam}
 | |
| 
 | |
| \usepackage[english]{babel}
 | |
| \usepackage{pslatex}
 | |
| \usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
 | |
| \usepackage{xcolor}
 | |
| \usepackage{graphicx}
 | |
| \usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
 | |
| 
 | |
| %%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| \usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
 | |
| \pagestyle{headandfoot}
 | |
| \ifprintanswers
 | |
| \newcommand{\stitle}{: Solutions}
 | |
| \else
 | |
| \newcommand{\stitle}{}
 | |
| \fi
 | |
| \header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
 | |
| \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 | |
| jan.benda@uni-tuebingen.de}
 | |
| \runningfooter{}{\thepage}{}
 | |
| 
 | |
| \setlength{\baselineskip}{15pt}
 | |
| \setlength{\parindent}{0.0cm}
 | |
| \setlength{\parskip}{0.3cm}
 | |
| \renewcommand{\baselinestretch}{1.15}
 | |
| 
 | |
| %%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| \usepackage{listings}
 | |
| \lstset{
 | |
|   language=Matlab,
 | |
|   basicstyle=\ttfamily\footnotesize,
 | |
|   numbers=left,
 | |
|   numberstyle=\tiny,
 | |
|   title=\lstname,
 | |
|   showstringspaces=false,
 | |
|   commentstyle=\itshape\color{darkgray},
 | |
|   breaklines=true,
 | |
|   breakautoindent=true,
 | |
|   columns=flexible,
 | |
|   frame=single,
 | |
|   xleftmargin=1em,
 | |
|   xrightmargin=1em,
 | |
|   aboveskip=10pt
 | |
| }
 | |
| 
 | |
| %%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| \usepackage{amsmath}
 | |
| \usepackage{amssymb}
 | |
| \usepackage{bm} 
 | |
| \usepackage{dsfont}
 | |
| \newcommand{\naZ}{\mathds{N}}
 | |
| \newcommand{\gaZ}{\mathds{Z}}
 | |
| \newcommand{\raZ}{\mathds{Q}}
 | |
| \newcommand{\reZ}{\mathds{R}}
 | |
| \newcommand{\reZp}{\mathds{R^+}}
 | |
| \newcommand{\reZpN}{\mathds{R^+_0}}
 | |
| \newcommand{\koZ}{\mathds{C}}
 | |
| 
 | |
| %%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| \newcommand{\continue}{\ifprintanswers%
 | |
| \else
 | |
| \vfill\hspace*{\fill}$\rightarrow$\newpage%
 | |
| \fi}
 | |
| \newcommand{\continuepage}{\ifprintanswers%
 | |
| \newpage
 | |
| \else
 | |
| \vfill\hspace*{\fill}$\rightarrow$\newpage%
 | |
| \fi}
 | |
| \newcommand{\newsolutionpage}{\ifprintanswers%
 | |
| \newpage%
 | |
| \else
 | |
| \fi}
 | |
| 
 | |
| %%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| \newcommand{\qt}[1]{\textbf{#1}\\}
 | |
| \newcommand{\pref}[1]{(\ref{#1})}
 | |
| \newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
 | |
| \newcommand{\code}[1]{\texttt{#1}}
 | |
| 
 | |
| 
 | |
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| \begin{document}
 | |
| 
 | |
| \input{instructions}
 | |
| 
 | |
| \begin{questions}
 | |
| 
 | |
| \question \qt{Bootstrap the standard error of the mean}
 | |
| We want to compute the standard error of the mean of a data set by
 | |
| means of the bootstrap method and compare the result with the formula
 | |
| ``standard deviation divided by the square-root of $n$''.
 | |
| \begin{parts}
 | |
|   \part Download the file \code{thymusglandweights.dat} from Ilias.
 | |
|   This is a data set of the weights of the thymus glands of 14-day old chicken embryos
 | |
|   measured in milligram.
 | |
|   \part Load the data into Matlab (\code{load} function).
 | |
|   \part Compute histogram, mean, and standard error of the mean of the first 80 data points.
 | |
|   \part Compute the standard error of the mean of the first 80 data
 | |
|   points by means of 500 times bootstrapping. Write a function that
 | |
|   bootstraps the standard error of the mean of a given data set. The
 | |
|   function should also return a vector with the bootstrapped means.
 | |
|   \part Compute the 95\,\% confidence interval for the mean from the
 | |
|   bootstrap distribution (\code{quantile()} function) --- the
 | |
|   interval that contains the true mean with 95\,\% probability.
 | |
|   \part Use the whole data set and the bootstrap method for computing
 | |
|   the dependence of the standard error of the mean from the sample
 | |
|   size $n$.
 | |
|   \part Compare your result with the formula for the standard error
 | |
|   $\sigma/\sqrt{n}$.
 | |
| \end{parts}
 | |
| \begin{solution}
 | |
|   \lstinputlisting{bootstrapmean.m}
 | |
|   \lstinputlisting{bootstraptymus.m}
 | |
|   \includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
 | |
|   \includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
 | |
|   \includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
 | |
| \end{solution}
 | |
| 
 | |
| 
 | |
| \question \qt{Student t-distribution}
 | |
| The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
 | |
| estimated mean $\bar x$ of a data set of size $n$ divided by the
 | |
| estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
 | |
| $\sigma_x$ is the estimated standard deviation, is not a normal
 | |
| distribution but a Student-t distribution.  We want to compute the
 | |
| Student-t distribution and compare it with the normal distribution.
 | |
| \begin{parts}
 | |
| \part Generate 100000 normally distributed random numbers.
 | |
| \part Draw from these data 1000 samples of size $n=3$, 5, 10, and
 | |
| 50. For each sample size $n$ ...
 | |
| \part ... compute the mean $\bar x$ of the samples and plot the
 | |
| probability density of these means.
 | |
| \part ... compare the resulting probability densities with corresponding
 | |
| normal distributions.
 | |
| \part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
 | |
| distribution with the normal distribution with standard deviation of
 | |
| one. Is $t$ normally distributed? Under which conditions is $t$
 | |
| normally distributed?
 | |
| \end{parts}
 | |
| \newsolutionpage
 | |
| \begin{solution}
 | |
|   \lstinputlisting{tdistribution.m}
 | |
|   \includegraphics[width=1\textwidth]{tdistribution-n03}\\
 | |
|   \includegraphics[width=1\textwidth]{tdistribution-n05}\\
 | |
|   \includegraphics[width=1\textwidth]{tdistribution-n10}\\
 | |
|   \includegraphics[width=1\textwidth]{tdistribution-n50}
 | |
| \end{solution}
 | |
| 
 | |
| 
 | |
| \continue
 | |
| \question \qt{Permutation test} \label{permutationtest}
 | |
| We want to compute the significance of a correlation by means of a permutation test.
 | |
| \begin{parts}
 | |
|   \part \label{permutationtestdata} Generate 1000 correlated pairs
 | |
|   $x$, $y$ of random numbers according to:
 | |
| \begin{verbatim}
 | |
| n = 1000
 | |
| a = 0.2;
 | |
| x = randn(n, 1);
 | |
| y = randn(n, 1) + a*x;
 | |
| \end{verbatim}
 | |
|   \part Generate a scatter plot of the two variables.
 | |
|   \part Why is $y$ correlated with $x$?
 | |
|   \part Compute the correlation coefficient between $x$ and $y$.
 | |
|   \part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
 | |
|   \part Do exactly this 1000 times and compute each time the correlation coefficient.
 | |
|   \part Compute and plot the probability density of these correlation
 | |
|   coefficients.
 | |
|   \part Is the correlation of the original data set significant?
 | |
|   \part What does ``significance of the correlation'' mean?
 | |
| %  \part Vary the sample size \code{n} and compute in the same way the
 | |
| %  significance of the correlation.
 | |
| \end{parts}
 | |
| \begin{solution}
 | |
|   \lstinputlisting{correlationsignificance.m}
 | |
|   \includegraphics[width=1\textwidth]{correlationsignificance}
 | |
| \end{solution}
 | |
| 
 | |
| \question \qt{Bootstrap the correlation coefficient} 
 | |
| The permutation test generates the distribution of the null hypothesis
 | |
| of uncorrelated data and we check whether the correlation coefficient
 | |
| of the data differs significantly from this
 | |
| distribution. Alternatively we can bootstrap the data while keeping
 | |
| the pairs and determine the confidence interval of the correlation
 | |
| coefficient of the data. If this differs significantly from a
 | |
| correlation coefficient of zero we can conclude that the correlation
 | |
| coefficient of the data indeed quantifies correlated data.
 | |
| 
 | |
| We take the same data set that we have generated in exercise
 | |
| \ref{permutationtest} (\ref{permutationtestdata}).
 | |
| \begin{parts}
 | |
|   \part Bootstrap 1000 times the correlation coefficient from the data.
 | |
|   \part Compute and plot the probability density of these correlation
 | |
|   coefficients.
 | |
|   \part Is the correlation of the original data set significant?
 | |
| \end{parts}
 | |
| \begin{solution}
 | |
|   \lstinputlisting{correlationbootstrap.m}
 | |
|   \includegraphics[width=1\textwidth]{correlationbootstrap}
 | |
| \end{solution}
 | |
| 
 | |
| \end{questions}
 | |
| 
 | |
| \end{document} |