scientificComputing/bootstrap/exercises/resampling-1.tex

\documentclass[12pt,a4paper,pdftex]{exam}

\newcommand{\exercisetopic}{Resampling}
\newcommand{\exercisenum}{8}
\newcommand{\exercisedate}{December 14th, 2020}

\input{../../exercisesheader}

\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\input{../../exercisestitle}

\begin{questions}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}

\question \qt{Permutation test of correlations} \label{correlationtest}
We want to compute the significance of a correlation by means of a permutation test.
\begin{parts}
  \part \label{correlationtestdata} Generate 1000 correlated pairs
  $x$, $y$ of random numbers according to:
\begin{verbatim}
n = 1000
a = 0.2;
x = randn(n, 1);
y = randn(n, 1) + a*x;
\end{verbatim}
  \part Generate a scatter plot of the two variables.
  \part Why is $y$ correlated with $x$?
  \part Compute the correlation coefficient between $x$ and $y$.
  \part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
  \part Do exactly this 1000 times and compute each time the correlation coefficient.
  \part Compute and plot the probability density of these correlation
  coefficients.
  \part Is the correlation of the original data set significant?
  \part What does ``significance of the correlation'' mean?
%  \part Vary the sample size \code{n} and compute in the same way the
%  significance of the correlation.
\end{parts}
\begin{solution}
  \lstinputlisting{correlationsignificance.m}
  \includegraphics[width=1\textwidth]{correlationsignificance}
\end{solution}

\newsolutionpage
\question \qt{Bootstrap the correlation coefficient}
The permutation test generates the distribution of the null hypothesis
of uncorrelated data and we check whether the correlation coefficient
of the data differs significantly from this
distribution. Alternatively we can bootstrap the data while keeping
the pairs and determine the confidence interval of the correlation
coefficient of the data. If this differs significantly from a
correlation coefficient of zero we can conclude that the correlation
coefficient of the data indeed quantifies correlated data.

We take the same data set that we have generated in exercise
\ref{correlationtest} (\ref{correlationtestdata}).
\begin{parts}
  \part Bootstrap 1000 times the correlation coefficient from the
  data, i.e.  generate bootstrap data by randomly resampling the
  original data pairs with replacement. Use the \code{randi()}
  function for generating random indices that you can use to select a
  random sample from the original data.
  \part Compute and plot the probability density of these correlation
  coefficients.
  \part Is the correlation of the original data set significant?
\end{parts}
\begin{solution}
  \lstinputlisting{correlationbootstrap.m}
  \includegraphics[width=1\textwidth]{correlationbootstrap}
\end{solution}


\continue
\question \qt{Permutation test of difference of means}
We want to test whether two data sets come from distributions that
differ in their mean by means of a permutation test.
\begin{parts}
  \part Generate two normally distributed data sets $x$ and $y$
  containing each $n=200$ samples. Let's assume the $x$ samples are
  measurements of the membrane potential of a mammalian photoreceptor
  in darkness with a mean of $-40$\,mV and a standard deviation of
  1\,mV. The $y$ values are the membrane potentials measured under dim
  illumination and come from a distribution with the same standard
  deviation and a mean of $-40.5$\,mV. See section 5.2 ``Scaling and
  shifting random numbers'' in the script.
  \part Plot histograms of the $x$ and $y$ data in a single
  plot. Choose appropriate bins.
  \part Compute the means of $x$ and $y$ and their difference.
  \part The null hypothesis is that the $x$ and $y$ data come from the
  same distribution. How can you generate new samples $x_r$ and $y_r$
  from the original data that come from the same distribution?
  \part Do exactly this 1000 times and compute each time the
  difference of the means of the two resampled samples.
  \part Compute and plot the probability density of the resulting
  distribution of the null hypothesis.
  \part Is the difference of the means of the original data sets significant?
  \part Repeat this procedure for $y$ samples that are closer or
  further apart from the mean of the $x$ data set. For this put the
  computations of the permuation test in a function and all the plotting
  in another function.
\end{parts}
\begin{solution}
  \lstinputlisting{meandiffpermutation.m}
  \lstinputlisting{meandiffplot.m}
  \lstinputlisting{meandiffsignificance.m}
  \includegraphics[width=1\textwidth]{meandiffsignificance}
\end{solution}

\end{questions}

\end{document}