Merge branch 'master' of https://whale.am28.uni-tuebingen.de/git/teaching/scientificComputing

2019-12-04 17:51:53 +01:00 · 2019-12-04 17:51:53 +01:00 · cc332ee25d
commit cc332ee25d
parent 84002e5cfb 006fa998cc
11 changed files with 72 additions and 69 deletions
--- a/bootstrap/exercises/exercises01.tex
+++ b/bootstrap/exercises/exercises01.tex
@ -15,7 +15,7 @@
 \else
 \newcommand{\stitle}{}
 \fi
-\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large November 20th, 2018}}
+\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
@ -86,7 +86,7 @@ jan.benda@uni-tuebingen.de}

 \begin{questions}

-\question \qt{Bootstrap of the standard error of the mean}
+\question \qt{Bootstrap the standard error of the mean}
 We want to compute the standard error of the mean of a data set by
 means of the bootstrap method and compare the result with the formula
 ``standard deviation divided by the square-root of $n$''.
@ -119,23 +119,24 @@ means of the bootstrap method and compare the result with the formula


 \question \qt{Student t-distribution}
-The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{m})$, the
-estimated mean of a data set divided by the estimated standard error
-of the mean, is not a normal distribution but a Student-t distribution.
-We want to compute the Student-t distribution and compare it with the 
-normal distribution.
+The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
+estimated mean $\bar x$ of a data set of size $n$ divided by the
+estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
+$\sigma_x$ is the estimated standard deviation, is not a normal
+distribution but a Student-t distribution.  We want to compute the
+Student-t distribution and compare it with the normal distribution.
 \begin{parts}
 \part Generate 100000 normally distributed random numbers.
-\part Draw from these data 1000 samples of size $n=3$, 5, 10, and 50.
-\part Compute the mean $\bar x$ of the samples and plot the
+\part Draw from these data 1000 samples of size $n=3$, 5, 10, and
+50. For each sample size $n$ ...
+\part ... compute the mean $\bar x$ of the samples and plot the
 probability density of these means.
-\part Compare the resulting probability densities with corresponding
+\part ... compare the resulting probability densities with corresponding
 normal distributions.
-\part Compute in addition $t=\bar x/(\sigma_x/\sqrt{n})$ (standard
-deviation of the samples $\sigma_x$) and compare their distribution
-with the normal distribution with standard deviation of one. Is $t$
-normally distributed? Under which conditions is $t$ normally
-distributed?
+\part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
+distribution with the normal distribution with standard deviation of
+one. Is $t$ normally distributed? Under which conditions is $t$
+normally distributed?
 \end{parts}
 \newsolutionpage
 \begin{solution}
@ -167,16 +168,16 @@ y = randn(n, 1) + a*x;
  \part Compute and plot the probability density of these correlation
  coefficients.
  \part Is the correlation of the original data set significant?
-  \part What does significance of the correlation mean?
-  \part Vary the sample size \code{n} and compute in the same way the
-  significance of the correlation.
+  \part What does ``significance of the correlation'' mean?
+%  \part Vary the sample size \code{n} and compute in the same way the
+%  significance of the correlation.
 \end{parts}
 \begin{solution}
  \lstinputlisting{correlationsignificance.m}
  \includegraphics[width=1\textwidth]{correlationsignificance}
 \end{solution}

-\question \qt{Bootstrap of the correlation coefficient} 
+\question \qt{Bootstrap the correlation coefficient} 
 The permutation test generates the distribution of the null hypothesis
 of uncorrelated data and we check whether the correlation coefficient
 of the data differs significantly from this
@ -184,7 +185,7 @@ distribution. Alternatively we can bootstrap the data while keeping
 the pairs and determine the confidence interval of the correlation
 coefficient of the data. If this differs significantly from a
 correlation coefficient of zero we can conclude that the correlation
-coefficient of the data quantifies indeed a correlated data.
+coefficient of the data indeed quantifies correlated data.

 We take the same data set that we have generated in exercise
 \ref{permutationtest} (\ref{permutationtestdata}).
--- a/bootstrap/lecture/bootstrap.tex
+++ b/bootstrap/lecture/bootstrap.tex
@ -84,9 +84,11 @@ standard errors and confidence intervals).
 Bootstrapping methods create bootstrapped samples from a SRS by
 resampling. The bootstrapped samples are used to estimate the sampling
 distribution of a statistical measure. The bootstrapped samples have
-the same size as the original sample and are created by randomly drawing with
-replacement. That is, each value of the original sample can occur
-once, multiple time, or not at all in a bootstrapped sample.
+the same size as the original sample and are created by randomly
+drawing with replacement. That is, each value of the original sample
+can occur once, multiple time, or not at all in a bootstrapped
+sample. This can be implemented by generating random indices into the
+data set using the \code{randi()} function.


 \section{Bootstrap of the standard error}
@ -165,13 +167,13 @@ data points $(x_i, y_i)$. By calculating the correlation coefficient
 we can quantify how strongly $y$ depends on $x$. The correlation
 coefficient alone, however, does not tell whether the correlation is
 significantly different from a random correlation. The null hypothesis
-for such a situation would be that $y$ does not depend on $x$. In
+for such a situation is that $y$ does not depend on $x$. In
 order to perform a permutation test, we need to destroy the
 correlation by permuting the $(x_i, y_i)$ pairs, i.e. we rearrange the
 $x_i$ and $y_i$ values in a random fashion. Generating many sets of
-random pairs and computing the resulting correlation coefficients,
+random pairs and computing the resulting correlation coefficients
 yields a distribution of correlation coefficients that result
-randomnly from uncorrelated data. By comparing the actually measured
+randomly from uncorrelated data. By comparing the actually measured
 correlation coefficient with this distribution we can directly assess
 the significance of the correlation
 (figure\,\ref{permutecorrelationfig}).
@ -183,10 +185,10 @@ Estimate the statistical significance of a correlation coefficient.
  and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
  where $u_i$ is a random number drawn from a normal distribution.
 \item Calculate the correlation coefficient.
-\item Generate the distribution according to the null hypothesis by
-  generating uncorrelated pairs. For this permute $x$- and $y$-values
-  \matlabfun{randperm()} 1000 times and calculate for each
-  permutation the correlation coefficient.
+\item Generate the distribution of the null hypothesis by generating
+  uncorrelated pairs. For this permute $x$- and $y$-values
+  \matlabfun{randperm()} 1000 times and calculate for each permutation
+  the correlation coefficient.
 \item Read out the 95\,\% percentile from the resulting distribution
  of the null hypothesis and compare it with the correlation
  coefficient computed from the original data.
--- a/pointprocesses/lecture/pointprocessscetchA.eps
+++ b/pointprocesses/lecture/pointprocessscetchA.eps
@ -1,7 +1,7 @@
 %!PS-Adobe-2.0 EPSF-2.0
 %%Title: pointprocessscetchA.tex
 %%Creator: gnuplot 4.6 patchlevel 4
-%%CreationDate: Mon Dec  2 13:03:15 2019
+%%CreationDate: Tue Dec  3 08:08:50 2019
 %%DocumentFonts: 
 %%BoundingBox: 50 50 373 135
 %%EndComments
@ -430,10 +430,10 @@ SDict begin [
  /Title (pointprocessscetchA.tex)
  /Subject (gnuplot plot)
  /Creator (gnuplot 4.6 patchlevel 4)
-  /Author (benda)
+  /Author (jan)
 %  /Producer (gnuplot)
 %  /Keywords ()
-  /CreationDate (Mon Dec  2 13:03:15 2019)
+  /CreationDate (Tue Dec  3 08:08:50 2019)
  /DOCINFO pdfmark
 end
 } ifelse
--- a/pointprocesses/lecture/pointprocessscetchA.pdf
+++ b/pointprocesses/lecture/pointprocessscetchA.pdf
--- a/pointprocesses/lecture/pointprocessscetchB.eps
+++ b/pointprocesses/lecture/pointprocessscetchB.eps
@ -1,7 +1,7 @@
 %!PS-Adobe-2.0 EPSF-2.0
 %%Title: pointprocessscetchB.tex
 %%Creator: gnuplot 4.6 patchlevel 4
-%%CreationDate: Mon Dec  2 13:03:15 2019
+%%CreationDate: Tue Dec  3 08:08:50 2019
 %%DocumentFonts: 
 %%BoundingBox: 50 50 373 237
 %%EndComments
@ -430,10 +430,10 @@ SDict begin [
  /Title (pointprocessscetchB.tex)
  /Subject (gnuplot plot)
  /Creator (gnuplot 4.6 patchlevel 4)
-  /Author (benda)
+  /Author (jan)
 %  /Producer (gnuplot)
 %  /Keywords ()
-  /CreationDate (Mon Dec  2 13:03:15 2019)
+  /CreationDate (Tue Dec  3 08:08:50 2019)
  /DOCINFO pdfmark
 end
 } ifelse
--- a/pointprocesses/lecture/pointprocessscetchB.pdf
+++ b/pointprocesses/lecture/pointprocessscetchB.pdf