From 006fa998ccacf058e33d492d920b8a613f1471cc Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Tue, 3 Dec 2019 08:57:40 +0100
Subject: [PATCH] [bootstrap] updated text and exercises

---
 bootstrap/exercises/bootstrapmean.m           |   6 +--
 bootstrap/exercises/bootstraptymus.m          |  28 ++++++------
 bootstrap/exercises/correlationbootstrap.m    |   8 ++--
 bootstrap/exercises/correlationsignificance.m |  10 ++--
 bootstrap/exercises/exercises01.tex           |  43 +++++++++---------
 bootstrap/exercises/tdistribution.m           |  12 ++---
 bootstrap/lecture/bootstrap.tex               |  22 +++++----
 .../lecture/pointprocessscetchA.eps           |   6 +--
 .../lecture/pointprocessscetchA.pdf           | Bin 2790 -> 2786 bytes
 .../lecture/pointprocessscetchB.eps           |   6 +--
 .../lecture/pointprocessscetchB.pdf           | Bin 4698 -> 4694 bytes
 11 files changed, 72 insertions(+), 69 deletions(-)

diff --git a/bootstrap/exercises/bootstrapmean.m b/bootstrap/exercises/bootstrapmean.m
index 356c531..6f3b494 100644
--- a/bootstrap/exercises/bootstrapmean.m
+++ b/bootstrap/exercises/bootstrapmean.m
@@ -1,11 +1,11 @@
-function [bootsem, mu] = bootstrapmean( x, resample )
+function [bootsem, mu] = bootstrapmean(x, resample)
 % computes standard error by bootstrapping the data
 % x: vector with data
 % resample: number of resamplings
 % returns:
 % bootsem: the standard error of the mean
 % mu: the bootstrapped means as a vector
-    mu = zeros( resample, 1 );
+    mu = zeros(resample, 1);
     nsamples = length(x);
     for i = 1:resample
         % resample:
@@ -13,5 +13,5 @@ function [bootsem, mu] = bootstrapmean( x, resample )
         % compute statistics on sample:
         mu(i) = mean(xr);
     end
-    bootsem = std( mu );
+    bootsem = std(mu);
 end
diff --git a/bootstrap/exercises/bootstraptymus.m b/bootstrap/exercises/bootstraptymus.m
index 0b4aaa7..2a4c43d 100644
--- a/bootstrap/exercises/bootstraptymus.m
+++ b/bootstrap/exercises/bootstraptymus.m
@@ -1,36 +1,36 @@
 %% (b) load the data:
-load( 'thymusglandweights.dat' );
+load('thymusglandweights.dat');
 nsamples = 80;
 x = thymusglandweights(1:nsamples);
 
 %% (c) mean, sem and hist:
 sem = std(x)/sqrt(nsamples);
-fprintf( 'Mean of the data set = %.2fmg\n', mean(x) );
-fprintf( 'SEM of the data set = %.2fmg\n', sem );
+fprintf('Mean of the data set = %.2fmg\n', mean(x));
+fprintf('SEM of the data set = %.2fmg\n', sem);
 hist(x,20)
 xlabel('x')
 ylabel('count')
-savefigpdf( gcf, 'bootstraptymus-datahist.pdf', 6, 5 );
-pause( 2.0 )
+savefigpdf(gcf, 'bootstraptymus-datahist.pdf', 6, 5);
+pause(2.0)
 
 %% (d) bootstrap the mean:
 resample = 500;
-[bootsem, mu] = bootstrapmean( x, resample );
-hist( mu, 20 );
+[bootsem, mu] = bootstrapmean(x, resample);
+hist(mu, 20);
 xlabel('mean(x)')
 ylabel('count')
-savefigpdf( gcf, 'bootstraptymus-meanhist.pdf', 6, 5 );
-fprintf( '  bootstrap standard error: %.3f\n', bootsem );
-fprintf( 'theoretical standard error: %.3f\n', sem );
+savefigpdf(gcf, 'bootstraptymus-meanhist.pdf', 6, 5);
+fprintf('  bootstrap standard error: %.3f\n', bootsem);
+fprintf('theoretical standard error: %.3f\n', sem);
 
 %% (e) confidence interval:
 q = quantile(mu, [0.025, 0.975]);
-fprintf( '95%% confidence interval of the mean from %.2fmg to %.2fmg\n', q(1), q(2) );
-pause( 2.0 )
+fprintf('95%% confidence interval of the mean from %.2fmg to %.2fmg\n', q(1), q(2));
+pause(2.0)
 
 %% (f): dependence on sample size:
 nsamplesrange = 10:10:1000;
-bootsems = zeros( length(nsamplesrange),1);
+bootsems = zeros(length(nsamplesrange), 1);
 for n=1:length(nsamplesrange)
     nsamples = nsamplesrange(n);
     % [bootsems(n), mu] = bootstrapmean(x, resample);
@@ -43,5 +43,5 @@ hold off
 xlabel('sample size')
 ylabel('SEM')
 legend('bootsrap', 'theory')
-savefigpdf( gcf, 'bootstraptymus-samples.pdf', 6, 5 );
+savefigpdf(gcf, 'bootstraptymus-samples.pdf', 6, 5);
 
diff --git a/bootstrap/exercises/correlationbootstrap.m b/bootstrap/exercises/correlationbootstrap.m
index 5abb951..707285f 100644
--- a/bootstrap/exercises/correlationbootstrap.m
+++ b/bootstrap/exercises/correlationbootstrap.m
@@ -11,12 +11,12 @@ for i=1:nperm
 end
 
 %% (b) pdf of the correlation coefficients:
-[hb,bb] = hist(rb, 20 );
+[hb,bb] = hist(rb, 20);
 hb = hb/sum(hb)/(bb(2)-bb(1));  % normalization
 
 %% (c) significance:
 rbq = quantile(rb, 0.05);
-fprintf('correlation coefficient at 5%% significance = %.2f\n', rbq );
+fprintf('correlation coefficient at 5%% significance = %.2f\n', rbq);
 if rbq > 0.0
     fprintf('--> correlation r=%.2f is significant\n', rd);
 else
@@ -28,10 +28,10 @@ hold on;
 bar(b, h, 'facecolor', [0.5 0.5 0.5]);
 bar(bb, hb, 'facecolor', 'b');
 bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
-plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
+plot([rd rd], [0 4], 'r', 'linewidth', 2);
 xlim([-0.25 0.75])
 xlabel('Correlation coefficient');
 ylabel('Probability density');
 hold off;
 
-savefigpdf( gcf, 'correlationbootstrap.pdf', 12, 6 );
+savefigpdf(gcf, 'correlationbootstrap.pdf', 12, 6);
diff --git a/bootstrap/exercises/correlationsignificance.m b/bootstrap/exercises/correlationsignificance.m
index 7c7e8a2..d44af84 100644
--- a/bootstrap/exercises/correlationsignificance.m
+++ b/bootstrap/exercises/correlationsignificance.m
@@ -6,7 +6,7 @@ y = randn(n, 1) + a*x;
 
 %% (b) scatter plot:
 subplot(1, 2, 1);
-plot(x, a*x, 'r', 'linewidth', 3 );
+plot(x, a*x, 'r', 'linewidth', 3);
 hold on
 %scatter(x, y );   % either scatter ...
 plot(x, y, 'o', 'markersize', 2 );  % ... or plot - same plot.
@@ -32,12 +32,12 @@ for i=1:nperm
 end
 
 %% (g) pdf of the correlation coefficients:
-[h,b] = hist(rs, 20 );
+[h,b] = hist(rs, 20);
 h = h/sum(h)/(b(2)-b(1));  % normalization
 
 %% (h) significance:
 rq = quantile(rs, 0.95);
-fprintf('correlation coefficient at 5%% significance = %.2f\n', rq );
+fprintf('correlation coefficient at 5%% significance = %.2f\n', rq);
 if rd >= rq
     fprintf('--> correlation r=%.2f is significant\n', rd);
 else
@@ -49,10 +49,10 @@ subplot(1, 2, 2)
 hold on;
 bar(b, h, 'facecolor', 'b');
 bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
-plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
+plot( [rd rd], [0 4], 'r', 'linewidth', 2);
 xlim([-0.25 0.25])
 xlabel('Correlation coefficient');
 ylabel('Probability density of H0');
 hold off;
 
-savefigpdf( gcf, 'correlationsignificance.pdf', 12, 6 );
+savefigpdf(gcf, 'correlationsignificance.pdf', 12, 6);
diff --git a/bootstrap/exercises/exercises01.tex b/bootstrap/exercises/exercises01.tex
index 3029cc1..c9e1c8a 100644
--- a/bootstrap/exercises/exercises01.tex
+++ b/bootstrap/exercises/exercises01.tex
@@ -15,7 +15,7 @@
 \else
 \newcommand{\stitle}{}
 \fi
-\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large November 20th, 2018}}
+\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
 \firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
 jan.benda@uni-tuebingen.de}
 \runningfooter{}{\thepage}{}
@@ -86,7 +86,7 @@ jan.benda@uni-tuebingen.de}
 
 \begin{questions}
 
-\question \qt{Bootstrap of the standard error of the mean}
+\question \qt{Bootstrap the standard error of the mean}
 We want to compute the standard error of the mean of a data set by
 means of the bootstrap method and compare the result with the formula
 ``standard deviation divided by the square-root of $n$''.
@@ -118,24 +118,25 @@ means of the bootstrap method and compare the result with the formula
 \end{solution}
 
 
-\question \qt{Student t-distribution} 
-The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{m})$, the
-estimated mean of a data set divided by the estimated standard error
-of the mean, is not a normal distribution but a Student-t distribution.
-We want to compute the Student-t distribution and compare it with the 
-normal distribution.
+\question \qt{Student t-distribution}
+The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
+estimated mean $\bar x$ of a data set of size $n$ divided by the
+estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
+$\sigma_x$ is the estimated standard deviation, is not a normal
+distribution but a Student-t distribution.  We want to compute the
+Student-t distribution and compare it with the normal distribution.
 \begin{parts}
 \part Generate 100000 normally distributed random numbers.
-\part Draw from these data 1000 samples of size $n=3$, 5, 10, and 50.
-\part Compute the mean $\bar x$ of the samples and plot the
+\part Draw from these data 1000 samples of size $n=3$, 5, 10, and
+50. For each sample size $n$ ...
+\part ... compute the mean $\bar x$ of the samples and plot the
 probability density of these means.
-\part Compare the resulting probability densities with corresponding
+\part ... compare the resulting probability densities with corresponding
 normal distributions.
-\part Compute in addition $t=\bar x/(\sigma_x/\sqrt{n})$ (standard
-deviation of the samples $\sigma_x$) and compare their distribution
-with the normal distribution with standard deviation of one. Is $t$
-normally distributed? Under which conditions is $t$ normally
-distributed?
+\part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
+distribution with the normal distribution with standard deviation of
+one. Is $t$ normally distributed? Under which conditions is $t$
+normally distributed?
 \end{parts}
 \newsolutionpage
 \begin{solution}
@@ -167,16 +168,16 @@ y = randn(n, 1) + a*x;
   \part Compute and plot the probability density of these correlation
   coefficients.
   \part Is the correlation of the original data set significant?
-  \part What does significance of the correlation mean?
-  \part Vary the sample size \code{n} and compute in the same way the
-  significance of the correlation.
+  \part What does ``significance of the correlation'' mean?
+%  \part Vary the sample size \code{n} and compute in the same way the
+%  significance of the correlation.
 \end{parts}
 \begin{solution}
   \lstinputlisting{correlationsignificance.m}
   \includegraphics[width=1\textwidth]{correlationsignificance}
 \end{solution}
 
-\question \qt{Bootstrap of the correlation coefficient} 
+\question \qt{Bootstrap the correlation coefficient} 
 The permutation test generates the distribution of the null hypothesis
 of uncorrelated data and we check whether the correlation coefficient
 of the data differs significantly from this
@@ -184,7 +185,7 @@ distribution. Alternatively we can bootstrap the data while keeping
 the pairs and determine the confidence interval of the correlation
 coefficient of the data. If this differs significantly from a
 correlation coefficient of zero we can conclude that the correlation
-coefficient of the data quantifies indeed a correlated data.
+coefficient of the data indeed quantifies correlated data.
 
 We take the same data set that we have generated in exercise
 \ref{permutationtest} (\ref{permutationtestdata}).
diff --git a/bootstrap/exercises/tdistribution.m b/bootstrap/exercises/tdistribution.m
index 223cbe5..5fe8341 100644
--- a/bootstrap/exercises/tdistribution.m
+++ b/bootstrap/exercises/tdistribution.m
@@ -6,9 +6,9 @@ for nsamples=[3 5 10 50]
     nsamples
     %% compute mean, standard deviation and t:
     nmeans = 10000;
-    means = zeros( nmeans, 1 );
-    sdevs = zeros( nmeans, 1 );
-    students = zeros( nmeans, 1 );
+    means = zeros(nmeans, 1);
+    sdevs = zeros(nmeans, 1);
+    students = zeros(nmeans, 1 );
     for i=1:nmeans
         sample = x(randi(n, nsamples, 1));
         means(i) = mean(sample);
@@ -34,7 +34,7 @@ for nsamples=[3 5 10 50]
     bar(b, h, 'facecolor', 'b', 'edgecolor', 'b')
     hold on
     plot(xg, pm, 'r', 'linewidth', 2)
-    title( sprintf('sample size = %d', nsamples) );
+    title(sprintf('sample size = %d', nsamples));
     xlim( [-3, 3] );
     xlabel('Mean');
     ylabel('pdf');
@@ -47,12 +47,12 @@ for nsamples=[3 5 10 50]
     bar(b, h, 'facecolor', 'b', 'edgecolor', 'b')
     hold on
     plot(xg, pt, 'r', 'linewidth', 2)
-    title( sprintf('sample size = %d', nsamples) );
+    title(sprintf('sample size = %d', nsamples));
     xlim( [-8, 8] );
     xlabel('Student-t');
     ylabel('pdf');
     hold off;
     
-    savefigpdf( gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5 );
+    savefigpdf(gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5);
     pause( 3.0 )
 end
diff --git a/bootstrap/lecture/bootstrap.tex b/bootstrap/lecture/bootstrap.tex
index 28dbec0..f0fae62 100644
--- a/bootstrap/lecture/bootstrap.tex
+++ b/bootstrap/lecture/bootstrap.tex
@@ -84,9 +84,11 @@ standard errors and confidence intervals).
 Bootstrapping methods create bootstrapped samples from a SRS by
 resampling. The bootstrapped samples are used to estimate the sampling
 distribution of a statistical measure. The bootstrapped samples have
-the same size as the original sample and are created by randomly drawing with
-replacement. That is, each value of the original sample can occur
-once, multiple time, or not at all in a bootstrapped sample.
+the same size as the original sample and are created by randomly
+drawing with replacement. That is, each value of the original sample
+can occur once, multiple time, or not at all in a bootstrapped
+sample. This can be implemented by generating random indices into the
+data set using the \code{randi()} function.
 
 
 \section{Bootstrap of the standard error}
@@ -165,13 +167,13 @@ data points $(x_i, y_i)$. By calculating the correlation coefficient
 we can quantify how strongly $y$ depends on $x$. The correlation
 coefficient alone, however, does not tell whether the correlation is
 significantly different from a random correlation. The null hypothesis
-for such a situation would be that $y$ does not depend on $x$. In
+for such a situation is that $y$ does not depend on $x$. In
 order to perform a permutation test, we need to destroy the
 correlation by permuting the $(x_i, y_i)$ pairs, i.e. we rearrange the
 $x_i$ and $y_i$ values in a random fashion. Generating many sets of
-random pairs and computing the resulting correlation coefficients,
+random pairs and computing the resulting correlation coefficients
 yields a distribution of correlation coefficients that result
-randomnly from uncorrelated data. By comparing the actually measured
+randomly from uncorrelated data. By comparing the actually measured
 correlation coefficient with this distribution we can directly assess
 the significance of the correlation
 (figure\,\ref{permutecorrelationfig}).
@@ -183,10 +185,10 @@ Estimate the statistical significance of a correlation coefficient.
   and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
   where $u_i$ is a random number drawn from a normal distribution.
 \item Calculate the correlation coefficient.
-\item Generate the distribution according to the null hypothesis by
-  generating uncorrelated pairs. For this permute $x$- and $y$-values
-  \matlabfun{randperm()} 1000 times and calculate for each
-  permutation the correlation coefficient.
+\item Generate the distribution of the null hypothesis by generating
+  uncorrelated pairs. For this permute $x$- and $y$-values
+  \matlabfun{randperm()} 1000 times and calculate for each permutation
+  the correlation coefficient.
 \item Read out the 95\,\% percentile from the resulting distribution
   of the null hypothesis and compare it with the correlation
   coefficient computed from the original data.
diff --git a/pointprocesses/lecture/pointprocessscetchA.eps b/pointprocesses/lecture/pointprocessscetchA.eps
index 15bfb45..30d6da4 100644
--- a/pointprocesses/lecture/pointprocessscetchA.eps
+++ b/pointprocesses/lecture/pointprocessscetchA.eps
@@ -1,7 +1,7 @@
 %!PS-Adobe-2.0 EPSF-2.0
 %%Title: pointprocessscetchA.tex
 %%Creator: gnuplot 4.6 patchlevel 4
-%%CreationDate: Mon Dec  2 13:03:15 2019
+%%CreationDate: Tue Dec  3 08:08:50 2019
 %%DocumentFonts: 
 %%BoundingBox: 50 50 373 135
 %%EndComments
@@ -430,10 +430,10 @@ SDict begin [
   /Title (pointprocessscetchA.tex)
   /Subject (gnuplot plot)
   /Creator (gnuplot 4.6 patchlevel 4)
-  /Author (benda)
+  /Author (jan)
 %  /Producer (gnuplot)
 %  /Keywords ()
-  /CreationDate (Mon Dec  2 13:03:15 2019)
+  /CreationDate (Tue Dec  3 08:08:50 2019)
   /DOCINFO pdfmark
 end
 } ifelse
diff --git a/pointprocesses/lecture/pointprocessscetchA.pdf b/pointprocesses/lecture/pointprocessscetchA.pdf
index afb0114e01cdaf83ecdb97f885ffa871b3163b71..d064a3a00a75327b804866f9670e94f32d37489a 100644
GIT binary patch
delta 307
zcmaDR`bcy`I}@Yf<_;!1M($Ko3xiZMLtT@Uq{$bVyV3XxliOKrxQs&#EUbXY)L`;i
z77dAz(o_XqmsH(kg%AZ}D+L29u%wm2WM0;04E?)V9ni!iC(E;`qKPR?&SyKqoRyfj
z*@ArmqcF$@m(*kh1!D!c&96Bm*^Lb>3@m`I;O5K*DV|)->B?v}`5@<sSSLdZQ)5Fn
z3v*W!7fUxsGdFW13j<?I10xexBS&*L3p*QvDq^|p?6`_c5{pVIic-_Kj7*G;xl~nM
H{oS|#?@d!3

delta 331
zcmaDP`b=~~I}@Yv<_;!1Ms8C>b2B4jLtT>;!^sz!yV3XxliOKrxQs#!jjarftqe^k
zpJmaI@XgOt&~-`GO;!j|FtSoGv;s?7nNH?qZN|{Qo7DkLOmeb3n<|=^!sL9mBdkfO
zc`1pT&Da+(8iVX`NljKzFj9cotzcweXsOAi@0*|El30?e;bH|6Ff=kSGBh?Y2D*)t
zGaF*W<Z@0|MvKXNIZwnoyE&U!7`r)|SejY7897=QS(-SSJ6jrASUMY-IT~Bq*$`9_
e%VlTBRa}x-R8motn#N^hVr<T(s_N?R#svTr99Aj-

diff --git a/pointprocesses/lecture/pointprocessscetchB.eps b/pointprocesses/lecture/pointprocessscetchB.eps
index e543fa1..1905250 100644
--- a/pointprocesses/lecture/pointprocessscetchB.eps
+++ b/pointprocesses/lecture/pointprocessscetchB.eps
@@ -1,7 +1,7 @@
 %!PS-Adobe-2.0 EPSF-2.0
 %%Title: pointprocessscetchB.tex
 %%Creator: gnuplot 4.6 patchlevel 4
-%%CreationDate: Mon Dec  2 13:03:15 2019
+%%CreationDate: Tue Dec  3 08:08:50 2019
 %%DocumentFonts: 
 %%BoundingBox: 50 50 373 237
 %%EndComments
@@ -430,10 +430,10 @@ SDict begin [
   /Title (pointprocessscetchB.tex)
   /Subject (gnuplot plot)
   /Creator (gnuplot 4.6 patchlevel 4)
-  /Author (benda)
+  /Author (jan)
 %  /Producer (gnuplot)
 %  /Keywords ()
-  /CreationDate (Mon Dec  2 13:03:15 2019)
+  /CreationDate (Tue Dec  3 08:08:50 2019)
   /DOCINFO pdfmark
 end
 } ifelse
diff --git a/pointprocesses/lecture/pointprocessscetchB.pdf b/pointprocesses/lecture/pointprocessscetchB.pdf
index 558282d18a2326d98af18896b350625258d610a4..b5f398530d2523e13e95671a01621bf5423157da 100644
GIT binary patch
delta 307
zcmcbma!qB!Z%#(T&3`!U7`anTEeul440TOXk|y)>Dojr0&PEecocxE|n#(xEz`_cM
zObsSG@@Pnel%^`^x}@qRD}*Q*TPYY=fhDaBCO7glqv=mT<4aDS%&UqfrZD+2?-Ay#
z#JtUG_!ck<gG_fxO;%7aR)E_YFCfWoY+zwv0dzsVKsHG6<o5!uj3$#!1y95p8o3&}
z7@4}68k)EoJ3E=X7`T`ixH`ERm^hoeTDn=-*$`9_%VlTBRa}x-R8motn#N^fY+}fz
Ks_N?R#svT!WKvTA

delta 311
zcmcbna!X~yZ%#(z&3`!U7`aUi&CQIA4RuXY3@7vRDojr0&PEecocxE|n#(A}(Adhr
z*vim!vLla%gl~SHg04%dZn8p%f{~Sip%qxt%5-ugPcxeS1T?<n<jK6MXkrSJAM+kz
zO-jv6N!+}OZvmq)$b6U7WCaBy1-QMj0+Q@ThQ<cQKsVG1WJ8osel6h2Xg1kU@I<VO
ztC5A7lYy~|iIcI3v#X=4p@EsDrG<-=p`nq9p^>GX4M7#LTy}O`#U+VFB^5=fX<R18
NCZ=4fs;>TSTma+tP*(r|