From 3dd0660b21b5d8c973a24e5be7effa7bee7378e7 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sat, 12 Dec 2020 20:34:43 +0100
Subject: [PATCH] [bootstrap] added difference of mean

---
 bootstrap/code/correlationsignificance.m |  14 +-
 bootstrap/code/meandiffsignificance.m    |  40 ++++++
 bootstrap/code/meandiffsignificance.out  |   4 +
 bootstrap/lecture/bootstrap-chapter.tex  |   2 -
 bootstrap/lecture/bootstrap.tex          | 160 ++++++++++++++++++-----
 bootstrap/lecture/bootstrapsem.py        |   2 +-
 bootstrap/lecture/permuteaverage.py      | 103 +++++++++++++++
 bootstrap/lecture/permutecorrelation.py  |  52 ++++++--
 chapter.mk                               |   3 +
 plotstyle.py                             |   7 +-
 10 files changed, 326 insertions(+), 61 deletions(-)
 create mode 100644 bootstrap/code/meandiffsignificance.m
 create mode 100644 bootstrap/code/meandiffsignificance.out
 create mode 100644 bootstrap/lecture/permuteaverage.py

diff --git a/bootstrap/code/correlationsignificance.m b/bootstrap/code/correlationsignificance.m
index d9b0fff..7426cc5 100644
--- a/bootstrap/code/correlationsignificance.m
+++ b/bootstrap/code/correlationsignificance.m
@@ -1,6 +1,6 @@
 % generate correlated data:
-n=200;
-a=0.2;
+n = 200;
+a = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + a*x;
 
@@ -11,13 +11,13 @@ fprintf('correlation coefficient of data r = %.2f\n', rd);
 % distribution of null hypothesis by permutation:
 nperm = 1000;
 rs = zeros(nperm,1);
-for i=1:nperm
-    xr=x(randperm(length(x)));  % shuffle x
-    yr=y(randperm(length(y)));  % shuffle y
+for i = 1:nperm
+    xr = x(randperm(length(x)));  % shuffle x
+    yr = y(randperm(length(y)));  % shuffle y
     rs(i) = corr(xr, yr);
 end
-[h,b] = hist(rs, 20);
-h = h/sum(h)/(b(2)-b(1));       % normalization
+[h, b] = hist(rs, 20);
+h = h/sum(h)/(b(2)-b(1));         % normalization
 
 % significance:
 rq = quantile(rs, 0.95);
diff --git a/bootstrap/code/meandiffsignificance.m b/bootstrap/code/meandiffsignificance.m
new file mode 100644
index 0000000..96a328e
--- /dev/null
+++ b/bootstrap/code/meandiffsignificance.m
@@ -0,0 +1,40 @@
+% generate two data sets:
+n = 200;
+d = 0.2;
+x = randn(n, 1);
+y = randn(n, 1) + d;
+
+% difference of sample means:
+md = mean(y) - mean(x);
+fprintf('difference of means of data d = %.2f\n', md);
+
+% distribution of null hypothesis by permutation:
+nperm = 1000;
+xy = [x; y];                         % x and y data in one column vector
+ds = zeros(nperm,1);
+for i = 1:nperm
+    xyr = xy(randperm(length(xy)));  % shuffle data
+    xr = xyr(1:length(x));           % random x-data
+    yr = xyr(length(x)+1:end);       % random y-data
+    ds(i) = mean(yr) - mean(xr);     % difference of means
+end
+[h, b] = hist(ds, 20);
+h = h/sum(h)/(b(2)-b(1));       % normalization
+
+% significance:
+dq = quantile(ds, 0.95);
+fprintf('difference of means of null hypothesis at 5%% significance = %.2f\n', dq);
+if md >= dq
+    fprintf('--> difference of means d=%.2f is significant\n', md);
+else
+    fprintf('--> d=%.2f is not a significant difference of means\n', md);
+end
+
+% plot:
+bar(b, h, 'facecolor', 'b');
+hold on;
+bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
+plot([md md], [0 4], 'r', 'linewidth', 2);
+xlabel('Difference of means');
+ylabel('Probability density of H0');
+hold off;
diff --git a/bootstrap/code/meandiffsignificance.out b/bootstrap/code/meandiffsignificance.out
new file mode 100644
index 0000000..e17ad8d
--- /dev/null
+++ b/bootstrap/code/meandiffsignificance.out
@@ -0,0 +1,4 @@
+>> meandiffsignificance
+difference of means of data d = 0.18
+difference of means of null hypothesis at 5% significance = 0.17
+--> difference of means d=0.18 is significant
diff --git a/bootstrap/lecture/bootstrap-chapter.tex b/bootstrap/lecture/bootstrap-chapter.tex
index b61d395..78d7d3c 100644
--- a/bootstrap/lecture/bootstrap-chapter.tex
+++ b/bootstrap/lecture/bootstrap-chapter.tex
@@ -26,6 +26,4 @@ This chapter easily covers two lectures:
 
 Add jacknife methods to bootstrapping
 
-Add permutation test for significant different means of two distributions.
-
 \end{document}
diff --git a/bootstrap/lecture/bootstrap.tex b/bootstrap/lecture/bootstrap.tex
index 20f01c2..8f72366 100644
--- a/bootstrap/lecture/bootstrap.tex
+++ b/bootstrap/lecture/bootstrap.tex
@@ -125,7 +125,6 @@ Via bootstrapping we create a distribution of the mean values
 (\figref{bootstrapsemfig}) and the standard deviation of this
 distribution is the standard error of the mean.
 
-\pagebreak[4]
 \begin{exercise}{bootstrapsem.m}{bootstrapsem.out}
   Create the distribution of mean values from bootstrapped samples
   resampled from a single SRS. Use this distribution to estimate the
@@ -152,67 +151,156 @@ desired \entermde{Signifikanz}{significance level}, the
 \entermde{Nullhypothese}{null hypothesis} may be rejected.
 
 Traditionally, such probabilities are taken from theoretical
-distributions which are based on assumptions about the data.  Thus the
-applied statistical test has to be appropriate for the type of
+distributions which are based on some assumptions about the data. For
+example, the data should be normally distributed.  Given some data one
+has to find an appropriate test that matches the properties of the
 data. An alternative approach is to calculate the probability density
-of the null hypothesis directly from the data itself. To do this, we
+of the null hypothesis directly from the data themselves. To do so, we
 need to resample the data according to the null hypothesis from the
 SRS. By such permutation operations we destroy the feature of interest
-while we conserve all other statistical properties of the data.
+while conserving all other statistical properties of the data.
+
+
+\subsection{Significance of a difference in the mean}
+
+Often we would like to know whether two data sets differ in their
+mean. Whether the ears of foxes are larger in southern Europe compared
+to the ones from Scandinavia, whether a drug decreases blood pressure
+in humans, whether a sensory stimulus increases the firing rate of a
+neuron, etc. The \entermde{Nullhypothese}{null hypothesis} is that
+they do not differ in their means, i.e. that both data sets come from
+the same distribution. But even if the two data sets come from the
+same distribution, their sample means may nevertheless differ by
+chance. We need to figure out how these differences of the means are
+distributed. Only if the measured difference between the means is
+significantly larger than the ones obtained by chance we can reject
+the null hypothesis and consider the two data sets to differ
+significantly in their means.
+
+We can easily estimate the distribution of the null hypothesis by
+putting the data of both data sets in one big bag. By merging the two
+data sets we assume that all the data values come from the same
+distribution. We then randomly separate the data values into two new
+data sets. These random data sets contain data from both original data
+sets and thus come from the same distribution. From these random data
+sets we compute the difference of their sample means. This procedure
+is repeated many, say one thousand, times and each time we get a value
+for a difference of means. The distribution of these values is the
+distribution of the null hypothesis. It is the distribution of
+differences of mean values that we get by chance although the two data
+sets come from the same distribution. For a one-sided test that checks
+whether the measured difference of means is significantly larger than
+zero at a significance level of 5\,\% we compute the value of the
+95\,\% percentile of the null distribution. If the measured value is
+larger, we can reject the null hypothesis and consider the two data
+sets to differ significantly in their means.
+
+By using the original data to estimate the null hypothesis, we make no
+assumption about the properties of the data. We do not need to worry
+about the data being normally distributed. We do not need to memorize
+which test to use in which situation. And we better understand what we
+are testing, because we design the test ourselves. Nowadays, computer
+are powerful enough to iterate even ten thousand times over the data
+to compute the distribution of the null hypothesis --- with only a few
+lines of code. This is why \entermde{Permutationstest}{permutaion
+  test} are getting quite popular.
 
 \begin{figure}[tp]
-  \includegraphics[width=1\textwidth]{permutecorrelation}
-  \titlecaption{\label{permutecorrelationfig}Permutation test for
-    correlations.}{Let the correlation coefficient of a dataset with
-    200 samples be $\rho=0.21$. The distribution of the null
-    hypothesis (yellow), optained from the correlation coefficients of
-    permuted and therefore uncorrelated datasets is centered around
-    zero. The measured correlation coefficient is larger than the
-    95\,\% percentile of the null hypothesis. The null hypothesis may
-    thus be rejected and the measured correlation is considered
-    statistically significant.}
+  \includegraphics[width=1\textwidth]{permuteaverage}
+  \titlecaption{\label{permuteaverage}Permutation test for differences
+    in means.}{We want to test whether two datasets
+    $\left\{x_i\right\}$ (red) and $\left\{y_i\right\}$ (blue) come
+    from different distributions by assessing the significance of the
+    difference in their sample means. The data sets were generated
+    with a difference in their population means of $d=0.7$. For
+    generating the distribution of the null hypothesis, i.e. the
+    distribution of differences in the means if the two data sets come
+    from the same distribution, we randomly select the same number of
+    samples from both data sets (top right). This is repeated many
+    times and results in the desired distribution of differences of
+    means (bottom). The measured difference is clearly beyond the
+    95\,\% percentile of this distribution and thus indicates a
+    significant difference between the distributions of the two
+    original data sets.}
 \end{figure}
 
-%\subsection{Significance of a difference in the mean}
+\begin{exercise}{meandiffsignificance.m}{meandiffsignificance.out}
+Estimate the statistical significance of a difference in the mean of two data sets.
+\vspace{-1ex}
+\begin{enumerate}
+\item Generate two independent data sets, $\left\{x_i\right\}$ and
+  $\left\{y_i\right\}$, of $n=200$ samples each, by drawing random
+  numbers from a normal distribution. Add 0.2 to all the $y_i$ samples
+  to ensure the population means to differ by 0.2.
+\item Calculate the difference between the sample means of the two data sets.
+\item Estimate the distribution of the null hypothesis of no
+  difference of the means by generating new data sets with the same
+  number of samples randomly selected from both data sets.  For this
+  lump the two data sets together into a single vector. Then permute
+  the order of the elements in this vector using the function
+  \varcode{randperm()}, split it into two data sets and calculate
+  the difference of their means. Repeat this 1000 times.
+\item Read out the 95\,\% percentile from the resulting distribution
+  of the differences in the mean, the null hypothesis using the
+  \varcode{quantile()} function, and compare it with the difference of
+  means measured from the original data sets.
+\end{enumerate}
+\end{exercise}
 
-%See \url{https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests.}
 
 \subsection{Significance of correlations}
 
-A good example for the application of a
-\entermde{Permutationstest}{permutaion test} is the statistical
-assessment of \entermde[correlation]{Korrelation}{correlations}. Given
-are measured pairs of data points $(x_i, y_i)$. By calculating the
+Another nice example for the application of a
+\entermde{Permutationstest}{permutaion test} is testing for
+significant \entermde[correlation]{Korrelation}{correlations}
+(figure\,\ref{permutecorrelationfig}). Given are measured pairs of
+data points $(x_i, y_i)$. By calculating the
 \entermde[correlation!correlation
-coefficient]{Korrelationskoeffizient}{correlation
-  coefficient} we can quantify how strongly $y$ depends on $x$. The
-correlation coefficient alone, however, does not tell whether the
-correlation is significantly different from a random correlation. The
-\entermde{Nullhypothese}{null hypothesis} for such a situation is that
-$y$ does not depend on $x$. In order to perform a permutation test, we
-need to destroy the correlation by permuting the $(x_i, y_i)$ pairs,
-i.e. we rearrange the $x_i$ and $y_i$ values in a random
+coefficient]{Korrelationskoeffizient}{correlation coefficient} we can
+quantify how strongly $y$ depends on $x$. The correlation coefficient
+alone, however, does not tell whether the correlation is significantly
+different from a non-zero correlation that we might get although there
+is no true correlation in the data. The \entermde{Nullhypothese}{null
+  hypothesis} for such a situation is that $y$ does not depend on
+$x$. In order to perform a permutation test, we need to destroy the
+correlation between the data pairs by permuting the $(x_i, y_i)$
+pairs, i.e. we rearrange the $x_i$ and $y_i$ values in a random
 fashion. Generating many sets of random pairs and computing the
-resulting correlation coefficients yields a distribution of
-correlation coefficients that result randomly from uncorrelated
+corresponding correlation coefficients yields a distribution of
+correlation coefficients that result randomly from truly uncorrelated
 data. By comparing the actually measured correlation coefficient with
 this distribution we can directly assess the significance of the
-correlation (figure\,\ref{permutecorrelationfig}).
+correlation.
+
+\begin{figure}[tp]
+  \includegraphics[width=1\textwidth]{permutecorrelation}
+  \titlecaption{\label{permutecorrelationfig}Permutation test for
+    correlations.}{Let the correlation coefficient of a dataset with
+    200 samples be $\rho=0.21$ (top left). By shuffling the data pairs
+    we destroy any true correlation (top right). The resulting
+    distribution of the null hypothesis (bottm, yellow), optained from
+    the correlation coefficients of permuted and therefore
+    uncorrelated datasets is centered around zero. The measured
+    correlation coefficient is larger than the 95\,\% percentile of
+    the null hypothesis. The null hypothesis may thus be rejected and
+    the measured correlation is considered statistically significant.}
+\end{figure}
 
 \begin{exercise}{correlationsignificance.m}{correlationsignificance.out}
 Estimate the statistical significance of a correlation coefficient.
 \begin{enumerate}
-\item Create pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
+\item Generate pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
   and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
   where $u_i$ is a random number drawn from a normal distribution.
 \item Calculate the correlation coefficient.
-\item Generate the distribution of the null hypothesis by generating
+\item Estimate the distribution of the null hypothesis by generating
   uncorrelated pairs. For this permute $x$- and $y$-values
   \matlabfun{randperm()} 1000 times and calculate for each permutation
   the correlation coefficient.
 \item Read out the 95\,\% percentile from the resulting distribution
-  of the null hypothesis and compare it with the correlation
-  coefficient computed from the original data.
+  of the null hypothesis using the \varcode{quantile()} function and
+  compare it with the correlation coefficient computed from the
+  original data.
 \end{enumerate}
 \end{exercise}
 
diff --git a/bootstrap/lecture/bootstrapsem.py b/bootstrap/lecture/bootstrapsem.py
index 3a276bd..837b0b1 100644
--- a/bootstrap/lecture/bootstrapsem.py
+++ b/bootstrap/lecture/bootstrapsem.py
@@ -24,7 +24,7 @@ for i in range(nresamples) :
     musrs.append(np.mean(rng.randn(nsamples)))
 hmusrs, _ = np.histogram(musrs, bins, density=True)
 
-fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
+fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.05*figure_height))
 fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=1.5))
 ax.set_xlabel('Mean')
 ax.set_xlim(-0.4, 0.4)
diff --git a/bootstrap/lecture/permuteaverage.py b/bootstrap/lecture/permuteaverage.py
new file mode 100644
index 0000000..53cfc0b
--- /dev/null
+++ b/bootstrap/lecture/permuteaverage.py
@@ -0,0 +1,103 @@
+import numpy as np
+import scipy.stats as st
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import matplotlib.ticker as ticker
+from plotstyle import *
+
+rng = np.random.RandomState(637281)
+
+# generate data that differ in their mein by d:
+n = 200
+d = 0.7
+x = rng.randn(n) + d;
+y = rng.randn(n);
+#x = rng.exponential(1.0, n);
+#y = rng.exponential(1.0, n) + d;
+
+# histogram of data:
+db = 0.5
+bins = np.arange(-2.5, 2.6, db)
+hx, _ = np.histogram(x, bins)
+hy, _ = np.histogram(y, bins)
+
+# Diference of means, pooled standard deviation and Cohen's d:
+ad = np.mean(x)-np.mean(y)
+s = np.sqrt(((len(x)-1)*np.var(x)+(len(y)-1)*np.var(y))/(len(x)+len(y)-2))
+cd = ad/s
+
+# permutation:
+nperm = 1000
+ads = []
+xy = np.hstack((x, y))
+for i in range(nperm) :
+    xyp = rng.permutation(xy)
+    ads.append(np.mean(xyp[:len(x)])-np.mean(xyp[len(x):]))
+
+# histogram of shuffled data:
+hxp, _ = np.histogram(xyp[:len(x)], bins)
+hyp, _ = np.histogram(xyp[len(x):], bins)
+    
+# pdf of the differences of means:
+h, b = np.histogram(ads, 20, density=True)
+
+# significance:
+dq = np.percentile(ads, 95.0)
+print('Measured difference of means = %.2f, difference at 95%% percentile of permutation = %.2f' % (ad, dq))
+da = 1.0-0.01*st.percentileofscore(ads, ad)
+print('Measured difference of means %.2f is at %.2f%% percentile of permutation' % (ad, 100.0*da))
+
+ap, at = st.ttest_ind(x, y)
+print('Measured difference of means %.2f is at %.2f%% percentile of test' % (ad, ap))
+
+
+fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
+gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
+                       **adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
+
+ax = fig.add_subplot(gs[0,0])
+ax.bar(bins[:-1]-0.25*db, hy, 0.5*db, **fsA)
+ax.bar(bins[:-1]+0.25*db, hx, 0.5*db, **fsB)
+ax.annotate('', xy=(0.0, 45.0), xytext=(d, 45.0), arrowprops=dict(arrowstyle='<->'))
+ax.text(0.5*d, 50.0, 'd=%.1f' % d, ha='center')
+ax.set_xlim(-2.5, 2.5)
+ax.set_ylim(0.0, 50)
+ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
+ax.set_xlabel('Original x and y values')
+ax.set_ylabel('Counts')
+
+ax = fig.add_subplot(gs[0,1])
+ax.bar(bins[:-1]-0.25*db, hyp, 0.5*db, **fsA)
+ax.bar(bins[:-1]+0.25*db, hxp, 0.5*db, **fsB)
+ax.set_xlim(-2.5, 2.5)
+ax.set_ylim(0.0, 50)
+ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
+ax.set_xlabel('Shuffled x and y values')
+ax.set_ylabel('Counts')
+
+ax = fig.add_subplot(gs[1,:])
+ax.annotate('Measured\ndifference\nis significant!',
+            xy=(ad, 1.2), xycoords='data',
+            xytext=(ad-0.1, 2.2), textcoords='data', ha='right',
+            arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
+                connectionstyle="angle3,angleA=-20,angleB=100") )
+ax.annotate('95% percentile',
+            xy=(0.19, 0.9), xycoords='data',
+            xytext=(0.3, 5.0), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
+                connectionstyle="angle3,angleA=40,angleB=80") )
+ax.annotate('Distribution of\nnullhypothesis',
+            xy=(-0.08, 3.0), xycoords='data',
+            xytext=(-0.22, 4.5), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
+                connectionstyle="angle3,angleA=60,angleB=150") )
+ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
+ax.bar(b[:-1][b[:-1]>=dq], h[b[:-1]>=dq], width=b[1]-b[0], **fsB)
+ax.plot( [ad, ad], [0, 1], **lsA)
+ax.set_xlim(-0.25, 0.85)
+ax.set_ylim(0.0, 5.0)
+ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
+ax.set_xlabel('Difference of means')
+ax.set_ylabel('PDF of H0')
+
+plt.savefig('permuteaverage.pdf')
diff --git a/bootstrap/lecture/permutecorrelation.py b/bootstrap/lecture/permutecorrelation.py
index c5f0db7..3357140 100644
--- a/bootstrap/lecture/permutecorrelation.py
+++ b/bootstrap/lecture/permutecorrelation.py
@@ -1,9 +1,11 @@
 import numpy as np
 import scipy.stats as st
 import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import matplotlib.ticker as ticker
 from plotstyle import *
 
-rng = np.random.RandomState(637281)
+rng = np.random.RandomState(37281)
 
 # generate correlated data:
 n = 200
@@ -19,32 +21,56 @@ rd = np.corrcoef(x, y)[0, 1]
 nperm = 1000
 rs = []
 for i in range(nperm) :
-    xr=rng.permutation(x)
-    yr=rng.permutation(y)
-    rs.append( np.corrcoef(xr, yr)[0, 1] )
+    xr = rng.permutation(x)
+    yr = rng.permutation(y)
+    rs.append(np.corrcoef(xr, yr)[0, 1])
+rr = np.corrcoef(xr, yr)[0, 1]
 
 # pdf of the correlation coefficients:
 h, b = np.histogram(rs, 20, density=True)
 
 # significance:
 rq = np.percentile(rs, 95.0)
-print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of bootstrap = %.2f' % (rd, rq))
+print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of permutation = %.2f' % (rd, rq))
 ra = 1.0-0.01*st.percentileofscore(rs, rd)
-print('Measured correlation coefficient %.2f is at %.4f percentile of bootstrap' % (rd, ra))
+print('Measured correlation coefficient %.2f is at %.4f percentile of permutation' % (rd, ra))
 
 rp, ra = st.pearsonr(x, y)
 print('Measured correlation coefficient %.2f is at %.4f percentile of test' % (rp, ra))
 
-fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
-fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=0.5, top=1.0))
+fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
+gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
+                       **adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
+
+ax = fig.add_subplot(gs[0,0])
+ax.text(0.0, 4.0, 'r=%.2f' % rd, ha='center')
+ax.plot(x, y, **psAm)
+ax.set_xlim(-4.0, 4.0)
+ax.set_ylim(-4.0, 4.0)
+ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
+ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
+ax.set_xlabel('x')
+ax.set_ylabel('y')
+
+ax = fig.add_subplot(gs[0,1])
+ax.text(0.0, 4.0, 'r=%.2f' % rr, ha='center')
+ax.plot(xr, yr, **psAm)
+ax.set_xlim(-4.0, 4.0)
+ax.set_ylim(-4.0, 4.0)
+ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
+ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
+ax.set_xlabel('Shuffled x')
+ax.set_ylabel('Shuffled y')
+
+ax = fig.add_subplot(gs[1,:])
 ax.annotate('Measured\ncorrelation\nis significant!',
             xy=(rd, 1.1), xycoords='data',
-            xytext=(rd, 2.2), textcoords='data', ha='left',
-            arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
+            xytext=(rd-0.01, 3.0), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.3,0.0),
                 connectionstyle="angle3,angleA=10,angleB=80") )
 ax.annotate('95% percentile',
             xy=(0.14, 0.9), xycoords='data',
-            xytext=(0.18, 4.0), textcoords='data', ha='left',
+            xytext=(0.16, 6.2), textcoords='data', ha='left',
             arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
                 connectionstyle="angle3,angleA=30,angleB=80") )
 ax.annotate('Distribution of\nuncorrelated\nsamples',
@@ -56,7 +82,9 @@ ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
 ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], **fsB)
 ax.plot( [rd, rd], [0, 1], **lsA)
 ax.set_xlim(-0.25, 0.35)
+ax.set_ylim(0.0, 6.0)
+ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.set_xlabel('Correlation coefficient')
-ax.set_ylabel('Prob. density of H0')
+ax.set_ylabel('PDF of H0')
 
 plt.savefig('permutecorrelation.pdf')
diff --git a/chapter.mk b/chapter.mk
index 50d1708..3afd160 100644
--- a/chapter.mk
+++ b/chapter.mk
@@ -42,6 +42,9 @@ $(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex $(wildcard $(B
 watchchapter :
 	while true; do ! make -q chapter && make chapter; sleep 0.5; done
 
+watchplots :
+	while true; do ! make -q plots && make plots; sleep 0.5; done
+
 cleantex:
 	rm -f *~ 
 	rm -f $(BASENAME).aux $(BASENAME).log *.idx
diff --git a/plotstyle.py b/plotstyle.py
index cb453f3..ecada61 100644
--- a/plotstyle.py
+++ b/plotstyle.py
@@ -33,13 +33,14 @@ colors['white'] = '#FFFFFF'
 # general settings for plot styles:
 lwthick = 3.0
 lwthin = 1.8
+edgewidth = 0.0 if xkcd_style else 1.0
 mainline = {'linestyle': '-', 'linewidth': lwthick}
 minorline = {'linestyle': '-', 'linewidth': lwthin}
-largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
-smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
+largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
+smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
 largelinepoints = {'linestyle': '-', 'linewidth': lwthick, 'marker': 'o', 'markersize': 10, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
 smalllinepoints = {'linestyle': '-', 'linewidth': 1.4, 'marker': 'o', 'markersize': 7, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
-filllw = 1.0
+filllw = edgewidth
 fillec = colors['white']
 fillalpha = 0.4
 filledge = {'linewidth': filllw, 'joinstyle': 'round'}