[bootstrap] added difference of mean

2020-12-12 20:34:43 +01:00 · 2020-12-12 20:34:43 +01:00 · 3dd0660b21
commit 3dd0660b21
parent 0e30a01a45
10 changed files with 332 additions and 67 deletions
--- a/bootstrap/code/correlationsignificance.m
+++ b/bootstrap/code/correlationsignificance.m
@ -1,6 +1,6 @@
 % generate correlated data:
-n=200;
+n = 200;
-a=0.2;
+a = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + a*x;
@ -11,13 +11,13 @@ fprintf('correlation coefficient of data r = %.2f\n', rd);
 % distribution of null hypothesis by permutation:
 nperm = 1000;
 rs = zeros(nperm,1);
-for i=1:nperm
+for i = 1:nperm
-    xr=x(randperm(length(x)));  % shuffle x
+    xr = x(randperm(length(x)));  % shuffle x
-    yr=y(randperm(length(y)));  % shuffle y
+    yr = y(randperm(length(y)));  % shuffle y
    rs(i) = corr(xr, yr);
 end
-[h,b] = hist(rs, 20);
+[h, b] = hist(rs, 20);
-h = h/sum(h)/(b(2)-b(1));       % normalization
+h = h/sum(h)/(b(2)-b(1));         % normalization
 % significance:
 rq = quantile(rs, 0.95);
--- a/bootstrap/code/meandiffsignificance.m
+++ b/bootstrap/code/meandiffsignificance.m
@ -0,0 +1,40 @@
 % generate two data sets:
 n = 200;
 d = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + d;
 % difference of sample means:
 md = mean(y) - mean(x);
 fprintf('difference of means of data d = %.2f\n', md);
 % distribution of null hypothesis by permutation:
 nperm = 1000;
 xy = [x; y];                         % x and y data in one column vector
 ds = zeros(nperm,1);
 for i = 1:nperm
    xyr = xy(randperm(length(xy)));  % shuffle data
    xr = xyr(1:length(x));           % random x-data
    yr = xyr(length(x)+1:end);       % random y-data
    ds(i) = mean(yr) - mean(xr);     % difference of means
 end
 [h, b] = hist(ds, 20);
 h = h/sum(h)/(b(2)-b(1));       % normalization
 % significance:
 dq = quantile(ds, 0.95);
 fprintf('difference of means of null hypothesis at 5%% significance = %.2f\n', dq);
 if md >= dq
    fprintf('--> difference of means d=%.2f is significant\n', md);
 else
    fprintf('--> d=%.2f is not a significant difference of means\n', md);
 end
 % plot:
 bar(b, h, 'facecolor', 'b');
 hold on;
 bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
 plot([md md], [0 4], 'r', 'linewidth', 2);
 xlabel('Difference of means');
 ylabel('Probability density of H0');
 hold off;
--- a/bootstrap/code/meandiffsignificance.out
+++ b/bootstrap/code/meandiffsignificance.out
@ -0,0 +1,4 @@
 >> meandiffsignificance
 difference of means of data d = 0.18
 difference of means of null hypothesis at 5% significance = 0.17
 --> difference of means d=0.18 is significant
--- a/bootstrap/lecture/bootstrap-chapter.tex
+++ b/bootstrap/lecture/bootstrap-chapter.tex
@ -26,6 +26,4 @@ This chapter easily covers two lectures:
 Add jacknife methods to bootstrapping
 Add permutation test for significant different means of two distributions.
 \end{document}
--- a/bootstrap/lecture/bootstrap.tex
+++ b/bootstrap/lecture/bootstrap.tex
@ -125,7 +125,6 @@ Via bootstrapping we create a distribution of the mean values
 (\figref{bootstrapsemfig}) and the standard deviation of this
 distribution is the standard error of the mean.
 \pagebreak[4]
 \begin{exercise}{bootstrapsem.m}{bootstrapsem.out}
  Create the distribution of mean values from bootstrapped samples
  resampled from a single SRS. Use this distribution to estimate the
@ -152,67 +151,156 @@ desired \entermde{Signifikanz}{significance level}, the
 \entermde{Nullhypothese}{null hypothesis} may be rejected.
 Traditionally, such probabilities are taken from theoretical
-distributions which are based on assumptions about the data.  Thus the
+distributions which are based on some assumptions about the data. For
-applied statistical test has to be appropriate for the type of
+example, the data should be normally distributed.  Given some data one
 has to find an appropriate test that matches the properties of the
 data. An alternative approach is to calculate the probability density
-of the null hypothesis directly from the data itself. To do this, we
+of the null hypothesis directly from the data themselves. To do so, we
 need to resample the data according to the null hypothesis from the
 SRS. By such permutation operations we destroy the feature of interest
-while we conserve all other statistical properties of the data.
+while conserving all other statistical properties of the data.
 \subsection{Significance of a difference in the mean}
 Often we would like to know whether two data sets differ in their
 mean. Whether the ears of foxes are larger in southern Europe compared
 to the ones from Scandinavia, whether a drug decreases blood pressure
 in humans, whether a sensory stimulus increases the firing rate of a
 neuron, etc. The \entermde{Nullhypothese}{null hypothesis} is that
 they do not differ in their means, i.e. that both data sets come from
 the same distribution. But even if the two data sets come from the
 same distribution, their sample means may nevertheless differ by
 chance. We need to figure out how these differences of the means are
 distributed. Only if the measured difference between the means is
 significantly larger than the ones obtained by chance we can reject
 the null hypothesis and consider the two data sets to differ
 significantly in their means.
 We can easily estimate the distribution of the null hypothesis by
 putting the data of both data sets in one big bag. By merging the two
 data sets we assume that all the data values come from the same
 distribution. We then randomly separate the data values into two new
 data sets. These random data sets contain data from both original data
 sets and thus come from the same distribution. From these random data
 sets we compute the difference of their sample means. This procedure
 is repeated many, say one thousand, times and each time we get a value
 for a difference of means. The distribution of these values is the
 distribution of the null hypothesis. It is the distribution of
 differences of mean values that we get by chance although the two data
 sets come from the same distribution. For a one-sided test that checks
 whether the measured difference of means is significantly larger than
 zero at a significance level of 5\,\% we compute the value of the
 95\,\% percentile of the null distribution. If the measured value is
 larger, we can reject the null hypothesis and consider the two data
 sets to differ significantly in their means.
 By using the original data to estimate the null hypothesis, we make no
 assumption about the properties of the data. We do not need to worry
 about the data being normally distributed. We do not need to memorize
 which test to use in which situation. And we better understand what we
 are testing, because we design the test ourselves. Nowadays, computer
 are powerful enough to iterate even ten thousand times over the data
 to compute the distribution of the null hypothesis --- with only a few
 lines of code. This is why \entermde{Permutationstest}{permutaion
  test} are getting quite popular.
 \begin{figure}[tp]
  \includegraphics[width=1\textwidth]{permuteaverage}
  \titlecaption{\label{permuteaverage}Permutation test for differences
    in means.}{We want to test whether two datasets
    $\left\{x_i\right\}$ (red) and $\left\{y_i\right\}$ (blue) come
    from different distributions by assessing the significance of the
    difference in their sample means. The data sets were generated
    with a difference in their population means of $d=0.7$. For
    generating the distribution of the null hypothesis, i.e. the
    distribution of differences in the means if the two data sets come
    from the same distribution, we randomly select the same number of
    samples from both data sets (top right). This is repeated many
    times and results in the desired distribution of differences of
    means (bottom). The measured difference is clearly beyond the
    95\,\% percentile of this distribution and thus indicates a
    significant difference between the distributions of the two
    original data sets.}
 \end{figure}
 \begin{exercise}{meandiffsignificance.m}{meandiffsignificance.out}
 Estimate the statistical significance of a difference in the mean of two data sets.
 \vspace{-1ex}
 \begin{enumerate}
 \item Generate two independent data sets, $\left\{x_i\right\}$ and
  $\left\{y_i\right\}$, of $n=200$ samples each, by drawing random
  numbers from a normal distribution. Add 0.2 to all the $y_i$ samples
  to ensure the population means to differ by 0.2.
 \item Calculate the difference between the sample means of the two data sets.
 \item Estimate the distribution of the null hypothesis of no
  difference of the means by generating new data sets with the same
  number of samples randomly selected from both data sets.  For this
  lump the two data sets together into a single vector. Then permute
  the order of the elements in this vector using the function
  \varcode{randperm()}, split it into two data sets and calculate
  the difference of their means. Repeat this 1000 times.
 \item Read out the 95\,\% percentile from the resulting distribution
  of the differences in the mean, the null hypothesis using the
  \varcode{quantile()} function, and compare it with the difference of
  means measured from the original data sets.
 \end{enumerate}
 \end{exercise}
 \subsection{Significance of correlations}
 Another nice example for the application of a
 \entermde{Permutationstest}{permutaion test} is testing for
 significant \entermde[correlation]{Korrelation}{correlations}
 (figure\,\ref{permutecorrelationfig}). Given are measured pairs of
 data points $(x_i, y_i)$. By calculating the
 \entermde[correlation!correlation
 coefficient]{Korrelationskoeffizient}{correlation coefficient} we can
 quantify how strongly $y$ depends on $x$. The correlation coefficient
 alone, however, does not tell whether the correlation is significantly
 different from a non-zero correlation that we might get although there
 is no true correlation in the data. The \entermde{Nullhypothese}{null
  hypothesis} for such a situation is that $y$ does not depend on
 $x$. In order to perform a permutation test, we need to destroy the
 correlation between the data pairs by permuting the $(x_i, y_i)$
 pairs, i.e. we rearrange the $x_i$ and $y_i$ values in a random
 fashion. Generating many sets of random pairs and computing the
 corresponding correlation coefficients yields a distribution of
 correlation coefficients that result randomly from truly uncorrelated
 data. By comparing the actually measured correlation coefficient with
 this distribution we can directly assess the significance of the
 correlation.
 \begin{figure}[tp]
  \includegraphics[width=1\textwidth]{permutecorrelation}
  \titlecaption{\label{permutecorrelationfig}Permutation test for
    correlations.}{Let the correlation coefficient of a dataset with
-    200 samples be $\rho=0.21$. The distribution of the null
+    200 samples be $\rho=0.21$ (top left). By shuffling the data pairs
-    hypothesis (yellow), optained from the correlation coefficients of
+    we destroy any true correlation (top right). The resulting
-    permuted and therefore uncorrelated datasets is centered around
+    distribution of the null hypothesis (bottm, yellow), optained from
-    zero. The measured correlation coefficient is larger than the
+    the correlation coefficients of permuted and therefore
-    95\,\% percentile of the null hypothesis. The null hypothesis may
+    uncorrelated datasets is centered around zero. The measured
-    thus be rejected and the measured correlation is considered
+    correlation coefficient is larger than the 95\,\% percentile of
-    statistically significant.}
+    the null hypothesis. The null hypothesis may thus be rejected and
    the measured correlation is considered statistically significant.}
 \end{figure}
 %\subsection{Significance of a difference in the mean}
 %See \url{https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests.}
 \subsection{Significance of correlations}
 A good example for the application of a
 \entermde{Permutationstest}{permutaion test} is the statistical
 assessment of \entermde[correlation]{Korrelation}{correlations}. Given
 are measured pairs of data points $(x_i, y_i)$. By calculating the
 \entermde[correlation!correlation
 coefficient]{Korrelationskoeffizient}{correlation
  coefficient} we can quantify how strongly $y$ depends on $x$. The
 correlation coefficient alone, however, does not tell whether the
 correlation is significantly different from a random correlation. The
 \entermde{Nullhypothese}{null hypothesis} for such a situation is that
 $y$ does not depend on $x$. In order to perform a permutation test, we
 need to destroy the correlation by permuting the $(x_i, y_i)$ pairs,
 i.e. we rearrange the $x_i$ and $y_i$ values in a random
 fashion. Generating many sets of random pairs and computing the
 resulting correlation coefficients yields a distribution of
 correlation coefficients that result randomly from uncorrelated
 data. By comparing the actually measured correlation coefficient with
 this distribution we can directly assess the significance of the
 correlation (figure\,\ref{permutecorrelationfig}).
 \begin{exercise}{correlationsignificance.m}{correlationsignificance.out}
 Estimate the statistical significance of a correlation coefficient.
 \begin{enumerate}
-\item Create pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
+\item Generate pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
  and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
  where $u_i$ is a random number drawn from a normal distribution.
 \item Calculate the correlation coefficient.
-\item Generate the distribution of the null hypothesis by generating
+\item Estimate the distribution of the null hypothesis by generating
  uncorrelated pairs. For this permute $x$- and $y$-values
  \matlabfun{randperm()} 1000 times and calculate for each permutation
  the correlation coefficient.
 \item Read out the 95\,\% percentile from the resulting distribution
-  of the null hypothesis and compare it with the correlation
+  of the null hypothesis using the \varcode{quantile()} function and
-  coefficient computed from the original data.
+  compare it with the correlation coefficient computed from the
  original data.
 \end{enumerate}
 \end{exercise}
--- a/bootstrap/lecture/bootstrapsem.py
+++ b/bootstrap/lecture/bootstrapsem.py
@ -24,7 +24,7 @@ for i in range(nresamples) :
    musrs.append(np.mean(rng.randn(nsamples)))
 hmusrs, _ = np.histogram(musrs, bins, density=True)
-fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
+fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.05*figure_height))
 fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=1.5))
 ax.set_xlabel('Mean')
 ax.set_xlim(-0.4, 0.4)
--- a/bootstrap/lecture/permuteaverage.py
+++ b/bootstrap/lecture/permuteaverage.py
@ -0,0 +1,103 @@
 import numpy as np
 import scipy.stats as st
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.ticker as ticker
 from plotstyle import *
 rng = np.random.RandomState(637281)
 # generate data that differ in their mein by d:
 n = 200
 d = 0.7
 x = rng.randn(n) + d;
 y = rng.randn(n);
 #x = rng.exponential(1.0, n);
 #y = rng.exponential(1.0, n) + d;
 # histogram of data:
 db = 0.5
 bins = np.arange(-2.5, 2.6, db)
 hx, _ = np.histogram(x, bins)
 hy, _ = np.histogram(y, bins)
 # Diference of means, pooled standard deviation and Cohen's d:
 ad = np.mean(x)-np.mean(y)
 s = np.sqrt(((len(x)-1)*np.var(x)+(len(y)-1)*np.var(y))/(len(x)+len(y)-2))
 cd = ad/s
 # permutation:
 nperm = 1000
 ads = []
 xy = np.hstack((x, y))
 for i in range(nperm) :
    xyp = rng.permutation(xy)
    ads.append(np.mean(xyp[:len(x)])-np.mean(xyp[len(x):]))
 # histogram of shuffled data:
 hxp, _ = np.histogram(xyp[:len(x)], bins)
 hyp, _ = np.histogram(xyp[len(x):], bins)
 # pdf of the differences of means:
 h, b = np.histogram(ads, 20, density=True)
 # significance:
 dq = np.percentile(ads, 95.0)
 print('Measured difference of means = %.2f, difference at 95%% percentile of permutation = %.2f' % (ad, dq))
 da = 1.0-0.01*st.percentileofscore(ads, ad)
 print('Measured difference of means %.2f is at %.2f%% percentile of permutation' % (ad, 100.0*da))
 ap, at = st.ttest_ind(x, y)
 print('Measured difference of means %.2f is at %.2f%% percentile of test' % (ad, ap))
 fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
 gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
                       **adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
 ax = fig.add_subplot(gs[0,0])
 ax.bar(bins[:-1]-0.25*db, hy, 0.5*db, **fsA)
 ax.bar(bins[:-1]+0.25*db, hx, 0.5*db, **fsB)
 ax.annotate('', xy=(0.0, 45.0), xytext=(d, 45.0), arrowprops=dict(arrowstyle='<->'))
 ax.text(0.5*d, 50.0, 'd=%.1f' % d, ha='center')
 ax.set_xlim(-2.5, 2.5)
 ax.set_ylim(0.0, 50)
 ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
 ax.set_xlabel('Original x and y values')
 ax.set_ylabel('Counts')
 ax = fig.add_subplot(gs[0,1])
 ax.bar(bins[:-1]-0.25*db, hyp, 0.5*db, **fsA)
 ax.bar(bins[:-1]+0.25*db, hxp, 0.5*db, **fsB)
 ax.set_xlim(-2.5, 2.5)
 ax.set_ylim(0.0, 50)
 ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
 ax.set_xlabel('Shuffled x and y values')
 ax.set_ylabel('Counts')
 ax = fig.add_subplot(gs[1,:])
 ax.annotate('Measured\ndifference\nis significant!',
            xy=(ad, 1.2), xycoords='data',
            xytext=(ad-0.1, 2.2), textcoords='data', ha='right',
            arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
                connectionstyle="angle3,angleA=-20,angleB=100") )
 ax.annotate('95% percentile',
            xy=(0.19, 0.9), xycoords='data',
            xytext=(0.3, 5.0), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
                connectionstyle="angle3,angleA=40,angleB=80") )
 ax.annotate('Distribution of\nnullhypothesis',
            xy=(-0.08, 3.0), xycoords='data',
            xytext=(-0.22, 4.5), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
                connectionstyle="angle3,angleA=60,angleB=150") )
 ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
 ax.bar(b[:-1][b[:-1]>=dq], h[b[:-1]>=dq], width=b[1]-b[0], **fsB)
 ax.plot( [ad, ad], [0, 1], **lsA)
 ax.set_xlim(-0.25, 0.85)
 ax.set_ylim(0.0, 5.0)
 ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.set_xlabel('Difference of means')
 ax.set_ylabel('PDF of H0')
 plt.savefig('permuteaverage.pdf')
--- a/bootstrap/lecture/permutecorrelation.py
+++ b/bootstrap/lecture/permutecorrelation.py
@ -1,9 +1,11 @@
 import numpy as np
 import scipy.stats as st
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.ticker as ticker
 from plotstyle import *
-rng = np.random.RandomState(637281)
+rng = np.random.RandomState(37281)
 # generate correlated data:
 n = 200
@ -19,32 +21,56 @@ rd = np.corrcoef(x, y)[0, 1]
 nperm = 1000
 rs = []
 for i in range(nperm) :
-    xr=rng.permutation(x)
+    xr = rng.permutation(x)
-    yr=rng.permutation(y)
+    yr = rng.permutation(y)
-    rs.append( np.corrcoef(xr, yr)[0, 1] )
+    rs.append(np.corrcoef(xr, yr)[0, 1])
 rr = np.corrcoef(xr, yr)[0, 1]
 # pdf of the correlation coefficients:
 h, b = np.histogram(rs, 20, density=True)
 # significance:
 rq = np.percentile(rs, 95.0)
-print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of bootstrap = %.2f' % (rd, rq))
+print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of permutation = %.2f' % (rd, rq))
 ra = 1.0-0.01*st.percentileofscore(rs, rd)
-print('Measured correlation coefficient %.2f is at %.4f percentile of bootstrap' % (rd, ra))
+print('Measured correlation coefficient %.2f is at %.4f percentile of permutation' % (rd, ra))
 rp, ra = st.pearsonr(x, y)
 print('Measured correlation coefficient %.2f is at %.4f percentile of test' % (rp, ra))
-fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
+fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
-fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=0.5, top=1.0))
+gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
                       **adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
 ax = fig.add_subplot(gs[0,0])
 ax.text(0.0, 4.0, 'r=%.2f' % rd, ha='center')
 ax.plot(x, y, **psAm)
 ax.set_xlim(-4.0, 4.0)
 ax.set_ylim(-4.0, 4.0)
 ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.set_xlabel('x')
 ax.set_ylabel('y')
 ax = fig.add_subplot(gs[0,1])
 ax.text(0.0, 4.0, 'r=%.2f' % rr, ha='center')
 ax.plot(xr, yr, **psAm)
 ax.set_xlim(-4.0, 4.0)
 ax.set_ylim(-4.0, 4.0)
 ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.set_xlabel('Shuffled x')
 ax.set_ylabel('Shuffled y')
 ax = fig.add_subplot(gs[1,:])
 ax.annotate('Measured\ncorrelation\nis significant!',
            xy=(rd, 1.1), xycoords='data',
-            xytext=(rd, 2.2), textcoords='data', ha='left',
+            xytext=(rd-0.01, 3.0), textcoords='data', ha='left',
-            arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
+            arrowprops=dict(arrowstyle="->", relpos=(0.3,0.0),
                connectionstyle="angle3,angleA=10,angleB=80") )
 ax.annotate('95% percentile',
            xy=(0.14, 0.9), xycoords='data',
-            xytext=(0.18, 4.0), textcoords='data', ha='left',
+            xytext=(0.16, 6.2), textcoords='data', ha='left',
            arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
                connectionstyle="angle3,angleA=30,angleB=80") )
 ax.annotate('Distribution of\nuncorrelated\nsamples',
@ -56,7 +82,9 @@ ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
 ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], **fsB)
 ax.plot( [rd, rd], [0, 1], **lsA)
 ax.set_xlim(-0.25, 0.35)
 ax.set_ylim(0.0, 6.0)
 ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
 ax.set_xlabel('Correlation coefficient')
-ax.set_ylabel('Prob. density of H0')
+ax.set_ylabel('PDF of H0')
 plt.savefig('permutecorrelation.pdf')
--- a/chapter.mk
+++ b/chapter.mk
@ -42,6 +42,9 @@ $(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex $(wildcard $(B
 watchchapter :
 	while true; do ! make -q chapter && make chapter; sleep 0.5; done
 watchplots :
 	while true; do ! make -q plots && make plots; sleep 0.5; done
 cleantex:
 	rm -f *~ 
 	rm -f $(BASENAME).aux $(BASENAME).log *.idx
--- a/plotstyle.py
+++ b/plotstyle.py
@ -33,13 +33,14 @@ colors['white'] = '#FFFFFF'
 # general settings for plot styles:
 lwthick = 3.0
 lwthin = 1.8
 edgewidth = 0.0 if xkcd_style else 1.0
 mainline = {'linestyle': '-', 'linewidth': lwthick}
 minorline = {'linestyle': '-', 'linewidth': lwthin}
-largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
+largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
-smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
+smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
 largelinepoints = {'linestyle': '-', 'linewidth': lwthick, 'marker': 'o', 'markersize': 10, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
 smalllinepoints = {'linestyle': '-', 'linewidth': 1.4, 'marker': 'o', 'markersize': 7, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
-filllw = 1.0
+filllw = edgewidth
 fillec = colors['white']
 fillalpha = 0.4
 filledge = {'linewidth': filllw, 'joinstyle': 'round'}