[bootstrap] added difference of mean
This commit is contained in:
parent
0e30a01a45
commit
3dd0660b21
@ -1,6 +1,6 @@
|
||||
% generate correlated data:
|
||||
n=200;
|
||||
a=0.2;
|
||||
n = 200;
|
||||
a = 0.2;
|
||||
x = randn(n, 1);
|
||||
y = randn(n, 1) + a*x;
|
||||
|
||||
@ -11,13 +11,13 @@ fprintf('correlation coefficient of data r = %.2f\n', rd);
|
||||
% distribution of null hypothesis by permutation:
|
||||
nperm = 1000;
|
||||
rs = zeros(nperm,1);
|
||||
for i=1:nperm
|
||||
xr=x(randperm(length(x))); % shuffle x
|
||||
yr=y(randperm(length(y))); % shuffle y
|
||||
for i = 1:nperm
|
||||
xr = x(randperm(length(x))); % shuffle x
|
||||
yr = y(randperm(length(y))); % shuffle y
|
||||
rs(i) = corr(xr, yr);
|
||||
end
|
||||
[h,b] = hist(rs, 20);
|
||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||
[h, b] = hist(rs, 20);
|
||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||
|
||||
% significance:
|
||||
rq = quantile(rs, 0.95);
|
||||
|
40
bootstrap/code/meandiffsignificance.m
Normal file
40
bootstrap/code/meandiffsignificance.m
Normal file
@ -0,0 +1,40 @@
|
||||
% generate two data sets:
|
||||
n = 200;
|
||||
d = 0.2;
|
||||
x = randn(n, 1);
|
||||
y = randn(n, 1) + d;
|
||||
|
||||
% difference of sample means:
|
||||
md = mean(y) - mean(x);
|
||||
fprintf('difference of means of data d = %.2f\n', md);
|
||||
|
||||
% distribution of null hypothesis by permutation:
|
||||
nperm = 1000;
|
||||
xy = [x; y]; % x and y data in one column vector
|
||||
ds = zeros(nperm,1);
|
||||
for i = 1:nperm
|
||||
xyr = xy(randperm(length(xy))); % shuffle data
|
||||
xr = xyr(1:length(x)); % random x-data
|
||||
yr = xyr(length(x)+1:end); % random y-data
|
||||
ds(i) = mean(yr) - mean(xr); % difference of means
|
||||
end
|
||||
[h, b] = hist(ds, 20);
|
||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||
|
||||
% significance:
|
||||
dq = quantile(ds, 0.95);
|
||||
fprintf('difference of means of null hypothesis at 5%% significance = %.2f\n', dq);
|
||||
if md >= dq
|
||||
fprintf('--> difference of means d=%.2f is significant\n', md);
|
||||
else
|
||||
fprintf('--> d=%.2f is not a significant difference of means\n', md);
|
||||
end
|
||||
|
||||
% plot:
|
||||
bar(b, h, 'facecolor', 'b');
|
||||
hold on;
|
||||
bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
|
||||
plot([md md], [0 4], 'r', 'linewidth', 2);
|
||||
xlabel('Difference of means');
|
||||
ylabel('Probability density of H0');
|
||||
hold off;
|
4
bootstrap/code/meandiffsignificance.out
Normal file
4
bootstrap/code/meandiffsignificance.out
Normal file
@ -0,0 +1,4 @@
|
||||
>> meandiffsignificance
|
||||
difference of means of data d = 0.18
|
||||
difference of means of null hypothesis at 5% significance = 0.17
|
||||
--> difference of means d=0.18 is significant
|
@ -26,6 +26,4 @@ This chapter easily covers two lectures:
|
||||
|
||||
Add jacknife methods to bootstrapping
|
||||
|
||||
Add permutation test for significant different means of two distributions.
|
||||
|
||||
\end{document}
|
||||
|
@ -125,7 +125,6 @@ Via bootstrapping we create a distribution of the mean values
|
||||
(\figref{bootstrapsemfig}) and the standard deviation of this
|
||||
distribution is the standard error of the mean.
|
||||
|
||||
\pagebreak[4]
|
||||
\begin{exercise}{bootstrapsem.m}{bootstrapsem.out}
|
||||
Create the distribution of mean values from bootstrapped samples
|
||||
resampled from a single SRS. Use this distribution to estimate the
|
||||
@ -152,67 +151,156 @@ desired \entermde{Signifikanz}{significance level}, the
|
||||
\entermde{Nullhypothese}{null hypothesis} may be rejected.
|
||||
|
||||
Traditionally, such probabilities are taken from theoretical
|
||||
distributions which are based on assumptions about the data. Thus the
|
||||
applied statistical test has to be appropriate for the type of
|
||||
distributions which are based on some assumptions about the data. For
|
||||
example, the data should be normally distributed. Given some data one
|
||||
has to find an appropriate test that matches the properties of the
|
||||
data. An alternative approach is to calculate the probability density
|
||||
of the null hypothesis directly from the data itself. To do this, we
|
||||
of the null hypothesis directly from the data themselves. To do so, we
|
||||
need to resample the data according to the null hypothesis from the
|
||||
SRS. By such permutation operations we destroy the feature of interest
|
||||
while we conserve all other statistical properties of the data.
|
||||
while conserving all other statistical properties of the data.
|
||||
|
||||
|
||||
\subsection{Significance of a difference in the mean}
|
||||
|
||||
Often we would like to know whether two data sets differ in their
|
||||
mean. Whether the ears of foxes are larger in southern Europe compared
|
||||
to the ones from Scandinavia, whether a drug decreases blood pressure
|
||||
in humans, whether a sensory stimulus increases the firing rate of a
|
||||
neuron, etc. The \entermde{Nullhypothese}{null hypothesis} is that
|
||||
they do not differ in their means, i.e. that both data sets come from
|
||||
the same distribution. But even if the two data sets come from the
|
||||
same distribution, their sample means may nevertheless differ by
|
||||
chance. We need to figure out how these differences of the means are
|
||||
distributed. Only if the measured difference between the means is
|
||||
significantly larger than the ones obtained by chance we can reject
|
||||
the null hypothesis and consider the two data sets to differ
|
||||
significantly in their means.
|
||||
|
||||
We can easily estimate the distribution of the null hypothesis by
|
||||
putting the data of both data sets in one big bag. By merging the two
|
||||
data sets we assume that all the data values come from the same
|
||||
distribution. We then randomly separate the data values into two new
|
||||
data sets. These random data sets contain data from both original data
|
||||
sets and thus come from the same distribution. From these random data
|
||||
sets we compute the difference of their sample means. This procedure
|
||||
is repeated many, say one thousand, times and each time we get a value
|
||||
for a difference of means. The distribution of these values is the
|
||||
distribution of the null hypothesis. It is the distribution of
|
||||
differences of mean values that we get by chance although the two data
|
||||
sets come from the same distribution. For a one-sided test that checks
|
||||
whether the measured difference of means is significantly larger than
|
||||
zero at a significance level of 5\,\% we compute the value of the
|
||||
95\,\% percentile of the null distribution. If the measured value is
|
||||
larger, we can reject the null hypothesis and consider the two data
|
||||
sets to differ significantly in their means.
|
||||
|
||||
By using the original data to estimate the null hypothesis, we make no
|
||||
assumption about the properties of the data. We do not need to worry
|
||||
about the data being normally distributed. We do not need to memorize
|
||||
which test to use in which situation. And we better understand what we
|
||||
are testing, because we design the test ourselves. Nowadays, computer
|
||||
are powerful enough to iterate even ten thousand times over the data
|
||||
to compute the distribution of the null hypothesis --- with only a few
|
||||
lines of code. This is why \entermde{Permutationstest}{permutaion
|
||||
test} are getting quite popular.
|
||||
|
||||
\begin{figure}[tp]
|
||||
\includegraphics[width=1\textwidth]{permutecorrelation}
|
||||
\titlecaption{\label{permutecorrelationfig}Permutation test for
|
||||
correlations.}{Let the correlation coefficient of a dataset with
|
||||
200 samples be $\rho=0.21$. The distribution of the null
|
||||
hypothesis (yellow), optained from the correlation coefficients of
|
||||
permuted and therefore uncorrelated datasets is centered around
|
||||
zero. The measured correlation coefficient is larger than the
|
||||
95\,\% percentile of the null hypothesis. The null hypothesis may
|
||||
thus be rejected and the measured correlation is considered
|
||||
statistically significant.}
|
||||
\includegraphics[width=1\textwidth]{permuteaverage}
|
||||
\titlecaption{\label{permuteaverage}Permutation test for differences
|
||||
in means.}{We want to test whether two datasets
|
||||
$\left\{x_i\right\}$ (red) and $\left\{y_i\right\}$ (blue) come
|
||||
from different distributions by assessing the significance of the
|
||||
difference in their sample means. The data sets were generated
|
||||
with a difference in their population means of $d=0.7$. For
|
||||
generating the distribution of the null hypothesis, i.e. the
|
||||
distribution of differences in the means if the two data sets come
|
||||
from the same distribution, we randomly select the same number of
|
||||
samples from both data sets (top right). This is repeated many
|
||||
times and results in the desired distribution of differences of
|
||||
means (bottom). The measured difference is clearly beyond the
|
||||
95\,\% percentile of this distribution and thus indicates a
|
||||
significant difference between the distributions of the two
|
||||
original data sets.}
|
||||
\end{figure}
|
||||
|
||||
%\subsection{Significance of a difference in the mean}
|
||||
\begin{exercise}{meandiffsignificance.m}{meandiffsignificance.out}
|
||||
Estimate the statistical significance of a difference in the mean of two data sets.
|
||||
\vspace{-1ex}
|
||||
\begin{enumerate}
|
||||
\item Generate two independent data sets, $\left\{x_i\right\}$ and
|
||||
$\left\{y_i\right\}$, of $n=200$ samples each, by drawing random
|
||||
numbers from a normal distribution. Add 0.2 to all the $y_i$ samples
|
||||
to ensure the population means to differ by 0.2.
|
||||
\item Calculate the difference between the sample means of the two data sets.
|
||||
\item Estimate the distribution of the null hypothesis of no
|
||||
difference of the means by generating new data sets with the same
|
||||
number of samples randomly selected from both data sets. For this
|
||||
lump the two data sets together into a single vector. Then permute
|
||||
the order of the elements in this vector using the function
|
||||
\varcode{randperm()}, split it into two data sets and calculate
|
||||
the difference of their means. Repeat this 1000 times.
|
||||
\item Read out the 95\,\% percentile from the resulting distribution
|
||||
of the differences in the mean, the null hypothesis using the
|
||||
\varcode{quantile()} function, and compare it with the difference of
|
||||
means measured from the original data sets.
|
||||
\end{enumerate}
|
||||
\end{exercise}
|
||||
|
||||
%See \url{https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests.}
|
||||
|
||||
\subsection{Significance of correlations}
|
||||
|
||||
A good example for the application of a
|
||||
\entermde{Permutationstest}{permutaion test} is the statistical
|
||||
assessment of \entermde[correlation]{Korrelation}{correlations}. Given
|
||||
are measured pairs of data points $(x_i, y_i)$. By calculating the
|
||||
Another nice example for the application of a
|
||||
\entermde{Permutationstest}{permutaion test} is testing for
|
||||
significant \entermde[correlation]{Korrelation}{correlations}
|
||||
(figure\,\ref{permutecorrelationfig}). Given are measured pairs of
|
||||
data points $(x_i, y_i)$. By calculating the
|
||||
\entermde[correlation!correlation
|
||||
coefficient]{Korrelationskoeffizient}{correlation
|
||||
coefficient} we can quantify how strongly $y$ depends on $x$. The
|
||||
correlation coefficient alone, however, does not tell whether the
|
||||
correlation is significantly different from a random correlation. The
|
||||
\entermde{Nullhypothese}{null hypothesis} for such a situation is that
|
||||
$y$ does not depend on $x$. In order to perform a permutation test, we
|
||||
need to destroy the correlation by permuting the $(x_i, y_i)$ pairs,
|
||||
i.e. we rearrange the $x_i$ and $y_i$ values in a random
|
||||
coefficient]{Korrelationskoeffizient}{correlation coefficient} we can
|
||||
quantify how strongly $y$ depends on $x$. The correlation coefficient
|
||||
alone, however, does not tell whether the correlation is significantly
|
||||
different from a non-zero correlation that we might get although there
|
||||
is no true correlation in the data. The \entermde{Nullhypothese}{null
|
||||
hypothesis} for such a situation is that $y$ does not depend on
|
||||
$x$. In order to perform a permutation test, we need to destroy the
|
||||
correlation between the data pairs by permuting the $(x_i, y_i)$
|
||||
pairs, i.e. we rearrange the $x_i$ and $y_i$ values in a random
|
||||
fashion. Generating many sets of random pairs and computing the
|
||||
resulting correlation coefficients yields a distribution of
|
||||
correlation coefficients that result randomly from uncorrelated
|
||||
corresponding correlation coefficients yields a distribution of
|
||||
correlation coefficients that result randomly from truly uncorrelated
|
||||
data. By comparing the actually measured correlation coefficient with
|
||||
this distribution we can directly assess the significance of the
|
||||
correlation (figure\,\ref{permutecorrelationfig}).
|
||||
correlation.
|
||||
|
||||
\begin{figure}[tp]
|
||||
\includegraphics[width=1\textwidth]{permutecorrelation}
|
||||
\titlecaption{\label{permutecorrelationfig}Permutation test for
|
||||
correlations.}{Let the correlation coefficient of a dataset with
|
||||
200 samples be $\rho=0.21$ (top left). By shuffling the data pairs
|
||||
we destroy any true correlation (top right). The resulting
|
||||
distribution of the null hypothesis (bottm, yellow), optained from
|
||||
the correlation coefficients of permuted and therefore
|
||||
uncorrelated datasets is centered around zero. The measured
|
||||
correlation coefficient is larger than the 95\,\% percentile of
|
||||
the null hypothesis. The null hypothesis may thus be rejected and
|
||||
the measured correlation is considered statistically significant.}
|
||||
\end{figure}
|
||||
|
||||
\begin{exercise}{correlationsignificance.m}{correlationsignificance.out}
|
||||
Estimate the statistical significance of a correlation coefficient.
|
||||
\begin{enumerate}
|
||||
\item Create pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
|
||||
\item Generate pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
|
||||
and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
|
||||
where $u_i$ is a random number drawn from a normal distribution.
|
||||
\item Calculate the correlation coefficient.
|
||||
\item Generate the distribution of the null hypothesis by generating
|
||||
\item Estimate the distribution of the null hypothesis by generating
|
||||
uncorrelated pairs. For this permute $x$- and $y$-values
|
||||
\matlabfun{randperm()} 1000 times and calculate for each permutation
|
||||
the correlation coefficient.
|
||||
\item Read out the 95\,\% percentile from the resulting distribution
|
||||
of the null hypothesis and compare it with the correlation
|
||||
coefficient computed from the original data.
|
||||
of the null hypothesis using the \varcode{quantile()} function and
|
||||
compare it with the correlation coefficient computed from the
|
||||
original data.
|
||||
\end{enumerate}
|
||||
\end{exercise}
|
||||
|
||||
|
@ -24,7 +24,7 @@ for i in range(nresamples) :
|
||||
musrs.append(np.mean(rng.randn(nsamples)))
|
||||
hmusrs, _ = np.histogram(musrs, bins, density=True)
|
||||
|
||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
|
||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.05*figure_height))
|
||||
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=1.5))
|
||||
ax.set_xlabel('Mean')
|
||||
ax.set_xlim(-0.4, 0.4)
|
||||
|
103
bootstrap/lecture/permuteaverage.py
Normal file
103
bootstrap/lecture/permuteaverage.py
Normal file
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import scipy.stats as st
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.gridspec as gridspec
|
||||
import matplotlib.ticker as ticker
|
||||
from plotstyle import *
|
||||
|
||||
rng = np.random.RandomState(637281)
|
||||
|
||||
# generate data that differ in their mein by d:
|
||||
n = 200
|
||||
d = 0.7
|
||||
x = rng.randn(n) + d;
|
||||
y = rng.randn(n);
|
||||
#x = rng.exponential(1.0, n);
|
||||
#y = rng.exponential(1.0, n) + d;
|
||||
|
||||
# histogram of data:
|
||||
db = 0.5
|
||||
bins = np.arange(-2.5, 2.6, db)
|
||||
hx, _ = np.histogram(x, bins)
|
||||
hy, _ = np.histogram(y, bins)
|
||||
|
||||
# Diference of means, pooled standard deviation and Cohen's d:
|
||||
ad = np.mean(x)-np.mean(y)
|
||||
s = np.sqrt(((len(x)-1)*np.var(x)+(len(y)-1)*np.var(y))/(len(x)+len(y)-2))
|
||||
cd = ad/s
|
||||
|
||||
# permutation:
|
||||
nperm = 1000
|
||||
ads = []
|
||||
xy = np.hstack((x, y))
|
||||
for i in range(nperm) :
|
||||
xyp = rng.permutation(xy)
|
||||
ads.append(np.mean(xyp[:len(x)])-np.mean(xyp[len(x):]))
|
||||
|
||||
# histogram of shuffled data:
|
||||
hxp, _ = np.histogram(xyp[:len(x)], bins)
|
||||
hyp, _ = np.histogram(xyp[len(x):], bins)
|
||||
|
||||
# pdf of the differences of means:
|
||||
h, b = np.histogram(ads, 20, density=True)
|
||||
|
||||
# significance:
|
||||
dq = np.percentile(ads, 95.0)
|
||||
print('Measured difference of means = %.2f, difference at 95%% percentile of permutation = %.2f' % (ad, dq))
|
||||
da = 1.0-0.01*st.percentileofscore(ads, ad)
|
||||
print('Measured difference of means %.2f is at %.2f%% percentile of permutation' % (ad, 100.0*da))
|
||||
|
||||
ap, at = st.ttest_ind(x, y)
|
||||
print('Measured difference of means %.2f is at %.2f%% percentile of test' % (ad, ap))
|
||||
|
||||
|
||||
fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
|
||||
gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
|
||||
**adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
|
||||
|
||||
ax = fig.add_subplot(gs[0,0])
|
||||
ax.bar(bins[:-1]-0.25*db, hy, 0.5*db, **fsA)
|
||||
ax.bar(bins[:-1]+0.25*db, hx, 0.5*db, **fsB)
|
||||
ax.annotate('', xy=(0.0, 45.0), xytext=(d, 45.0), arrowprops=dict(arrowstyle='<->'))
|
||||
ax.text(0.5*d, 50.0, 'd=%.1f' % d, ha='center')
|
||||
ax.set_xlim(-2.5, 2.5)
|
||||
ax.set_ylim(0.0, 50)
|
||||
ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
|
||||
ax.set_xlabel('Original x and y values')
|
||||
ax.set_ylabel('Counts')
|
||||
|
||||
ax = fig.add_subplot(gs[0,1])
|
||||
ax.bar(bins[:-1]-0.25*db, hyp, 0.5*db, **fsA)
|
||||
ax.bar(bins[:-1]+0.25*db, hxp, 0.5*db, **fsB)
|
||||
ax.set_xlim(-2.5, 2.5)
|
||||
ax.set_ylim(0.0, 50)
|
||||
ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
|
||||
ax.set_xlabel('Shuffled x and y values')
|
||||
ax.set_ylabel('Counts')
|
||||
|
||||
ax = fig.add_subplot(gs[1,:])
|
||||
ax.annotate('Measured\ndifference\nis significant!',
|
||||
xy=(ad, 1.2), xycoords='data',
|
||||
xytext=(ad-0.1, 2.2), textcoords='data', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
|
||||
connectionstyle="angle3,angleA=-20,angleB=100") )
|
||||
ax.annotate('95% percentile',
|
||||
xy=(0.19, 0.9), xycoords='data',
|
||||
xytext=(0.3, 5.0), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
|
||||
connectionstyle="angle3,angleA=40,angleB=80") )
|
||||
ax.annotate('Distribution of\nnullhypothesis',
|
||||
xy=(-0.08, 3.0), xycoords='data',
|
||||
xytext=(-0.22, 4.5), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
||||
connectionstyle="angle3,angleA=60,angleB=150") )
|
||||
ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
|
||||
ax.bar(b[:-1][b[:-1]>=dq], h[b[:-1]>=dq], width=b[1]-b[0], **fsB)
|
||||
ax.plot( [ad, ad], [0, 1], **lsA)
|
||||
ax.set_xlim(-0.25, 0.85)
|
||||
ax.set_ylim(0.0, 5.0)
|
||||
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||
ax.set_xlabel('Difference of means')
|
||||
ax.set_ylabel('PDF of H0')
|
||||
|
||||
plt.savefig('permuteaverage.pdf')
|
@ -1,9 +1,11 @@
|
||||
import numpy as np
|
||||
import scipy.stats as st
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.gridspec as gridspec
|
||||
import matplotlib.ticker as ticker
|
||||
from plotstyle import *
|
||||
|
||||
rng = np.random.RandomState(637281)
|
||||
rng = np.random.RandomState(37281)
|
||||
|
||||
# generate correlated data:
|
||||
n = 200
|
||||
@ -19,32 +21,56 @@ rd = np.corrcoef(x, y)[0, 1]
|
||||
nperm = 1000
|
||||
rs = []
|
||||
for i in range(nperm) :
|
||||
xr=rng.permutation(x)
|
||||
yr=rng.permutation(y)
|
||||
rs.append( np.corrcoef(xr, yr)[0, 1] )
|
||||
xr = rng.permutation(x)
|
||||
yr = rng.permutation(y)
|
||||
rs.append(np.corrcoef(xr, yr)[0, 1])
|
||||
rr = np.corrcoef(xr, yr)[0, 1]
|
||||
|
||||
# pdf of the correlation coefficients:
|
||||
h, b = np.histogram(rs, 20, density=True)
|
||||
|
||||
# significance:
|
||||
rq = np.percentile(rs, 95.0)
|
||||
print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of bootstrap = %.2f' % (rd, rq))
|
||||
print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of permutation = %.2f' % (rd, rq))
|
||||
ra = 1.0-0.01*st.percentileofscore(rs, rd)
|
||||
print('Measured correlation coefficient %.2f is at %.4f percentile of bootstrap' % (rd, ra))
|
||||
print('Measured correlation coefficient %.2f is at %.4f percentile of permutation' % (rd, ra))
|
||||
|
||||
rp, ra = st.pearsonr(x, y)
|
||||
print('Measured correlation coefficient %.2f is at %.4f percentile of test' % (rp, ra))
|
||||
|
||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
|
||||
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=0.5, top=1.0))
|
||||
fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
|
||||
gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
|
||||
**adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
|
||||
|
||||
ax = fig.add_subplot(gs[0,0])
|
||||
ax.text(0.0, 4.0, 'r=%.2f' % rd, ha='center')
|
||||
ax.plot(x, y, **psAm)
|
||||
ax.set_xlim(-4.0, 4.0)
|
||||
ax.set_ylim(-4.0, 4.0)
|
||||
ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||
ax.set_xlabel('x')
|
||||
ax.set_ylabel('y')
|
||||
|
||||
ax = fig.add_subplot(gs[0,1])
|
||||
ax.text(0.0, 4.0, 'r=%.2f' % rr, ha='center')
|
||||
ax.plot(xr, yr, **psAm)
|
||||
ax.set_xlim(-4.0, 4.0)
|
||||
ax.set_ylim(-4.0, 4.0)
|
||||
ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||
ax.set_xlabel('Shuffled x')
|
||||
ax.set_ylabel('Shuffled y')
|
||||
|
||||
ax = fig.add_subplot(gs[1,:])
|
||||
ax.annotate('Measured\ncorrelation\nis significant!',
|
||||
xy=(rd, 1.1), xycoords='data',
|
||||
xytext=(rd, 2.2), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
||||
xytext=(rd-0.01, 3.0), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.3,0.0),
|
||||
connectionstyle="angle3,angleA=10,angleB=80") )
|
||||
ax.annotate('95% percentile',
|
||||
xy=(0.14, 0.9), xycoords='data',
|
||||
xytext=(0.18, 4.0), textcoords='data', ha='left',
|
||||
xytext=(0.16, 6.2), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
|
||||
connectionstyle="angle3,angleA=30,angleB=80") )
|
||||
ax.annotate('Distribution of\nuncorrelated\nsamples',
|
||||
@ -56,7 +82,9 @@ ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
|
||||
ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], **fsB)
|
||||
ax.plot( [rd, rd], [0, 1], **lsA)
|
||||
ax.set_xlim(-0.25, 0.35)
|
||||
ax.set_ylim(0.0, 6.0)
|
||||
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||
ax.set_xlabel('Correlation coefficient')
|
||||
ax.set_ylabel('Prob. density of H0')
|
||||
ax.set_ylabel('PDF of H0')
|
||||
|
||||
plt.savefig('permutecorrelation.pdf')
|
||||
|
@ -42,6 +42,9 @@ $(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex $(wildcard $(B
|
||||
watchchapter :
|
||||
while true; do ! make -q chapter && make chapter; sleep 0.5; done
|
||||
|
||||
watchplots :
|
||||
while true; do ! make -q plots && make plots; sleep 0.5; done
|
||||
|
||||
cleantex:
|
||||
rm -f *~
|
||||
rm -f $(BASENAME).aux $(BASENAME).log *.idx
|
||||
|
@ -33,13 +33,14 @@ colors['white'] = '#FFFFFF'
|
||||
# general settings for plot styles:
|
||||
lwthick = 3.0
|
||||
lwthin = 1.8
|
||||
edgewidth = 0.0 if xkcd_style else 1.0
|
||||
mainline = {'linestyle': '-', 'linewidth': lwthick}
|
||||
minorline = {'linestyle': '-', 'linewidth': lwthin}
|
||||
largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
||||
smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
||||
largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
|
||||
smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
|
||||
largelinepoints = {'linestyle': '-', 'linewidth': lwthick, 'marker': 'o', 'markersize': 10, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
||||
smalllinepoints = {'linestyle': '-', 'linewidth': 1.4, 'marker': 'o', 'markersize': 7, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
||||
filllw = 1.0
|
||||
filllw = edgewidth
|
||||
fillec = colors['white']
|
||||
fillalpha = 0.4
|
||||
filledge = {'linewidth': filllw, 'joinstyle': 'round'}
|
||||
|
Reference in New Issue
Block a user