[bootstrap] expanding exercise by difference of mean permutation test
This commit is contained in:
parent
68784b0e23
commit
c9dd1ffbe6
@ -10,7 +10,7 @@ function [bootsem, mu] = bootstrapmean(x, resample)
|
|||||||
for i = 1:resample
|
for i = 1:resample
|
||||||
% resample:
|
% resample:
|
||||||
xr = x(randi(nsamples, nsamples, 1));
|
xr = x(randi(nsamples, nsamples, 1));
|
||||||
% compute statistics on sample:
|
% compute statistics of resampled sample:
|
||||||
mu(i) = mean(xr);
|
mu(i) = mean(xr);
|
||||||
end
|
end
|
||||||
bootsem = std(mu);
|
bootsem = std(mu);
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
%% (a) bootstrap:
|
%% (a) bootstrap:
|
||||||
nperm = 1000;
|
nperm = 1000;
|
||||||
rb = zeros(nperm,1);
|
rb = zeros(nperm, 1);
|
||||||
for i=1:nperm
|
for i=1:nperm
|
||||||
% indices for resampling the data:
|
% indices for resampling the data:
|
||||||
inx = randi(length(x), length(x), 1);
|
inx = randi(length(x), length(x), 1);
|
||||||
% resampled data pairs:
|
% resampled data pairs:
|
||||||
xb=x(inx);
|
xb = x(inx);
|
||||||
yb=y(inx);
|
yb = y(inx);
|
||||||
rb(i) = corr(xb, yb);
|
rb(i) = corr(xb, yb);
|
||||||
end
|
end
|
||||||
|
|
||||||
%% (b) pdf of the correlation coefficients:
|
%% (b) pdf of the correlation coefficients:
|
||||||
[hb,bb] = hist(rb, 20);
|
[hb, bb] = hist(rb, 20);
|
||||||
hb = hb/sum(hb)/(bb(2)-bb(1)); % normalization
|
hb = hb/sum(hb)/(bb(2)-bb(1)); % normalization
|
||||||
|
|
||||||
%% (c) significance:
|
%% (c) significance:
|
||||||
rbq = quantile(rb, 0.05);
|
rbq = quantile(rb, 0.05);
|
||||||
@ -25,8 +25,8 @@ end
|
|||||||
|
|
||||||
%% plot:
|
%% plot:
|
||||||
hold on;
|
hold on;
|
||||||
bar(b, h, 'facecolor', [0.5 0.5 0.5]);
|
bar(b, h, 'facecolor', [0.5 0.5 0.5]); % permuation test
|
||||||
bar(bb, hb, 'facecolor', 'b');
|
bar(bb, hb, 'facecolor', 'b'); % bootstrap
|
||||||
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
||||||
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlim([-0.25 0.75])
|
xlim([-0.25 0.75])
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
%% (a) generate correlated data
|
%% (a) generate correlated data:
|
||||||
n = 1000;
|
n = 1000;
|
||||||
a = 0.2;
|
a = 0.2;
|
||||||
x = randn(n, 1);
|
x = randn(n, 1);
|
||||||
@ -8,7 +8,7 @@ y = randn(n, 1) + a*x;
|
|||||||
subplot(1, 2, 1);
|
subplot(1, 2, 1);
|
||||||
plot(x, a*x, 'r', 'linewidth', 3);
|
plot(x, a*x, 'r', 'linewidth', 3);
|
||||||
hold on
|
hold on
|
||||||
%scatter(x, y ); % either scatter ...
|
%scatter(x, y ); % either scatter ...
|
||||||
plot(x, y, 'o', 'markersize', 2 ); % ... or plot - same plot.
|
plot(x, y, 'o', 'markersize', 2 ); % ... or plot - same plot.
|
||||||
xlim([-4 4])
|
xlim([-4 4])
|
||||||
ylim([-4 4])
|
ylim([-4 4])
|
||||||
@ -20,20 +20,20 @@ hold off
|
|||||||
%c = corrcoef(x, y); % returns correlation matrix
|
%c = corrcoef(x, y); % returns correlation matrix
|
||||||
%rd = c(1, 2);
|
%rd = c(1, 2);
|
||||||
rd = corr(x, y);
|
rd = corr(x, y);
|
||||||
fprintf('correlation coefficient = %.2f\n', rd );
|
fprintf('correlation coefficient = %.2f\n', rd);
|
||||||
|
|
||||||
%% (e) permutation:
|
%% (e) permutation:
|
||||||
nperm = 1000;
|
nperm = 1000;
|
||||||
rs = zeros(nperm,1);
|
rs = zeros(nperm, 1);
|
||||||
for i=1:nperm
|
for i = 1:nperm
|
||||||
xr=x(randperm(length(x))); % shuffle x
|
xr = x(randperm(length(x))); % shuffle x
|
||||||
yr=y(randperm(length(y))); % shuffle y
|
yr = y(randperm(length(y))); % shuffle y
|
||||||
rs(i) = corr(xr, yr);
|
rs(i) = corr(xr, yr);
|
||||||
end
|
end
|
||||||
|
|
||||||
%% (g) pdf of the correlation coefficients:
|
%% (g) pdf of the correlation coefficients:
|
||||||
[h,b] = hist(rs, 20);
|
[h, b] = hist(rs, 20);
|
||||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
%% (h) significance:
|
%% (h) significance:
|
||||||
rq = quantile(rs, 0.95);
|
rq = quantile(rs, 0.95);
|
||||||
@ -49,7 +49,7 @@ subplot(1, 2, 2)
|
|||||||
hold on;
|
hold on;
|
||||||
bar(b, h, 'facecolor', 'b');
|
bar(b, h, 'facecolor', 'b');
|
||||||
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
||||||
plot( [rd rd], [0 4], 'r', 'linewidth', 2);
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlim([-0.25 0.25])
|
xlim([-0.25 0.25])
|
||||||
xlabel('Correlation coefficient');
|
xlabel('Correlation coefficient');
|
||||||
ylabel('Probability density of H0');
|
ylabel('Probability density of H0');
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
\else
|
\else
|
||||||
\newcommand{\stitle}{}
|
\newcommand{\stitle}{}
|
||||||
\fi
|
\fi
|
||||||
\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
|
\header{{\bfseries\large Exercise 8\stitle}}{{\bfseries\large Resampling}}{{\bfseries\large December 14th, 2020}}
|
||||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
||||||
jan.benda@uni-tuebingen.de}
|
jan.benda@uni-tuebingen.de}
|
||||||
\runningfooter{}{\thepage}{}
|
\runningfooter{}{\thepage}{}
|
||||||
@ -86,6 +86,9 @@ jan.benda@uni-tuebingen.de}
|
|||||||
|
|
||||||
\begin{questions}
|
\begin{questions}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
|
||||||
|
|
||||||
\question \qt{Bootstrap the standard error of the mean}
|
\question \qt{Bootstrap the standard error of the mean}
|
||||||
We want to compute the standard error of the mean of a data set by
|
We want to compute the standard error of the mean of a data set by
|
||||||
means of the bootstrap method and compare the result with the formula
|
means of the bootstrap method and compare the result with the formula
|
||||||
@ -149,10 +152,10 @@ normally distributed?
|
|||||||
|
|
||||||
|
|
||||||
\continue
|
\continue
|
||||||
\question \qt{Permutation test} \label{permutationtest}
|
\question \qt{Permutation test of correlations} \label{correlationtest}
|
||||||
We want to compute the significance of a correlation by means of a permutation test.
|
We want to compute the significance of a correlation by means of a permutation test.
|
||||||
\begin{parts}
|
\begin{parts}
|
||||||
\part \label{permutationtestdata} Generate 1000 correlated pairs
|
\part \label{correlationtestdata} Generate 1000 correlated pairs
|
||||||
$x$, $y$ of random numbers according to:
|
$x$, $y$ of random numbers according to:
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
n = 1000
|
n = 1000
|
||||||
@ -188,9 +191,13 @@ correlation coefficient of zero we can conclude that the correlation
|
|||||||
coefficient of the data indeed quantifies correlated data.
|
coefficient of the data indeed quantifies correlated data.
|
||||||
|
|
||||||
We take the same data set that we have generated in exercise
|
We take the same data set that we have generated in exercise
|
||||||
\ref{permutationtest} (\ref{permutationtestdata}).
|
\ref{correlationtest} (\ref{correlationtestdata}).
|
||||||
\begin{parts}
|
\begin{parts}
|
||||||
\part Bootstrap 1000 times the correlation coefficient from the data.
|
\part Bootstrap 1000 times the correlation coefficient from the
|
||||||
|
data, i.e. generate bootstrap data by randomly resampling the
|
||||||
|
original data pairs with replacement. Use the \code{randi()}
|
||||||
|
function for generating random indices that you can use to select a
|
||||||
|
random sample from the original data.
|
||||||
\part Compute and plot the probability density of these correlation
|
\part Compute and plot the probability density of these correlation
|
||||||
coefficients.
|
coefficients.
|
||||||
\part Is the correlation of the original data set significant?
|
\part Is the correlation of the original data set significant?
|
||||||
@ -200,6 +207,43 @@ We take the same data set that we have generated in exercise
|
|||||||
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
||||||
\end{solution}
|
\end{solution}
|
||||||
|
|
||||||
|
|
||||||
|
\continuepage
|
||||||
|
\question \qt{Permutation test of difference of means}
|
||||||
|
We want to test whether two data sets come from distributions that
|
||||||
|
differ in their mean by means of a permutation test.
|
||||||
|
\begin{parts}
|
||||||
|
\part Generate two normally distributed data sets $x$ and $y$
|
||||||
|
containing each $n=200$ samples. Let's assume the $x$ samples are
|
||||||
|
measurements of the membrane potential of a mammalian photoreceptor
|
||||||
|
in darkness with a mean of $-40$\,mV and a standard deviation of
|
||||||
|
1\,mV. The $y$ values are the membrane potentials measured under dim
|
||||||
|
illumination and come from a distribution with the same standard
|
||||||
|
deviation and a mean of $-40.5$\,mV. See section 5.2 ``Scaling and
|
||||||
|
shifting random numbers'' in the script.
|
||||||
|
\part Plot histograms of the $x$ and $y$ data in a single
|
||||||
|
plot. Choose appropriate bins.
|
||||||
|
\part Compute the means of $x$ and $y$ and their difference.
|
||||||
|
\part The null hypothesis is that the $x$ and $y$ data come from the
|
||||||
|
same distribution. How can you generate new samples $x_r$ and $y_r$
|
||||||
|
from the original data that come from the same distribution?
|
||||||
|
\part Do exactly this 1000 times and compute each time the
|
||||||
|
difference of the means of the two resampled samples.
|
||||||
|
\part Compute and plot the probability density of the resulting
|
||||||
|
distribution of the null hypothesis.
|
||||||
|
\part Is the difference of the means of the original data sets significant?
|
||||||
|
\part Repeat this procedure for $y$ samples that are closer or
|
||||||
|
further apart from the mean of the $x$ data set. For this put the
|
||||||
|
computations of the permuation test in a function and all the plotting
|
||||||
|
in another function.
|
||||||
|
\end{parts}
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{meandiffpermutation.m}
|
||||||
|
%\lstinputlisting{meandiffplots.m}
|
||||||
|
\lstinputlisting{meandiffsignificance.m}
|
||||||
|
\includegraphics[width=1\textwidth]{meandiffsignificance}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
\end{questions}
|
\end{questions}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
29
bootstrap/exercises/meandiffpermutation.m
Normal file
29
bootstrap/exercises/meandiffpermutation.m
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
function [md, ds, dq] = meandiffpermutation(x, y, nperm, alpha)
|
||||||
|
% Permutation test for difference of means of two independent samples.
|
||||||
|
%
|
||||||
|
% [md, ds] = meandiffpermutation(x, y, nperm, alpha);
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% x: vector with the samples of the x data set.
|
||||||
|
% y: vector with the samples of the y data set.
|
||||||
|
% nperm: number of permutations run.
|
||||||
|
% alpha: significance level.
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% md: difference of the means
|
||||||
|
% ds: vector containing the differences of the means of the resampled data sets
|
||||||
|
% dq: difference of the means at a significance of alpha.
|
||||||
|
|
||||||
|
md = mean(x) - mean(y); % measured difference
|
||||||
|
xy = [x; y]; % merge data sets
|
||||||
|
% permutations:
|
||||||
|
ds = zeros(nperm, 1);
|
||||||
|
for i = 1:nperm
|
||||||
|
xyr = xy(randperm(length(xy))); % shuffle xy
|
||||||
|
xr = xyr(1:length(x)); % random x sample
|
||||||
|
yr = xyr(length(x)+1:end); % random y sample
|
||||||
|
ds(i) = mean(xr) - mean(yr);
|
||||||
|
end
|
||||||
|
% significance:
|
||||||
|
dq = quantile(ds, 1.0 - alpha);
|
||||||
|
end
|
48
bootstrap/exercises/meandiffsignificance.m
Normal file
48
bootstrap/exercises/meandiffsignificance.m
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
%% (a) generate data:
|
||||||
|
n = 200;
|
||||||
|
mx = -40.0;
|
||||||
|
my = -40.5;
|
||||||
|
x = randn(n, 1) + mx;
|
||||||
|
y = randn(n, 1) + my;
|
||||||
|
|
||||||
|
%% (b) plot histograms:
|
||||||
|
subplot(1, 2, 1);
|
||||||
|
bmin = min([x; y]);
|
||||||
|
bmax = max([x; y]);
|
||||||
|
bins = bmin:(bmax-bmin)/20.0:bmax;
|
||||||
|
hist(x, bins, 'facecolor', 'b');
|
||||||
|
hold on
|
||||||
|
hist(y, bins, 'facecolor', 'r');
|
||||||
|
xlabel('x and y')
|
||||||
|
ylabel('counts')
|
||||||
|
hold off
|
||||||
|
|
||||||
|
% permutation test:
|
||||||
|
[md, ds, dq] = meandiffpermutation(x, y, nperm, alpha);
|
||||||
|
|
||||||
|
%% (c) difference of means:
|
||||||
|
fprintf('difference of means = %.2fmV\n', md);
|
||||||
|
|
||||||
|
%% (f) pdf of the differences:
|
||||||
|
[h, b] = hist(ds, 20);
|
||||||
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
|
%% (g) significance:
|
||||||
|
fprintf('difference of means at 5%% significance = %.2fmV\n', dq);
|
||||||
|
if md >= dq
|
||||||
|
fprintf('--> difference of means %.2fmV is significant\n', md);
|
||||||
|
else
|
||||||
|
fprintf('--> %.2fmV is not a significant difference of means\n', md);
|
||||||
|
end
|
||||||
|
|
||||||
|
%% plot:
|
||||||
|
subplot(1, 2, 2)
|
||||||
|
bar(b, h, 'facecolor', 'b');
|
||||||
|
hold on;
|
||||||
|
bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
|
||||||
|
plot([md md], [0 4], 'r', 'linewidth', 2);
|
||||||
|
xlabel('Difference of means');
|
||||||
|
ylabel('Probability density of H0');
|
||||||
|
hold off;
|
||||||
|
|
||||||
|
savefigpdf(gcf, 'meandiffsignificance.pdf', 12, 6);
|
@ -1,10 +1,10 @@
|
|||||||
%% (a) generate random numbers:
|
%% (a) generate random numbers:
|
||||||
n = 100000;
|
n = 100000;
|
||||||
x=randn(n, 1);
|
x = randn(n, 1);
|
||||||
|
|
||||||
for nsamples=[3 5 10 50]
|
for nsamples = [3 5 10 50]
|
||||||
nsamples
|
nsamples
|
||||||
%% compute mean, standard deviation and t:
|
% compute mean, standard deviation and t:
|
||||||
nmeans = 10000;
|
nmeans = 10000;
|
||||||
means = zeros(nmeans, 1);
|
means = zeros(nmeans, 1);
|
||||||
sdevs = zeros(nmeans, 1);
|
sdevs = zeros(nmeans, 1);
|
||||||
@ -19,14 +19,14 @@ for nsamples=[3 5 10 50]
|
|||||||
% Gaussian pdfs:
|
% Gaussian pdfs:
|
||||||
msdev = std(means);
|
msdev = std(means);
|
||||||
tsdev = 1.0;
|
tsdev = 1.0;
|
||||||
dxg=0.01;
|
dxg = 0.01;
|
||||||
xmax = 10.0;
|
xmax = 10.0;
|
||||||
xmin = -xmax;
|
xmin = -xmax;
|
||||||
xg = [xmin:dxg:xmax];
|
xg = [xmin:dxg:xmax];
|
||||||
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
||||||
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
||||||
|
|
||||||
%% plots
|
% plots:
|
||||||
subplot(1, 2, 1)
|
subplot(1, 2, 1)
|
||||||
bins = xmin:0.2:xmax;
|
bins = xmin:0.2:xmax;
|
||||||
[h,b] = hist(means, bins);
|
[h,b] = hist(means, bins);
|
||||||
@ -35,7 +35,7 @@ for nsamples=[3 5 10 50]
|
|||||||
hold on
|
hold on
|
||||||
plot(xg, pm, 'r', 'linewidth', 2)
|
plot(xg, pm, 'r', 'linewidth', 2)
|
||||||
title(sprintf('sample size = %d', nsamples));
|
title(sprintf('sample size = %d', nsamples));
|
||||||
xlim( [-3, 3] );
|
xlim([-3, 3]);
|
||||||
xlabel('Mean');
|
xlabel('Mean');
|
||||||
ylabel('pdf');
|
ylabel('pdf');
|
||||||
hold off;
|
hold off;
|
||||||
@ -48,11 +48,11 @@ for nsamples=[3 5 10 50]
|
|||||||
hold on
|
hold on
|
||||||
plot(xg, pt, 'r', 'linewidth', 2)
|
plot(xg, pt, 'r', 'linewidth', 2)
|
||||||
title(sprintf('sample size = %d', nsamples));
|
title(sprintf('sample size = %d', nsamples));
|
||||||
xlim( [-8, 8] );
|
xlim([-8, 8]);
|
||||||
xlabel('Student-t');
|
xlabel('Student-t');
|
||||||
ylabel('pdf');
|
ylabel('pdf');
|
||||||
hold off;
|
hold off;
|
||||||
|
|
||||||
savefigpdf(gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5);
|
savefigpdf(gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5);
|
||||||
pause( 3.0 )
|
pause(3.0)
|
||||||
end
|
end
|
||||||
|
Reference in New Issue
Block a user