[bootstrap] expanding exercise by difference of mean permutation test
This commit is contained in:
parent
68784b0e23
commit
c9dd1ffbe6
@ -10,7 +10,7 @@ function [bootsem, mu] = bootstrapmean(x, resample)
|
||||
for i = 1:resample
|
||||
% resample:
|
||||
xr = x(randi(nsamples, nsamples, 1));
|
||||
% compute statistics on sample:
|
||||
% compute statistics of resampled sample:
|
||||
mu(i) = mean(xr);
|
||||
end
|
||||
bootsem = std(mu);
|
||||
|
@ -25,8 +25,8 @@ end
|
||||
|
||||
%% plot:
|
||||
hold on;
|
||||
bar(b, h, 'facecolor', [0.5 0.5 0.5]);
|
||||
bar(bb, hb, 'facecolor', 'b');
|
||||
bar(b, h, 'facecolor', [0.5 0.5 0.5]); % permuation test
|
||||
bar(bb, hb, 'facecolor', 'b'); % bootstrap
|
||||
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
||||
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||
xlim([-0.25 0.75])
|
||||
|
@ -1,4 +1,4 @@
|
||||
%% (a) generate correlated data
|
||||
%% (a) generate correlated data:
|
||||
n = 1000;
|
||||
a = 0.2;
|
||||
x = randn(n, 1);
|
||||
|
@ -15,7 +15,7 @@
|
||||
\else
|
||||
\newcommand{\stitle}{}
|
||||
\fi
|
||||
\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
|
||||
\header{{\bfseries\large Exercise 8\stitle}}{{\bfseries\large Resampling}}{{\bfseries\large December 14th, 2020}}
|
||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
||||
jan.benda@uni-tuebingen.de}
|
||||
\runningfooter{}{\thepage}{}
|
||||
@ -86,6 +86,9 @@ jan.benda@uni-tuebingen.de}
|
||||
|
||||
\begin{questions}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
|
||||
|
||||
\question \qt{Bootstrap the standard error of the mean}
|
||||
We want to compute the standard error of the mean of a data set by
|
||||
means of the bootstrap method and compare the result with the formula
|
||||
@ -149,10 +152,10 @@ normally distributed?
|
||||
|
||||
|
||||
\continue
|
||||
\question \qt{Permutation test} \label{permutationtest}
|
||||
\question \qt{Permutation test of correlations} \label{correlationtest}
|
||||
We want to compute the significance of a correlation by means of a permutation test.
|
||||
\begin{parts}
|
||||
\part \label{permutationtestdata} Generate 1000 correlated pairs
|
||||
\part \label{correlationtestdata} Generate 1000 correlated pairs
|
||||
$x$, $y$ of random numbers according to:
|
||||
\begin{verbatim}
|
||||
n = 1000
|
||||
@ -188,9 +191,13 @@ correlation coefficient of zero we can conclude that the correlation
|
||||
coefficient of the data indeed quantifies correlated data.
|
||||
|
||||
We take the same data set that we have generated in exercise
|
||||
\ref{permutationtest} (\ref{permutationtestdata}).
|
||||
\ref{correlationtest} (\ref{correlationtestdata}).
|
||||
\begin{parts}
|
||||
\part Bootstrap 1000 times the correlation coefficient from the data.
|
||||
\part Bootstrap 1000 times the correlation coefficient from the
|
||||
data, i.e. generate bootstrap data by randomly resampling the
|
||||
original data pairs with replacement. Use the \code{randi()}
|
||||
function for generating random indices that you can use to select a
|
||||
random sample from the original data.
|
||||
\part Compute and plot the probability density of these correlation
|
||||
coefficients.
|
||||
\part Is the correlation of the original data set significant?
|
||||
@ -200,6 +207,43 @@ We take the same data set that we have generated in exercise
|
||||
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
||||
\end{solution}
|
||||
|
||||
|
||||
\continuepage
|
||||
\question \qt{Permutation test of difference of means}
|
||||
We want to test whether two data sets come from distributions that
|
||||
differ in their mean by means of a permutation test.
|
||||
\begin{parts}
|
||||
\part Generate two normally distributed data sets $x$ and $y$
|
||||
containing each $n=200$ samples. Let's assume the $x$ samples are
|
||||
measurements of the membrane potential of a mammalian photoreceptor
|
||||
in darkness with a mean of $-40$\,mV and a standard deviation of
|
||||
1\,mV. The $y$ values are the membrane potentials measured under dim
|
||||
illumination and come from a distribution with the same standard
|
||||
deviation and a mean of $-40.5$\,mV. See section 5.2 ``Scaling and
|
||||
shifting random numbers'' in the script.
|
||||
\part Plot histograms of the $x$ and $y$ data in a single
|
||||
plot. Choose appropriate bins.
|
||||
\part Compute the means of $x$ and $y$ and their difference.
|
||||
\part The null hypothesis is that the $x$ and $y$ data come from the
|
||||
same distribution. How can you generate new samples $x_r$ and $y_r$
|
||||
from the original data that come from the same distribution?
|
||||
\part Do exactly this 1000 times and compute each time the
|
||||
difference of the means of the two resampled samples.
|
||||
\part Compute and plot the probability density of the resulting
|
||||
distribution of the null hypothesis.
|
||||
\part Is the difference of the means of the original data sets significant?
|
||||
\part Repeat this procedure for $y$ samples that are closer or
|
||||
further apart from the mean of the $x$ data set. For this put the
|
||||
computations of the permuation test in a function and all the plotting
|
||||
in another function.
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{meandiffpermutation.m}
|
||||
%\lstinputlisting{meandiffplots.m}
|
||||
\lstinputlisting{meandiffsignificance.m}
|
||||
\includegraphics[width=1\textwidth]{meandiffsignificance}
|
||||
\end{solution}
|
||||
|
||||
\end{questions}
|
||||
|
||||
\end{document}
|
29
bootstrap/exercises/meandiffpermutation.m
Normal file
29
bootstrap/exercises/meandiffpermutation.m
Normal file
@ -0,0 +1,29 @@
|
||||
function [md, ds, dq] = meandiffpermutation(x, y, nperm, alpha)
|
||||
% Permutation test for difference of means of two independent samples.
|
||||
%
|
||||
% [md, ds] = meandiffpermutation(x, y, nperm, alpha);
|
||||
%
|
||||
% Arguments:
|
||||
% x: vector with the samples of the x data set.
|
||||
% y: vector with the samples of the y data set.
|
||||
% nperm: number of permutations run.
|
||||
% alpha: significance level.
|
||||
%
|
||||
% Returns:
|
||||
% md: difference of the means
|
||||
% ds: vector containing the differences of the means of the resampled data sets
|
||||
% dq: difference of the means at a significance of alpha.
|
||||
|
||||
md = mean(x) - mean(y); % measured difference
|
||||
xy = [x; y]; % merge data sets
|
||||
% permutations:
|
||||
ds = zeros(nperm, 1);
|
||||
for i = 1:nperm
|
||||
xyr = xy(randperm(length(xy))); % shuffle xy
|
||||
xr = xyr(1:length(x)); % random x sample
|
||||
yr = xyr(length(x)+1:end); % random y sample
|
||||
ds(i) = mean(xr) - mean(yr);
|
||||
end
|
||||
% significance:
|
||||
dq = quantile(ds, 1.0 - alpha);
|
||||
end
|
48
bootstrap/exercises/meandiffsignificance.m
Normal file
48
bootstrap/exercises/meandiffsignificance.m
Normal file
@ -0,0 +1,48 @@
|
||||
%% (a) generate data:
|
||||
n = 200;
|
||||
mx = -40.0;
|
||||
my = -40.5;
|
||||
x = randn(n, 1) + mx;
|
||||
y = randn(n, 1) + my;
|
||||
|
||||
%% (b) plot histograms:
|
||||
subplot(1, 2, 1);
|
||||
bmin = min([x; y]);
|
||||
bmax = max([x; y]);
|
||||
bins = bmin:(bmax-bmin)/20.0:bmax;
|
||||
hist(x, bins, 'facecolor', 'b');
|
||||
hold on
|
||||
hist(y, bins, 'facecolor', 'r');
|
||||
xlabel('x and y')
|
||||
ylabel('counts')
|
||||
hold off
|
||||
|
||||
% permutation test:
|
||||
[md, ds, dq] = meandiffpermutation(x, y, nperm, alpha);
|
||||
|
||||
%% (c) difference of means:
|
||||
fprintf('difference of means = %.2fmV\n', md);
|
||||
|
||||
%% (f) pdf of the differences:
|
||||
[h, b] = hist(ds, 20);
|
||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||
|
||||
%% (g) significance:
|
||||
fprintf('difference of means at 5%% significance = %.2fmV\n', dq);
|
||||
if md >= dq
|
||||
fprintf('--> difference of means %.2fmV is significant\n', md);
|
||||
else
|
||||
fprintf('--> %.2fmV is not a significant difference of means\n', md);
|
||||
end
|
||||
|
||||
%% plot:
|
||||
subplot(1, 2, 2)
|
||||
bar(b, h, 'facecolor', 'b');
|
||||
hold on;
|
||||
bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
|
||||
plot([md md], [0 4], 'r', 'linewidth', 2);
|
||||
xlabel('Difference of means');
|
||||
ylabel('Probability density of H0');
|
||||
hold off;
|
||||
|
||||
savefigpdf(gcf, 'meandiffsignificance.pdf', 12, 6);
|
@ -4,7 +4,7 @@ x=randn(n, 1);
|
||||
|
||||
for nsamples = [3 5 10 50]
|
||||
nsamples
|
||||
%% compute mean, standard deviation and t:
|
||||
% compute mean, standard deviation and t:
|
||||
nmeans = 10000;
|
||||
means = zeros(nmeans, 1);
|
||||
sdevs = zeros(nmeans, 1);
|
||||
@ -26,7 +26,7 @@ for nsamples=[3 5 10 50]
|
||||
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
||||
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
||||
|
||||
%% plots
|
||||
% plots:
|
||||
subplot(1, 2, 1)
|
||||
bins = xmin:0.2:xmax;
|
||||
[h,b] = hist(means, bins);
|
||||
|
Reference in New Issue
Block a user