[bootstrap] expanding exercise by difference of mean permutation test
This commit is contained in:
parent
68784b0e23
commit
c9dd1ffbe6
@ -10,7 +10,7 @@ function [bootsem, mu] = bootstrapmean(x, resample)
|
|||||||
for i = 1:resample
|
for i = 1:resample
|
||||||
% resample:
|
% resample:
|
||||||
xr = x(randi(nsamples, nsamples, 1));
|
xr = x(randi(nsamples, nsamples, 1));
|
||||||
% compute statistics on sample:
|
% compute statistics of resampled sample:
|
||||||
mu(i) = mean(xr);
|
mu(i) = mean(xr);
|
||||||
end
|
end
|
||||||
bootsem = std(mu);
|
bootsem = std(mu);
|
||||||
|
@ -25,8 +25,8 @@ end
|
|||||||
|
|
||||||
%% plot:
|
%% plot:
|
||||||
hold on;
|
hold on;
|
||||||
bar(b, h, 'facecolor', [0.5 0.5 0.5]);
|
bar(b, h, 'facecolor', [0.5 0.5 0.5]); % permuation test
|
||||||
bar(bb, hb, 'facecolor', 'b');
|
bar(bb, hb, 'facecolor', 'b'); % bootstrap
|
||||||
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
||||||
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlim([-0.25 0.75])
|
xlim([-0.25 0.75])
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
%% (a) generate correlated data
|
%% (a) generate correlated data:
|
||||||
n = 1000;
|
n = 1000;
|
||||||
a = 0.2;
|
a = 0.2;
|
||||||
x = randn(n, 1);
|
x = randn(n, 1);
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
\else
|
\else
|
||||||
\newcommand{\stitle}{}
|
\newcommand{\stitle}{}
|
||||||
\fi
|
\fi
|
||||||
\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
|
\header{{\bfseries\large Exercise 8\stitle}}{{\bfseries\large Resampling}}{{\bfseries\large December 14th, 2020}}
|
||||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
||||||
jan.benda@uni-tuebingen.de}
|
jan.benda@uni-tuebingen.de}
|
||||||
\runningfooter{}{\thepage}{}
|
\runningfooter{}{\thepage}{}
|
||||||
@ -86,6 +86,9 @@ jan.benda@uni-tuebingen.de}
|
|||||||
|
|
||||||
\begin{questions}
|
\begin{questions}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
|
||||||
|
|
||||||
\question \qt{Bootstrap the standard error of the mean}
|
\question \qt{Bootstrap the standard error of the mean}
|
||||||
We want to compute the standard error of the mean of a data set by
|
We want to compute the standard error of the mean of a data set by
|
||||||
means of the bootstrap method and compare the result with the formula
|
means of the bootstrap method and compare the result with the formula
|
||||||
@ -149,10 +152,10 @@ normally distributed?
|
|||||||
|
|
||||||
|
|
||||||
\continue
|
\continue
|
||||||
\question \qt{Permutation test} \label{permutationtest}
|
\question \qt{Permutation test of correlations} \label{correlationtest}
|
||||||
We want to compute the significance of a correlation by means of a permutation test.
|
We want to compute the significance of a correlation by means of a permutation test.
|
||||||
\begin{parts}
|
\begin{parts}
|
||||||
\part \label{permutationtestdata} Generate 1000 correlated pairs
|
\part \label{correlationtestdata} Generate 1000 correlated pairs
|
||||||
$x$, $y$ of random numbers according to:
|
$x$, $y$ of random numbers according to:
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
n = 1000
|
n = 1000
|
||||||
@ -188,9 +191,13 @@ correlation coefficient of zero we can conclude that the correlation
|
|||||||
coefficient of the data indeed quantifies correlated data.
|
coefficient of the data indeed quantifies correlated data.
|
||||||
|
|
||||||
We take the same data set that we have generated in exercise
|
We take the same data set that we have generated in exercise
|
||||||
\ref{permutationtest} (\ref{permutationtestdata}).
|
\ref{correlationtest} (\ref{correlationtestdata}).
|
||||||
\begin{parts}
|
\begin{parts}
|
||||||
\part Bootstrap 1000 times the correlation coefficient from the data.
|
\part Bootstrap 1000 times the correlation coefficient from the
|
||||||
|
data, i.e. generate bootstrap data by randomly resampling the
|
||||||
|
original data pairs with replacement. Use the \code{randi()}
|
||||||
|
function for generating random indices that you can use to select a
|
||||||
|
random sample from the original data.
|
||||||
\part Compute and plot the probability density of these correlation
|
\part Compute and plot the probability density of these correlation
|
||||||
coefficients.
|
coefficients.
|
||||||
\part Is the correlation of the original data set significant?
|
\part Is the correlation of the original data set significant?
|
||||||
@ -200,6 +207,43 @@ We take the same data set that we have generated in exercise
|
|||||||
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
||||||
\end{solution}
|
\end{solution}
|
||||||
|
|
||||||
|
|
||||||
|
\continuepage
|
||||||
|
\question \qt{Permutation test of difference of means}
|
||||||
|
We want to test whether two data sets come from distributions that
|
||||||
|
differ in their mean by means of a permutation test.
|
||||||
|
\begin{parts}
|
||||||
|
\part Generate two normally distributed data sets $x$ and $y$
|
||||||
|
containing each $n=200$ samples. Let's assume the $x$ samples are
|
||||||
|
measurements of the membrane potential of a mammalian photoreceptor
|
||||||
|
in darkness with a mean of $-40$\,mV and a standard deviation of
|
||||||
|
1\,mV. The $y$ values are the membrane potentials measured under dim
|
||||||
|
illumination and come from a distribution with the same standard
|
||||||
|
deviation and a mean of $-40.5$\,mV. See section 5.2 ``Scaling and
|
||||||
|
shifting random numbers'' in the script.
|
||||||
|
\part Plot histograms of the $x$ and $y$ data in a single
|
||||||
|
plot. Choose appropriate bins.
|
||||||
|
\part Compute the means of $x$ and $y$ and their difference.
|
||||||
|
\part The null hypothesis is that the $x$ and $y$ data come from the
|
||||||
|
same distribution. How can you generate new samples $x_r$ and $y_r$
|
||||||
|
from the original data that come from the same distribution?
|
||||||
|
\part Do exactly this 1000 times and compute each time the
|
||||||
|
difference of the means of the two resampled samples.
|
||||||
|
\part Compute and plot the probability density of the resulting
|
||||||
|
distribution of the null hypothesis.
|
||||||
|
\part Is the difference of the means of the original data sets significant?
|
||||||
|
\part Repeat this procedure for $y$ samples that are closer or
|
||||||
|
further apart from the mean of the $x$ data set. For this put the
|
||||||
|
computations of the permuation test in a function and all the plotting
|
||||||
|
in another function.
|
||||||
|
\end{parts}
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{meandiffpermutation.m}
|
||||||
|
%\lstinputlisting{meandiffplots.m}
|
||||||
|
\lstinputlisting{meandiffsignificance.m}
|
||||||
|
\includegraphics[width=1\textwidth]{meandiffsignificance}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
\end{questions}
|
\end{questions}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
29
bootstrap/exercises/meandiffpermutation.m
Normal file
29
bootstrap/exercises/meandiffpermutation.m
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
function [md, ds, dq] = meandiffpermutation(x, y, nperm, alpha)
|
||||||
|
% Permutation test for difference of means of two independent samples.
|
||||||
|
%
|
||||||
|
% [md, ds] = meandiffpermutation(x, y, nperm, alpha);
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% x: vector with the samples of the x data set.
|
||||||
|
% y: vector with the samples of the y data set.
|
||||||
|
% nperm: number of permutations run.
|
||||||
|
% alpha: significance level.
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% md: difference of the means
|
||||||
|
% ds: vector containing the differences of the means of the resampled data sets
|
||||||
|
% dq: difference of the means at a significance of alpha.
|
||||||
|
|
||||||
|
md = mean(x) - mean(y); % measured difference
|
||||||
|
xy = [x; y]; % merge data sets
|
||||||
|
% permutations:
|
||||||
|
ds = zeros(nperm, 1);
|
||||||
|
for i = 1:nperm
|
||||||
|
xyr = xy(randperm(length(xy))); % shuffle xy
|
||||||
|
xr = xyr(1:length(x)); % random x sample
|
||||||
|
yr = xyr(length(x)+1:end); % random y sample
|
||||||
|
ds(i) = mean(xr) - mean(yr);
|
||||||
|
end
|
||||||
|
% significance:
|
||||||
|
dq = quantile(ds, 1.0 - alpha);
|
||||||
|
end
|
48
bootstrap/exercises/meandiffsignificance.m
Normal file
48
bootstrap/exercises/meandiffsignificance.m
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
%% (a) generate data:
|
||||||
|
n = 200;
|
||||||
|
mx = -40.0;
|
||||||
|
my = -40.5;
|
||||||
|
x = randn(n, 1) + mx;
|
||||||
|
y = randn(n, 1) + my;
|
||||||
|
|
||||||
|
%% (b) plot histograms:
|
||||||
|
subplot(1, 2, 1);
|
||||||
|
bmin = min([x; y]);
|
||||||
|
bmax = max([x; y]);
|
||||||
|
bins = bmin:(bmax-bmin)/20.0:bmax;
|
||||||
|
hist(x, bins, 'facecolor', 'b');
|
||||||
|
hold on
|
||||||
|
hist(y, bins, 'facecolor', 'r');
|
||||||
|
xlabel('x and y')
|
||||||
|
ylabel('counts')
|
||||||
|
hold off
|
||||||
|
|
||||||
|
% permutation test:
|
||||||
|
[md, ds, dq] = meandiffpermutation(x, y, nperm, alpha);
|
||||||
|
|
||||||
|
%% (c) difference of means:
|
||||||
|
fprintf('difference of means = %.2fmV\n', md);
|
||||||
|
|
||||||
|
%% (f) pdf of the differences:
|
||||||
|
[h, b] = hist(ds, 20);
|
||||||
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
|
%% (g) significance:
|
||||||
|
fprintf('difference of means at 5%% significance = %.2fmV\n', dq);
|
||||||
|
if md >= dq
|
||||||
|
fprintf('--> difference of means %.2fmV is significant\n', md);
|
||||||
|
else
|
||||||
|
fprintf('--> %.2fmV is not a significant difference of means\n', md);
|
||||||
|
end
|
||||||
|
|
||||||
|
%% plot:
|
||||||
|
subplot(1, 2, 2)
|
||||||
|
bar(b, h, 'facecolor', 'b');
|
||||||
|
hold on;
|
||||||
|
bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
|
||||||
|
plot([md md], [0 4], 'r', 'linewidth', 2);
|
||||||
|
xlabel('Difference of means');
|
||||||
|
ylabel('Probability density of H0');
|
||||||
|
hold off;
|
||||||
|
|
||||||
|
savefigpdf(gcf, 'meandiffsignificance.pdf', 12, 6);
|
@ -4,7 +4,7 @@ x=randn(n, 1);
|
|||||||
|
|
||||||
for nsamples = [3 5 10 50]
|
for nsamples = [3 5 10 50]
|
||||||
nsamples
|
nsamples
|
||||||
%% compute mean, standard deviation and t:
|
% compute mean, standard deviation and t:
|
||||||
nmeans = 10000;
|
nmeans = 10000;
|
||||||
means = zeros(nmeans, 1);
|
means = zeros(nmeans, 1);
|
||||||
sdevs = zeros(nmeans, 1);
|
sdevs = zeros(nmeans, 1);
|
||||||
@ -26,7 +26,7 @@ for nsamples=[3 5 10 50]
|
|||||||
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
||||||
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
||||||
|
|
||||||
%% plots
|
% plots:
|
||||||
subplot(1, 2, 1)
|
subplot(1, 2, 1)
|
||||||
bins = xmin:0.2:xmax;
|
bins = xmin:0.2:xmax;
|
||||||
[h,b] = hist(means, bins);
|
[h,b] = hist(means, bins);
|
||||||
|
Reference in New Issue
Block a user