[bootstrap] improved code
This commit is contained in:
parent
e1c6c32db0
commit
430bdfb7fd
@ -1,24 +1,25 @@
|
|||||||
nsamples = 100;
|
nsamples = 100;
|
||||||
nresamples = 1000;
|
nresamples = 1000;
|
||||||
|
|
||||||
% draw a SRS (simple random sample, "Stichprobe") from the population:
|
% draw a simple random sample ("Stichprobe") from the population:
|
||||||
x = randn( 1, nsamples );
|
x = randn(1, nsamples);
|
||||||
fprintf('%-30s %-5s %-5s %-5s\n', '', 'mean', 'stdev', 'sem' )
|
fprintf('%-30s %-5s %-5s %-5s\n', '', 'mean', 'stdev', 'sem')
|
||||||
fprintf('%30s %5.2f %5.2f %5.2f\n', 'single SRS', mean( x ), std( x ), std( x )/sqrt(nsamples) )
|
fprintf('%30s %5.2f %5.2f %5.2f\n', 'single SRS', mean(x), std(x), std(x)/sqrt(nsamples))
|
||||||
|
|
||||||
% bootstrap the mean:
|
% bootstrap the mean:
|
||||||
mus = zeros(nresamples,1); % vector for storing the means
|
mus = zeros(nresamples,1); % vector for storing the means
|
||||||
for i = 1:nresamples % loop for generating the bootstraps
|
for i = 1:nresamples % loop for generating the bootstraps
|
||||||
inx = randi(nsamples, 1, nsamples); % range, 1D-vector, number
|
inx = randi(nsamples, 1, nsamples); % range, 1D-vector, number
|
||||||
xr = x(inx); % resample the original SRS
|
xr = x(inx); % resample the original SRS
|
||||||
mus(i) = mean(xr); % compute statistic of the resampled SRS
|
mus(i) = mean(xr); % compute statistic of the resampled SRS
|
||||||
end
|
end
|
||||||
fprintf('%30s %5.2f %5.2f -\n', 'bootstrapped distribution', mean( mus ), std( mus ) )
|
fprintf('%30s %5.2f %5.2f -\n', 'bootstrapped distribution', mean(mus), std(mus))
|
||||||
|
|
||||||
% many SRS (we can do that with the random number generator, but not in real life!):
|
% many SRS (we can do that with the random number generator,
|
||||||
musrs = zeros(nresamples,1); % vector for the means of each SRS
|
% but not in real life!):
|
||||||
|
musrs = zeros(nresamples,1); % vector for the means of each SRS
|
||||||
for i = 1:nresamples
|
for i = 1:nresamples
|
||||||
x = randn( 1, nsamples ); % draw a new SRS
|
x = randn(1, nsamples); % draw a new SRS
|
||||||
musrs(i) = mean( x ); % compute its mean
|
musrs(i) = mean(x); % compute its mean
|
||||||
end
|
end
|
||||||
fprintf('%30s %5.2f %5.2f -\n', 'sampling distribution', mean( musrs ), std( musrs ) )
|
fprintf('%30s %5.2f %5.2f -\n', 'sampling distribution', mean(musrs), std(musrs))
|
||||||
|
@ -6,7 +6,7 @@ y = randn(n, 1) + a*x;
|
|||||||
|
|
||||||
% correlation coefficient:
|
% correlation coefficient:
|
||||||
rd = corr(x, y);
|
rd = corr(x, y);
|
||||||
fprintf('correlation coefficient of data r = %.2f\n', rd );
|
fprintf('correlation coefficient of data r = %.2f\n', rd);
|
||||||
|
|
||||||
% distribution of null hypothesis by permutation:
|
% distribution of null hypothesis by permutation:
|
||||||
nperm = 1000;
|
nperm = 1000;
|
||||||
@ -16,12 +16,12 @@ for i=1:nperm
|
|||||||
yr=y(randperm(length(y))); % shuffle y
|
yr=y(randperm(length(y))); % shuffle y
|
||||||
rs(i) = corr(xr, yr);
|
rs(i) = corr(xr, yr);
|
||||||
end
|
end
|
||||||
[h,b] = hist(rs, 20 );
|
[h,b] = hist(rs, 20);
|
||||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
% significance:
|
% significance:
|
||||||
rq = quantile(rs, 0.95);
|
rq = quantile(rs, 0.95);
|
||||||
fprintf('correlation coefficient of null hypothesis at 5%% significance = %.2f\n', rq );
|
fprintf('correlation coefficient of null hypothesis at 5%% significance = %.2f\n', rq);
|
||||||
if rd >= rq
|
if rd >= rq
|
||||||
fprintf('--> correlation r=%.2f is significant\n', rd);
|
fprintf('--> correlation r=%.2f is significant\n', rd);
|
||||||
else
|
else
|
||||||
@ -32,7 +32,7 @@ end
|
|||||||
bar(b, h, 'facecolor', 'b');
|
bar(b, h, 'facecolor', 'b');
|
||||||
hold on;
|
hold on;
|
||||||
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
||||||
plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlabel('Correlation coefficient');
|
xlabel('Correlation coefficient');
|
||||||
ylabel('Probability density of H0');
|
ylabel('Probability density of H0');
|
||||||
hold off;
|
hold off;
|
||||||
|
@ -3,7 +3,7 @@ corrs = [1.0, 0.6, 0.0, -0.9];
|
|||||||
for k = [1:length(corrs)]
|
for k = [1:length(corrs)]
|
||||||
r = corrs(k);
|
r = corrs(k);
|
||||||
x = randn(n, 1);
|
x = randn(n, 1);
|
||||||
y = r*x; % linear dependence of y on x
|
y = r*x; % linear dependence of y on x
|
||||||
% add noise to destroy perfect correlations:
|
% add noise to destroy perfect correlations:
|
||||||
y = y + sqrt(1.0-r*r)*randn(n, 1);
|
y = y + sqrt(1.0-r*r)*randn(n, 1);
|
||||||
% compute correlation coefficient of data:
|
% compute correlation coefficient of data:
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
data = randn(100, 1); % generate some data
|
data = randn(100, 1); % generate some data
|
||||||
sigma = 0.2; % std. dev. of Gaussian kernel
|
sigma = 0.2; % std. dev. of Gaussian kernel
|
||||||
xmin = -4.0; % minimum x value for kernel density
|
xmin = -4.0; % minimum x value for kernel density
|
||||||
xmax = 4.0; % maximum x value for kernel density
|
xmax = 4.0; % maximum x value for kernel density
|
||||||
dx = 0.05*sigma; % step size for kernel density
|
dx = 0.05*sigma; % step size for kernel density
|
||||||
xg = [-4.0*sigma:dx:4.0*sigma]; % x-axis for single Gaussian kernel
|
xg = [-4.0*sigma:dx:4.0*sigma]; % x-axis for single Gaussian kernel
|
||||||
% single Gaussian kernel:
|
% single Gaussian kernel:
|
||||||
kernel = exp(-0.5*(xg/sigma).^2)/sqrt(2.0*pi)/sigma;
|
kernel = exp(-0.5*(xg/sigma).^2)/sqrt(2.0*pi)/sigma;
|
||||||
ng = floor((length(kernel)-1)/2); % half the length of the Gaussian
|
ng = floor((length(kernel)-1)/2); % half the length of the Gaussian
|
||||||
x = [xmin:dx:xmax+0.5*dx]; % x-axis for kernel density
|
x = [xmin:dx:xmax+0.5*dx]; % x-axis for kernel density
|
||||||
kd = zeros(1, length(x)); % vector for kernel density
|
kd = zeros(1, length(x)); % vector for kernel density
|
||||||
for i = 1:length(data) % for every data value ...
|
for i = 1:length(data) % for every data value ...
|
||||||
xd = data(i);
|
xd = data(i);
|
||||||
% index of data value in kernel density vector:
|
% index of data value in kernel density vector:
|
||||||
inx = round((xd-xmin)/dx)+1;
|
inx = round((xd-xmin)/dx)+1;
|
||||||
@ -17,8 +17,8 @@ for i = 1:length(data) % for every data value ...
|
|||||||
k0 = inx-ng;
|
k0 = inx-ng;
|
||||||
% end index for Gaussian in kernel density vector:
|
% end index for Gaussian in kernel density vector:
|
||||||
k1 = inx+ng;
|
k1 = inx+ng;
|
||||||
g0 = 1; % start index in Gaussian
|
g0 = 1; % start index in Gaussian
|
||||||
g1 = length(kernel); % end index in Gaussian
|
g1 = length(kernel); % end index in Gaussian
|
||||||
% check whether left side of Gaussian extends below xmin:
|
% check whether left side of Gaussian extends below xmin:
|
||||||
if inx < ng+1
|
if inx < ng+1
|
||||||
% adjust start indices accordingly:
|
% adjust start indices accordingly:
|
||||||
@ -34,7 +34,7 @@ for i = 1:length(data) % for every data value ...
|
|||||||
% add Gaussian on kernel density:
|
% add Gaussian on kernel density:
|
||||||
kd(k0:k1) = kd(k0:k1) + kernel(g0:g1);
|
kd(k0:k1) = kd(k0:k1) + kernel(g0:g1);
|
||||||
end
|
end
|
||||||
kd = kd/length(data); % normalize by number of data points
|
kd = kd/length(data); % normalize by number of data points
|
||||||
|
|
||||||
% plot the computed kernel density:
|
% plot the computed kernel density:
|
||||||
plot(x, kd, 'b', 'linewidth', 4, 'displayname', 'manual')
|
plot(x, kd, 'b', 'linewidth', 4, 'displayname', 'manual')
|
||||||
|
Reference in New Issue
Block a user