[bootstrap] improved code

2020-12-07 22:37:36 +01:00 · 2020-12-07 22:37:36 +01:00 · 430bdfb7fd
commit 430bdfb7fd
parent e1c6c32db0
4 changed files with 34 additions and 33 deletions
--- a/bootstrap/code/bootstrapsem.m
+++ b/bootstrap/code/bootstrapsem.m
@ -1,24 +1,25 @@
 nsamples = 100;
 nresamples = 1000;

-% draw a SRS (simple random sample, "Stichprobe") from the population:
-x = randn( 1, nsamples );
-fprintf('%-30s  %-5s  %-5s  %-5s\n', '', 'mean', 'stdev', 'sem' )
-fprintf('%30s  %5.2f  %5.2f  %5.2f\n', 'single SRS', mean( x ), std( x ), std( x )/sqrt(nsamples) )
+% draw a simple random sample ("Stichprobe") from the population:
+x = randn(1, nsamples);
+fprintf('%-30s  %-5s  %-5s  %-5s\n', '', 'mean', 'stdev', 'sem')
+fprintf('%30s  %5.2f  %5.2f  %5.2f\n', 'single SRS', mean(x), std(x), std(x)/sqrt(nsamples))

 % bootstrap the mean:
-mus = zeros(nresamples,1);  % vector for storing the means
-for i = 1:nresamples        % loop for generating the bootstraps
+mus = zeros(nresamples,1);    % vector for storing the means
+for i = 1:nresamples          % loop for generating the bootstraps
    inx = randi(nsamples, 1, nsamples);  % range, 1D-vector, number
-    xr = x(inx);            % resample the original SRS
-    mus(i) = mean(xr);      % compute statistic of the resampled SRS
+    xr = x(inx);              % resample the original SRS
+    mus(i) = mean(xr);        % compute statistic of the resampled SRS
 end
-fprintf('%30s  %5.2f  %5.2f  -\n', 'bootstrapped distribution', mean( mus ), std( mus ) )
+fprintf('%30s  %5.2f  %5.2f  -\n', 'bootstrapped distribution', mean(mus), std(mus))

-% many SRS (we can do that with the random number generator, but not in real life!):
-musrs = zeros(nresamples,1);      % vector for the means of each SRS
+% many SRS (we can do that with the random number generator,
+% but not in real life!):
+musrs = zeros(nresamples,1);  % vector for the means of each SRS
 for i = 1:nresamples
-    x = randn( 1, nsamples );     % draw a new SRS
-    musrs(i) = mean( x );         % compute its mean
+    x = randn(1, nsamples);   % draw a new SRS
+    musrs(i) = mean(x);       % compute its mean
 end
-fprintf('%30s  %5.2f  %5.2f  -\n', 'sampling distribution', mean( musrs ), std( musrs ) )
+fprintf('%30s  %5.2f  %5.2f  -\n', 'sampling distribution', mean(musrs), std(musrs))
--- a/bootstrap/code/correlationsignificance.m
+++ b/bootstrap/code/correlationsignificance.m
@ -6,7 +6,7 @@ y = randn(n, 1) + a*x;

 % correlation coefficient:
 rd = corr(x, y);
-fprintf('correlation coefficient of data r = %.2f\n', rd );
+fprintf('correlation coefficient of data r = %.2f\n', rd);

 % distribution of null hypothesis by permutation:
 nperm = 1000;
@ -16,12 +16,12 @@ for i=1:nperm
    yr=y(randperm(length(y)));  % shuffle y
    rs(i) = corr(xr, yr);
 end
-[h,b] = hist(rs, 20 );
-h = h/sum(h)/(b(2)-b(1));  % normalization
+[h,b] = hist(rs, 20);
+h = h/sum(h)/(b(2)-b(1));       % normalization

 % significance:
 rq = quantile(rs, 0.95);
-fprintf('correlation coefficient of null hypothesis at 5%% significance = %.2f\n', rq );
+fprintf('correlation coefficient of null hypothesis at 5%% significance = %.2f\n', rq);
 if rd >= rq
    fprintf('--> correlation r=%.2f is significant\n', rd);
 else
@ -32,7 +32,7 @@ end
 bar(b, h, 'facecolor', 'b');
 hold on;
 bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
-plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
+plot([rd rd], [0 4], 'r', 'linewidth', 2);
 xlabel('Correlation coefficient');
 ylabel('Probability density of H0');
 hold off;
--- a/statistics/code/correlations.m
+++ b/statistics/code/correlations.m
@ -3,7 +3,7 @@ corrs = [1.0, 0.6, 0.0, -0.9];
 for k = [1:length(corrs)]
    r = corrs(k);
    x = randn(n, 1);
-    y = r*x; % linear dependence of y on x
+    y = r*x;      % linear dependence of y on x
    % add noise to destroy perfect correlations:
    y = y + sqrt(1.0-r*r)*randn(n, 1);
    % compute correlation coefficient of data:
--- a/statistics/code/gaussiankerneldensity.m
+++ b/statistics/code/gaussiankerneldensity.m
@ -1,15 +1,15 @@
-data = randn(100, 1);             % generate some data
-sigma = 0.2;                      % std. dev. of Gaussian kernel
-xmin = -4.0;                      % minimum x value for kernel density
-xmax = 4.0;                       % maximum x value for kernel density
-dx = 0.05*sigma;                  % step size for kernel density
-xg = [-4.0*sigma:dx:4.0*sigma];   % x-axis for single Gaussian kernel  
+data = randn(100, 1);           % generate some data
+sigma = 0.2;                    % std. dev. of Gaussian kernel
+xmin = -4.0;                    % minimum x value for kernel density
+xmax = 4.0;                     % maximum x value for kernel density
+dx = 0.05*sigma;                % step size for kernel density
+xg = [-4.0*sigma:dx:4.0*sigma]; % x-axis for single Gaussian kernel  
 % single Gaussian kernel:
 kernel = exp(-0.5*(xg/sigma).^2)/sqrt(2.0*pi)/sigma;
 ng = floor((length(kernel)-1)/2); % half the length of the Gaussian
-x = [xmin:dx:xmax+0.5*dx];        % x-axis for kernel density
-kd = zeros(1, length(x));         % vector for kernel density
-for i = 1:length(data)            % for every data value ...                
+x = [xmin:dx:xmax+0.5*dx];      % x-axis for kernel density
+kd = zeros(1, length(x));       % vector for kernel density
+for i = 1:length(data)          % for every data value ...                
  xd = data(i);
  % index of data value in kernel density vector:
  inx = round((xd-xmin)/dx)+1;
@ -17,8 +17,8 @@ for i = 1:length(data)            % for every data value ...
  k0 = inx-ng;
  % end index for Gaussian in kernel density vector:
  k1 = inx+ng;
-  g0 = 1;                         % start index in Gaussian
-  g1 = length(kernel);            % end index in Gaussian
+  g0 = 1;                       % start index in Gaussian
+  g1 = length(kernel);          % end index in Gaussian
  % check whether left side of Gaussian extends below xmin:
  if inx < ng+1
    % adjust start indices accordingly:
@ -34,7 +34,7 @@ for i = 1:length(data)            % for every data value ...
  % add Gaussian on kernel density:
  kd(k0:k1) = kd(k0:k1) + kernel(g0:g1);
 end
-kd = kd/length(data);              % normalize by number of data points
+kd = kd/length(data);           % normalize by number of data points

 % plot the computed kernel density:
 plot(x, kd, 'b', 'linewidth', 4, 'displayname', 'manual')
@ -45,4 +45,4 @@ plot(x, kd, '--r', 'linewidth', 4, 'displayname', 'ksdensity()')
 hold off
 xlabel('x')
 ylabel('Probability density')
-legend('show')
+legend('show')