%% (b) load the data:
load('thymusglandweights.dat');
nsamples = 80;
x = thymusglandweights(1:nsamples);

%% (c) mean, sem and hist:
sem = std(x)/sqrt(nsamples);
fprintf('Mean of the data set = %.2fmg\n', mean(x));
fprintf('SEM of the data set = %.2fmg\n', sem);
hist(x,20)
xlabel('x')
ylabel('count')
savefigpdf(gcf, 'bootstraptymus-datahist.pdf', 6, 5);
pause(2.0)

%% (d) bootstrap the mean:
resample = 500;
[bootsem, mu] = bootstrapmean(x, resample);
hist(mu, 20);
xlabel('mean(x)')
ylabel('count')
savefigpdf(gcf, 'bootstraptymus-meanhist.pdf', 6, 5);
fprintf('  bootstrap standard error: %.3f\n', bootsem);
fprintf('theoretical standard error: %.3f\n', sem);

%% (e) confidence interval:
q = quantile(mu, [0.025, 0.975]);
fprintf('95%% confidence interval of the mean from %.2fmg to %.2fmg\n', q(1), q(2));
pause(2.0)

%% (f): dependence on sample size:
nsamplesrange = 10:10:1000;
bootsems = zeros(length(nsamplesrange), 1);
for n=1:length(nsamplesrange)
    nsamples = nsamplesrange(n);
    % [bootsems(n), mu] = bootstrapmean(x, resample);
    bootsems(n) = bootstrapmean(thymusglandweights(1:nsamples), resample);
end
plot(nsamplesrange, bootsems, 'b', 'linewidth', 2);
hold on
plot(nsamplesrange, std(x)./sqrt(nsamplesrange), 'r', 'linewidth', 1)
hold off
xlabel('sample size')
ylabel('SEM')
legend('bootsrap', 'theory')
savefigpdf(gcf, 'bootstraptymus-samples.pdf', 6, 5);