diff --git a/statistics/exercises/bootstrapmean.m b/statistics/exercises/bootstrapmean.m new file mode 100644 index 0000000..356c531 --- /dev/null +++ b/statistics/exercises/bootstrapmean.m @@ -0,0 +1,17 @@ +function [bootsem, mu] = bootstrapmean( x, resample ) +% computes standard error by bootstrapping the data +% x: vector with data +% resample: number of resamplings +% returns: +% bootsem: the standard error of the mean +% mu: the bootstrapped means as a vector + mu = zeros( resample, 1 ); + nsamples = length(x); + for i = 1:resample + % resample: + xr = x(randi(nsamples, nsamples, 1)); + % compute statistics on sample: + mu(i) = mean(xr); + end + bootsem = std( mu ); +end diff --git a/statistics/exercises/bootstraptymus-datahist.pdf b/statistics/exercises/bootstraptymus-datahist.pdf new file mode 100644 index 0000000..4cc71ac --- /dev/null +++ b/statistics/exercises/bootstraptymus-datahist.pdf @@ -0,0 +1,92 @@ +%PDF-1.4 +%�쏢 +5 0 obj +<</Length 6 0 R/Filter /FlateDecode>> +stream +x��W�n1}�W�# 1$��"!�ˬ���jS���'lj=����^��>���'i�4�YM2�x���?�~ +;�jzH���6�\��\f;E��Vz�z:e��fE�٧�b�BN�mG9�vCUۘ��Wjx��F�k���f�aCN�I�҉ �c�>R7r���6H�̳ z���;�x#�ħ���q��A�iv��{z5�V�P��U�7��`C^Ϭ�I�;���}�'�_���>�'=zk �M�Hy��H;Ѫ2�9��h��C����j��M�--�U3�����;��qsBU�[ܼ*=�ڢ�q��^8�E�i�Yڧ����:=�z�Z��|fU�A\�3��J]��Mܿc�������,��B�{�f�y]Q�i�ԯ����6Ҕ�'Sg{Oc�l�Ԓe�$�D�eO�RoC�Н��h؟���$�7]�]�� �meN����H�h7������p�k�y-`c��NC���~9o�E�=�k�(���I7h�b"ګ\Aq^�+j�AfυCm�j���Ng$O2a�iL���!���j����)m�l��t�~#q �O@hG5A��U��z_wd�AJK�D�X�(` +�hw�i��t���Ga}���X��-��٬�r��X�)�7��#�ء`Y��4�E���8)$v�b���FR˕2ƻ���8"X��o�o�A�@�B�JJI!����j�~�@fd�I�2���$���6�h�"wI+�)"�]������B���"��9�*�g>�m5i��ۄ���H�n~ڭ����_���.�ާ�4��un� �����N���lendstream +endobj +6 0 obj +824 +endobj +4 0 obj +<</Type/Page/MediaBox [0 0 170 142] +/Rotate 0/Parent 3 0 R +/Resources<</ProcSet[/PDF /Text] +/ExtGState 9 0 R +/Font 10 0 R +>> +/Contents 5 0 R +>> +endobj +3 0 obj +<< /Type /Pages /Kids [ +4 0 R +] /Count 1 +>> +endobj +1 0 obj +<</Type /Catalog /Pages 3 0 R +/Metadata 11 0 R +>> +endobj +7 0 obj +<</Type/ExtGState +/OPM 1>>endobj +9 0 obj +<</R7 +7 0 R>> +endobj +10 0 obj +<</R8 +8 0 R>> +endobj +8 0 obj +<</BaseFont/Helvetica/Type/Font +/Subtype/Type1>> +endobj +11 0 obj +<</Length 1316>>stream +<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?> +<?adobe-xap-filters esc="CRLF"?> +<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'> +<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:iX='http://ns.adobe.com/iX/1.0/'> +<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' pdf:Producer='Artifex Ghostscript 8.54'/> +<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:xap='http://ns.adobe.com/xap/1.0/' xap:ModifyDate='2015-10-22' xap:CreateDate='2015-10-22'><xap:CreatorTool>Artifex Ghostscript 8.54 PDF Writer</xap:CreatorTool></rdf:Description> +<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/' xapMM:DocumentID='94827694-b0d9-11f0-0000-86f60cc553dd'/> +<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:dc='http://purl.org/dc/elements/1.1/' dc:format='application/pdf'><dc:title><rdf:Alt><rdf:li xml:lang='x-default'>/tmp/tpd0b45dc9_ff5a_4aa8_90bd_50aa8e8237b6.ps</rdf:li></rdf:Alt></dc:title></rdf:Description> +</rdf:RDF> +</x:xmpmeta> + + +<?xpacket end='w'?> +endstream +endobj +2 0 obj +<</Producer(Artifex Ghostscript 8.54) +/CreationDate(D:20151022150138) +/ModDate(D:20151022150138) +/Creator(MATLAB, The MathWorks, Inc. Version 8.3.0.532 \(R2014a\). Operating System: Linux 3.13.0-24-generic #47-Ubuntu SMP Fri May 2 23:30:00 UTC 2014 x86_64.) +/Title(/tmp/tpd0b45dc9_ff5a_4aa8_90bd_50aa8e8237b6.ps)>>endobj +xref +0 12 +0000000000 65535 f +0000001146 00000 n +0000002741 00000 n +0000001087 00000 n +0000000928 00000 n +0000000015 00000 n +0000000909 00000 n +0000001211 00000 n +0000001311 00000 n +0000001252 00000 n +0000001281 00000 n +0000001375 00000 n +trailer +<< /Size 12 /Root 1 0 R /Info 2 0 R +/ID [<8FE161FFCC6D1C11BAD3CE59BA0E3F32><8FE161FFCC6D1C11BAD3CE59BA0E3F32>] +>> +startxref +3070 +%%EOF diff --git a/statistics/exercises/bootstraptymus-meanhist.pdf b/statistics/exercises/bootstraptymus-meanhist.pdf new file mode 100644 index 0000000..f503f5f Binary files /dev/null and b/statistics/exercises/bootstraptymus-meanhist.pdf differ diff --git a/statistics/exercises/bootstraptymus-samples.pdf b/statistics/exercises/bootstraptymus-samples.pdf new file mode 100644 index 0000000..e4d69bc Binary files /dev/null and b/statistics/exercises/bootstraptymus-samples.pdf differ diff --git a/statistics/exercises/bootstraptymus.m b/statistics/exercises/bootstraptymus.m index e92a5b7..0b4aaa7 100644 --- a/statistics/exercises/bootstraptymus.m +++ b/statistics/exercises/bootstraptymus.m @@ -1,24 +1,47 @@ -resample = 500 - +%% (b) load the data: load( 'thymusglandweights.dat' ); -x = thymusglandweights; -nsamples = length( x ); +nsamples = 80; +x = thymusglandweights(1:nsamples); + +%% (c) mean, sem and hist: sem = std(x)/sqrt(nsamples); +fprintf( 'Mean of the data set = %.2fmg\n', mean(x) ); +fprintf( 'SEM of the data set = %.2fmg\n', sem ); +hist(x,20) +xlabel('x') +ylabel('count') +savefigpdf( gcf, 'bootstraptymus-datahist.pdf', 6, 5 ); +pause( 2.0 ) -mu = zeros( resample, 1 ); -for i = 1:resample - % resample: - xr = x(randi(nsamples, nsamples, 1)); - % compute statistics on sample: - mu(i) = mean(xr); -end -bootsem = std( mu ); +%% (d) bootstrap the mean: +resample = 500; +[bootsem, mu] = bootstrapmean( x, resample ); +hist( mu, 20 ); +xlabel('mean(x)') +ylabel('count') +savefigpdf( gcf, 'bootstraptymus-meanhist.pdf', 6, 5 ); +fprintf( ' bootstrap standard error: %.3f\n', bootsem ); +fprintf( 'theoretical standard error: %.3f\n', sem ); +%% (e) confidence interval: +q = quantile(mu, [0.025, 0.975]); +fprintf( '95%% confidence interval of the mean from %.2fmg to %.2fmg\n', q(1), q(2) ); +pause( 2.0 ) + +%% (f): dependence on sample size: +nsamplesrange = 10:10:1000; +bootsems = zeros( length(nsamplesrange),1); +for n=1:length(nsamplesrange) + nsamples = nsamplesrange(n); + % [bootsems(n), mu] = bootstrapmean(x, resample); + bootsems(n) = bootstrapmean(thymusglandweights(1:nsamples), resample); +end +plot(nsamplesrange, bootsems, 'b', 'linewidth', 2); hold on -hist( x, 20 ); -hist( mu, 20 ); +plot(nsamplesrange, std(x)./sqrt(nsamplesrange), 'r', 'linewidth', 1) hold off - -disp(['bootstrap standard error: ', num2str(bootsem)]); -disp(['standard error: ', num2str(sem)]); +xlabel('sample size') +ylabel('SEM') +legend('bootsrap', 'theory') +savefigpdf( gcf, 'bootstraptymus-samples.pdf', 6, 5 ); diff --git a/statistics/exercises/centrallimit-hist01.pdf b/statistics/exercises/centrallimit-hist01.pdf new file mode 100644 index 0000000..9edf2c1 Binary files /dev/null and b/statistics/exercises/centrallimit-hist01.pdf differ diff --git a/statistics/exercises/centrallimit-hist02.pdf b/statistics/exercises/centrallimit-hist02.pdf new file mode 100644 index 0000000..b20fb92 Binary files /dev/null and b/statistics/exercises/centrallimit-hist02.pdf differ diff --git a/statistics/exercises/centrallimit-hist03.pdf b/statistics/exercises/centrallimit-hist03.pdf new file mode 100644 index 0000000..7961beb Binary files /dev/null and b/statistics/exercises/centrallimit-hist03.pdf differ diff --git a/statistics/exercises/centrallimit-hist04.pdf b/statistics/exercises/centrallimit-hist04.pdf new file mode 100644 index 0000000..ff1f82c Binary files /dev/null and b/statistics/exercises/centrallimit-hist04.pdf differ diff --git a/statistics/exercises/centrallimit-hist05.pdf b/statistics/exercises/centrallimit-hist05.pdf new file mode 100644 index 0000000..2a61113 Binary files /dev/null and b/statistics/exercises/centrallimit-hist05.pdf differ diff --git a/statistics/exercises/centrallimit-hist06.pdf b/statistics/exercises/centrallimit-hist06.pdf new file mode 100644 index 0000000..ea77490 Binary files /dev/null and b/statistics/exercises/centrallimit-hist06.pdf differ diff --git a/statistics/exercises/centrallimit-hist07.pdf b/statistics/exercises/centrallimit-hist07.pdf new file mode 100644 index 0000000..b565917 Binary files /dev/null and b/statistics/exercises/centrallimit-hist07.pdf differ diff --git a/statistics/exercises/centrallimit-hist08.pdf b/statistics/exercises/centrallimit-hist08.pdf new file mode 100644 index 0000000..3a7db93 Binary files /dev/null and b/statistics/exercises/centrallimit-hist08.pdf differ diff --git a/statistics/exercises/centrallimit-hist09.pdf b/statistics/exercises/centrallimit-hist09.pdf new file mode 100644 index 0000000..e0b7ec7 Binary files /dev/null and b/statistics/exercises/centrallimit-hist09.pdf differ diff --git a/statistics/exercises/centrallimit-hist10.pdf b/statistics/exercises/centrallimit-hist10.pdf new file mode 100644 index 0000000..e10fee8 Binary files /dev/null and b/statistics/exercises/centrallimit-hist10.pdf differ diff --git a/statistics/exercises/centrallimit-samples.pdf b/statistics/exercises/centrallimit-samples.pdf new file mode 100644 index 0000000..cef3be6 Binary files /dev/null and b/statistics/exercises/centrallimit-samples.pdf differ diff --git a/statistics/exercises/centrallimit.m b/statistics/exercises/centrallimit.m index 9f492bd..0794335 100644 --- a/statistics/exercises/centrallimit.m +++ b/statistics/exercises/centrallimit.m @@ -2,7 +2,7 @@ n = 10000; m = 10; % number of loops -%% (b) a single random number: +%% (b) a single data set of random numbers: x = rand( n, 1 ); %% (c) plot probability density: @@ -41,7 +41,7 @@ for i=1:m %xx = min(x):0.01:max(x); xx = -1:0.01:i+1; % x-axis values for plot of pdf p = exp(-0.5*(xx-mu).^2/sd^2)/sqrt(2*pi*sd^2); % pdf - plot(xx, p, 'r', 'linewidth', 6 ) + plot(xx, p, 'r', 'linewidth', 3 ) ns = sprintf( 'N=%d', i ); text( 0.1, 0.9, ns, 'units', 'normalized' ) hold on @@ -50,8 +50,10 @@ for i=1:m h = h/sum(h)/(b(2)-b(1)); % normalization bar(b, h) hold off + xlim([-0.5, i+0.5]) xlabel( 'x' ) ylabel( 'summed pdf' ) + savefigpdf( gcf, sprintf('centrallimit-hist%02d.pdf', i), 6, 5 ); if i < 6 pause( 3.0 ) end @@ -68,5 +70,6 @@ plot( xx, sqrt(xx)*sdu, 'k' ) legend( 'mean', 'std', 'theory' ) xlabel('N') hold off +savefigpdf( gcf, 'centrallimit-samples.pdf', 6, 5 ); diff --git a/statistics/exercises/correlationsignificance.m b/statistics/exercises/correlationsignificance.m index 7a0f5f0..97c09e7 100644 --- a/statistics/exercises/correlationsignificance.m +++ b/statistics/exercises/correlationsignificance.m @@ -6,22 +6,27 @@ y = randn(n, 1) + a*x; %% (b) scatter plot: subplot(1, 2, 1); +plot(x, a*x, 'r', 'linewidth', 4 ); +hold on %scatter(x, y ); % either scatter ... plot(x, y, 'o' ); % ... or plot - same plot. +xlabel('x') +ylabel('y') +hold off %% (d) correlation coefficient: +%c = corrcoef(x, y); % returns correlation matrix +%rd = c(1, 2); rd = corr(x, y); -%rd = r(0, 1); fprintf('correlation coefficient = %.2f\n', rd ); -%% (f) permutation: +%% (e) permutation: nperm = 1000; rs = zeros(nperm,1); for i=1:nperm xr=x(randperm(length(x))); % shuffle x yr=y(randperm(length(y))); % shuffle y rs(i) = corr(xr, yr); - %rs(i) = r(0,1); end %% (g) pdf of the correlation coefficients: @@ -44,6 +49,7 @@ bar(b, h, 'facecolor', 'b'); bar(b(b>=rq), h(b>=rq), 'facecolor', 'r'); plot( [rd rd], [0 4], 'r', 'linewidth', 2 ); xlabel('correlation coefficient'); -ylabel('probability density'); +ylabel('probability density of H0'); hold off; +savefigpdf( gcf, 'correlationsignificance.pdf', 12, 7 ); diff --git a/statistics/exercises/die1.m b/statistics/exercises/die1.m index d407ff1..32ddad4 100644 --- a/statistics/exercises/die1.m +++ b/statistics/exercises/die1.m @@ -18,10 +18,11 @@ for i =1:6 P(i) = sum(x == i)/length(x); end subplot( 1, 2, 1 ) -plot( [0 7], [1/6 1/6], 'r', 'linewidth', 6 ) +plot( [0 7], [1/6 1/6], 'r', 'linewidth', 3 ) hold on bar( P ); hold off +set(gca, 'XTick', 1:6 ); xlim( [ 0 7 ] ); xlabel('Eyes'); ylabel('Probability'); @@ -35,5 +36,4 @@ diehist( x ); x = randi( 8, 1, n ); % random numbers from 1 to 8 x(x>6) = 6; % set numbers 7 and 8 to 6 diehist( x ); - - +savefigpdf(gcf, 'die1.pdf', 12, 5) \ No newline at end of file diff --git a/statistics/exercises/die1.pdf b/statistics/exercises/die1.pdf new file mode 100644 index 0000000..bc4ed16 Binary files /dev/null and b/statistics/exercises/die1.pdf differ diff --git a/statistics/exercises/die2.m b/statistics/exercises/die2.m index 407592b..bd2e5f1 100644 --- a/statistics/exercises/die2.m +++ b/statistics/exercises/die2.m @@ -17,8 +17,10 @@ s = std(P, 1); bar(m, 'facecolor', [0.8 0 0]); % darker red hold on; errorbar(m, s, '.k', 'linewidth', 2 ); % k is black +set(gca, 'XTick', 1:6 ); xlim( [ 0, 7 ] ); +ylim( [ 0, 0.25]) xlabel('Eyes'); ylabel('Probability'); hold off; - +savefigpdf(gcf, 'die2.pdf', 6, 5) diff --git a/statistics/exercises/die2.pdf b/statistics/exercises/die2.pdf new file mode 100644 index 0000000..1bd4ec6 Binary files /dev/null and b/statistics/exercises/die2.pdf differ diff --git a/statistics/exercises/diehist.m b/statistics/exercises/diehist.m index 4a8c24a..1581884 100644 --- a/statistics/exercises/diehist.m +++ b/statistics/exercises/diehist.m @@ -3,10 +3,11 @@ function diehist( x ) % die. [h,b] = hist( x, 1:6 ); h = h/sum(h); % normalization - plot( [0 7], [1/6 1/6], 'r', 'linewidth', 6 ) + plot( [0 7], [1/6 1/6], 'r', 'linewidth', 3 ) hold on bar( b, h ); hold off + set(gca, 'XTick', 1:6 ); xlim( [ 0, 7 ] ); xlabel('Eyes'); ylabel('Probability'); diff --git a/statistics/exercises/normprobs.m b/statistics/exercises/normprobs.m index c7e4f37..d858b27 100644 --- a/statistics/exercises/normprobs.m +++ b/statistics/exercises/normprobs.m @@ -27,17 +27,20 @@ fprintf( 'Integral over the Gaussian pdf from -3 to 3 is %.4f\n\n', P ); %% (e) probability of small ranges nr = 50; +xmax = 3.0 xs = zeros(nr, 1); % size of integration interval Ps = zeros(nr, 1); % storage for i = 1:nr % upper limit goes from 4.0 down to 0.0: - xupper = 3.0*(nr-i)/nr; + xupper = xmax*(nr-i)/nr; xs(i) = xupper; % integral from 0 to xupper: Ps(i) = sum(pg((xx>=0.0)&(xx<=xupper)))*dx; end plot( xs, Ps, 'linewidth', 3 ) +xlim([0 xmax]) ylim([0 0.55]) xlabel('Integration interval') ylabel('Probability') fprintf('The probability P(0.1234) = %.4f\n\n', sum(x == 0.1234)/length(x) ); +savefigpdf(gcf, 'normprobs.pdf', 12, 8); diff --git a/statistics/exercises/normprobs.pdf b/statistics/exercises/normprobs.pdf new file mode 100644 index 0000000..f0e5456 Binary files /dev/null and b/statistics/exercises/normprobs.pdf differ diff --git a/statistics/exercises/randomwalk-hists.pdf b/statistics/exercises/randomwalk-hists.pdf new file mode 100644 index 0000000..4da23c9 Binary files /dev/null and b/statistics/exercises/randomwalk-hists.pdf differ diff --git a/statistics/exercises/randomwalk-stdev.pdf b/statistics/exercises/randomwalk-stdev.pdf new file mode 100644 index 0000000..33cd212 Binary files /dev/null and b/statistics/exercises/randomwalk-stdev.pdf differ diff --git a/statistics/exercises/randomwalk-traces.pdf b/statistics/exercises/randomwalk-traces.pdf new file mode 100644 index 0000000..4857900 Binary files /dev/null and b/statistics/exercises/randomwalk-traces.pdf differ diff --git a/statistics/exercises/randomwalkstatistics.m b/statistics/exercises/randomwalkstatistics.m index 8653aa6..e0c2946 100644 --- a/statistics/exercises/randomwalkstatistics.m +++ b/statistics/exercises/randomwalkstatistics.m @@ -11,9 +11,10 @@ for i=1:length(nwalks) end text( 0.05, 0.8, sprintf( 'N=%d', nwalks(i)), 'units', 'normalized' ) xlabel( 'Number of steps' ); - ylabel( 'Position of walker' ) + ylabel( 'Position' ) hold off; end +savefigpdf( gcf, 'randomwalk-traces.pdf', 12, 16 ); pause( 5.0 ) nsteps = 100; @@ -34,7 +35,10 @@ xx = 0:0.01:nsteps; plot( xx, sqrt(xx), 'k' ) plot( xx, zeros(length(xx),1), 'k' ) legend( 'mean', 'std', 'theory' ) +xlabel('Steps') +ylabel('Position') hold off +savefigpdf( gcf, 'randomwalk-stdev.pdf', 6, 5 ); pause( 3.0 ); %% (d) histograms: @@ -47,5 +51,8 @@ for i = 1:length(tinx) hold on; end hold off; -xlabel('Position of walker'); +xlabel('Position'); ylabel('Probability density'); +xlim([-30 30]) +ylim([0 0.3]) +savefigpdf( gcf, 'randomwalk-hists.pdf', 6, 5 ); diff --git a/statistics/exercises/savefigpdf.m b/statistics/exercises/savefigpdf.m new file mode 100644 index 0000000..fbe2dc2 --- /dev/null +++ b/statistics/exercises/savefigpdf.m @@ -0,0 +1,28 @@ +function savefigpdf( fig, name, width, height ) +% Saves figure fig in pdf file name.pdf with appropriately set page size +% and fonts + +% default width: +if nargin < 3 + width = 11.7; +end +% default height: +if nargin < 4 + height = 9.0; +end + +% paper: +set( fig, 'PaperUnits', 'centimeters' ); +set( fig, 'PaperSize', [width height] ); +set( fig, 'PaperPosition', [0.0 0.0 width height] ); +set( fig, 'Color', 'white') + +% font: +set( findall( fig, 'type', 'axes' ), 'FontSize', 12 ) +set( findall( fig, 'type', 'text' ), 'FontSize', 12 ) + +% save: +saveas( fig, name, 'pdf' ) + +end + diff --git a/statistics/exercises/statistics01.tex b/statistics/exercises/statistics01.tex index d78a0b3..767142c 100644 --- a/statistics/exercises/statistics01.tex +++ b/statistics/exercises/statistics01.tex @@ -115,6 +115,7 @@ Der Computer kann auch als W\"urfel verwendet werden! \lstinputlisting{rollthedie.m} \lstinputlisting{diehist.m} \lstinputlisting{die1.m} + \includegraphics[width=1\textwidth]{die1} \end{solution} @@ -132,6 +133,7 @@ Wir werten nun das Verhalten mehrerer W\"urfel aus. \end{parts} \begin{solution} \lstinputlisting{die2.m} + \includegraphics[width=0.5\textwidth]{die2} \end{solution} @@ -173,6 +175,7 @@ Mittelwert enthalten ist. \end{parts} \begin{solution} \lstinputlisting{normprobs.m} + \includegraphics[width=1\textwidth]{normprobs} \end{solution} diff --git a/statistics/exercises/statistics02.tex b/statistics/exercises/statistics02.tex index 36216be..f2acd45 100644 --- a/statistics/exercises/statistics02.tex +++ b/statistics/exercises/statistics02.tex @@ -121,6 +121,11 @@ Den Zentralen Grenzwertsatz wollen wir uns im Folgenden veranschaulichen. \end{parts} \begin{solution} \lstinputlisting{centrallimit.m} + \includegraphics[width=0.5\textwidth]{centrallimit-hist01} + \includegraphics[width=0.5\textwidth]{centrallimit-hist02} + \includegraphics[width=0.5\textwidth]{centrallimit-hist03} + \includegraphics[width=0.5\textwidth]{centrallimit-hist05} + \includegraphics[width=0.5\textwidth]{centrallimit-samples} \end{solution} @@ -147,6 +152,9 @@ Im folgenden wollen wir einige Eigenschaften des Random Walks bestimmen. \begin{solution} \lstinputlisting{randomwalk.m} \lstinputlisting{randomwalkstatistics.m} + \includegraphics[width=0.8\textwidth]{randomwalk-traces}\\ + \includegraphics[width=0.5\textwidth]{randomwalk-stdev} + \includegraphics[width=0.5\textwidth]{randomwalk-hists} \end{solution} diff --git a/statistics/exercises/statistics03.tex b/statistics/exercises/statistics03.tex index 6ae5189..c80a013 100644 --- a/statistics/exercises/statistics03.tex +++ b/statistics/exercises/statistics03.tex @@ -103,19 +103,33 @@ jan.benda@uni-tuebingen.de} des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen. \part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$. \end{parts} +\begin{solution} + \lstinputlisting{bootstrapmean.m} + \lstinputlisting{bootstraptymus.m} + \includegraphics[width=0.5\textwidth]{bootstraptymus-datahist} + \includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist} + \includegraphics[width=0.5\textwidth]{bootstraptymus-samples} +\end{solution} \continue \question \qt{Student t-Verteilung} \begin{parts} \part Erzeuge 100000 normalverteilte Zufallszahlen. -\part Ziehe daraus 1000 Stichproben vom Umfang $m$ (3, 5, 10, 50). +\part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50. \part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte dieser Mittelwerte. \part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve. -\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m}$ +\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$ (Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt? \end{parts} +\begin{solution} + \lstinputlisting{tdistribution.m} + \includegraphics[width=1\textwidth]{tdistribution-n03}\\ + \includegraphics[width=1\textwidth]{tdistribution-n05}\\ + \includegraphics[width=1\textwidth]{tdistribution-n10}\\ + \includegraphics[width=1\textwidth]{tdistribution-n50} +\end{solution} \question \qt{Korrelationen} @@ -135,8 +149,13 @@ Paaren zu zerst\"oren? \part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten. \part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten. \part Ist die Korrelation der urspr\"unglichen Daten signifikant? -\part Variiere den Parameter $a$ und \"uberpr\"ufe auf gleiche Weise die Signifikanz. +\part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe +auf gleiche Weise die Signifikanz. \end{parts} +\begin{solution} + \lstinputlisting{correlationsignificance.m} + \includegraphics[width=1\textwidth]{correlationsignificance} +\end{solution} \end{questions} diff --git a/statistics/exercises/tdistribution-n03.pdf b/statistics/exercises/tdistribution-n03.pdf new file mode 100644 index 0000000..8d780be Binary files /dev/null and b/statistics/exercises/tdistribution-n03.pdf differ diff --git a/statistics/exercises/tdistribution-n05.pdf b/statistics/exercises/tdistribution-n05.pdf new file mode 100644 index 0000000..b69abeb Binary files /dev/null and b/statistics/exercises/tdistribution-n05.pdf differ diff --git a/statistics/exercises/tdistribution-n10.pdf b/statistics/exercises/tdistribution-n10.pdf new file mode 100644 index 0000000..bd77f2e Binary files /dev/null and b/statistics/exercises/tdistribution-n10.pdf differ diff --git a/statistics/exercises/tdistribution-n50.pdf b/statistics/exercises/tdistribution-n50.pdf new file mode 100644 index 0000000..f3c4fe5 Binary files /dev/null and b/statistics/exercises/tdistribution-n50.pdf differ diff --git a/statistics/exercises/tdistribution.m b/statistics/exercises/tdistribution.m index 9d8920b..223cbe5 100644 --- a/statistics/exercises/tdistribution.m +++ b/statistics/exercises/tdistribution.m @@ -1,32 +1,58 @@ -n = 100000 +%% (a) generate random numbers: +n = 100000; x=randn(n, 1); -nsamples = 3; -nmeans = 10000; -means = zeros( nmeans, 1 ); -sdevs = zeros( nmeans, 1 ); -students = zeros( nmeans, 1 ); -for i=1:nmeans - sample = x(randi(n, nsamples, 1)); - means(i) = mean(sample); - sdevs(i) = std(sample); - students(i) = mean(sample)/std(sample)*sqrt(nsamples); +for nsamples=[3 5 10 50] + nsamples + %% compute mean, standard deviation and t: + nmeans = 10000; + means = zeros( nmeans, 1 ); + sdevs = zeros( nmeans, 1 ); + students = zeros( nmeans, 1 ); + for i=1:nmeans + sample = x(randi(n, nsamples, 1)); + means(i) = mean(sample); + sdevs(i) = std(sample); + students(i) = mean(sample)/std(sample)*sqrt(nsamples); + end + + % Gaussian pdfs: + msdev = std(means); + tsdev = 1.0; + dxg=0.01; + xmax = 10.0; + xmin = -xmax; + xg = [xmin:dxg:xmax]; + pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev; + pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev; + + %% plots + subplot(1, 2, 1) + bins = xmin:0.2:xmax; + [h,b] = hist(means, bins); + h = h/sum(h)/(b(2)-b(1)); + bar(b, h, 'facecolor', 'b', 'edgecolor', 'b') + hold on + plot(xg, pm, 'r', 'linewidth', 2) + title( sprintf('sample size = %d', nsamples) ); + xlim( [-3, 3] ); + xlabel('Mean'); + ylabel('pdf'); + hold off; + + subplot(1, 2, 2) + bins = xmin:0.5:xmax; + [h,b] = hist(students, bins); + h = h/sum(h)/(b(2)-b(1)); + bar(b, h, 'facecolor', 'b', 'edgecolor', 'b') + hold on + plot(xg, pt, 'r', 'linewidth', 2) + title( sprintf('sample size = %d', nsamples) ); + xlim( [-8, 8] ); + xlabel('Student-t'); + ylabel('pdf'); + hold off; + + savefigpdf( gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5 ); + pause( 3.0 ) end -sdev = 1.0 -msdev = std(means) - -% scatter( means, sdevs ) - -hold on; -dxg=0.01; -xmax = 10.0 -xmin = -xmax -xg = [xmin:dxg:xmax]; -pg = exp(-0.5*(xg/sdev).^2)/sqrt(2.0*pi)/sdev; -hold on -plot(xg, pg, 'r', 'linewidth', 4) - -bins = xmin:0.1:xmax; -hist(means, bins, 1.0/(bins(2)-bins(1)) ); -hold off; -