reorganized statistics exercises
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
TEXFILES=$(wildcard statistics??.tex)
|
||||
TEXFILES=$(wildcard exercises??.tex)
|
||||
EXERCISES=$(TEXFILES:.tex=.pdf)
|
||||
SOLUTIONS=$(EXERCISES:statistics%=solutions%)
|
||||
SOLUTIONS=$(EXERCISES:exercises%=solutions%)
|
||||
|
||||
.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
|
||||
|
||||
@@ -10,7 +10,7 @@ exercises : $(EXERCISES)
|
||||
|
||||
solutions : $(SOLUTIONS)
|
||||
|
||||
$(SOLUTIONS) : solutions%.pdf : statistics%.tex instructions.tex
|
||||
$(SOLUTIONS) : solutions%.pdf : exercises%.tex instructions.tex
|
||||
{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
|
||||
pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
|
||||
rm $(patsubst %.pdf,%,$@).[!p]*
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
function [bootsem, mu] = bootstrapmean( x, resample )
|
||||
% computes standard error by bootstrapping the data
|
||||
% x: vector with data
|
||||
% resample: number of resamplings
|
||||
% returns:
|
||||
% bootsem: the standard error of the mean
|
||||
% mu: the bootstrapped means as a vector
|
||||
mu = zeros( resample, 1 );
|
||||
nsamples = length(x);
|
||||
for i = 1:resample
|
||||
% resample:
|
||||
xr = x(randi(nsamples, nsamples, 1));
|
||||
% compute statistics on sample:
|
||||
mu(i) = mean(xr);
|
||||
end
|
||||
bootsem = std( mu );
|
||||
end
|
||||
@@ -1,92 +0,0 @@
|
||||
%PDF-1.4
|
||||
%Çì<C387>¢
|
||||
5 0 obj
|
||||
<</Length 6 0 R/Filter /FlateDecode>>
|
||||
stream
|
||||
xœÅWÛn1}ÏWÌ# 1$“ë¼"!žË¬Ä¬è¢jSÔ‚Ï'lj=»Õö‰^”Ø>ëãÄ'iú4ÉYM2ÃxŒâÃ?<3F>~
|
||||
;ýjzH¿Ÿ…6ë¬\˜´\f;E´•VzÖz:e<>åfEŸÙ§ÏbòBNò‘mG9ÅvCUÛ˜õ†Wjx‰àF«këè©fçaCNâI¨Ò‰ †cœ>R7r™÷¢6H¥Ì³
|
||||
z™”›;¢x#ßħƒ¸«q³ªAªiv¯¦{z5·VÂP„íUì7½ñ“`C^ϬŒIÉ;¹’<C2B9>}×'ç¼_½¯¬><3E>'=zk
|
||||
‹M‹HyÓòH;Ѫ2Ì9Úëh¬¤C –ñÚj¼›Mæ--ÙU3¨ªÙ–±;¥šqsBUæ[ܼ*=ºÚ¢ÿqöŸ^8šE‘i¬YÚ§žžë»ø:=Šz‹Z—ù|fUÃA\ò3éèJ]ûüMÜ¿c˜ˆ¶“¶´æ,¬ªBØ{ÚfÑy]QÄi×Ô¯Š«ÌÑ6Ò”õ'Sg{Ocâ•l±Ô’e£$ÁDéeOÅRoCÒÐ<C392>ëášhØŸµÎö$ˆ7]ê]¸¨ ¢meNà–ìçHÃh7¿¸ªÁêëp†k¡y-`cñ¦ŸNC•Ðì~9oÈEÖ=Èk¸(¢©ßI7h<37>b"Ú«\Aq^Ö+jïAfÏ…CmÍj¯ÌÑNg$O2aïiL¼’<C2BC>! Á«jú§áØ)m¹l©‰tŒ~#q °O@hG5AöôUÿÔz_wdïAJKÕDãXÏ(`
|
||||
‰hwÉi™…tž‹¨Ga}ã±ìÁXæõ-‡ÃÙ¬–r¤ÛX´)¥7ŒÆ#ÚØ¡`Y›ù4×EÀ²¨8)$v”b¥•ÌFRË•2Æ»®‡“8"XýÞo§oïAÊ@•Bã°JJI!ñ‰¶Ìjœ~¢@fdÓIÅ2÷Ò×$ðüá6’h²"wI+£)"¢]•¸†ºïÔB®Ý•"Úý9¬*°g>Øm5i³ËÛ„ÛȸHîn~ÚðïÇÒ_½óÃ.ÇÞ§Ÿ4ªúun– Ç¿áûïNüÔlendstream
|
||||
endobj
|
||||
6 0 obj
|
||||
824
|
||||
endobj
|
||||
4 0 obj
|
||||
<</Type/Page/MediaBox [0 0 170 142]
|
||||
/Rotate 0/Parent 3 0 R
|
||||
/Resources<</ProcSet[/PDF /Text]
|
||||
/ExtGState 9 0 R
|
||||
/Font 10 0 R
|
||||
>>
|
||||
/Contents 5 0 R
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<< /Type /Pages /Kids [
|
||||
4 0 R
|
||||
] /Count 1
|
||||
>>
|
||||
endobj
|
||||
1 0 obj
|
||||
<</Type /Catalog /Pages 3 0 R
|
||||
/Metadata 11 0 R
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<</Type/ExtGState
|
||||
/OPM 1>>endobj
|
||||
9 0 obj
|
||||
<</R7
|
||||
7 0 R>>
|
||||
endobj
|
||||
10 0 obj
|
||||
<</R8
|
||||
8 0 R>>
|
||||
endobj
|
||||
8 0 obj
|
||||
<</BaseFont/Helvetica/Type/Font
|
||||
/Subtype/Type1>>
|
||||
endobj
|
||||
11 0 obj
|
||||
<</Length 1316>>stream
|
||||
<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||
<?adobe-xap-filters esc="CRLF"?>
|
||||
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
|
||||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:iX='http://ns.adobe.com/iX/1.0/'>
|
||||
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' pdf:Producer='Artifex Ghostscript 8.54'/>
|
||||
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:xap='http://ns.adobe.com/xap/1.0/' xap:ModifyDate='2015-10-22' xap:CreateDate='2015-10-22'><xap:CreatorTool>Artifex Ghostscript 8.54 PDF Writer</xap:CreatorTool></rdf:Description>
|
||||
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/' xapMM:DocumentID='94827694-b0d9-11f0-0000-86f60cc553dd'/>
|
||||
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:dc='http://purl.org/dc/elements/1.1/' dc:format='application/pdf'><dc:title><rdf:Alt><rdf:li xml:lang='x-default'>/tmp/tpd0b45dc9_ff5a_4aa8_90bd_50aa8e8237b6.ps</rdf:li></rdf:Alt></dc:title></rdf:Description>
|
||||
</rdf:RDF>
|
||||
</x:xmpmeta>
|
||||
|
||||
|
||||
<?xpacket end='w'?>
|
||||
endstream
|
||||
endobj
|
||||
2 0 obj
|
||||
<</Producer(Artifex Ghostscript 8.54)
|
||||
/CreationDate(D:20151022150138)
|
||||
/ModDate(D:20151022150138)
|
||||
/Creator(MATLAB, The MathWorks, Inc. Version 8.3.0.532 \(R2014a\). Operating System: Linux 3.13.0-24-generic #47-Ubuntu SMP Fri May 2 23:30:00 UTC 2014 x86_64.)
|
||||
/Title(/tmp/tpd0b45dc9_ff5a_4aa8_90bd_50aa8e8237b6.ps)>>endobj
|
||||
xref
|
||||
0 12
|
||||
0000000000 65535 f
|
||||
0000001146 00000 n
|
||||
0000002741 00000 n
|
||||
0000001087 00000 n
|
||||
0000000928 00000 n
|
||||
0000000015 00000 n
|
||||
0000000909 00000 n
|
||||
0000001211 00000 n
|
||||
0000001311 00000 n
|
||||
0000001252 00000 n
|
||||
0000001281 00000 n
|
||||
0000001375 00000 n
|
||||
trailer
|
||||
<< /Size 12 /Root 1 0 R /Info 2 0 R
|
||||
/ID [<8FE161FFCC6D1C11BAD3CE59BA0E3F32><8FE161FFCC6D1C11BAD3CE59BA0E3F32>]
|
||||
>>
|
||||
startxref
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,47 +0,0 @@
|
||||
%% (b) load the data:
|
||||
load( 'thymusglandweights.dat' );
|
||||
nsamples = 80;
|
||||
x = thymusglandweights(1:nsamples);
|
||||
|
||||
%% (c) mean, sem and hist:
|
||||
sem = std(x)/sqrt(nsamples);
|
||||
fprintf( 'Mean of the data set = %.2fmg\n', mean(x) );
|
||||
fprintf( 'SEM of the data set = %.2fmg\n', sem );
|
||||
hist(x,20)
|
||||
xlabel('x')
|
||||
ylabel('count')
|
||||
savefigpdf( gcf, 'bootstraptymus-datahist.pdf', 6, 5 );
|
||||
pause( 2.0 )
|
||||
|
||||
%% (d) bootstrap the mean:
|
||||
resample = 500;
|
||||
[bootsem, mu] = bootstrapmean( x, resample );
|
||||
hist( mu, 20 );
|
||||
xlabel('mean(x)')
|
||||
ylabel('count')
|
||||
savefigpdf( gcf, 'bootstraptymus-meanhist.pdf', 6, 5 );
|
||||
fprintf( ' bootstrap standard error: %.3f\n', bootsem );
|
||||
fprintf( 'theoretical standard error: %.3f\n', sem );
|
||||
|
||||
%% (e) confidence interval:
|
||||
q = quantile(mu, [0.025, 0.975]);
|
||||
fprintf( '95%% confidence interval of the mean from %.2fmg to %.2fmg\n', q(1), q(2) );
|
||||
pause( 2.0 )
|
||||
|
||||
%% (f): dependence on sample size:
|
||||
nsamplesrange = 10:10:1000;
|
||||
bootsems = zeros( length(nsamplesrange),1);
|
||||
for n=1:length(nsamplesrange)
|
||||
nsamples = nsamplesrange(n);
|
||||
% [bootsems(n), mu] = bootstrapmean(x, resample);
|
||||
bootsems(n) = bootstrapmean(thymusglandweights(1:nsamples), resample);
|
||||
end
|
||||
plot(nsamplesrange, bootsems, 'b', 'linewidth', 2);
|
||||
hold on
|
||||
plot(nsamplesrange, std(x)./sqrt(nsamplesrange), 'r', 'linewidth', 1)
|
||||
hold off
|
||||
xlabel('sample size')
|
||||
ylabel('SEM')
|
||||
legend('bootsrap', 'theory')
|
||||
savefigpdf( gcf, 'bootstraptymus-samples.pdf', 6, 5 );
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
%% (a) generate correlated data
|
||||
n=1000;
|
||||
a=0.2;
|
||||
x = randn(n, 1);
|
||||
y = randn(n, 1) + a*x;
|
||||
|
||||
%% (b) scatter plot:
|
||||
subplot(1, 2, 1);
|
||||
plot(x, a*x, 'r', 'linewidth', 3 );
|
||||
hold on
|
||||
%scatter(x, y ); % either scatter ...
|
||||
plot(x, y, 'o', 'markersize', 2 ); % ... or plot - same plot.
|
||||
xlim([-4 4])
|
||||
ylim([-4 4])
|
||||
xlabel('x')
|
||||
ylabel('y')
|
||||
hold off
|
||||
|
||||
%% (d) correlation coefficient:
|
||||
%c = corrcoef(x, y); % returns correlation matrix
|
||||
%rd = c(1, 2);
|
||||
rd = corr(x, y);
|
||||
fprintf('correlation coefficient = %.2f\n', rd );
|
||||
|
||||
%% (e) permutation:
|
||||
nperm = 1000;
|
||||
rs = zeros(nperm,1);
|
||||
for i=1:nperm
|
||||
xr=x(randperm(length(x))); % shuffle x
|
||||
yr=y(randperm(length(y))); % shuffle y
|
||||
rs(i) = corr(xr, yr);
|
||||
end
|
||||
|
||||
%% (g) pdf of the correlation coefficients:
|
||||
[h,b] = hist(rs, 20 );
|
||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||
|
||||
%% (h) significance:
|
||||
rq = quantile(rs, 0.95);
|
||||
fprintf('correlation coefficient at 5%% significance = %.2f\n', rq );
|
||||
if rd >= rq
|
||||
fprintf('--> correlation r=%.2f is significant\n', rd);
|
||||
else
|
||||
fprintf('--> r=%.2f is not a significant correlation\n', rd);
|
||||
end
|
||||
|
||||
%% plot:
|
||||
subplot(1, 2, 2)
|
||||
hold on;
|
||||
bar(b, h, 'facecolor', 'b');
|
||||
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
||||
plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
|
||||
xlim([-0.2 0.2])
|
||||
xlabel('Correlation coefficient');
|
||||
ylabel('Probability density of H0');
|
||||
hold off;
|
||||
|
||||
savefigpdf( gcf, 'correlationsignificance.pdf', 12, 6 );
|
||||
Binary file not shown.
@@ -1,30 +0,0 @@
|
||||
% plot gamma pdfs:
|
||||
xx = 0.0:0.1:10.0;
|
||||
shapes = [ 1.0, 2.0, 3.0, 5.0];
|
||||
cc = jet(length(shapes) );
|
||||
for i=1:length(shapes)
|
||||
yy = gampdf(xx, shapes(i), 1.0);
|
||||
plot(xx, yy, '-', 'linewidth', 3, 'color', cc(i,:), ...
|
||||
'DisplayName', sprintf('s=%.0f', shapes(i)) );
|
||||
hold on;
|
||||
end
|
||||
|
||||
% generate gamma distributed random numbers:
|
||||
n = 50;
|
||||
x = gamrnd(3.0, 1.0, n, 1);
|
||||
|
||||
% histogram:
|
||||
[h,b] = hist(x, 15);
|
||||
h = h/sum(h)/(b(2)-b(1));
|
||||
bar(b, h, 1.0, 'DisplayName', 'data');
|
||||
|
||||
% maximum likelihood estimate:
|
||||
p = mle(x, 'distribution', 'gamma');
|
||||
yy = gampdf(xx, p(1), p(2));
|
||||
plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
|
||||
|
||||
hold off;
|
||||
xlabel('x');
|
||||
ylabel('pdf');
|
||||
legend('show');
|
||||
savefigpdf(gcf, 'mlepdffit.pdf', 12, 8)
|
||||
Binary file not shown.
@@ -1,31 +0,0 @@
|
||||
m = 2.0; % slope
|
||||
sigma = 1.0; % standard deviation
|
||||
n = 100; % number of data pairs
|
||||
|
||||
% data pairs:
|
||||
x = 5.0*rand(n, 1);
|
||||
y = m*x + sigma*randn(n, 1);
|
||||
|
||||
% fit:
|
||||
slope = mleslope(x, y);
|
||||
fprintf('slopes:\n');
|
||||
fprintf('original = %.2f\n', m);
|
||||
fprintf(' fit = %.2f\n', slope);
|
||||
|
||||
% lines:
|
||||
xx = 0.0:0.1:5.0; % x-axis values
|
||||
yorg = m*xx;
|
||||
yfit = slope*xx;
|
||||
|
||||
% plot:
|
||||
plot(xx, yorg, '-r', 'linewidth', 5);
|
||||
hold on;
|
||||
plot(xx, yfit, '-g', 'linewidth', 2);
|
||||
plot(x, y, 'ob');
|
||||
hold off;
|
||||
legend('data', 'original', 'fit', 'Location', 'NorthWest');
|
||||
legend('boxoff')
|
||||
xlabel('x');
|
||||
ylabel('y');
|
||||
|
||||
savefigpdf(gcf, 'mlepropfit.pdf', 12, 7);
|
||||
Binary file not shown.
@@ -1,6 +0,0 @@
|
||||
function slope = mleslope(x, y )
|
||||
% Compute the maximum likelihood estimate of the slope
|
||||
% of a line through the origin
|
||||
% given the data pairs in the vectors x and y.
|
||||
slope = sum(x.*y)/sum(x.*x);
|
||||
end
|
||||
@@ -1,30 +0,0 @@
|
||||
% draw random numbers:
|
||||
n = 50;
|
||||
mu = 3.0;
|
||||
sigma =2.0;
|
||||
x = randn(n,1)*sigma+mu;
|
||||
fprintf(' mean of the data is %.2f\n', mean(x))
|
||||
fprintf('standard deviation of the data is %.2f\n', std(x))
|
||||
|
||||
% standard deviation as parameter:
|
||||
psigs = 1.0:0.01:3.0;
|
||||
% matrix with the probabilities for each x and psigs:
|
||||
lms = zeros(length(x), length(psigs));
|
||||
for i=1:length(psigs)
|
||||
psig = psigs(i);
|
||||
p = exp(-0.5*((x-mu)/psig).^2.0)/sqrt(2.0*pi)/psig;
|
||||
lms(:,i) = p;
|
||||
end
|
||||
lm = prod(lms, 1); % likelihood
|
||||
loglm = sum(log(lms), 1); % log likelihood
|
||||
|
||||
% plot likelihood of standard deviation:
|
||||
subplot(1, 2, 1);
|
||||
plot(psigs, lm );
|
||||
xlabel('standard deviation')
|
||||
ylabel('likelihood')
|
||||
subplot(1, 2, 2);
|
||||
plot(psigs, loglm);
|
||||
xlabel('standard deviation')
|
||||
ylabel('log likelihood')
|
||||
savefigpdf(gcf, 'mlestd.pdf', 15, 5);
|
||||
Binary file not shown.
@@ -1,163 +0,0 @@
|
||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||
|
||||
\usepackage[german]{babel}
|
||||
\usepackage{pslatex}
|
||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||
\usepackage{xcolor}
|
||||
\usepackage{graphicx}
|
||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||
|
||||
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
||||
\pagestyle{headandfoot}
|
||||
\ifprintanswers
|
||||
\newcommand{\stitle}{: L\"osungen}
|
||||
\else
|
||||
\newcommand{\stitle}{}
|
||||
\fi
|
||||
\header{{\bfseries\large \"Ubung 3\stitle}}{{\bfseries\large Statistik}}{{\bfseries\large 21. Oktober, 2015}}
|
||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
||||
jan.benda@uni-tuebingen.de}
|
||||
\runningfooter{}{\thepage}{}
|
||||
|
||||
\setlength{\baselineskip}{15pt}
|
||||
\setlength{\parindent}{0.0cm}
|
||||
\setlength{\parskip}{0.3cm}
|
||||
\renewcommand{\baselinestretch}{1.15}
|
||||
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
language=Matlab,
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
numbers=left,
|
||||
numberstyle=\tiny,
|
||||
title=\lstname,
|
||||
showstringspaces=false,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
|
||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{bm}
|
||||
\usepackage{dsfont}
|
||||
\newcommand{\naZ}{\mathds{N}}
|
||||
\newcommand{\gaZ}{\mathds{Z}}
|
||||
\newcommand{\raZ}{\mathds{Q}}
|
||||
\newcommand{\reZ}{\mathds{R}}
|
||||
\newcommand{\reZp}{\mathds{R^+}}
|
||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||
\newcommand{\koZ}{\mathds{C}}
|
||||
|
||||
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\newcommand{\continue}{\ifprintanswers%
|
||||
\else
|
||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
||||
\fi}
|
||||
\newcommand{\continuepage}{\ifprintanswers%
|
||||
\newpage
|
||||
\else
|
||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
||||
\fi}
|
||||
\newcommand{\newsolutionpage}{\ifprintanswers%
|
||||
\newpage%
|
||||
\else
|
||||
\fi}
|
||||
|
||||
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\newcommand{\qt}[1]{\textbf{#1}\\}
|
||||
\newcommand{\pref}[1]{(\ref{#1})}
|
||||
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\input{instructions}
|
||||
|
||||
|
||||
\begin{questions}
|
||||
|
||||
\question \qt{Bootstrap des Standardfehlers}
|
||||
\begin{parts}
|
||||
\part Lade von Ilias die Datei \code{thymusglandweights.dat} herunter.
|
||||
Darin befindet sich ein Datensatz vom Gewicht der Thymus Dr\"use in 14-Tage alten
|
||||
H\"uhnerembryos in mg.
|
||||
\part Lade diese Daten in Matlab (\code{load} Funktion).
|
||||
\part Bestimme Histogramm, Mittelwert und Standardfehler aus den ersten 80 Datenpunkten.
|
||||
\part Bestimme den Standardfehler aus den ersten 80 Datenpunkten durch 500-mal Bootstrappen.
|
||||
\part Bestimme das 95\,\% Konfidenzintervall f\"ur den Mittelwert
|
||||
aus der Bootstrap Verteilung (\code{quantile()} Funktion) --- also
|
||||
das Interval innerhalb dessen mit 95\,\% Wahrscheinlichkeit der
|
||||
wahre Mittelwert liegen wird.
|
||||
\part Benutze den ganzen Datensatz und die Bootstrapping Technik, um die Abh\"angigkeit
|
||||
des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen.
|
||||
\part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$.
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{bootstrapmean.m}
|
||||
\lstinputlisting{bootstraptymus.m}
|
||||
\includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
|
||||
\includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
|
||||
\includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
|
||||
\end{solution}
|
||||
|
||||
|
||||
\continue
|
||||
\question \qt{Student t-Verteilung}
|
||||
\begin{parts}
|
||||
\part Erzeuge 100000 normalverteilte Zufallszahlen.
|
||||
\part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50.
|
||||
\part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte
|
||||
dieser Mittelwerte.
|
||||
\part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve.
|
||||
\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$
|
||||
(Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt?
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{tdistribution.m}
|
||||
\includegraphics[width=1\textwidth]{tdistribution-n03}\\
|
||||
\includegraphics[width=1\textwidth]{tdistribution-n05}\\
|
||||
\includegraphics[width=1\textwidth]{tdistribution-n10}\\
|
||||
\includegraphics[width=1\textwidth]{tdistribution-n50}
|
||||
\end{solution}
|
||||
|
||||
|
||||
\question \qt{Korrelationen}
|
||||
\begin{parts}
|
||||
\part Erzeuge 1000 korrelierte Zufallszahlen $x$, $y$ durch
|
||||
\begin{verbatim}
|
||||
n = 1000
|
||||
a = 0.2;
|
||||
x = randn(n, 1);
|
||||
y = randn(n, 1) + a*x;
|
||||
\end{verbatim}
|
||||
\part Erstelle einen Scatterplot der beiden Variablen.
|
||||
\part Warum ist $y$ mit $x$ korreliert?
|
||||
\part Berechne den Korrelationskoeffizienten zwischen $x$ und $y$.
|
||||
\part Was m\"usste man tun, um die Korrelationen zwischen den $x$-$y$
|
||||
Paaren zu zerst\"oren?
|
||||
\part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten.
|
||||
\part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten.
|
||||
\part Ist die Korrelation der urspr\"unglichen Daten signifikant?
|
||||
\part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe
|
||||
auf gleiche Weise die Signifikanz.
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{correlationsignificance.m}
|
||||
\includegraphics[width=1\textwidth]{correlationsignificance}
|
||||
\end{solution}
|
||||
|
||||
|
||||
\end{questions}
|
||||
|
||||
\end{document}
|
||||
@@ -1,192 +0,0 @@
|
||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||
|
||||
\usepackage[german]{babel}
|
||||
\usepackage{pslatex}
|
||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||
\usepackage{xcolor}
|
||||
\usepackage{graphicx}
|
||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||
|
||||
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
||||
\pagestyle{headandfoot}
|
||||
\ifprintanswers
|
||||
\newcommand{\stitle}{: L\"osungen}
|
||||
\else
|
||||
\newcommand{\stitle}{}
|
||||
\fi
|
||||
\header{{\bfseries\large \"Ubung 4\stitle}}{{\bfseries\large Statistik}}{{\bfseries\large 26. Oktober, 2015}}
|
||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
||||
jan.benda@uni-tuebingen.de}
|
||||
\runningfooter{}{\thepage}{}
|
||||
|
||||
\setlength{\baselineskip}{15pt}
|
||||
\setlength{\parindent}{0.0cm}
|
||||
\setlength{\parskip}{0.3cm}
|
||||
\renewcommand{\baselinestretch}{1.15}
|
||||
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
language=Matlab,
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
numbers=left,
|
||||
numberstyle=\tiny,
|
||||
title=\lstname,
|
||||
showstringspaces=false,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
|
||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{bm}
|
||||
\usepackage{dsfont}
|
||||
\newcommand{\naZ}{\mathds{N}}
|
||||
\newcommand{\gaZ}{\mathds{Z}}
|
||||
\newcommand{\raZ}{\mathds{Q}}
|
||||
\newcommand{\reZ}{\mathds{R}}
|
||||
\newcommand{\reZp}{\mathds{R^+}}
|
||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||
\newcommand{\koZ}{\mathds{C}}
|
||||
|
||||
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\newcommand{\continue}{\ifprintanswers%
|
||||
\else
|
||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
||||
\fi}
|
||||
\newcommand{\continuepage}{\ifprintanswers%
|
||||
\newpage
|
||||
\else
|
||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
||||
\fi}
|
||||
\newcommand{\newsolutionpage}{\ifprintanswers%
|
||||
\newpage%
|
||||
\else
|
||||
\fi}
|
||||
|
||||
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\newcommand{\qt}[1]{\textbf{#1}\\}
|
||||
\newcommand{\pref}[1]{(\ref{#1})}
|
||||
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\input{instructions}
|
||||
|
||||
|
||||
\begin{questions}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Maximum Likelihood der Standardabweichung}
|
||||
Wir wollen uns die Likelihood und die Log-Likelihood am Beispiel der
|
||||
Absch\"atzung der Standardabweichung verdeutlichen.
|
||||
\begin{parts}
|
||||
\part Ziehe $n=50$ normalverteilte Zufallsvariablen mit Mittelwert $\mu=3$
|
||||
und einer Standardabweichung $\sigma=2$.
|
||||
|
||||
\part
|
||||
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
|
||||
die Log-Likelihood (aus der Summe der logarithmierten
|
||||
Wahrscheinlichkeiten) f\"ur die Standardabweichung als Parameter. Vergleiche die
|
||||
Position der Maxima mit der aus den Daten berechneten Standardabweichung.
|
||||
|
||||
\part
|
||||
Erh\"ohe $n$ auf 1000. Was passiert mit der Likelihood, was mit der Log-Likelihood? Warum?
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{mlestd.m}
|
||||
\includegraphics[width=1\textwidth]{mlestd}
|
||||
\end{solution}
|
||||
|
||||
\continue
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Maximum-Likelihood-Sch\"atzer einer Ursprungsgeraden}
|
||||
In der Vorlesung haben wir folgende Formel f\"ur die Maximum-Likelihood
|
||||
Absch\"atzung der Steigung $\theta$ einer Ursprungsgeraden durch $n$ Datenpunkte $(x_i|y_i)$ mit Standardabweichung $\sigma_i$ hergeleitet:
|
||||
\[\theta = \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n
|
||||
\frac{x_i^2}{\sigma_i^2}} \]
|
||||
\begin{parts}
|
||||
\part \label{mleslopefunc} Schreibe eine Funktion, die in einem $x$ und einem
|
||||
$y$ Vektor die Datenpaare \"uberreicht bekommt und die Steigung der
|
||||
Ursprungsgeraden, die die Likelihood maximiert, zur\"uckgibt
|
||||
($\sigma=\text{const}$).
|
||||
|
||||
\part
|
||||
Schreibe ein Skript, das Datenpaare erzeugt, die um eine
|
||||
Ursprungsgerade mit vorgegebener Steigung streuen. Berechne mit der
|
||||
Funktion aus \pref{mleslopefunc} die Steigung aus den Daten,
|
||||
vergleiche mit der wahren Steigung, und plotte die urspr\"ungliche
|
||||
sowie die gefittete Gerade zusammen mit den Daten.
|
||||
|
||||
\part
|
||||
Ver\"andere die Anzahl der Datenpunkte, die Steigung, sowie die
|
||||
Streuung der Daten um die Gerade.
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{mleslope.m}
|
||||
\lstinputlisting{mlepropfit.m}
|
||||
\includegraphics[width=1\textwidth]{mlepropfit}
|
||||
\end{solution}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Maximum-Likelihood-Sch\"atzer einer Wahrscheinlichkeitsdichtefunktion}
|
||||
Verschiedene Wahrscheinlichkeitsdichtefunktionen haben Parameter, die
|
||||
nicht so einfach wie der Mittelwert und die Standardabweichung einer
|
||||
Normalverteilung direkt aus den Daten berechnet werden k\"onnen. Solche Parameter
|
||||
m\"ussen dann aus den Daten mit der Maximum-Likelihood-Methode gefittet werden.
|
||||
|
||||
Um dies zu veranschaulichen ziehen wir uns diesmal nicht normalverteilte Zufallszahlen, sondern Zufallszahlen aus der Gamma-Verteilung.
|
||||
\begin{parts}
|
||||
\part
|
||||
Finde heraus welche \code{matlab} Funktion die
|
||||
Wahrscheinlichkeitsdichtefunktion (probability density function) der
|
||||
Gamma-Verteilung berechnet.
|
||||
|
||||
\part
|
||||
Plotte mit Hilfe dieser Funktion die Wahrscheinlichkeitsdichtefunktion
|
||||
der Gamma-Verteilung f\"ur verschiedene Werte des (positiven) ``shape'' Parameters.
|
||||
Den ``scale'' Parameter setzen wir auf Eins.
|
||||
|
||||
\part
|
||||
Finde heraus mit welcher Funktion Gammaverteilte Zufallszahlen in
|
||||
\code{matlab} gezogen werden k\"onnen. Erzeuge mit dieser Funktion
|
||||
50 Zufallszahlen mit einem der oben geplotteten ``shape'' Parameter.
|
||||
|
||||
\part
|
||||
Berechne und plotte ein normiertes Histogramm dieser Zufallszahlen.
|
||||
|
||||
\part
|
||||
Finde heraus mit welcher \code{matlab}-Funktion eine beliebige
|
||||
Verteilung (``distribution'') an die Zufallszahlen nach der
|
||||
Maximum-Likelihood Methode gefittet werden kann. Wie wird diese
|
||||
Funktion benutzt, um die Gammaverteilung an die Daten zu fitten?
|
||||
|
||||
\part
|
||||
Bestimme mit dieser Funktion die Parameter der Gammaverteilung aus
|
||||
den Zufallszahlen.
|
||||
|
||||
\part
|
||||
Plotte anschlie{\ss}end die Gammaverteilung mit den gefitteten
|
||||
Parametern.
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{mlepdffit.m}
|
||||
\includegraphics[width=1\textwidth]{mlepdffit}
|
||||
\end{solution}
|
||||
|
||||
\end{questions}
|
||||
|
||||
\end{document}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,58 +0,0 @@
|
||||
%% (a) generate random numbers:
|
||||
n = 100000;
|
||||
x=randn(n, 1);
|
||||
|
||||
for nsamples=[3 5 10 50]
|
||||
nsamples
|
||||
%% compute mean, standard deviation and t:
|
||||
nmeans = 10000;
|
||||
means = zeros( nmeans, 1 );
|
||||
sdevs = zeros( nmeans, 1 );
|
||||
students = zeros( nmeans, 1 );
|
||||
for i=1:nmeans
|
||||
sample = x(randi(n, nsamples, 1));
|
||||
means(i) = mean(sample);
|
||||
sdevs(i) = std(sample);
|
||||
students(i) = mean(sample)/std(sample)*sqrt(nsamples);
|
||||
end
|
||||
|
||||
% Gaussian pdfs:
|
||||
msdev = std(means);
|
||||
tsdev = 1.0;
|
||||
dxg=0.01;
|
||||
xmax = 10.0;
|
||||
xmin = -xmax;
|
||||
xg = [xmin:dxg:xmax];
|
||||
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
||||
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
||||
|
||||
%% plots
|
||||
subplot(1, 2, 1)
|
||||
bins = xmin:0.2:xmax;
|
||||
[h,b] = hist(means, bins);
|
||||
h = h/sum(h)/(b(2)-b(1));
|
||||
bar(b, h, 'facecolor', 'b', 'edgecolor', 'b')
|
||||
hold on
|
||||
plot(xg, pm, 'r', 'linewidth', 2)
|
||||
title( sprintf('sample size = %d', nsamples) );
|
||||
xlim( [-3, 3] );
|
||||
xlabel('Mean');
|
||||
ylabel('pdf');
|
||||
hold off;
|
||||
|
||||
subplot(1, 2, 2)
|
||||
bins = xmin:0.5:xmax;
|
||||
[h,b] = hist(students, bins);
|
||||
h = h/sum(h)/(b(2)-b(1));
|
||||
bar(b, h, 'facecolor', 'b', 'edgecolor', 'b')
|
||||
hold on
|
||||
plot(xg, pt, 'r', 'linewidth', 2)
|
||||
title( sprintf('sample size = %d', nsamples) );
|
||||
xlim( [-8, 8] );
|
||||
xlabel('Student-t');
|
||||
ylabel('pdf');
|
||||
hold off;
|
||||
|
||||
savefigpdf( gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5 );
|
||||
pause( 3.0 )
|
||||
end
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user