reorganized statistics exercises

This commit is contained in:
2016-11-15 15:53:12 +01:00
parent bd62a15593
commit 778fde35fa
34 changed files with 214 additions and 3 deletions

View File

@@ -1,6 +1,6 @@
TEXFILES=$(wildcard statistics??.tex)
TEXFILES=$(wildcard exercises??.tex)
EXERCISES=$(TEXFILES:.tex=.pdf)
SOLUTIONS=$(EXERCISES:statistics%=solutions%)
SOLUTIONS=$(EXERCISES:exercises%=solutions%)
.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
@@ -10,7 +10,7 @@ exercises : $(EXERCISES)
solutions : $(SOLUTIONS)
$(SOLUTIONS) : solutions%.pdf : statistics%.tex instructions.tex
$(SOLUTIONS) : solutions%.pdf : exercises%.tex instructions.tex
{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
rm $(patsubst %.pdf,%,$@).[!p]*

View File

@@ -1,17 +0,0 @@
function [bootsem, mu] = bootstrapmean( x, resample )
% computes standard error by bootstrapping the data
% x: vector with data
% resample: number of resamplings
% returns:
% bootsem: the standard error of the mean
% mu: the bootstrapped means as a vector
mu = zeros( resample, 1 );
nsamples = length(x);
for i = 1:resample
% resample:
xr = x(randi(nsamples, nsamples, 1));
% compute statistics on sample:
mu(i) = mean(xr);
end
bootsem = std( mu );
end

View File

@@ -1,92 +0,0 @@
%PDF-1.4
%Çì<C387>¢
5 0 obj
<</Length 6 0 R/Filter /FlateDecode>>
stream
xœÅWÛn1 }ÏWÌ# 1$“ë¼"!žË¬Ä¬è¢jSÔ‚Ï'lj=»Õö‰^”Ø>ëãÄ'iú4ÉYM2ÃxŒâÃ?<3F>~
jzH¿Ÿ…6ë¬\˜´\f;E´•VzÖz: e<>åfEŸÙ§ÏbòBNòmG9ÅvCUÛ˜õ† Wjx‰àF«këè©fçaCNâI¨Ò‰ †cœ>R7r™÷¢6H¥Ì³
z™”;¢x#ßħƒ¸«q³ªAªiv¯¦{z5·VÂP„íUì7½ñ“`C^ϬŒIÉ;¹<C2B9>}×'ç¼_½¯¬><3E>'=zk
MHyÓòH;Ѫ2Ì9Úëh¬¤C ñÚj¼Mæ--ÙU3¨ªÙ–±;¥šqsBUæ[ܼ*=ºÚ¢ÿqöŸ^8šEYÚ§ žžë»ø:=ŠzZ—ù|fUÃA\ò3éèJ]ûüMÜ¿c˜ˆ¶“¶´æ,¬ªBØ{ÚfÑy]QÄi×Ô¯Š«ÌÑ6Ò”õ'Sg{Ocâ•l ±Ôe£$ÁDéeOÅRoCÒÐ<C392>ëášhØŸµÎö$ˆ7]ê]¸¨ ¢meNàìçHÃh7¿¸ªÁêëp†k¡y-`cñ¦ ŸNC•Ðì~9oÈEÖ=Èk¸(¢©ßI7h<37>b"Ú«\Aq^Ö+jïAfÏ…Cj¯ÌÑNg$O2aïiL¼<C2BC>! Á«jú§áØ)m¹l©‰tŒ~#q °O@hG5AöôUÿÔz_wdïAJKÕDãXÏ(`
‰hwÉi™…tž¨Ga}ã±ìÁXæõ-‡ÃÙ¬r¤ÛX´)¥7ŒÆ#ÚØ¡`Yù4×EÀ²¨8)$v”b¥•ÌFRË•2Æ»®‡“8"XýÞo§oïAÊ@•Bã°JJI!ñ‰¶Ìjœ~¢@fdÓIÅ2÷Ò×$ðüá6h²"wI+£)"¢]•¸†ºïÔB®Ý•"Úý9¬*°g>Øm5i³ËÛ„ÛȸHîn~Ú­ðïÇÒ_½óÃ.ÇÞ§Ÿ4ªúun Ç¿áûïNü­Ôlendstream
endobj
6 0 obj
824
endobj
4 0 obj
<</Type/Page/MediaBox [0 0 170 142]
/Rotate 0/Parent 3 0 R
/Resources<</ProcSet[/PDF /Text]
/ExtGState 9 0 R
/Font 10 0 R
>>
/Contents 5 0 R
>>
endobj
3 0 obj
<< /Type /Pages /Kids [
4 0 R
] /Count 1
>>
endobj
1 0 obj
<</Type /Catalog /Pages 3 0 R
/Metadata 11 0 R
>>
endobj
7 0 obj
<</Type/ExtGState
/OPM 1>>endobj
9 0 obj
<</R7
7 0 R>>
endobj
10 0 obj
<</R8
8 0 R>>
endobj
8 0 obj
<</BaseFont/Helvetica/Type/Font
/Subtype/Type1>>
endobj
11 0 obj
<</Length 1316>>stream
<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
<?adobe-xap-filters esc="CRLF"?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:iX='http://ns.adobe.com/iX/1.0/'>
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' pdf:Producer='Artifex Ghostscript 8.54'/>
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:xap='http://ns.adobe.com/xap/1.0/' xap:ModifyDate='2015-10-22' xap:CreateDate='2015-10-22'><xap:CreatorTool>Artifex Ghostscript 8.54 PDF Writer</xap:CreatorTool></rdf:Description>
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/' xapMM:DocumentID='94827694-b0d9-11f0-0000-86f60cc553dd'/>
<rdf:Description rdf:about='94827694-b0d9-11f0-0000-86f60cc553dd' xmlns:dc='http://purl.org/dc/elements/1.1/' dc:format='application/pdf'><dc:title><rdf:Alt><rdf:li xml:lang='x-default'>/tmp/tpd0b45dc9_ff5a_4aa8_90bd_50aa8e8237b6.ps</rdf:li></rdf:Alt></dc:title></rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end='w'?>
endstream
endobj
2 0 obj
<</Producer(Artifex Ghostscript 8.54)
/CreationDate(D:20151022150138)
/ModDate(D:20151022150138)
/Creator(MATLAB, The MathWorks, Inc. Version 8.3.0.532 \(R2014a\). Operating System: Linux 3.13.0-24-generic #47-Ubuntu SMP Fri May 2 23:30:00 UTC 2014 x86_64.)
/Title(/tmp/tpd0b45dc9_ff5a_4aa8_90bd_50aa8e8237b6.ps)>>endobj
xref
0 12
0000000000 65535 f
0000001146 00000 n
0000002741 00000 n
0000001087 00000 n
0000000928 00000 n
0000000015 00000 n
0000000909 00000 n
0000001211 00000 n
0000001311 00000 n
0000001252 00000 n
0000001281 00000 n
0000001375 00000 n
trailer
<< /Size 12 /Root 1 0 R /Info 2 0 R
/ID [<8FE161FFCC6D1C11BAD3CE59BA0E3F32><8FE161FFCC6D1C11BAD3CE59BA0E3F32>]
>>
startxref

View File

@@ -1,47 +0,0 @@
%% (b) load the data:
load( 'thymusglandweights.dat' );
nsamples = 80;
x = thymusglandweights(1:nsamples);
%% (c) mean, sem and hist:
sem = std(x)/sqrt(nsamples);
fprintf( 'Mean of the data set = %.2fmg\n', mean(x) );
fprintf( 'SEM of the data set = %.2fmg\n', sem );
hist(x,20)
xlabel('x')
ylabel('count')
savefigpdf( gcf, 'bootstraptymus-datahist.pdf', 6, 5 );
pause( 2.0 )
%% (d) bootstrap the mean:
resample = 500;
[bootsem, mu] = bootstrapmean( x, resample );
hist( mu, 20 );
xlabel('mean(x)')
ylabel('count')
savefigpdf( gcf, 'bootstraptymus-meanhist.pdf', 6, 5 );
fprintf( ' bootstrap standard error: %.3f\n', bootsem );
fprintf( 'theoretical standard error: %.3f\n', sem );
%% (e) confidence interval:
q = quantile(mu, [0.025, 0.975]);
fprintf( '95%% confidence interval of the mean from %.2fmg to %.2fmg\n', q(1), q(2) );
pause( 2.0 )
%% (f): dependence on sample size:
nsamplesrange = 10:10:1000;
bootsems = zeros( length(nsamplesrange),1);
for n=1:length(nsamplesrange)
nsamples = nsamplesrange(n);
% [bootsems(n), mu] = bootstrapmean(x, resample);
bootsems(n) = bootstrapmean(thymusglandweights(1:nsamples), resample);
end
plot(nsamplesrange, bootsems, 'b', 'linewidth', 2);
hold on
plot(nsamplesrange, std(x)./sqrt(nsamplesrange), 'r', 'linewidth', 1)
hold off
xlabel('sample size')
ylabel('SEM')
legend('bootsrap', 'theory')
savefigpdf( gcf, 'bootstraptymus-samples.pdf', 6, 5 );

View File

@@ -1,58 +0,0 @@
%% (a) generate correlated data
n=1000;
a=0.2;
x = randn(n, 1);
y = randn(n, 1) + a*x;
%% (b) scatter plot:
subplot(1, 2, 1);
plot(x, a*x, 'r', 'linewidth', 3 );
hold on
%scatter(x, y ); % either scatter ...
plot(x, y, 'o', 'markersize', 2 ); % ... or plot - same plot.
xlim([-4 4])
ylim([-4 4])
xlabel('x')
ylabel('y')
hold off
%% (d) correlation coefficient:
%c = corrcoef(x, y); % returns correlation matrix
%rd = c(1, 2);
rd = corr(x, y);
fprintf('correlation coefficient = %.2f\n', rd );
%% (e) permutation:
nperm = 1000;
rs = zeros(nperm,1);
for i=1:nperm
xr=x(randperm(length(x))); % shuffle x
yr=y(randperm(length(y))); % shuffle y
rs(i) = corr(xr, yr);
end
%% (g) pdf of the correlation coefficients:
[h,b] = hist(rs, 20 );
h = h/sum(h)/(b(2)-b(1)); % normalization
%% (h) significance:
rq = quantile(rs, 0.95);
fprintf('correlation coefficient at 5%% significance = %.2f\n', rq );
if rd >= rq
fprintf('--> correlation r=%.2f is significant\n', rd);
else
fprintf('--> r=%.2f is not a significant correlation\n', rd);
end
%% plot:
subplot(1, 2, 2)
hold on;
bar(b, h, 'facecolor', 'b');
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
xlim([-0.2 0.2])
xlabel('Correlation coefficient');
ylabel('Probability density of H0');
hold off;
savefigpdf( gcf, 'correlationsignificance.pdf', 12, 6 );

View File

@@ -1,30 +0,0 @@
% plot gamma pdfs:
xx = 0.0:0.1:10.0;
shapes = [ 1.0, 2.0, 3.0, 5.0];
cc = jet(length(shapes) );
for i=1:length(shapes)
yy = gampdf(xx, shapes(i), 1.0);
plot(xx, yy, '-', 'linewidth', 3, 'color', cc(i,:), ...
'DisplayName', sprintf('s=%.0f', shapes(i)) );
hold on;
end
% generate gamma distributed random numbers:
n = 50;
x = gamrnd(3.0, 1.0, n, 1);
% histogram:
[h,b] = hist(x, 15);
h = h/sum(h)/(b(2)-b(1));
bar(b, h, 1.0, 'DisplayName', 'data');
% maximum likelihood estimate:
p = mle(x, 'distribution', 'gamma');
yy = gampdf(xx, p(1), p(2));
plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
hold off;
xlabel('x');
ylabel('pdf');
legend('show');
savefigpdf(gcf, 'mlepdffit.pdf', 12, 8)

Binary file not shown.

View File

@@ -1,31 +0,0 @@
m = 2.0; % slope
sigma = 1.0; % standard deviation
n = 100; % number of data pairs
% data pairs:
x = 5.0*rand(n, 1);
y = m*x + sigma*randn(n, 1);
% fit:
slope = mleslope(x, y);
fprintf('slopes:\n');
fprintf('original = %.2f\n', m);
fprintf(' fit = %.2f\n', slope);
% lines:
xx = 0.0:0.1:5.0; % x-axis values
yorg = m*xx;
yfit = slope*xx;
% plot:
plot(xx, yorg, '-r', 'linewidth', 5);
hold on;
plot(xx, yfit, '-g', 'linewidth', 2);
plot(x, y, 'ob');
hold off;
legend('data', 'original', 'fit', 'Location', 'NorthWest');
legend('boxoff')
xlabel('x');
ylabel('y');
savefigpdf(gcf, 'mlepropfit.pdf', 12, 7);

Binary file not shown.

View File

@@ -1,6 +0,0 @@
function slope = mleslope(x, y )
% Compute the maximum likelihood estimate of the slope
% of a line through the origin
% given the data pairs in the vectors x and y.
slope = sum(x.*y)/sum(x.*x);
end

View File

@@ -1,30 +0,0 @@
% draw random numbers:
n = 50;
mu = 3.0;
sigma =2.0;
x = randn(n,1)*sigma+mu;
fprintf(' mean of the data is %.2f\n', mean(x))
fprintf('standard deviation of the data is %.2f\n', std(x))
% standard deviation as parameter:
psigs = 1.0:0.01:3.0;
% matrix with the probabilities for each x and psigs:
lms = zeros(length(x), length(psigs));
for i=1:length(psigs)
psig = psigs(i);
p = exp(-0.5*((x-mu)/psig).^2.0)/sqrt(2.0*pi)/psig;
lms(:,i) = p;
end
lm = prod(lms, 1); % likelihood
loglm = sum(log(lms), 1); % log likelihood
% plot likelihood of standard deviation:
subplot(1, 2, 1);
plot(psigs, lm );
xlabel('standard deviation')
ylabel('likelihood')
subplot(1, 2, 2);
plot(psigs, loglm);
xlabel('standard deviation')
ylabel('log likelihood')
savefigpdf(gcf, 'mlestd.pdf', 15, 5);

Binary file not shown.

View File

@@ -1,163 +0,0 @@
\documentclass[12pt,a4paper,pdftex]{exam}
\usepackage[german]{babel}
\usepackage{pslatex}
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
\usepackage{xcolor}
\usepackage{graphicx}
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
\pagestyle{headandfoot}
\ifprintanswers
\newcommand{\stitle}{: L\"osungen}
\else
\newcommand{\stitle}{}
\fi
\header{{\bfseries\large \"Ubung 3\stitle}}{{\bfseries\large Statistik}}{{\bfseries\large 21. Oktober, 2015}}
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
jan.benda@uni-tuebingen.de}
\runningfooter{}{\thepage}{}
\setlength{\baselineskip}{15pt}
\setlength{\parindent}{0.0cm}
\setlength{\parskip}{0.3cm}
\renewcommand{\baselinestretch}{1.15}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
language=Matlab,
basicstyle=\ttfamily\footnotesize,
numbers=left,
numberstyle=\tiny,
title=\lstname,
showstringspaces=false,
commentstyle=\itshape\color{darkgray},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\continue}{\ifprintanswers%
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\continuepage}{\ifprintanswers%
\newpage
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\newsolutionpage}{\ifprintanswers%
\newpage%
\else
\fi}
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\qt}[1]{\textbf{#1}\\}
\newcommand{\pref}[1]{(\ref{#1})}
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
\newcommand{\code}[1]{\texttt{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\input{instructions}
\begin{questions}
\question \qt{Bootstrap des Standardfehlers}
\begin{parts}
\part Lade von Ilias die Datei \code{thymusglandweights.dat} herunter.
Darin befindet sich ein Datensatz vom Gewicht der Thymus Dr\"use in 14-Tage alten
H\"uhnerembryos in mg.
\part Lade diese Daten in Matlab (\code{load} Funktion).
\part Bestimme Histogramm, Mittelwert und Standardfehler aus den ersten 80 Datenpunkten.
\part Bestimme den Standardfehler aus den ersten 80 Datenpunkten durch 500-mal Bootstrappen.
\part Bestimme das 95\,\% Konfidenzintervall f\"ur den Mittelwert
aus der Bootstrap Verteilung (\code{quantile()} Funktion) --- also
das Interval innerhalb dessen mit 95\,\% Wahrscheinlichkeit der
wahre Mittelwert liegen wird.
\part Benutze den ganzen Datensatz und die Bootstrapping Technik, um die Abh\"angigkeit
des Standardfehlers von der Stichprobengr\"o{\ss}e zu bestimmen.
\part Vergleiche mit der bekannten Formel f\"ur den Standardfehler $\sigma/\sqrt{n}$.
\end{parts}
\begin{solution}
\lstinputlisting{bootstrapmean.m}
\lstinputlisting{bootstraptymus.m}
\includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
\includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
\includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
\end{solution}
\continue
\question \qt{Student t-Verteilung}
\begin{parts}
\part Erzeuge 100000 normalverteilte Zufallszahlen.
\part Ziehe daraus 1000 Stichproben vom Umfang $m=3$, 5, 10, oder 50.
\part Berechne den Mittelwert $\bar x$ der Stichproben und plotte die Wahrscheinlichkeitsdichte
dieser Mittelwerte.
\part Vergleiche diese Wahrscheinlichkeitsdichte mit der Gausskurve.
\part Berechne ausserdem die Gr\"o{\ss}e $t=\bar x/(\sigma_x/\sqrt{m})$
(Standardabweichung $\sigma_x$) und vergleiche diese mit der Normalverteilung mit Standardabweichung Eins. Ist $t$ normalverteilt, bzw. unter welchen Bedingungen ist $t$ normalverteilt?
\end{parts}
\begin{solution}
\lstinputlisting{tdistribution.m}
\includegraphics[width=1\textwidth]{tdistribution-n03}\\
\includegraphics[width=1\textwidth]{tdistribution-n05}\\
\includegraphics[width=1\textwidth]{tdistribution-n10}\\
\includegraphics[width=1\textwidth]{tdistribution-n50}
\end{solution}
\question \qt{Korrelationen}
\begin{parts}
\part Erzeuge 1000 korrelierte Zufallszahlen $x$, $y$ durch
\begin{verbatim}
n = 1000
a = 0.2;
x = randn(n, 1);
y = randn(n, 1) + a*x;
\end{verbatim}
\part Erstelle einen Scatterplot der beiden Variablen.
\part Warum ist $y$ mit $x$ korreliert?
\part Berechne den Korrelationskoeffizienten zwischen $x$ und $y$.
\part Was m\"usste man tun, um die Korrelationen zwischen den $x$-$y$
Paaren zu zerst\"oren?
\part Mach genau dies 1000 mal und berechne jedes Mal den Korrelationskoeffizienten.
\part Bestimme die Wahrscheinlichkeitsdichte dieser Korrelationskoeffizienten.
\part Ist die Korrelation der urspr\"unglichen Daten signifikant?
\part Variiere die Stichprobengr\"o{\ss}e \code{n} und \"uberpr\"ufe
auf gleiche Weise die Signifikanz.
\end{parts}
\begin{solution}
\lstinputlisting{correlationsignificance.m}
\includegraphics[width=1\textwidth]{correlationsignificance}
\end{solution}
\end{questions}
\end{document}

View File

@@ -1,192 +0,0 @@
\documentclass[12pt,a4paper,pdftex]{exam}
\usepackage[german]{babel}
\usepackage{pslatex}
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
\usepackage{xcolor}
\usepackage{graphicx}
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
\pagestyle{headandfoot}
\ifprintanswers
\newcommand{\stitle}{: L\"osungen}
\else
\newcommand{\stitle}{}
\fi
\header{{\bfseries\large \"Ubung 4\stitle}}{{\bfseries\large Statistik}}{{\bfseries\large 26. Oktober, 2015}}
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
jan.benda@uni-tuebingen.de}
\runningfooter{}{\thepage}{}
\setlength{\baselineskip}{15pt}
\setlength{\parindent}{0.0cm}
\setlength{\parskip}{0.3cm}
\renewcommand{\baselinestretch}{1.15}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
language=Matlab,
basicstyle=\ttfamily\footnotesize,
numbers=left,
numberstyle=\tiny,
title=\lstname,
showstringspaces=false,
commentstyle=\itshape\color{darkgray},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\continue}{\ifprintanswers%
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\continuepage}{\ifprintanswers%
\newpage
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\newsolutionpage}{\ifprintanswers%
\newpage%
\else
\fi}
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\qt}[1]{\textbf{#1}\\}
\newcommand{\pref}[1]{(\ref{#1})}
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
\newcommand{\code}[1]{\texttt{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\input{instructions}
\begin{questions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Maximum Likelihood der Standardabweichung}
Wir wollen uns die Likelihood und die Log-Likelihood am Beispiel der
Absch\"atzung der Standardabweichung verdeutlichen.
\begin{parts}
\part Ziehe $n=50$ normalverteilte Zufallsvariablen mit Mittelwert $\mu=3$
und einer Standardabweichung $\sigma=2$.
\part
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
die Log-Likelihood (aus der Summe der logarithmierten
Wahrscheinlichkeiten) f\"ur die Standardabweichung als Parameter. Vergleiche die
Position der Maxima mit der aus den Daten berechneten Standardabweichung.
\part
Erh\"ohe $n$ auf 1000. Was passiert mit der Likelihood, was mit der Log-Likelihood? Warum?
\end{parts}
\begin{solution}
\lstinputlisting{mlestd.m}
\includegraphics[width=1\textwidth]{mlestd}
\end{solution}
\continue
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Maximum-Likelihood-Sch\"atzer einer Ursprungsgeraden}
In der Vorlesung haben wir folgende Formel f\"ur die Maximum-Likelihood
Absch\"atzung der Steigung $\theta$ einer Ursprungsgeraden durch $n$ Datenpunkte $(x_i|y_i)$ mit Standardabweichung $\sigma_i$ hergeleitet:
\[\theta = \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n
\frac{x_i^2}{\sigma_i^2}} \]
\begin{parts}
\part \label{mleslopefunc} Schreibe eine Funktion, die in einem $x$ und einem
$y$ Vektor die Datenpaare \"uberreicht bekommt und die Steigung der
Ursprungsgeraden, die die Likelihood maximiert, zur\"uckgibt
($\sigma=\text{const}$).
\part
Schreibe ein Skript, das Datenpaare erzeugt, die um eine
Ursprungsgerade mit vorgegebener Steigung streuen. Berechne mit der
Funktion aus \pref{mleslopefunc} die Steigung aus den Daten,
vergleiche mit der wahren Steigung, und plotte die urspr\"ungliche
sowie die gefittete Gerade zusammen mit den Daten.
\part
Ver\"andere die Anzahl der Datenpunkte, die Steigung, sowie die
Streuung der Daten um die Gerade.
\end{parts}
\begin{solution}
\lstinputlisting{mleslope.m}
\lstinputlisting{mlepropfit.m}
\includegraphics[width=1\textwidth]{mlepropfit}
\end{solution}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Maximum-Likelihood-Sch\"atzer einer Wahrscheinlichkeitsdichtefunktion}
Verschiedene Wahrscheinlichkeitsdichtefunktionen haben Parameter, die
nicht so einfach wie der Mittelwert und die Standardabweichung einer
Normalverteilung direkt aus den Daten berechnet werden k\"onnen. Solche Parameter
m\"ussen dann aus den Daten mit der Maximum-Likelihood-Methode gefittet werden.
Um dies zu veranschaulichen ziehen wir uns diesmal nicht normalverteilte Zufallszahlen, sondern Zufallszahlen aus der Gamma-Verteilung.
\begin{parts}
\part
Finde heraus welche \code{matlab} Funktion die
Wahrscheinlichkeitsdichtefunktion (probability density function) der
Gamma-Verteilung berechnet.
\part
Plotte mit Hilfe dieser Funktion die Wahrscheinlichkeitsdichtefunktion
der Gamma-Verteilung f\"ur verschiedene Werte des (positiven) ``shape'' Parameters.
Den ``scale'' Parameter setzen wir auf Eins.
\part
Finde heraus mit welcher Funktion Gammaverteilte Zufallszahlen in
\code{matlab} gezogen werden k\"onnen. Erzeuge mit dieser Funktion
50 Zufallszahlen mit einem der oben geplotteten ``shape'' Parameter.
\part
Berechne und plotte ein normiertes Histogramm dieser Zufallszahlen.
\part
Finde heraus mit welcher \code{matlab}-Funktion eine beliebige
Verteilung (``distribution'') an die Zufallszahlen nach der
Maximum-Likelihood Methode gefittet werden kann. Wie wird diese
Funktion benutzt, um die Gammaverteilung an die Daten zu fitten?
\part
Bestimme mit dieser Funktion die Parameter der Gammaverteilung aus
den Zufallszahlen.
\part
Plotte anschlie{\ss}end die Gammaverteilung mit den gefitteten
Parametern.
\end{parts}
\begin{solution}
\lstinputlisting{mlepdffit.m}
\includegraphics[width=1\textwidth]{mlepdffit}
\end{solution}
\end{questions}
\end{document}

View File

@@ -1,58 +0,0 @@
%% (a) generate random numbers:
n = 100000;
x=randn(n, 1);
for nsamples=[3 5 10 50]
nsamples
%% compute mean, standard deviation and t:
nmeans = 10000;
means = zeros( nmeans, 1 );
sdevs = zeros( nmeans, 1 );
students = zeros( nmeans, 1 );
for i=1:nmeans
sample = x(randi(n, nsamples, 1));
means(i) = mean(sample);
sdevs(i) = std(sample);
students(i) = mean(sample)/std(sample)*sqrt(nsamples);
end
% Gaussian pdfs:
msdev = std(means);
tsdev = 1.0;
dxg=0.01;
xmax = 10.0;
xmin = -xmax;
xg = [xmin:dxg:xmax];
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
%% plots
subplot(1, 2, 1)
bins = xmin:0.2:xmax;
[h,b] = hist(means, bins);
h = h/sum(h)/(b(2)-b(1));
bar(b, h, 'facecolor', 'b', 'edgecolor', 'b')
hold on
plot(xg, pm, 'r', 'linewidth', 2)
title( sprintf('sample size = %d', nsamples) );
xlim( [-3, 3] );
xlabel('Mean');
ylabel('pdf');
hold off;
subplot(1, 2, 2)
bins = xmin:0.5:xmax;
[h,b] = hist(students, bins);
h = h/sum(h)/(b(2)-b(1));
bar(b, h, 'facecolor', 'b', 'edgecolor', 'b')
hold on
plot(xg, pt, 'r', 'linewidth', 2)
title( sprintf('sample size = %d', nsamples) );
xlim( [-8, 8] );
xlabel('Student-t');
ylabel('pdf');
hold off;
savefigpdf( gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5 );
pause( 3.0 )
end

File diff suppressed because it is too large Load Diff