Reorganized the folders and started a common script for the lectures.
This commit is contained in:
@@ -1,24 +0,0 @@
|
||||
nsamples = 100;
|
||||
nresamples = 1000;
|
||||
|
||||
% draw a SRS (simple random sample, "Stichprobe") from the population:
|
||||
x = randn( 1, nsamples );
|
||||
fprintf('%-30s %-5s %-5s %-5s\n', '', 'mean', 'stdev', 'sem' )
|
||||
fprintf('%30s %5.2f %5.2f %5.2f\n', 'single SRS', mean( x ), std( x ), std( x )/sqrt(nsamples) )
|
||||
|
||||
% bootstrap the mean:
|
||||
mus = zeros(nresamples,1); % vector for storing the means
|
||||
for i = 1:nresamples % loop for generating the bootstraps
|
||||
inx = randi(nsamples, 1, nsamples); % range, 1D-vector, number
|
||||
xr = x(inx); % resample the original SRS
|
||||
mus(i) = mean(xr); % compute statistic of the resampled SRS
|
||||
end
|
||||
fprintf('%30s %5.2f %5.2f -\n', 'bootstrapped distribution', mean( mus ), std( mus ) )
|
||||
|
||||
% many SRS (we can do that with the random number generator, but not in real life!):
|
||||
musrs = zeros(nresamples,1); % vector for the means of each SRS
|
||||
for i = 1:nresamples
|
||||
x = randn( 1, nsamples ); % draw a new SRS
|
||||
musrs(i) = mean( x ); % compute its mean
|
||||
end
|
||||
fprintf('%30s %5.2f %5.2f -\n', 'sampling distribution', mean( musrs ), std( musrs ) )
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,6 +0,0 @@
|
||||
function error = lsq_error(parameter, x, y)
|
||||
% parameter(1) is the slope
|
||||
% parameter(2) is the intercept
|
||||
|
||||
f_x = x .* parameter(1) + parameter(2);
|
||||
error = mean((f_x - y).^2);
|
||||
@@ -1,7 +0,0 @@
|
||||
function gradient = lsq_gradient(parameter, x, y)
|
||||
h = 1e-6;
|
||||
|
||||
partial_m = (lsq_error([parameter(1)+h, parameter(2)],x,y) - lsq_error(parameter,x,y))/ h;
|
||||
partial_n = (lsq_error([parameter(1), parameter(2)+h],x,y) - lsq_error(parameter,x,y))/ h;
|
||||
|
||||
gradient = [partial_m, partial_n];
|
||||
@@ -1,9 +0,0 @@
|
||||
function gradient = lsq_gradient_sigmoid(parameter, x, y)
|
||||
h = 1e-6;
|
||||
|
||||
gradient = zeros(size(parameter));
|
||||
for i = 1:length(parameter)
|
||||
parameter_h = parameter;
|
||||
parameter_h(i) = parameter_h(i) + h;
|
||||
gradient(i) = (lsq_sigmoid_error(parameter_h, x, y) - lsq_sigmoid_error(parameter, x, y)) / h;
|
||||
end
|
||||
@@ -1,8 +0,0 @@
|
||||
function error = lsq_sigmoid_error(parameter, x, y)
|
||||
% p(1) the amplitude
|
||||
% p(2) the slope
|
||||
% p(3) the x-shift
|
||||
% p(4) the y-shift
|
||||
|
||||
y_est = parameter(1)./(1+ exp(-parameter(2) .* (x - parameter(3)))) + parameter(4);
|
||||
error = mean((y_est - y).^2);
|
||||
Binary file not shown.
@@ -1,29 +0,0 @@
|
||||
% draw random numbers:
|
||||
n = 100;
|
||||
mu = 3.0;
|
||||
sigma =2.0;
|
||||
x = randn(n,1)*sigma+mu;
|
||||
fprintf(' mean of the data is %.2f\n', mean(x))
|
||||
fprintf('standard deviation of the data is %.2f\n', std(x))
|
||||
|
||||
% mean as parameter:
|
||||
pmus = 2.0:0.01:4.0;
|
||||
% matrix with the probabilities for each x and pmus:
|
||||
lms = zeros(length(x), length(pmus));
|
||||
for i=1:length(pmus)
|
||||
pmu = pmus(i);
|
||||
p = exp(-0.5*((x-pmu)/sigma).^2.0)/sqrt(2.0*pi)/sigma;
|
||||
lms(:,i) = p;
|
||||
end
|
||||
lm = prod(lms, 1); % likelihood
|
||||
loglm = sum(log(lms), 1); % log likelihood
|
||||
|
||||
% plot likelihood of mean:
|
||||
subplot(1, 2, 1);
|
||||
plot(pmus, lm );
|
||||
xlabel('mean')
|
||||
ylabel('likelihood')
|
||||
subplot(1, 2, 2);
|
||||
plot(pmus, loglm );
|
||||
xlabel('mean')
|
||||
ylabel('log likelihood')
|
||||
@@ -1,112 +0,0 @@
|
||||
clear
|
||||
close all
|
||||
|
||||
%% first, plot the raw data
|
||||
load('lin_regression.mat');
|
||||
|
||||
figure()
|
||||
plot(x,y, 'o')
|
||||
xlabel('Input')
|
||||
ylabel('Output')
|
||||
|
||||
%% plot the error surface
|
||||
clear
|
||||
load('lin_regression.mat')
|
||||
ms = -5:0.25:5;
|
||||
ns = -30:1:30;
|
||||
|
||||
error_surf = zeros(length(ms), length(ns));
|
||||
|
||||
for i = 1:length(ms)
|
||||
for j = 1:length(ns)
|
||||
error_surf(i,j) = lsq_error([ms(i), ns(j)], x, y);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
% plot the error surface
|
||||
figure()
|
||||
[N,M] = meshgrid(ns, ms);
|
||||
s = surface(M,N,error_surf);
|
||||
xlabel('slope')
|
||||
ylabel('intercept')
|
||||
zlabel('error')
|
||||
view(3)
|
||||
% rotate(s, [1 1 0], 25 )
|
||||
|
||||
%% Plot the gradient at different points in the surface
|
||||
clear
|
||||
load('lin_regression.mat')
|
||||
|
||||
ms = -1:0.5:5;
|
||||
ns = -10:1:10;
|
||||
|
||||
error_surf = zeros(length(ms), length(ns));
|
||||
gradient_m = zeros(size(error_surf));
|
||||
gradient_n = zeros(size(error_surf));
|
||||
|
||||
for i = 1:length(ms)
|
||||
for j = 1:length(ns)
|
||||
error_surf(i,j) = lsq_error([ms(i), ns(j)], x, y);
|
||||
grad = lsq_gradient([ms(i), ns(j)], x, y);
|
||||
gradient_m(i,j) = grad(1);
|
||||
gradient_n(i,j) = grad(2);
|
||||
end
|
||||
end
|
||||
|
||||
figure()
|
||||
hold on
|
||||
[N, M] = meshgrid(ns, ms);
|
||||
surface(M,N, error_surf, 'FaceAlpha', 0.5);
|
||||
contour(M,N, error_surf, 50);
|
||||
quiver(M,N, gradient_m, gradient_n)
|
||||
view(3)
|
||||
xlabel('slope')
|
||||
ylabel('intercept')
|
||||
zlabel('error')
|
||||
|
||||
%% do the gradient descent
|
||||
clear
|
||||
close all
|
||||
|
||||
load('lin_regression.mat')
|
||||
|
||||
ms = -1:0.5:5;
|
||||
ns = -10:1:10;
|
||||
|
||||
position = [-2. 10.];
|
||||
gradient = [];
|
||||
error = [];
|
||||
eps = 0.01;
|
||||
|
||||
% claculate error surface
|
||||
error_surf = zeros(length(ms), length(ns));
|
||||
for i = 1:length(ms)
|
||||
for j = 1:length(ns)
|
||||
error_surf(i,j) = lsq_error([ms(i), ns(j)], x, y);
|
||||
end
|
||||
end
|
||||
figure()
|
||||
hold on
|
||||
[N, M] = meshgrid(ns, ms);
|
||||
surface(M,N, error_surf, 'FaceAlpha', 0.5);
|
||||
view(3)
|
||||
xlabel('slope')
|
||||
ylabel('intersection')
|
||||
zlabel('error')
|
||||
|
||||
% do the descent
|
||||
|
||||
while isempty(gradient) || norm(gradient) > 0.1
|
||||
gradient = lsq_gradient(position, x,y);
|
||||
error = lsq_error(position, x, y);
|
||||
plot3(position(1), position(2), error, 'o', 'color', 'red')
|
||||
position = position - eps .* gradient;
|
||||
pause(0.25)
|
||||
end
|
||||
disp('gradient descent done!')
|
||||
disp(strcat('final position: ', num2str(position)))
|
||||
disp(strcat('final error: ', num2str(error)))
|
||||
|
||||
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
|
||||
|
||||
%% fit the sigmoid
|
||||
|
||||
clear
|
||||
close all
|
||||
|
||||
load('iv_curve.mat')
|
||||
|
||||
figure()
|
||||
plot(voltage, current, 'o')
|
||||
xlabel('voltate [mV]')
|
||||
ylabel('current [pA]')
|
||||
|
||||
% amplitude, slope, x-shift, y-shift
|
||||
%parameter = [10 0.25 -50, 2.5];
|
||||
parameter = [20 0.5 -50, 2.5];
|
||||
|
||||
eps = 0.1;
|
||||
% do the descent
|
||||
gradient = [];
|
||||
steps = 0;
|
||||
error = [];
|
||||
|
||||
while isempty(gradient) || norm(gradient) > 0.01
|
||||
steps = steps + 1;
|
||||
gradient = lsq_gradient_sigmoid(parameter, voltage, current);
|
||||
error(steps) = lsq_sigmoid_error(parameter, voltage, current);
|
||||
parameter = parameter - eps .* gradient;
|
||||
end
|
||||
plot(1:steps, error)
|
||||
|
||||
disp('gradient descent done!')
|
||||
disp(strcat('final position: ', num2str(parameter)))
|
||||
disp(strcat('final error: ', num2str(error(end))))
|
||||
|
||||
%% use fminsearch
|
||||
parameter = [10 0.5 -50, 2.5];
|
||||
|
||||
objective_function = @(p)lsq_sigmoid_error(p, voltage, current);
|
||||
param = fminunc(objective_function, parameter);
|
||||
disp(param)
|
||||
param1 = fminsearch(objective_function, parameter);
|
||||
disp(param1)
|
||||
@@ -24,4 +24,7 @@ yy = gampdf(xx, p(1), p(2));
|
||||
plot(xx, yy, '-k', 'linewidth', 5, 'DisplayName', 'mle' );
|
||||
|
||||
hold off;
|
||||
xlabel('x');
|
||||
ylabel('pdf');
|
||||
legend('show');
|
||||
savefigpdf(gcf, 'mlepdffit.pdf', 12, 8)
|
||||
|
||||
BIN
statistics/exercises/mlepdffit.pdf
Normal file
BIN
statistics/exercises/mlepdffit.pdf
Normal file
Binary file not shown.
BIN
statistics/exercises/mlepropfit.pdf
Normal file
BIN
statistics/exercises/mlepropfit.pdf
Normal file
Binary file not shown.
BIN
statistics/exercises/mlestd.pdf
Normal file
BIN
statistics/exercises/mlestd.pdf
Normal file
Binary file not shown.
@@ -183,7 +183,7 @@ Normalverteilung entstammen, sonder aus der Gamma-Verteilung.
|
||||
\end{parts}
|
||||
\begin{solution}
|
||||
\lstinputlisting{mlepdffit.m}
|
||||
%\includegraphics[width=1\textwidth]{mlepdffit}
|
||||
\includegraphics[width=1\textwidth]{mlepdffit}
|
||||
\end{solution}
|
||||
|
||||
\end{questions}
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
TEXFILES=descriptivestatistics.tex linear_regression.tex #$(wildcard *.tex)
|
||||
PDFFILES=$(TEXFILES:.tex=.pdf)
|
||||
BASENAME=descriptivestatistics
|
||||
PYFILES=$(wildcard *.py)
|
||||
PYPDFFILES=$(PYFILES:.py=.pdf)
|
||||
|
||||
pdf : $(PDFFILES) $(PYPDFFILES)
|
||||
pdf : $(BASENAME)-chapter.pdf $(PYPDFFILES)
|
||||
|
||||
$(PDFFILES) : %.pdf : %.tex
|
||||
$(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex
|
||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||
|
||||
$(PYPDFFILES) : %.pdf : %.py
|
||||
python $<
|
||||
|
||||
clean :
|
||||
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
|
||||
rm -f *~ $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME).aux $(BASENAME).log
|
||||
|
||||
cleanall : clean
|
||||
rm -f $(PDFFILES)
|
||||
rm -f $(BASENAME)-chapter.pdf
|
||||
|
||||
watch :
|
||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||
|
||||
361
statistics/lecture/descriptivestatistics-chapter.tex
Normal file
361
statistics/lecture/descriptivestatistics-chapter.tex
Normal file
@@ -0,0 +1,361 @@
|
||||
\documentclass[12pt]{report}
|
||||
|
||||
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||
\date{WS 15/16}
|
||||
|
||||
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% \newcommand{\tr}[2]{#1} % en
|
||||
% \usepackage[english]{babel}
|
||||
\newcommand{\tr}[2]{#2} % de
|
||||
\usepackage[german]{babel}
|
||||
|
||||
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{pslatex} % nice font for pdf file
|
||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||
|
||||
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||
\setcounter{tocdepth}{1}
|
||||
|
||||
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
||||
\setcounter{secnumdepth}{1}
|
||||
|
||||
|
||||
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||
|
||||
|
||||
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{graphicx}
|
||||
\usepackage{xcolor}
|
||||
\pagecolor{white}
|
||||
|
||||
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
||||
\put(0,4){\line(1,0){170}}%
|
||||
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
||||
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
||||
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
||||
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
||||
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
||||
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
||||
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
||||
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
||||
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
||||
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
||||
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
||||
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
||||
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
||||
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
||||
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
||||
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
||||
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
||||
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
||||
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
||||
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
||||
\end{picture}\par}
|
||||
|
||||
% figures:
|
||||
\setlength{\fboxsep}{0pt}
|
||||
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
||||
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
||||
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
||||
%\newcommand{\texpicture}[1]{}
|
||||
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||
|
||||
% maximum number of floats:
|
||||
\setcounter{topnumber}{2}
|
||||
\setcounter{bottomnumber}{0}
|
||||
\setcounter{totalnumber}{2}
|
||||
|
||||
% float placement fractions:
|
||||
\renewcommand{\textfraction}{0.2}
|
||||
\renewcommand{\topfraction}{0.8}
|
||||
\renewcommand{\bottomfraction}{0.0}
|
||||
\renewcommand{\floatpagefraction}{0.5}
|
||||
|
||||
% spacing for floats:
|
||||
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
||||
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
||||
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
||||
|
||||
% spacing for a floating page:
|
||||
\makeatletter
|
||||
\setlength{\@fptop}{0pt}
|
||||
\setlength{\@fpsep}{8pt plus 2.0fil}
|
||||
\setlength{\@fpbot}{0pt plus 1.0fil}
|
||||
\makeatother
|
||||
|
||||
% rules for floats:
|
||||
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
||||
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
||||
|
||||
% captions:
|
||||
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
||||
|
||||
% put caption on separate float:
|
||||
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
||||
|
||||
% references to panels of a figure within the caption:
|
||||
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
||||
% references to figures:
|
||||
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
||||
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
||||
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
||||
% references to figures in normal text:
|
||||
\newcommand{\fig}{Fig.}
|
||||
\newcommand{\Fig}{Figure}
|
||||
\newcommand{\figs}{Figs.}
|
||||
\newcommand{\Figs}{Figures}
|
||||
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
||||
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
||||
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
||||
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
||||
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
||||
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
||||
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
||||
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
||||
% references to figures within bracketed text:
|
||||
\newcommand{\figb}{Fig.}
|
||||
\newcommand{\figsb}{Figs.}
|
||||
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
||||
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
||||
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
||||
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
||||
|
||||
% references to tables:
|
||||
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
||||
% references to tables in normal text:
|
||||
\newcommand{\tab}{Tab.}
|
||||
\newcommand{\Tab}{Table}
|
||||
\newcommand{\tabs}{Tabs.}
|
||||
\newcommand{\Tabs}{Tables}
|
||||
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
||||
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
||||
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
||||
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
||||
% references to tables within bracketed text:
|
||||
\newcommand{\tabb}{Tab.}
|
||||
\newcommand{\tabsb}{Tab.}
|
||||
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
||||
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
||||
|
||||
|
||||
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%\newcommand{\eqref}[1]{(\ref{#1})}
|
||||
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
||||
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
||||
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
||||
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
||||
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
||||
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
||||
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
||||
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
||||
|
||||
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
inputpath=../code,
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||
captionpos=t,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
|
||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{amsmath}
|
||||
\usepackage{bm}
|
||||
\usepackage{dsfont}
|
||||
\newcommand{\naZ}{\mathds{N}}
|
||||
\newcommand{\gaZ}{\mathds{Z}}
|
||||
\newcommand{\raZ}{\mathds{Q}}
|
||||
\newcommand{\reZ}{\mathds{R}}
|
||||
\newcommand{\reZp}{\mathds{R^+}}
|
||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||
\newcommand{\koZ}{\mathds{C}}
|
||||
|
||||
|
||||
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{ifthen}
|
||||
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
|
||||
\newcommand{\source}[1]{
|
||||
\begin{flushright}
|
||||
\color{gray}\scriptsize \url{#1}
|
||||
\end{flushright}
|
||||
}
|
||||
|
||||
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||
{\medskip}
|
||||
|
||||
\newcounter{maxexercise}
|
||||
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
||||
\newcounter{theexercise}
|
||||
\setcounter{theexercise}{1}
|
||||
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
||||
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
|
||||
|
||||
\graphicspath{{figures/}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\include{descriptivestatistics}
|
||||
|
||||
\end{document}
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Statistics}
|
||||
What is "a statistic"? % dt. Sch\"atzfunktion
|
||||
\begin{definition}[statistic]
|
||||
A statistic (singular) is a single measure of some attribute of a
|
||||
sample (e.g., its arithmetic mean value). It is calculated by
|
||||
applying a function (statistical algorithm) to the values of the
|
||||
items of the sample, which are known together as a set of data.
|
||||
|
||||
\source{http://en.wikipedia.org/wiki/Statistic}
|
||||
\end{definition}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Data types}
|
||||
|
||||
\subsection{Nominal scale}
|
||||
\begin{itemize}
|
||||
\item Binary
|
||||
\begin{itemize}
|
||||
\item ``yes/no'',
|
||||
\item ``true/false'',
|
||||
\item ``success/failure'', etc.
|
||||
\end{itemize}
|
||||
\item Categorial
|
||||
\begin{itemize}
|
||||
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
||||
\item blood type (``A/B/AB/0''),
|
||||
\item parts of speech (``noun/veerb/preposition/article/...''),
|
||||
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
||||
\end{itemize}
|
||||
\item Each observation/measurement/sample is put into one category
|
||||
\item There is no reasonable order among the categories.\\
|
||||
example: [rods, cones] vs. [cones, rods]
|
||||
\item Statistics: mode, i.e. the most common item
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Ordinal scale}
|
||||
\begin{itemize}
|
||||
\item Like nominal scale, but with an order
|
||||
\item Examples: ranks, ratings
|
||||
\begin{itemize}
|
||||
\item ``bad/ok/good'',
|
||||
\item ``cold/warm/hot'',
|
||||
\item ``young/old'', etc.
|
||||
\end{itemize}
|
||||
\item {\bf But:} there is no reasonable measure of {\em distance}
|
||||
between the classes
|
||||
\item Statistics: mode, median
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Interval scale}
|
||||
\begin{itemize}
|
||||
\item Quantitative/metric values
|
||||
\item Reasonable measure of distance between values, but no absolute zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
||||
\item Direction measured in degrees from magnetic or true north
|
||||
\end{itemize}
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Absolute/ratio scale}
|
||||
\begin{itemize}
|
||||
\item Like interval scale, but with absolute origin/zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$K
|
||||
\item Length, mass, duration, electric charge, ...
|
||||
\item Plane angle, etc.
|
||||
\item Count (e.g. number of spikes in response to a stimulus)
|
||||
\end{itemize}
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\item Coefficient of variation (ratio standard deviation/mean)
|
||||
\item All other statistical measures
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Data types}
|
||||
\begin{itemize}
|
||||
\item Data type selects
|
||||
\begin{itemize}
|
||||
\item statistics
|
||||
\item type of plots (bar graph versus x-y plot)
|
||||
\item correct tests
|
||||
\end{itemize}
|
||||
\item Scales exhibit increasing information content from nominal
|
||||
to absolute.\\
|
||||
Conversion ,,downwards'' is always possible
|
||||
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
||||
categories ``small/medium/large'' (ordinal scale)
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Examples from neuroscience}
|
||||
\begin{itemize}
|
||||
\item {\bf absolute:}
|
||||
\begin{itemize}
|
||||
\item size of neuron/brain
|
||||
\item length of axon
|
||||
\item ion concentration
|
||||
\item membrane potential
|
||||
\item firing rate
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf interval:}
|
||||
\begin{itemize}
|
||||
\item edge orientation
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf ordinal:}
|
||||
\begin{itemize}
|
||||
\item stages of a disease
|
||||
\item ratings
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf nominal:}
|
||||
\begin{itemize}
|
||||
\item cell type
|
||||
\item odor
|
||||
\item states of an ion channel
|
||||
\end{itemize}
|
||||
|
||||
\end{itemize}
|
||||
|
||||
@@ -1,229 +1,3 @@
|
||||
\documentclass[12pt]{report}
|
||||
|
||||
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||
\date{WS 15/16}
|
||||
|
||||
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% \newcommand{\tr}[2]{#1} % en
|
||||
% \usepackage[english]{babel}
|
||||
\newcommand{\tr}[2]{#2} % de
|
||||
\usepackage[german]{babel}
|
||||
|
||||
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{pslatex} % nice font for pdf file
|
||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||
|
||||
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||
\setcounter{tocdepth}{1}
|
||||
|
||||
%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[sf,bf,it,big,clearempty]{titlesec}
|
||||
\setcounter{secnumdepth}{1}
|
||||
|
||||
|
||||
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
||||
|
||||
|
||||
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{graphicx}
|
||||
\usepackage{xcolor}
|
||||
\pagecolor{white}
|
||||
|
||||
\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)%
|
||||
\put(0,4){\line(1,0){170}}%
|
||||
\multiput(0,2)(10,0){18}{\line(0,1){4}}%
|
||||
\multiput(0,3)(1,0){170}{\line(0,1){2}}%
|
||||
\put(0,0){\makebox(0,0){{\tiny 0}}}%
|
||||
\put(10,0){\makebox(0,0){{\tiny 1}}}%
|
||||
\put(20,0){\makebox(0,0){{\tiny 2}}}%
|
||||
\put(30,0){\makebox(0,0){{\tiny 3}}}%
|
||||
\put(40,0){\makebox(0,0){{\tiny 4}}}%
|
||||
\put(50,0){\makebox(0,0){{\tiny 5}}}%
|
||||
\put(60,0){\makebox(0,0){{\tiny 6}}}%
|
||||
\put(70,0){\makebox(0,0){{\tiny 7}}}%
|
||||
\put(80,0){\makebox(0,0){{\tiny 8}}}%
|
||||
\put(90,0){\makebox(0,0){{\tiny 9}}}%
|
||||
\put(100,0){\makebox(0,0){{\tiny 10}}}%
|
||||
\put(110,0){\makebox(0,0){{\tiny 11}}}%
|
||||
\put(120,0){\makebox(0,0){{\tiny 12}}}%
|
||||
\put(130,0){\makebox(0,0){{\tiny 13}}}%
|
||||
\put(140,0){\makebox(0,0){{\tiny 14}}}%
|
||||
\put(150,0){\makebox(0,0){{\tiny 15}}}%
|
||||
\put(160,0){\makebox(0,0){{\tiny 16}}}%
|
||||
\put(170,0){\makebox(0,0){{\tiny 17}}}%
|
||||
\end{picture}\par}
|
||||
|
||||
% figures:
|
||||
\setlength{\fboxsep}{0pt}
|
||||
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}}
|
||||
%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}}
|
||||
%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}}
|
||||
%\newcommand{\texpicture}[1]{}
|
||||
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||
|
||||
% maximum number of floats:
|
||||
\setcounter{topnumber}{2}
|
||||
\setcounter{bottomnumber}{0}
|
||||
\setcounter{totalnumber}{2}
|
||||
|
||||
% float placement fractions:
|
||||
\renewcommand{\textfraction}{0.2}
|
||||
\renewcommand{\topfraction}{0.8}
|
||||
\renewcommand{\bottomfraction}{0.0}
|
||||
\renewcommand{\floatpagefraction}{0.5}
|
||||
|
||||
% spacing for floats:
|
||||
\setlength{\floatsep}{12pt plus 2pt minus 2pt}
|
||||
\setlength{\textfloatsep}{20pt plus 4pt minus 2pt}
|
||||
\setlength{\intextsep}{12pt plus 2pt minus 2pt}
|
||||
|
||||
% spacing for a floating page:
|
||||
\makeatletter
|
||||
\setlength{\@fptop}{0pt}
|
||||
\setlength{\@fpsep}{8pt plus 2.0fil}
|
||||
\setlength{\@fpbot}{0pt plus 1.0fil}
|
||||
\makeatother
|
||||
|
||||
% rules for floats:
|
||||
\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}}
|
||||
\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}}
|
||||
|
||||
% captions:
|
||||
\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption}
|
||||
|
||||
% put caption on separate float:
|
||||
\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]}
|
||||
|
||||
% references to panels of a figure within the caption:
|
||||
\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}}
|
||||
% references to figures:
|
||||
\newcommand{\panel}[1]{\textsf{\uppercase{#1}}}
|
||||
\newcommand{\fref}[1]{\textup{\ref{#1}}}
|
||||
\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}}
|
||||
% references to figures in normal text:
|
||||
\newcommand{\fig}{Fig.}
|
||||
\newcommand{\Fig}{Figure}
|
||||
\newcommand{\figs}{Figs.}
|
||||
\newcommand{\Figs}{Figures}
|
||||
\newcommand{\figref}[1]{\fig~\fref{#1}}
|
||||
\newcommand{\Figref}[1]{\Fig~\fref{#1}}
|
||||
\newcommand{\figsref}[1]{\figs~\fref{#1}}
|
||||
\newcommand{\Figsref}[1]{\Figs~\fref{#1}}
|
||||
\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}}
|
||||
\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}}
|
||||
\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}}
|
||||
\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}}
|
||||
% references to figures within bracketed text:
|
||||
\newcommand{\figb}{Fig.}
|
||||
\newcommand{\figsb}{Figs.}
|
||||
\newcommand{\figrefb}[1]{\figb~\fref{#1}}
|
||||
\newcommand{\figsrefb}[1]{\figsb~\fref{#1}}
|
||||
\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}}
|
||||
\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}}
|
||||
|
||||
% references to tables:
|
||||
\newcommand{\tref}[1]{\textup{\ref{#1}}}
|
||||
% references to tables in normal text:
|
||||
\newcommand{\tab}{Tab.}
|
||||
\newcommand{\Tab}{Table}
|
||||
\newcommand{\tabs}{Tabs.}
|
||||
\newcommand{\Tabs}{Tables}
|
||||
\newcommand{\tabref}[1]{\tab~\tref{#1}}
|
||||
\newcommand{\Tabref}[1]{\Tab~\tref{#1}}
|
||||
\newcommand{\tabsref}[1]{\tabs~\tref{#1}}
|
||||
\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}}
|
||||
% references to tables within bracketed text:
|
||||
\newcommand{\tabb}{Tab.}
|
||||
\newcommand{\tabsb}{Tab.}
|
||||
\newcommand{\tabrefb}[1]{\tabb~\tref{#1}}
|
||||
\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}}
|
||||
|
||||
|
||||
%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%\newcommand{\eqref}[1]{(\ref{#1})}
|
||||
\newcommand{\eqn}{\tr{Eq}{Gl}.}
|
||||
\newcommand{\Eqn}{\tr{Eq}{Gl}.}
|
||||
\newcommand{\eqns}{\tr{Eqs}{Gln}.}
|
||||
\newcommand{\Eqns}{\tr{Eqs}{Gln}.}
|
||||
\newcommand{\eqnref}[1]{\eqn~\eqref{#1}}
|
||||
\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}}
|
||||
\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}}
|
||||
\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}}
|
||||
|
||||
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
inputpath=../code,
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||
captionpos=t,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
|
||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{amsmath}
|
||||
\usepackage{bm}
|
||||
\usepackage{dsfont}
|
||||
\newcommand{\naZ}{\mathds{N}}
|
||||
\newcommand{\gaZ}{\mathds{Z}}
|
||||
\newcommand{\raZ}{\mathds{Q}}
|
||||
\newcommand{\reZ}{\mathds{R}}
|
||||
\newcommand{\reZp}{\mathds{R^+}}
|
||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||
\newcommand{\koZ}{\mathds{C}}
|
||||
|
||||
|
||||
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{ifthen}
|
||||
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
|
||||
\newcommand{\source}[1]{
|
||||
\begin{flushright}
|
||||
\color{gray}\scriptsize \url{#1}
|
||||
\end{flushright}
|
||||
}
|
||||
|
||||
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||
{\medskip}
|
||||
|
||||
\newcounter{maxexercise}
|
||||
\setcounter{maxexercise}{9} % show listings up to exercise maxexercise
|
||||
\newcounter{theexercise}
|
||||
\setcounter{theexercise}{1}
|
||||
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||
\arabic{theexercise}:}\newline \newcommand{\exercisesource}{#1}}%
|
||||
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\value{theexercise}>\value{maxexercise}}{}{\medskip\lstinputlisting{\exercisesource}}}\medskip\stepcounter{theexercise}}
|
||||
|
||||
\graphicspath{{figures/}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
|
||||
%\tableofcontents
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
|
||||
@@ -453,418 +227,3 @@ Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}).
|
||||
$x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null.
|
||||
$\xi$ sind normalverteilte Zufallszahlen.}
|
||||
\end{figure}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\chapter{\tr{Bootstrap Methods}{Bootstrap Methoden}}
|
||||
|
||||
Beim Bootstrap erzeugt man sich die Verteilung von Statistiken durch Resampling
|
||||
aus der Stichprobe. Das hat mehrere Vorteile:
|
||||
\begin{itemize}
|
||||
\item Weniger Annahmen (z.B. muss eine Stichprobe nicht Normalverteilt sein).
|
||||
\item H\"ohere Genauigkeit als klassische Methoden.
|
||||
\item Allgemeing\"ultigkeit: Bootstrap Methoden sind sich sehr
|
||||
\"ahnlich f\"ur viele verschiedene Statistiken und ben\"otigen nicht
|
||||
f\"ur jede Statistik eine andere Formel.
|
||||
\end{itemize}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
|
||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-41-39_523}\\[2ex]
|
||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-29-35_312}
|
||||
\caption{\tr{Why can we only measure a sample of the
|
||||
population?}{Warum k\"onnen wir nur eine Stichprobe der
|
||||
Grundgesamtheit messen?}}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[height=0.2\textheight]{srs1}\\[2ex]
|
||||
\includegraphics[height=0.2\textheight]{srs2}\\[2ex]
|
||||
\includegraphics[height=0.2\textheight]{srs3}
|
||||
\caption{Bootstrap der Stichprobenvertielung (a) Von der
|
||||
Grundgesamtheit (population) mit unbekanntem Parameter
|
||||
(z.B. Mittelwert $\mu$) zieht man Stichproben (SRS: simple random
|
||||
samples). Die Statistik (hier Bestimmung von $\bar x$) kann f\"ur
|
||||
jede Stichprobe berechnet werden. Die erhaltenen Werte entstammen
|
||||
der Stichprobenverteilung. Meisten wird aber nur eine Stichprobe
|
||||
gezogen! (b) Mit bestimmten Annahmen und Theorien kann man auf
|
||||
die Stichprobenverteilung schlie{\ss}en ohne sie gemessen zu
|
||||
haben. (c) Alternativ k\"onnen aus der einen Stichprobe viele
|
||||
Bootstrap-Stichproben generiert werden (resampling) und so
|
||||
Eigenschaften der Stichprobenverteilung empirisch bestimmt
|
||||
werden. Aus Hesterberg et al. 2003, Bootstrap Methods and
|
||||
Permuation Tests}
|
||||
\end{figure}
|
||||
|
||||
\section{Bootstrap des Standardfehlers}
|
||||
|
||||
Beim Bootstrap erzeugen wir durch Resampling neue Stichproben und
|
||||
benutzen diese um die Stichprobenverteilung einer Statistik zu
|
||||
berechnen. Die Bootstrap Stichproben haben jeweils den gleichen Umfang
|
||||
wie die urspr\"unglich gemessene Stichprobe und werden durch Ziehen
|
||||
mit Zur\"ucklegen gewonnen. Jeder Wert der urspr\"unglichen Stichprobe
|
||||
kann also einmal, mehrmals oder gar nicht in einer Bootstrap
|
||||
Stichprobe vorkommen.
|
||||
|
||||
\begin{exercise}[bootstrapsem.m]
|
||||
Ziehe 1000 normalverteilte Zufallszahlen und berechne deren Mittelwert,
|
||||
Standardabweichung und Standardfehler ($\sigma/\sqrt{n}$).
|
||||
|
||||
Resample die Daten 1000 mal (Ziehen mit Zur\"ucklegen) und berechne jeweils
|
||||
den Mittelwert.
|
||||
|
||||
Plotte ein Histogramm dieser Mittelwerte, sowie deren Mittelwert und
|
||||
die Standardabweichung.
|
||||
|
||||
Was hat das mit dem Standardfehler zu tun?
|
||||
\end{exercise}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\chapter{\tr{Maximum likelihood estimation}{Maximum-Likelihood Methode}}
|
||||
|
||||
In vielen Situationen wollen wir einen oder mehrere Parameter $\theta$
|
||||
einer Wahrscheinlichkeitsverteilung sch\"atzen, so dass die Verteilung
|
||||
die Daten $x_1, x_2, \ldots x_n$ am besten beschreibt. Bei der
|
||||
Maximum-Likelihood-Methode w\"ahlen wir die Parameter so, dass die
|
||||
Wahrscheinlichkeit, dass die Daten aus der Verteilung stammen, am
|
||||
gr\"o{\ss}ten ist.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Maximum Likelihood}
|
||||
Sei $p(x|\theta)$ (lies ``Wahrscheinlichkeit(sdichte) von $x$ gegeben
|
||||
$\theta$'') die Wahrscheinlichkeits(dichte)verteilung von $x$ mit dem
|
||||
Parameter(n) $\theta$. Das k\"onnte die Normalverteilung
|
||||
\begin{equation}
|
||||
\label{normpdfmean}
|
||||
p(x|\theta) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\theta)^2}{2\sigma^2}}
|
||||
\end{equation}
|
||||
sein mit
|
||||
fester Standardverteilung $\sigma$ und dem Mittelwert $\mu$ als
|
||||
Parameter $\theta$.
|
||||
|
||||
Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
|
||||
die Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt, dann
|
||||
ist die Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
|
||||
Auftretens der Werte $x_1, x_2, \ldots x_n$ gegeben ein bestimmtes $\theta$
|
||||
\begin{equation}
|
||||
p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
|
||||
\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
|
||||
\end{equation}
|
||||
Andersherum gesehen ist das die Likelihood (deutsch immer noch ``Wahrscheinlichleit'')
|
||||
den Parameter $\theta$ zu haben, gegeben die Me{\ss}werte $x_1, x_2, \ldots x_n$,
|
||||
\begin{equation}
|
||||
{\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta)
|
||||
\end{equation}
|
||||
|
||||
Wir sind nun an dem Wert des Parameters $\theta_{mle}$ interessiert, der die
|
||||
Likelihood maximiert (``mle'': Maximum-Likelihood Estimate):
|
||||
\begin{equation}
|
||||
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
||||
\end{equation}
|
||||
$\text{argmax}_xf(x)$ bezeichnet den Wert des Arguments $x$ der Funktion $f(x)$, bei
|
||||
dem $f(x)$ ihr globales Maximum annimmt. Wir suchen also den Wert von $\theta$
|
||||
bei dem die Likelihood ${\cal L}(\theta)$ ihr Maximum hat.
|
||||
|
||||
An der Stelle eines Maximums einer Funktion \"andert sich nichts, wenn
|
||||
man die Funktionswerte mit einer streng monoton steigenden Funktion
|
||||
transformiert. Aus gleich ersichtlichen mathematischen Gr\"unden wird meistens
|
||||
das Maximum der logarithmierten Likelihood (``Log-Likelihood'') gesucht:
|
||||
\begin{eqnarray}
|
||||
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
||||
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
||||
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
|
||||
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
|
||||
\end{eqnarray}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Beispiel: Das arithmetische Mittel}
|
||||
|
||||
Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung \eqnref{normpdfmean}
|
||||
entstammen, und wir den Mittelwert $\mu$ als einzigen Parameter der Verteilung betrachten,
|
||||
welcher Wert von $\theta$ maximiert dessen Likelhood?
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{mlemean}
|
||||
\caption{\label{mlemeanfig} Maximum Likelihood Estimation des
|
||||
Mittelwerts. Oben: Die Daten zusammen mit drei m\"oglichen
|
||||
Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus
|
||||
denen die Daten stammen k\"onnten. Unteln links: Die Likelihood
|
||||
in Abh\"angigkeit des Mittelwerts als Parameter der
|
||||
Normalverteilungen. Unten rechts: die entsprechende
|
||||
Log-Likelihood. An der Position des Maximums bei $\theta=2$
|
||||
\"andert sich nichts (Pfeil).}
|
||||
\end{figure}
|
||||
|
||||
Die Log-Likelihood \eqnref{loglikelihood} ist
|
||||
\begin{eqnarray*}
|
||||
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
||||
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\
|
||||
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2}
|
||||
\end{eqnarray*}
|
||||
Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung
|
||||
nach dem Parameter $\theta$ und setzen diese gleich Null:
|
||||
\begin{eqnarray*}
|
||||
\frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
|
||||
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\
|
||||
\Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\
|
||||
\Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i
|
||||
\end{eqnarray*}
|
||||
Der Maximum-Likelihood-Estimator ist das arithmetische Mittel der Daten. D.h.
|
||||
das arithmetische Mittel maximiert die Wahrscheinlichkeit, dass die Daten aus einer
|
||||
Normalverteilung mit diesem Mittelwert gezogen worden sind.
|
||||
|
||||
\begin{exercise}[mlemean.m]
|
||||
Ziehe $n=50$ normalverteilte Zufallsvariablen mit einem Mittelwert $\ne 0$
|
||||
und einer Standardabweichung $\ne 1$.
|
||||
|
||||
Plotte die Likelihood (aus dem Produkt der Wahrscheinlichkeiten) und
|
||||
die Log-Likelihood (aus der Summe der logarithmierten
|
||||
Wahrscheinlichkeiten) f\"ur den Mittelwert als Parameter. Vergleiche
|
||||
die Position der Maxima mit den aus den Daten berechneten
|
||||
Mittelwerte.
|
||||
\end{exercise}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Kurvenfit als Maximum Likelihood Estimation}
|
||||
Beim Kurvenfit soll eine Funktion $f(x;\theta)$ mit den Parametern
|
||||
$\theta$ an die Datenpaare $(x_i|y_i)$ durch Anpassung der Parameter
|
||||
$\theta$ gefittet werden. Wenn wir annehmen, dass die $y_i$ um die
|
||||
entsprechenden Funktionswerte $f(x_i;\theta)$ mit einer
|
||||
Standardabweichung $\sigma_i$ normalverteilt streuen, dann lautet die
|
||||
Log-Likelihood
|
||||
\begin{eqnarray*}
|
||||
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
||||
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \\
|
||||
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(x_i-f(y_i;\theta))^2}{2\sigma_i^2} \\
|
||||
\end{eqnarray*}
|
||||
Der einzige Unterschied zum vorherigen Beispiel ist, dass die
|
||||
Mittelwerte der Normalverteilungen nun durch die Funktionswerte
|
||||
gegeben sind.
|
||||
|
||||
Der Parameter $\theta$ soll so gew\"ahlt werden, dass die
|
||||
Log-Likelihood maximal wird. Der erste Term der Summe ist
|
||||
unabh\"angig von $\theta$ und kann deshalb bei der Suche nach dem
|
||||
Maximum weggelassen werden.
|
||||
\begin{eqnarray*}
|
||||
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
|
||||
\end{eqnarray*}
|
||||
Anstatt nach dem Maximum zu suchen, k\"onnen wir auch das Vorzeichen der Log-Likelihood
|
||||
umdrehen und nach dem Minimum suchen. Dabei k\"onnen wir auch den Faktor $1/2$ vor der Summe vernachl\"assigen --- auch das \"andert nichts an der Position des Minimums.
|
||||
\begin{equation}
|
||||
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
|
||||
\end{equation}
|
||||
Die Summer der quadratischen Abst\"ande normiert auf die jeweiligen
|
||||
Standardabweichungen wird auch mit $\chi^2$ bezeichnet. Der Wert des
|
||||
Parameters $\theta$ welcher den quadratischen Abstand minimiert ist
|
||||
also identisch mit der Maximierung der Wahrscheinlichkeit, dass die
|
||||
Daten tats\"achlich aus der Funktion stammen k\"onnen. Minimierung des
|
||||
$\chi^2$ ist also ein Maximum-Likelihood Estimate.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{mlepropline}
|
||||
\caption{\label{mleproplinefig} Maximum Likelihood Estimation der
|
||||
Steigung einer Ursprungsgeraden.}
|
||||
\end{figure}
|
||||
|
||||
|
||||
\subsection{Beispiel: einfache Proportionalit\"at}
|
||||
Als Funktion nehmen wir die Ursprungsgerade
|
||||
\[ f(x) = \theta x \]
|
||||
mit Steigung $\theta$. Die $\chi^2$-Summe lautet damit
|
||||
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
|
||||
Zur Bestimmung des Minimums berechnen wir wieder die erste Ableitung nach $\theta$
|
||||
und setzen diese gleich Null:
|
||||
\begin{eqnarray}
|
||||
\frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
||||
& = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
||||
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
|
||||
& = & -2 \sum_{i=1}^n \left( \frac{x_iy_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
|
||||
\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2} \nonumber \\
|
||||
\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
|
||||
\end{eqnarray}
|
||||
Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
|
||||
der Steigung $\theta$ des Regressionsgeraden gewonnen. Ein
|
||||
Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also gar nicht
|
||||
n\"otig. Das gilt allgemein f\"ur das Fitten von Koeffizienten von
|
||||
linear kombinierten Basisfunktionen. Parameter die nichtlinear in
|
||||
einer Funktion enthalten sind k\"onnen aber nicht analytisch aus den
|
||||
Daten berechnet werden. Da bleibt dann nur auf numerische Verfahren
|
||||
zur Optimierung der Kostenfunktion, wie z.B. der Gradientenabstieg,
|
||||
zur\"uckzugreifen.
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Fits von Wahrscheinlichkeitsverteilungen}
|
||||
Zum Abschluss betrachten wir noch den Fall, bei dem wir die Parameter
|
||||
einer Wahrscheinlichkeitsdichtefunktion (z.B. Mittelwert und
|
||||
Standardabweichung der Normalverteilung) an ein Datenset fitten wolle.
|
||||
|
||||
Ein erster Gedanke k\"onnte sein, die
|
||||
Wahrscheinlichkeitsdichtefunktion durch Minimierung des quadratischen
|
||||
Abstands an ein Histogram der Daten zu fitten. Das ist aber aus
|
||||
folgenden Gr\"unden nicht die Methode der Wahl: (i)
|
||||
Wahrscheinlichkeitsdichten k\"onnen nur positiv sein. Darum k\"onnen
|
||||
insbesondere bei kleinen Werten die Daten nicht symmetrisch streuen,
|
||||
wie es normalverteilte Daten machen sollten. (ii) Die Datenwerte sind
|
||||
nicht unabh\"angig, da das normierte Histogram sich zu Eins
|
||||
aufintegriert. Die beiden Annahmen normalverteilte und unabh\"angige Daten
|
||||
die die Minimierung des quadratischen Abstands zu einem Maximum
|
||||
Likelihood Estimator machen sind also verletzt. (iii) Das Histgramm
|
||||
h\"angt von der Wahl der Klassenbreite ab.
|
||||
|
||||
Den direkten Weg, eine Wahrscheinlichkeitsdichtefunktion an ein
|
||||
Datenset zu fitten, haben wir oben schon bei dem Beispiel zur
|
||||
Absch\"atzung des Mittelwertes einer Normalverteilung gesehen ---
|
||||
Maximum Likelihood! Wir suchen einfach die Parameter $\theta$ der
|
||||
gesuchten Wahrscheinlichkeitsdichtefunktion bei der die Log-Likelihood
|
||||
\eqnref{loglikelihood} maximal wird. Das ist im allgemeinen ein
|
||||
nichtlinieares Optimierungsproblem, das mit numerischen Verfahren, wie
|
||||
z.B. dem Gradientenabstieg, gel\"ost wird.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{mlepdf}
|
||||
\caption{\label{mlepdffig} Maximum Likelihood Estimation einer
|
||||
Wahrscheinlichkeitsdichtefunktion. Links: die 100 Datenpunkte, die aus der Gammaverteilung
|
||||
2. Ordnung (rot) gezogen worden sind. Der Maximum-Likelihood-Fit ist orange dargestellt.
|
||||
Rechts: das normierte Histogramm der Daten zusammen mit der \"uber Minimierung
|
||||
des quadratischen Abstands zum Histogramm berechneten Fits ist potentiell schlechter.}
|
||||
\end{figure}
|
||||
|
||||
\end{document}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Statistics}
|
||||
What is "a statistic"? % dt. Sch\"atzfunktion
|
||||
\begin{definition}[statistic]
|
||||
A statistic (singular) is a single measure of some attribute of a
|
||||
sample (e.g., its arithmetic mean value). It is calculated by
|
||||
applying a function (statistical algorithm) to the values of the
|
||||
items of the sample, which are known together as a set of data.
|
||||
|
||||
\source{http://en.wikipedia.org/wiki/Statistic}
|
||||
\end{definition}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Data types}
|
||||
|
||||
\subsection{Nominal scale}
|
||||
\begin{itemize}
|
||||
\item Binary
|
||||
\begin{itemize}
|
||||
\item ``yes/no'',
|
||||
\item ``true/false'',
|
||||
\item ``success/failure'', etc.
|
||||
\end{itemize}
|
||||
\item Categorial
|
||||
\begin{itemize}
|
||||
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
||||
\item blood type (``A/B/AB/0''),
|
||||
\item parts of speech (``noun/veerb/preposition/article/...''),
|
||||
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
||||
\end{itemize}
|
||||
\item Each observation/measurement/sample is put into one category
|
||||
\item There is no reasonable order among the categories.\\
|
||||
example: [rods, cones] vs. [cones, rods]
|
||||
\item Statistics: mode, i.e. the most common item
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Ordinal scale}
|
||||
\begin{itemize}
|
||||
\item Like nominal scale, but with an order
|
||||
\item Examples: ranks, ratings
|
||||
\begin{itemize}
|
||||
\item ``bad/ok/good'',
|
||||
\item ``cold/warm/hot'',
|
||||
\item ``young/old'', etc.
|
||||
\end{itemize}
|
||||
\item {\bf But:} there is no reasonable measure of {\em distance}
|
||||
between the classes
|
||||
\item Statistics: mode, median
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Interval scale}
|
||||
\begin{itemize}
|
||||
\item Quantitative/metric values
|
||||
\item Reasonable measure of distance between values, but no absolute zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
||||
\item Direction measured in degrees from magnetic or true north
|
||||
\end{itemize}
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Absolute/ratio scale}
|
||||
\begin{itemize}
|
||||
\item Like interval scale, but with absolute origin/zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$K
|
||||
\item Length, mass, duration, electric charge, ...
|
||||
\item Plane angle, etc.
|
||||
\item Count (e.g. number of spikes in response to a stimulus)
|
||||
\end{itemize}
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\item Coefficient of variation (ratio standard deviation/mean)
|
||||
\item All other statistical measures
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Data types}
|
||||
\begin{itemize}
|
||||
\item Data type selects
|
||||
\begin{itemize}
|
||||
\item statistics
|
||||
\item type of plots (bar graph versus x-y plot)
|
||||
\item correct tests
|
||||
\end{itemize}
|
||||
\item Scales exhibit increasing information content from nominal
|
||||
to absolute.\\
|
||||
Conversion ,,downwards'' is always possible
|
||||
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
||||
categories ``small/medium/large'' (ordinal scale)
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Examples from neuroscience}
|
||||
\begin{itemize}
|
||||
\item {\bf absolute:}
|
||||
\begin{itemize}
|
||||
\item size of neuron/brain
|
||||
\item length of axon
|
||||
\item ion concentration
|
||||
\item membrane potential
|
||||
\item firing rate
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf interval:}
|
||||
\begin{itemize}
|
||||
\item edge orientation
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf ordinal:}
|
||||
\begin{itemize}
|
||||
\item stages of a disease
|
||||
\item ratings
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf nominal:}
|
||||
\begin{itemize}
|
||||
\item cell type
|
||||
\item odor
|
||||
\item states of an ion channel
|
||||
\end{itemize}
|
||||
|
||||
\end{itemize}
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 724 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 386 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 461 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 59 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 55 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 73 KiB |
Binary file not shown.
@@ -1,454 +0,0 @@
|
||||
\documentclass{beamer}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{listings}
|
||||
\usepackage{pgf}
|
||||
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
||||
%\usepackage{multimedia}
|
||||
|
||||
\usepackage[english]{babel}
|
||||
\usepackage{movie15}
|
||||
\usepackage[latin1]{inputenc}
|
||||
\usepackage{times}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{bm}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[scaled=.90]{helvet}
|
||||
\usepackage{scalefnt}
|
||||
\usepackage{tikz}
|
||||
\usepackage{ textcomp }
|
||||
\usepackage{soul}
|
||||
\usepackage{hyperref}
|
||||
\definecolor{lightblue}{rgb}{.7,.7,1.}
|
||||
\definecolor{mygreen}{rgb}{0,1.,0}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\mode<presentation>
|
||||
{
|
||||
\usetheme{Singapore}
|
||||
\setbeamercovered{opaque}
|
||||
\usecolortheme{tuebingen}
|
||||
\setbeamertemplate{navigation symbols}{}
|
||||
\usefonttheme{default}
|
||||
\useoutertheme{infolines}
|
||||
% \useoutertheme{miniframes}
|
||||
}
|
||||
|
||||
\AtBeginSection[]
|
||||
{
|
||||
\begin{frame}<beamer>
|
||||
\begin{center}
|
||||
\Huge \insertsectionhead
|
||||
\end{center}
|
||||
% \frametitle{\insertsectionhead}
|
||||
% \tableofcontents[currentsection,hideothersubsections]
|
||||
\end{frame}
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
||||
|
||||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
||||
|
||||
\title[]{Scientific Computing -- Statistik}
|
||||
\author[]{Jan Grewe, Fabian Sinz\\Abteilung f\"ur Neuroethologie\\
|
||||
Universit\"at T\"ubingen}
|
||||
|
||||
\institute[Wissenschaftliche Datenverarbeitung]{}
|
||||
\date{12.10.2015 - 06.11.2015}
|
||||
%\logo{\pgfuseimage{../../resources/UT_BM_Rot_RGB.pdf}}
|
||||
|
||||
\subject{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}
|
||||
\vspace{1em}
|
||||
\titlegraphic{
|
||||
\includegraphics[width=0.5\linewidth]{../../resources/UT_WBMW_Rot_RGB}
|
||||
}
|
||||
%%%%%%%%%% configuration for code
|
||||
\lstset{
|
||||
basicstyle=\ttfamily,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
captionpos=b,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\newcommand{\mycite}[1]{
|
||||
\begin{flushright}
|
||||
\tiny \color{black!80} #1
|
||||
\end{flushright}
|
||||
}
|
||||
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
|
||||
\input{../../latex/environments.tex}
|
||||
\makeatother
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}[plain]
|
||||
\frametitle{}
|
||||
\vspace{-1cm}
|
||||
\titlepage % erzeugt Titelseite
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[plain]
|
||||
\huge{Curve Fitting/Optimierung mit dem Gradientenabstiegsverfahren}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{\"Ubersicht}
|
||||
\begin{enumerate}
|
||||
\item Das Problem: Wir haben beobachtete Daten und ein Modell, das die Daten erkl\"aren soll.
|
||||
\item Wie finden wir die Parameter (des Modells), die die Daten am Besten erkl\"aren?
|
||||
\item L\"osung: Anpassen der Parameter an die Daten (Fitting).
|
||||
\item Wie macht man das?
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Ein 1-D Beispiel}
|
||||
\begin{columns}
|
||||
\begin{column}{6.25cm}
|
||||
\begin{figure}
|
||||
\includegraphics[width=1.\columnwidth]{figures/one_d_problem_a.pdf}
|
||||
\end{figure}
|
||||
\end{column}
|
||||
\begin{column}{6.5cm}
|
||||
\begin{itemize}
|
||||
\item z.B. eine Reihe Me{\ss}werte bei einer Bedingung.
|
||||
\item Ich suche den y-Wert, der die Daten am besten
|
||||
repr\"asentiert.
|
||||
\item F\"ur jeden m\"oglichen y-Wert wird die mittlere
|
||||
quadratische Abweichung zu allen Daten berechnet:\\
|
||||
\[ error = \frac{1}{N}\sum_{i=1}^{N}(y_i - y_{test})^2 \]
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}\pause
|
||||
Wie finde ich den besten Wert heraus?
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Ein 1-D Beispiel}
|
||||
\only<1> {
|
||||
\begin{columns}
|
||||
\begin{column}{4.5cm}
|
||||
\begin{figure}
|
||||
\includegraphics[width=1.\columnwidth]{figures/one_d_problem_b.pdf}
|
||||
\end{figure}
|
||||
\end{column}
|
||||
\begin{column}{8cm}
|
||||
\begin{itemize}
|
||||
\item Man folgt dem Gradienten!
|
||||
\item Der Gradient kann numerisch berechnet werden indem man ein
|
||||
(sehr kleines) ``Steigungsdreieck'' an den Positionen anlegt.\\ \vspace{0.25cm}
|
||||
$\frac{\Delta error}{\Delta y} = \frac{error(y+h) - error(y)}{h}$
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
}
|
||||
\only<2>{
|
||||
\begin{columns}
|
||||
\begin{column}{4.5cm}
|
||||
\begin{figure}
|
||||
\includegraphics[width=1.\columnwidth]{figures/one_d_problem_c.pdf}
|
||||
\end{figure}
|
||||
\end{column}
|
||||
\begin{column}{8cm}
|
||||
\begin{itemize}
|
||||
\item Man folgt dem Gradienten!
|
||||
\item Der Gradient kann numerisch berechnet werden indem man ein
|
||||
(sehr kleines) ``Steigungsdreieck'' an den Positionen anlegt.\\ \vspace{0.25cm}
|
||||
$\frac{\Delta error}{\Delta y} = \frac{error(y+h) - error(y)}{h}$
|
||||
\item Da, wo der Gradient seine Nullstelle hat, liegt der beste y-Wert.
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression}
|
||||
\only<1-2> {
|
||||
\begin{figure}
|
||||
\includegraphics[width=0.45\columnwidth]{figures/lin_regress.pdf}
|
||||
\end{figure}
|
||||
}
|
||||
\only<2>{
|
||||
Nehmen wir mal einen linearen Zusammenhang zwischen \textit{Input}
|
||||
und \textit{Output} an. ($y = m\cdot x + n$)
|
||||
}
|
||||
\only<3> {
|
||||
Ver\"anderung der Steigung:
|
||||
\begin{figure}
|
||||
\includegraphics[width=0.45\columnwidth]{figures/lin_regress_slope.pdf}
|
||||
\end{figure}
|
||||
}
|
||||
\only<4> {
|
||||
Ver\"anderung des y-Achsenabschnitts:
|
||||
\begin{figure}
|
||||
\includegraphics[width=0.45\columnwidth]{figures/lin_regress_abscissa.pdf}
|
||||
\end{figure}
|
||||
}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regresssion}
|
||||
|
||||
\huge{Welche Kombination ist die richtige?}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Methode der kleinsten quadratischen Abweichung}
|
||||
\begin{columns}
|
||||
\begin{column}{4.5cm}
|
||||
\begin{figure}
|
||||
\includegraphics[width=\columnwidth]{figures/linear_least_squares.pdf}
|
||||
\end{figure}
|
||||
\footnotesize{\url{http://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)}}
|
||||
\end{column}
|
||||
\begin{column}{7cm}
|
||||
\begin{enumerate}
|
||||
\item Die am h\"aufigstern Angewandte Methode ist die der
|
||||
kleinsten quadratischen Abweichungen.
|
||||
\item Es wird versucht die Summe der quadratischen Abweichung zu
|
||||
minimieren.
|
||||
\end{enumerate}
|
||||
\[g(m,n) = \frac{1}{N}\sum^{N}_{1=1} \left( y_i - f_{m, n}(x_i)\right )^2\]
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Methode der kleinsten quadratischen Abweichun}
|
||||
\begin{itemize}
|
||||
\item Was heisst das: Minimieren der Summe der kleinsten
|
||||
quadratischen Abweichungen?
|
||||
\item Kann man einen Algortihmus zur L\"osung des Problems
|
||||
erstellen?
|
||||
\item Kann man das visualisieren?
|
||||
\end{itemize}\pause
|
||||
\begin{columns}
|
||||
\begin{column}{5.5cm}
|
||||
\tiny
|
||||
\begin{lstlisting}
|
||||
x_range = linspace(-1, 1, 20);
|
||||
y_range = linspace(-5, 5, 20);
|
||||
|
||||
[X, Y] = meshgrid(x_range, y_range);
|
||||
Z = X.^2 + Y.^2;
|
||||
surf(X, Y, Z);
|
||||
colormap('autumn')
|
||||
xlabel('x')
|
||||
ylabel('y')
|
||||
zlabel('z')
|
||||
\end{lstlisting}
|
||||
\end{column}
|
||||
\begin{column}{5.5cm}
|
||||
\begin{figure}
|
||||
\includegraphics[width=0.9\columnwidth]{figures/surface.pdf}
|
||||
\end{figure}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Methode der kleinsten quadratischen Abweichung}
|
||||
\textbf{Aufgabe}
|
||||
\begin{enumerate}
|
||||
\item Ladet den Datensatz \textit{lin\_regression.mat} in den
|
||||
Workspace. Wie sehen die Daten aus?
|
||||
\item Schreibt eine Funktion \code{lsq\_error}, die den Fehler
|
||||
brechnet:
|
||||
\begin{itemize}
|
||||
\item \"Ubernimmt einen 2-elementigen Vektor, der die Parameter
|
||||
\code{m} und \code{n} enth\"alt, die x-Werte und y-Werte.
|
||||
\item Die Funktion gibt den Fehler zur\"uck.
|
||||
\end{itemize}
|
||||
\item Schreibt ein Skript dass den Fehler in Abh\"angigkeit von
|
||||
\code{m} und \code{n} als surface plot darstellt (\code{surf}
|
||||
Funktion).
|
||||
\item Wie k\"onnen wir diesen Plot benutzen um die beste Kombination
|
||||
zu finden?
|
||||
\item Wo lieft die beste Kombination?
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Methode der kleinsten quadratischen Abweichung}
|
||||
\begin{itemize}
|
||||
\item Wie findet man die Extrempunkte in einer Kurve?\pause
|
||||
\item Ableitung der Funktion auf Null setzen und nach x aufl\"osen.
|
||||
\item Definition der Ableitung:\\ \vspace{0.25cm}
|
||||
\begin{center}
|
||||
$ f'(x) = \lim\limits_{h \rightarrow 0} \frac{f(x + h) - f(x)}{h} $
|
||||
\vspace{0.25cm}\pause
|
||||
\end{center}
|
||||
\item Bei zwei Parametern $g(m,n)$ k\"onnen wie die partielle
|
||||
Ableitung bez\"uglich eines Parameters benutzen um die
|
||||
Ver\"anderung des Fehlers bei Ver\"anderung eines Parameters
|
||||
auszuwerten.
|
||||
\item Partielle Ableitung nach \code{m}?\\\pause
|
||||
\vspace{0.25cm}
|
||||
\begin{center}
|
||||
$\frac{\partial g(m,n)}{\partial m} = \lim\limits_{h \rightarrow 0} \frac{g(m + h, n) - g(m,n)}{h}$
|
||||
\vspace{0.25cm}
|
||||
\end{center}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Gradientenabstieg}
|
||||
\large{Der Gradient:}
|
||||
\begin{center}
|
||||
$\bigtriangledown g(m,n) = \left( \frac{\partial g(m,n)}{\partial m}, \frac{\partial g(m,n)}{\partial n}\right)$
|
||||
\end{center}
|
||||
Ist der Vektor mit den partiellen Ableitungen nach \code{m} und
|
||||
\code{n}.
|
||||
|
||||
\pause Numerisch kann die Ableitung durch einen sehr kleinen Schritt
|
||||
angen\"ahert werden.
|
||||
\begin{center}
|
||||
$\frac{\partial g(m,n)}{\partial m} = \lim\limits_{h \rightarrow
|
||||
0} \frac{g(m + h, n) - g(m,n)}{h} \approx \frac{g(m + h, n) -
|
||||
g(m,n)}{h}$
|
||||
\end{center}
|
||||
f\"ur sehr kleine Schritte \code{h}.
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Gradientenabstieg}
|
||||
Plotten des Gradientenfeldes:
|
||||
\begin{itemize}
|
||||
\item Ladet die Daten in \code{lin\_regession.mat}.
|
||||
\item Schreibt eine Funktion \code{lsq\_gradient.m} in dem gleichen
|
||||
Muster wie \code{lsq\_error.m}. Die Funktion berechnet
|
||||
den Gradienten an einer Position (Kombination von Parametern),
|
||||
wenn ein kleiner Schritt gemacht wird (\code{h=1e-6;}).
|
||||
\item Variiert \code{m} im Bereich von -2 bis +5 und \code{n} im
|
||||
Bereich -10 bis 10.
|
||||
\item Plottet die Fehlerfl\"ache als \code{surface} und
|
||||
\code{contour} plot in die gleiche Abbildung.
|
||||
\item F\"ugt die Gradienten als \code{quiver} plot hinzu.
|
||||
\item Was sagen die Pfeile? Wie passen Pfeile und Fl\"ache zusammen?
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Lineare Regression - Gradientenabstieg}
|
||||
\begin{itemize}
|
||||
\item Der Gradient zeigt in die Richtung des gr\"o{\ss}ten \textbf{Anstiegs}. \pause
|
||||
\item Wie kann der Gradient nun dazu genutzt werden zum Minimum zu kommen?\pause
|
||||
\item \textbf{Man nehme: $-\bigtriangledown g(m,n)$!}\pause
|
||||
\vspace{0.25cm}
|
||||
\item Wir haben jetzt alle Zutaten um den Gradientenabstieg zu formulieren.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Gradientenabstieg - Algorithums}
|
||||
\begin{enumerate}
|
||||
\item Starte mit einer beliebigen Parameterkombination $p_0 = (m_0,
|
||||
n_0)$.
|
||||
\item Wiederhole solange wie die der Gradient \"uber einer
|
||||
bestimmten Schwelle ist:
|
||||
\begin{itemize}
|
||||
\item Berechne den Gradienten an der akutellen Position $p_t$.
|
||||
\item Gehe einen kleinen Schritt in die entgegensetzte Richtung des
|
||||
Gradienten:\\
|
||||
\begin{center}
|
||||
$p_{t+1} = p_t - \epsilon \cdot \bigtriangledown g(m_t, n_t)$
|
||||
\end{center}
|
||||
wobei $\epsilon$ eine kleine Zahl (0.01) ist.
|
||||
\end{itemize}
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Gradientenabstieg - \"Ubung}
|
||||
\begin{enumerate}
|
||||
\item Implementiert den Grandientenabstieg f\"ur das Fitten der
|
||||
linearen Geradengleichung an die Daten.
|
||||
\item Plottet f\"ur jeden Schritt den surface plot und die aktuelle
|
||||
Position als roten Punkt (nutzt \code{plot3}).
|
||||
\item Plottet f\"ur jeden Schritt den Fit in einen separaten plot.
|
||||
\item Nutzt \code{pause(0.1)} nach jedem Schritt um die Entwicklung
|
||||
des Fits zu beobachten.
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Gradientenabstieg - \"Ubung II}
|
||||
\begin{columns}
|
||||
\begin{column}{6cm}
|
||||
\begin{figure}
|
||||
\includegraphics[width=1\columnwidth]{figures/charging_curve.pdf}
|
||||
\end{figure}
|
||||
\end{column}
|
||||
\begin{column}{7cm}
|
||||
\begin{itemize}
|
||||
\item Ladet die Daten aus der \code{membraneVoltage.mat}.
|
||||
\item Plottet die Rohdaten.
|
||||
\item Fittet folgende Funktion an die Daten:\\
|
||||
\begin{center}
|
||||
$f_{A,\tau}(t) = A \cdot \left(1 - e^{-\frac{t}{\tau}}\right )$
|
||||
\end{center}
|
||||
\item An welcher Stelle muss der Code von oben ver\"andert
|
||||
werden?
|
||||
\item Plottet die Daten zusammen mit dem Fit.
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{Fitting und Optimierung}
|
||||
\framesubtitle{Fitting mit Matlab}
|
||||
\begin{itemize}
|
||||
\item Es gibt mehrere Funktionen in Matlab, die eine Optimierung
|
||||
automatisch durchf\"uhren.
|
||||
\item z.B. \code{fminunc, lsqcurvefit, fminsearch, lsqnonlin, ...}
|
||||
\item Einige der Funktionen stecken allerdings in der
|
||||
\textit{Optimization Toolbox}, die nicht zum Standard Matlab
|
||||
geh\"ort.
|
||||
\end{itemize}
|
||||
\begin{lstlisting}
|
||||
function param = estimated_regression(x, y, start_parameter)
|
||||
objective_function = @(p)(lsq_error(p, x, y));
|
||||
param = fminunc(objective_function, start_parameter)
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
@@ -1,99 +0,0 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,5) )
|
||||
|
||||
# the data:
|
||||
n = 40
|
||||
rng = np.random.RandomState(54637281)
|
||||
sigma = 0.5
|
||||
rmu = 2.0
|
||||
xd = rng.randn(n)*sigma+rmu
|
||||
# and possible pdfs:
|
||||
x = np.arange( 0.0, 4.0, 0.01 )
|
||||
mus = [1.5, 2.0, 2.5]
|
||||
g=np.zeros((len(x), len(mus)))
|
||||
for k, mu in enumerate(mus) :
|
||||
g[:,k] = np.exp(-0.5*((x-mu)/sigma)**2.0)/np.sqrt(2.0*np.pi)/sigma
|
||||
# plot it:
|
||||
ax = fig.add_subplot( 2, 1, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlim(0.5, 3.5)
|
||||
ax.set_ylim(-0.02, 0.85)
|
||||
ax.set_xticks( np.arange(0, 5))
|
||||
ax.set_yticks( np.arange(0, 0.9, 0.2))
|
||||
ax.set_xlabel('x')
|
||||
ax.set_ylabel('Probability density')
|
||||
s = 1
|
||||
for mu in mus :
|
||||
r = 5.0*rng.rand()+2.0
|
||||
cs = 'angle3,angleA={:.0f},angleB={:.0f}'.format(90+s*r, 90-s*r)
|
||||
s *= -1
|
||||
ax.annotate('', xy=(mu, 0.02), xycoords='data',
|
||||
xytext=(mu, 0.75), textcoords='data',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
|
||||
connectionstyle=cs), zorder=1 )
|
||||
if mu > rmu :
|
||||
ax.text(mu-0.1, 0.04, '?', zorder=1, ha='right')
|
||||
else :
|
||||
ax.text(mu+0.1, 0.04, '?', zorder=1)
|
||||
for k in xrange(len(mus)) :
|
||||
ax.plot(x, g[:,k], zorder=5)
|
||||
ax.scatter(xd, 0.05*rng.rand(len(xd))+0.2, s=30, zorder=10)
|
||||
|
||||
# likelihood:
|
||||
thetas=np.arange(1.5, 2.6, 0.01)
|
||||
ps=np.zeros((len(xd),len(thetas)));
|
||||
for i, theta in enumerate(thetas) :
|
||||
ps[:,i]=np.exp(-0.5*((xd-theta)/sigma)**2.0)/np.sqrt(2.0*np.pi)/sigma
|
||||
p=np.prod(ps,axis=0)
|
||||
# plot it:
|
||||
ax = fig.add_subplot( 2, 2, 3 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel(r'Parameter $\theta$')
|
||||
ax.set_ylabel('Likelihood')
|
||||
ax.set_xticks( np.arange(1.6, 2.5, 0.4))
|
||||
ax.annotate('Maximum',
|
||||
xy=(2.0, 5.5e-11), xycoords='data',
|
||||
xytext=(1.0, 1.1), textcoords='axes fraction', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=70") )
|
||||
ax.annotate('',
|
||||
xy=(2.0, 0), xycoords='data',
|
||||
xytext=(2.0, 5e-11), textcoords='data',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
|
||||
connectionstyle="angle3,angleA=90,angleB=80") )
|
||||
ax.plot(thetas,p)
|
||||
|
||||
ax = fig.add_subplot( 2, 2, 4 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel(r'Parameter $\theta$')
|
||||
ax.set_ylabel('Log-Likelihood')
|
||||
ax.set_ylim(-50,-20)
|
||||
ax.set_xticks( np.arange(1.6, 2.5, 0.4))
|
||||
ax.set_yticks( np.arange(-50, -19, 10.0))
|
||||
ax.annotate('Maximum',
|
||||
xy=(2.0, -23), xycoords='data',
|
||||
xytext=(1.0, 1.1), textcoords='axes fraction', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=70") )
|
||||
ax.annotate('',
|
||||
xy=(2.0, -50), xycoords='data',
|
||||
xytext=(2.0, -26), textcoords='data',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
|
||||
connectionstyle="angle3,angleA=80,angleB=100") )
|
||||
ax.plot(thetas,np.log(p))
|
||||
|
||||
plt.tight_layout();
|
||||
plt.savefig('mlemean.pdf')
|
||||
#plt.show();
|
||||
@@ -1,70 +0,0 @@
|
||||
import numpy as np
|
||||
import scipy.stats as st
|
||||
import scipy.optimize as opt
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,3) )
|
||||
|
||||
# the data:
|
||||
n = 100
|
||||
shape = 2.0
|
||||
scale = 1.0
|
||||
rng = np.random.RandomState(4637281)
|
||||
xd = rng.gamma(shape, scale, n)
|
||||
|
||||
# true pdf:
|
||||
xx = np.arange(0.0, 10.1, 0.01)
|
||||
rv = st.gamma(shape)
|
||||
yy = rv.pdf(xx)
|
||||
|
||||
# mle fit:
|
||||
a = st.gamma.fit(xd, 5.0)
|
||||
yf = st.gamma.pdf(xx, *a)
|
||||
|
||||
# plot it:
|
||||
ax = fig.add_subplot( 1, 2, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlim(0, 10.0)
|
||||
ax.set_ylim(0.0, 0.42)
|
||||
ax.set_xticks( np.arange(0, 11, 2))
|
||||
ax.set_yticks( np.arange(0, 0.42, 0.1))
|
||||
ax.set_xlabel('x')
|
||||
ax.set_ylabel('Probability density')
|
||||
ax.plot(xx, yy, '-', lw=5, color='#ff0000', label='pdf')
|
||||
ax.plot(xx, yf, '-', lw=2, color='#ffcc00', label='mle')
|
||||
ax.scatter(xd, 0.025*rng.rand(len(xd))+0.05, s=30, zorder=10)
|
||||
ax.legend(loc='upper right', frameon=False)
|
||||
|
||||
# histogram:
|
||||
h,b = np.histogram(xd, np.arange(0, 8.5, 1), density=True)
|
||||
|
||||
# fit histogram:
|
||||
def gammapdf(x, n, l, s) :
|
||||
return st.gamma.pdf(x, n, l, s)
|
||||
popt, pcov = opt.curve_fit(gammapdf, b[:-1]+0.5*(b[1]-b[0]), h)
|
||||
yc = st.gamma.pdf(xx, *popt)
|
||||
|
||||
# plot it:
|
||||
ax = fig.add_subplot( 1, 2, 2 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlim(0, 10.0)
|
||||
ax.set_xticks( np.arange(0, 11, 2))
|
||||
ax.set_xlabel('x')
|
||||
ax.set_ylim(0.0, 0.42)
|
||||
ax.set_yticks( np.arange(0, 0.42, 0.1))
|
||||
ax.set_ylabel('Probability density')
|
||||
ax.plot(xx, yy, '-', lw=5, color='#ff0000', label='pdf')
|
||||
ax.plot(xx, yc, '-', lw=2, color='#ffcc00', label='fit')
|
||||
ax.bar(b[:-1], h, np.diff(b))
|
||||
ax.legend(loc='upper right', frameon=False)
|
||||
|
||||
plt.tight_layout();
|
||||
plt.savefig('mlepdf.pdf')
|
||||
#plt.show();
|
||||
@@ -1,49 +0,0 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
|
||||
# the line:
|
||||
slope = 2.0
|
||||
xx = np.arange(0.0, 4.1, 0.1)
|
||||
yy = slope*xx
|
||||
# the data:
|
||||
n = 80
|
||||
rng = np.random.RandomState(218)
|
||||
sigma = 1.5
|
||||
x = 4.0*rng.rand(n)
|
||||
y = slope*x+rng.randn(n)*sigma
|
||||
# fit:
|
||||
slopef = np.sum(x*y)/np.sum(x*x)
|
||||
yf = slopef*xx
|
||||
|
||||
# plot it:
|
||||
ax = fig.add_subplot( 1, 1, 1 )
|
||||
ax.spines['left'].set_position('zero')
|
||||
ax.spines['bottom'].set_position('zero')
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.get_xaxis().set_tick_params(direction='inout', length=10, width=2)
|
||||
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xticks(np.arange(0.0, 4.1))
|
||||
ax.set_xlim(0.0, 4.2)
|
||||
#ax.set_ylim(-1, 5)
|
||||
#ax.set_xticks( np.arange(0, 5))
|
||||
#ax.set_yticks( np.arange(0, 0.9, 0.2))
|
||||
ax.set_xlabel('x')
|
||||
ax.set_ylabel('y')
|
||||
#ax.annotate('', xy=(mu, 0.02), xycoords='data',
|
||||
# xytext=(mu, 0.75), textcoords='data',
|
||||
# arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
|
||||
# connectionstyle=cs), zorder=1 )
|
||||
ax.scatter(x, y, label='data', s=50, zorder=10)
|
||||
ax.plot(xx, yy, 'r', lw=6.0, color='#ff0000', label='original', zorder=5)
|
||||
ax.plot(xx, yf, '--', lw=2.0, color='#ffcc00', label='fit', zorder=7)
|
||||
ax.legend(loc='upper left', frameon=False)
|
||||
|
||||
plt.tight_layout();
|
||||
plt.savefig('mlepropline.pdf')
|
||||
#plt.show();
|
||||
Reference in New Issue
Block a user