Merge branch 'master' of https://whale.am28.uni-tuebingen.de/git/teaching/scientificComputing
This commit is contained in:
commit
ee665dc163
2
.gitignore
vendored
2
.gitignore
vendored
@ -26,4 +26,4 @@
|
|||||||
*.vrb
|
*.vrb
|
||||||
__*
|
__*
|
||||||
pointprocesses/lecture/pointprocessscetch*.tex
|
pointprocesses/lecture/pointprocessscetch*.tex
|
||||||
|
*.DS_Store
|
||||||
|
8
Makefile
8
Makefile
@ -3,7 +3,7 @@ BASENAME=scientificcomputing-script
|
|||||||
SUBDIRS=programming debugging plotting codestyle statistics bootstrap regression likelihood pointprocesses designpattern
|
SUBDIRS=programming debugging plotting codestyle statistics bootstrap regression likelihood pointprocesses designpattern
|
||||||
SUBTEXS=$(foreach subd, $(SUBDIRS), $(subd)/lecture/$(subd).tex)
|
SUBTEXS=$(foreach subd, $(SUBDIRS), $(subd)/lecture/$(subd).tex)
|
||||||
|
|
||||||
all : script chapters
|
all : plots chapters index script exercises
|
||||||
|
|
||||||
chapters :
|
chapters :
|
||||||
for sd in $(SUBDIRS); do $(MAKE) -C $$sd/lecture chapter; done
|
for sd in $(SUBDIRS); do $(MAKE) -C $$sd/lecture chapter; done
|
||||||
@ -37,6 +37,11 @@ watchpdf :
|
|||||||
watchscript :
|
watchscript :
|
||||||
while true; do ! make -s -q script && make script; sleep 0.5; done
|
while true; do ! make -s -q script && make script; sleep 0.5; done
|
||||||
|
|
||||||
|
|
||||||
|
exercises:
|
||||||
|
for sd in $(SUBDIRS); do if test -d $$sd/exercises; then $(MAKE) -C $$sd/exercises pdf; fi; done
|
||||||
|
|
||||||
|
|
||||||
cleanplots:
|
cleanplots:
|
||||||
for sd in $(SUBDIRS); do $(MAKE) -C $$sd/lecture cleanplots; done
|
for sd in $(SUBDIRS); do $(MAKE) -C $$sd/lecture cleanplots; done
|
||||||
|
|
||||||
@ -46,6 +51,7 @@ cleantex:
|
|||||||
|
|
||||||
clean : cleantex
|
clean : cleantex
|
||||||
for sd in $(SUBDIRS); do $(MAKE) -C $$sd/lecture clean; done
|
for sd in $(SUBDIRS); do $(MAKE) -C $$sd/lecture clean; done
|
||||||
|
for sd in $(SUBDIRS); do if test -d $$sd/exercises; then $(MAKE) -C $$sd/exercises clean; fi; done
|
||||||
|
|
||||||
cleanall : clean
|
cleanall : clean
|
||||||
rm -f $(BASENAME).pdf
|
rm -f $(BASENAME).pdf
|
||||||
|
14
README.fonts
14
README.fonts
@ -1,6 +1,20 @@
|
|||||||
Fonts for matplotlib
|
Fonts for matplotlib
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
Install Humor Sans font
|
||||||
|
```
|
||||||
|
sudo apt install fonts-humor-sans
|
||||||
|
```
|
||||||
|
|
||||||
|
Clear matplotlib font cache:
|
||||||
|
```
|
||||||
|
cd ~/.cache/matplotlib/
|
||||||
|
rm -r *
|
||||||
|
```
|
||||||
|
|
||||||
|
Older problems
|
||||||
|
--------------
|
||||||
|
|
||||||
Make sure the right fonts are installed:
|
Make sure the right fonts are installed:
|
||||||
```
|
```
|
||||||
sudo apt-get install ttf-lyx2.0
|
sudo apt-get install ttf-lyx2.0
|
||||||
|
@ -1,24 +1,25 @@
|
|||||||
nsamples = 100;
|
nsamples = 100;
|
||||||
nresamples = 1000;
|
nresamples = 1000;
|
||||||
|
|
||||||
% draw a SRS (simple random sample, "Stichprobe") from the population:
|
% draw a simple random sample ("Stichprobe") from the population:
|
||||||
x = randn( 1, nsamples );
|
x = randn(1, nsamples);
|
||||||
fprintf('%-30s %-5s %-5s %-5s\n', '', 'mean', 'stdev', 'sem' )
|
fprintf('%-30s %-5s %-5s %-5s\n', '', 'mean', 'stdev', 'sem')
|
||||||
fprintf('%30s %5.2f %5.2f %5.2f\n', 'single SRS', mean( x ), std( x ), std( x )/sqrt(nsamples) )
|
fprintf('%30s %5.2f %5.2f %5.2f\n', 'single SRS', mean(x), std(x), std(x)/sqrt(nsamples))
|
||||||
|
|
||||||
% bootstrap the mean:
|
% bootstrap the mean:
|
||||||
mus = zeros(nresamples,1); % vector for storing the means
|
mus = zeros(nresamples,1); % vector for storing the means
|
||||||
for i = 1:nresamples % loop for generating the bootstraps
|
for i = 1:nresamples % loop for generating the bootstraps
|
||||||
inx = randi(nsamples, 1, nsamples); % range, 1D-vector, number
|
inx = randi(nsamples, 1, nsamples); % range, 1D-vector, number
|
||||||
xr = x(inx); % resample the original SRS
|
xr = x(inx); % resample the original SRS
|
||||||
mus(i) = mean(xr); % compute statistic of the resampled SRS
|
mus(i) = mean(xr); % compute statistic of the resampled SRS
|
||||||
end
|
end
|
||||||
fprintf('%30s %5.2f %5.2f -\n', 'bootstrapped distribution', mean( mus ), std( mus ) )
|
fprintf('%30s %5.2f %5.2f -\n', 'bootstrapped distribution', mean(mus), std(mus))
|
||||||
|
|
||||||
% many SRS (we can do that with the random number generator, but not in real life!):
|
% many SRS (we can do that with the random number generator,
|
||||||
musrs = zeros(nresamples,1); % vector for the means of each SRS
|
% but not in real life!):
|
||||||
|
musrs = zeros(nresamples,1); % vector for the means of each SRS
|
||||||
for i = 1:nresamples
|
for i = 1:nresamples
|
||||||
x = randn( 1, nsamples ); % draw a new SRS
|
x = randn(1, nsamples); % draw a new SRS
|
||||||
musrs(i) = mean( x ); % compute its mean
|
musrs(i) = mean(x); % compute its mean
|
||||||
end
|
end
|
||||||
fprintf('%30s %5.2f %5.2f -\n', 'sampling distribution', mean( musrs ), std( musrs ) )
|
fprintf('%30s %5.2f %5.2f -\n', 'sampling distribution', mean(musrs), std(musrs))
|
||||||
|
@ -1,27 +1,27 @@
|
|||||||
% generate correlated data:
|
% generate correlated data:
|
||||||
n=200;
|
n = 200;
|
||||||
a=0.2;
|
a = 0.2;
|
||||||
x = randn(n, 1);
|
x = randn(n, 1);
|
||||||
y = randn(n, 1) + a*x;
|
y = randn(n, 1) + a*x;
|
||||||
|
|
||||||
% correlation coefficient:
|
% correlation coefficient:
|
||||||
rd = corr(x, y);
|
rd = corr(x, y);
|
||||||
fprintf('correlation coefficient of data r = %.2f\n', rd );
|
fprintf('correlation coefficient of data r = %.2f\n', rd);
|
||||||
|
|
||||||
% distribution of null hypothesis by permutation:
|
% distribution of null hypothesis by permutation:
|
||||||
nperm = 1000;
|
nperm = 1000;
|
||||||
rs = zeros(nperm,1);
|
rs = zeros(nperm,1);
|
||||||
for i=1:nperm
|
for i = 1:nperm
|
||||||
xr=x(randperm(length(x))); % shuffle x
|
xr = x(randperm(length(x))); % shuffle x
|
||||||
yr=y(randperm(length(y))); % shuffle y
|
yr = y(randperm(length(y))); % shuffle y
|
||||||
rs(i) = corr(xr, yr);
|
rs(i) = corr(xr, yr);
|
||||||
end
|
end
|
||||||
[h,b] = hist(rs, 20 );
|
[h, b] = hist(rs, 20);
|
||||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
% significance:
|
% significance:
|
||||||
rq = quantile(rs, 0.95);
|
rq = quantile(rs, 0.95);
|
||||||
fprintf('correlation coefficient of null hypothesis at 5%% significance = %.2f\n', rq );
|
fprintf('correlation coefficient of null hypothesis at 5%% significance = %.2f\n', rq);
|
||||||
if rd >= rq
|
if rd >= rq
|
||||||
fprintf('--> correlation r=%.2f is significant\n', rd);
|
fprintf('--> correlation r=%.2f is significant\n', rd);
|
||||||
else
|
else
|
||||||
@ -32,7 +32,7 @@ end
|
|||||||
bar(b, h, 'facecolor', 'b');
|
bar(b, h, 'facecolor', 'b');
|
||||||
hold on;
|
hold on;
|
||||||
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
||||||
plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlabel('Correlation coefficient');
|
xlabel('Correlation coefficient');
|
||||||
ylabel('Probability density of H0');
|
ylabel('Probability density of H0');
|
||||||
hold off;
|
hold off;
|
||||||
|
40
bootstrap/code/meandiffsignificance.m
Normal file
40
bootstrap/code/meandiffsignificance.m
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
% generate two data sets:
|
||||||
|
n = 200;
|
||||||
|
d = 0.2;
|
||||||
|
x = randn(n, 1);
|
||||||
|
y = randn(n, 1) + d;
|
||||||
|
|
||||||
|
% difference of sample means:
|
||||||
|
md = mean(y) - mean(x);
|
||||||
|
fprintf('difference of means of data d = %.2f\n', md);
|
||||||
|
|
||||||
|
% distribution of null hypothesis by permutation:
|
||||||
|
nperm = 1000;
|
||||||
|
xy = [x; y]; % x and y data in one column vector
|
||||||
|
ds = zeros(nperm,1);
|
||||||
|
for i = 1:nperm
|
||||||
|
xyr = xy(randperm(length(xy))); % shuffle data
|
||||||
|
xr = xyr(1:length(x)); % random x-data
|
||||||
|
yr = xyr(length(x)+1:end); % random y-data
|
||||||
|
ds(i) = mean(yr) - mean(xr); % difference of means
|
||||||
|
end
|
||||||
|
[h, b] = hist(ds, 20);
|
||||||
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
|
% significance:
|
||||||
|
dq = quantile(ds, 0.95);
|
||||||
|
fprintf('difference of means of null hypothesis at 5%% significance = %.2f\n', dq);
|
||||||
|
if md >= dq
|
||||||
|
fprintf('--> difference of means d=%.2f is significant\n', md);
|
||||||
|
else
|
||||||
|
fprintf('--> d=%.2f is not a significant difference of means\n', md);
|
||||||
|
end
|
||||||
|
|
||||||
|
% plot:
|
||||||
|
bar(b, h, 'facecolor', 'b');
|
||||||
|
hold on;
|
||||||
|
bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
|
||||||
|
plot([md md], [0 4], 'r', 'linewidth', 2);
|
||||||
|
xlabel('Difference of means');
|
||||||
|
ylabel('Probability density of H0');
|
||||||
|
hold off;
|
4
bootstrap/code/meandiffsignificance.out
Normal file
4
bootstrap/code/meandiffsignificance.out
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
>> meandiffsignificance
|
||||||
|
difference of means of data d = 0.18
|
||||||
|
difference of means of null hypothesis at 5% significance = 0.17
|
||||||
|
--> difference of means d=0.18 is significant
|
@ -1,34 +1,3 @@
|
|||||||
TEXFILES=$(wildcard exercises??.tex)
|
TEXFILES=$(wildcard resampling-?.tex)
|
||||||
EXERCISES=$(TEXFILES:.tex=.pdf)
|
|
||||||
SOLUTIONS=$(EXERCISES:exercises%=solutions%)
|
|
||||||
|
|
||||||
.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
|
include ../../exercises.mk
|
||||||
|
|
||||||
pdf : $(SOLUTIONS) $(EXERCISES)
|
|
||||||
|
|
||||||
exercises : $(EXERCISES)
|
|
||||||
|
|
||||||
solutions : $(SOLUTIONS)
|
|
||||||
|
|
||||||
$(SOLUTIONS) : solutions%.pdf : exercises%.tex instructions.tex
|
|
||||||
{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
|
|
||||||
pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
|
|
||||||
rm $(patsubst %.pdf,%,$@).[!p]*
|
|
||||||
|
|
||||||
$(EXERCISES) : %.pdf : %.tex instructions.tex
|
|
||||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
|
||||||
|
|
||||||
watch :
|
|
||||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
|
||||||
|
|
||||||
watchexercises :
|
|
||||||
while true; do ! make -q exercises && make exercises; sleep 0.5; done
|
|
||||||
|
|
||||||
watchsolutions :
|
|
||||||
while true; do ! make -q solutions && make solutions; sleep 0.5; done
|
|
||||||
|
|
||||||
clean :
|
|
||||||
rm -f *~ *.aux *.log *.out
|
|
||||||
|
|
||||||
cleanup : clean
|
|
||||||
rm -f $(SOLUTIONS) $(EXERCISES)
|
|
||||||
|
@ -10,7 +10,7 @@ function [bootsem, mu] = bootstrapmean(x, resample)
|
|||||||
for i = 1:resample
|
for i = 1:resample
|
||||||
% resample:
|
% resample:
|
||||||
xr = x(randi(nsamples, nsamples, 1));
|
xr = x(randi(nsamples, nsamples, 1));
|
||||||
% compute statistics on sample:
|
% compute statistics of resampled sample:
|
||||||
mu(i) = mean(xr);
|
mu(i) = mean(xr);
|
||||||
end
|
end
|
||||||
bootsem = std(mu);
|
bootsem = std(mu);
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
%% (a) bootstrap:
|
%% (a) bootstrap:
|
||||||
nperm = 1000;
|
nperm = 1000;
|
||||||
rb = zeros(nperm,1);
|
rb = zeros(nperm, 1);
|
||||||
for i=1:nperm
|
for i=1:nperm
|
||||||
% indices for resampling the data:
|
% indices for resampling the data:
|
||||||
inx = randi(length(x), length(x), 1);
|
inx = randi(length(x), length(x), 1);
|
||||||
% resampled data pairs:
|
% resampled data pairs:
|
||||||
xb=x(inx);
|
xb = x(inx);
|
||||||
yb=y(inx);
|
yb = y(inx);
|
||||||
rb(i) = corr(xb, yb);
|
rb(i) = corr(xb, yb);
|
||||||
end
|
end
|
||||||
|
|
||||||
%% (b) pdf of the correlation coefficients:
|
%% (b) pdf of the correlation coefficients:
|
||||||
[hb,bb] = hist(rb, 20);
|
[hb, bb] = hist(rb, 20);
|
||||||
hb = hb/sum(hb)/(bb(2)-bb(1)); % normalization
|
hb = hb/sum(hb)/(bb(2)-bb(1)); % normalization
|
||||||
|
|
||||||
%% (c) significance:
|
%% (c) significance:
|
||||||
rbq = quantile(rb, 0.05);
|
rbq = quantile(rb, 0.05);
|
||||||
@ -25,8 +25,8 @@ end
|
|||||||
|
|
||||||
%% plot:
|
%% plot:
|
||||||
hold on;
|
hold on;
|
||||||
bar(b, h, 'facecolor', [0.5 0.5 0.5]);
|
bar(b, h, 'facecolor', [0.5 0.5 0.5]); % permuation test
|
||||||
bar(bb, hb, 'facecolor', 'b');
|
bar(bb, hb, 'facecolor', 'b'); % bootstrap
|
||||||
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
|
||||||
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlim([-0.25 0.75])
|
xlim([-0.25 0.75])
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
%% (a) generate correlated data
|
%% (a) generate correlated data:
|
||||||
n = 1000;
|
n = 1000;
|
||||||
a = 0.2;
|
a = 0.2;
|
||||||
x = randn(n, 1);
|
x = randn(n, 1);
|
||||||
@ -8,7 +8,7 @@ y = randn(n, 1) + a*x;
|
|||||||
subplot(1, 2, 1);
|
subplot(1, 2, 1);
|
||||||
plot(x, a*x, 'r', 'linewidth', 3);
|
plot(x, a*x, 'r', 'linewidth', 3);
|
||||||
hold on
|
hold on
|
||||||
%scatter(x, y ); % either scatter ...
|
%scatter(x, y ); % either scatter ...
|
||||||
plot(x, y, 'o', 'markersize', 2 ); % ... or plot - same plot.
|
plot(x, y, 'o', 'markersize', 2 ); % ... or plot - same plot.
|
||||||
xlim([-4 4])
|
xlim([-4 4])
|
||||||
ylim([-4 4])
|
ylim([-4 4])
|
||||||
@ -20,20 +20,20 @@ hold off
|
|||||||
%c = corrcoef(x, y); % returns correlation matrix
|
%c = corrcoef(x, y); % returns correlation matrix
|
||||||
%rd = c(1, 2);
|
%rd = c(1, 2);
|
||||||
rd = corr(x, y);
|
rd = corr(x, y);
|
||||||
fprintf('correlation coefficient = %.2f\n', rd );
|
fprintf('correlation coefficient = %.2f\n', rd);
|
||||||
|
|
||||||
%% (e) permutation:
|
%% (e) permutation:
|
||||||
nperm = 1000;
|
nperm = 1000;
|
||||||
rs = zeros(nperm,1);
|
rs = zeros(nperm, 1);
|
||||||
for i=1:nperm
|
for i = 1:nperm
|
||||||
xr=x(randperm(length(x))); % shuffle x
|
xr = x(randperm(length(x))); % shuffle x
|
||||||
yr=y(randperm(length(y))); % shuffle y
|
yr = y(randperm(length(y))); % shuffle y
|
||||||
rs(i) = corr(xr, yr);
|
rs(i) = corr(xr, yr);
|
||||||
end
|
end
|
||||||
|
|
||||||
%% (g) pdf of the correlation coefficients:
|
%% (g) pdf of the correlation coefficients:
|
||||||
[h,b] = hist(rs, 20);
|
[h, b] = hist(rs, 20);
|
||||||
h = h/sum(h)/(b(2)-b(1)); % normalization
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
%% (h) significance:
|
%% (h) significance:
|
||||||
rq = quantile(rs, 0.95);
|
rq = quantile(rs, 0.95);
|
||||||
@ -49,7 +49,7 @@ subplot(1, 2, 2)
|
|||||||
hold on;
|
hold on;
|
||||||
bar(b, h, 'facecolor', 'b');
|
bar(b, h, 'facecolor', 'b');
|
||||||
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
|
||||||
plot( [rd rd], [0 4], 'r', 'linewidth', 2);
|
plot([rd rd], [0 4], 'r', 'linewidth', 2);
|
||||||
xlim([-0.25 0.25])
|
xlim([-0.25 0.25])
|
||||||
xlabel('Correlation coefficient');
|
xlabel('Correlation coefficient');
|
||||||
ylabel('Probability density of H0');
|
ylabel('Probability density of H0');
|
||||||
|
@ -1,205 +0,0 @@
|
|||||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
|
||||||
|
|
||||||
\usepackage[english]{babel}
|
|
||||||
\usepackage{pslatex}
|
|
||||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
|
||||||
\usepackage{xcolor}
|
|
||||||
\usepackage{graphicx}
|
|
||||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
|
||||||
|
|
||||||
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
|
||||||
\pagestyle{headandfoot}
|
|
||||||
\ifprintanswers
|
|
||||||
\newcommand{\stitle}{: Solutions}
|
|
||||||
\else
|
|
||||||
\newcommand{\stitle}{}
|
|
||||||
\fi
|
|
||||||
\header{{\bfseries\large Exercise 9\stitle}}{{\bfseries\large Bootstrap}}{{\bfseries\large December 9th, 2019}}
|
|
||||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
|
||||||
jan.benda@uni-tuebingen.de}
|
|
||||||
\runningfooter{}{\thepage}{}
|
|
||||||
|
|
||||||
\setlength{\baselineskip}{15pt}
|
|
||||||
\setlength{\parindent}{0.0cm}
|
|
||||||
\setlength{\parskip}{0.3cm}
|
|
||||||
\renewcommand{\baselinestretch}{1.15}
|
|
||||||
|
|
||||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{listings}
|
|
||||||
\lstset{
|
|
||||||
language=Matlab,
|
|
||||||
basicstyle=\ttfamily\footnotesize,
|
|
||||||
numbers=left,
|
|
||||||
numberstyle=\tiny,
|
|
||||||
title=\lstname,
|
|
||||||
showstringspaces=false,
|
|
||||||
commentstyle=\itshape\color{darkgray},
|
|
||||||
breaklines=true,
|
|
||||||
breakautoindent=true,
|
|
||||||
columns=flexible,
|
|
||||||
frame=single,
|
|
||||||
xleftmargin=1em,
|
|
||||||
xrightmargin=1em,
|
|
||||||
aboveskip=10pt
|
|
||||||
}
|
|
||||||
|
|
||||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{amsmath}
|
|
||||||
\usepackage{amssymb}
|
|
||||||
\usepackage{bm}
|
|
||||||
\usepackage{dsfont}
|
|
||||||
\newcommand{\naZ}{\mathds{N}}
|
|
||||||
\newcommand{\gaZ}{\mathds{Z}}
|
|
||||||
\newcommand{\raZ}{\mathds{Q}}
|
|
||||||
\newcommand{\reZ}{\mathds{R}}
|
|
||||||
\newcommand{\reZp}{\mathds{R^+}}
|
|
||||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
|
||||||
\newcommand{\koZ}{\mathds{C}}
|
|
||||||
|
|
||||||
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\newcommand{\continue}{\ifprintanswers%
|
|
||||||
\else
|
|
||||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
||||||
\fi}
|
|
||||||
\newcommand{\continuepage}{\ifprintanswers%
|
|
||||||
\newpage
|
|
||||||
\else
|
|
||||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
||||||
\fi}
|
|
||||||
\newcommand{\newsolutionpage}{\ifprintanswers%
|
|
||||||
\newpage%
|
|
||||||
\else
|
|
||||||
\fi}
|
|
||||||
|
|
||||||
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\newcommand{\qt}[1]{\textbf{#1}\\}
|
|
||||||
\newcommand{\pref}[1]{(\ref{#1})}
|
|
||||||
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
|
|
||||||
\newcommand{\code}[1]{\texttt{#1}}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\begin{document}
|
|
||||||
|
|
||||||
\input{instructions}
|
|
||||||
|
|
||||||
\begin{questions}
|
|
||||||
|
|
||||||
\question \qt{Bootstrap the standard error of the mean}
|
|
||||||
We want to compute the standard error of the mean of a data set by
|
|
||||||
means of the bootstrap method and compare the result with the formula
|
|
||||||
``standard deviation divided by the square-root of $n$''.
|
|
||||||
\begin{parts}
|
|
||||||
\part Download the file \code{thymusglandweights.dat} from Ilias.
|
|
||||||
This is a data set of the weights of the thymus glands of 14-day old chicken embryos
|
|
||||||
measured in milligram.
|
|
||||||
\part Load the data into Matlab (\code{load} function).
|
|
||||||
\part Compute histogram, mean, and standard error of the mean of the first 80 data points.
|
|
||||||
\part Compute the standard error of the mean of the first 80 data
|
|
||||||
points by bootstrapping the data 500 times. Write a function that
|
|
||||||
bootstraps the standard error of the mean of a given data set. The
|
|
||||||
function should also return a vector with the bootstrapped means.
|
|
||||||
\part Compute the 95\,\% confidence interval for the mean from the
|
|
||||||
bootstrap distribution (\code{quantile()} function) --- the
|
|
||||||
interval that contains the true mean with 95\,\% probability.
|
|
||||||
\part Use the whole data set and the bootstrap method for computing
|
|
||||||
the dependence of the standard error of the mean from the sample
|
|
||||||
size $n$.
|
|
||||||
\part Compare your result with the formula for the standard error
|
|
||||||
$\sigma/\sqrt{n}$.
|
|
||||||
\end{parts}
|
|
||||||
\begin{solution}
|
|
||||||
\lstinputlisting{bootstrapmean.m}
|
|
||||||
\lstinputlisting{bootstraptymus.m}
|
|
||||||
\includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
|
|
||||||
\includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
|
|
||||||
\includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
|
|
||||||
\end{solution}
|
|
||||||
|
|
||||||
|
|
||||||
\question \qt{Student t-distribution}
|
|
||||||
The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
|
|
||||||
estimated mean $\bar x$ of a data set of size $n$ divided by the
|
|
||||||
estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
|
|
||||||
$\sigma_x$ is the estimated standard deviation, is not a normal
|
|
||||||
distribution but a Student-t distribution. We want to compute the
|
|
||||||
Student-t distribution and compare it with the normal distribution.
|
|
||||||
\begin{parts}
|
|
||||||
\part Generate 100000 normally distributed random numbers.
|
|
||||||
\part Draw from these data 1000 samples of size $n=3$, 5, 10, and
|
|
||||||
50. For each sample size $n$ ...
|
|
||||||
\part ... compute the mean $\bar x$ of the samples and plot the
|
|
||||||
probability density of these means.
|
|
||||||
\part ... compare the resulting probability densities with corresponding
|
|
||||||
normal distributions.
|
|
||||||
\part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
|
|
||||||
distribution with the normal distribution with standard deviation of
|
|
||||||
one. Is $t$ normally distributed? Under which conditions is $t$
|
|
||||||
normally distributed?
|
|
||||||
\end{parts}
|
|
||||||
\newsolutionpage
|
|
||||||
\begin{solution}
|
|
||||||
\lstinputlisting{tdistribution.m}
|
|
||||||
\includegraphics[width=1\textwidth]{tdistribution-n03}\\
|
|
||||||
\includegraphics[width=1\textwidth]{tdistribution-n05}\\
|
|
||||||
\includegraphics[width=1\textwidth]{tdistribution-n10}\\
|
|
||||||
\includegraphics[width=1\textwidth]{tdistribution-n50}
|
|
||||||
\end{solution}
|
|
||||||
|
|
||||||
|
|
||||||
\continue
|
|
||||||
\question \qt{Permutation test} \label{permutationtest}
|
|
||||||
We want to compute the significance of a correlation by means of a permutation test.
|
|
||||||
\begin{parts}
|
|
||||||
\part \label{permutationtestdata} Generate 1000 correlated pairs
|
|
||||||
$x$, $y$ of random numbers according to:
|
|
||||||
\begin{verbatim}
|
|
||||||
n = 1000
|
|
||||||
a = 0.2;
|
|
||||||
x = randn(n, 1);
|
|
||||||
y = randn(n, 1) + a*x;
|
|
||||||
\end{verbatim}
|
|
||||||
\part Generate a scatter plot of the two variables.
|
|
||||||
\part Why is $y$ correlated with $x$?
|
|
||||||
\part Compute the correlation coefficient between $x$ and $y$.
|
|
||||||
\part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
|
|
||||||
\part Do exactly this 1000 times and compute each time the correlation coefficient.
|
|
||||||
\part Compute and plot the probability density of these correlation
|
|
||||||
coefficients.
|
|
||||||
\part Is the correlation of the original data set significant?
|
|
||||||
\part What does ``significance of the correlation'' mean?
|
|
||||||
% \part Vary the sample size \code{n} and compute in the same way the
|
|
||||||
% significance of the correlation.
|
|
||||||
\end{parts}
|
|
||||||
\begin{solution}
|
|
||||||
\lstinputlisting{correlationsignificance.m}
|
|
||||||
\includegraphics[width=1\textwidth]{correlationsignificance}
|
|
||||||
\end{solution}
|
|
||||||
|
|
||||||
\question \qt{Bootstrap the correlation coefficient}
|
|
||||||
The permutation test generates the distribution of the null hypothesis
|
|
||||||
of uncorrelated data and we check whether the correlation coefficient
|
|
||||||
of the data differs significantly from this
|
|
||||||
distribution. Alternatively we can bootstrap the data while keeping
|
|
||||||
the pairs and determine the confidence interval of the correlation
|
|
||||||
coefficient of the data. If this differs significantly from a
|
|
||||||
correlation coefficient of zero we can conclude that the correlation
|
|
||||||
coefficient of the data indeed quantifies correlated data.
|
|
||||||
|
|
||||||
We take the same data set that we have generated in exercise
|
|
||||||
\ref{permutationtest} (\ref{permutationtestdata}).
|
|
||||||
\begin{parts}
|
|
||||||
\part Bootstrap 1000 times the correlation coefficient from the data.
|
|
||||||
\part Compute and plot the probability density of these correlation
|
|
||||||
coefficients.
|
|
||||||
\part Is the correlation of the original data set significant?
|
|
||||||
\end{parts}
|
|
||||||
\begin{solution}
|
|
||||||
\lstinputlisting{correlationbootstrap.m}
|
|
||||||
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
|
||||||
\end{solution}
|
|
||||||
|
|
||||||
\end{questions}
|
|
||||||
|
|
||||||
\end{document}
|
|
@ -1,6 +0,0 @@
|
|||||||
\vspace*{-7.8ex}
|
|
||||||
\begin{center}
|
|
||||||
\textbf{\Large Introduction to Scientific Computing}\\[2.3ex]
|
|
||||||
{\large Jan Grewe, Jan Benda}\\[-3ex]
|
|
||||||
Neuroethology Lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
|
|
||||||
\end{center}
|
|
29
bootstrap/exercises/meandiffpermutation.m
Normal file
29
bootstrap/exercises/meandiffpermutation.m
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
function [md, ds, dq] = meandiffpermutation(x, y, nperm, alpha)
|
||||||
|
% Permutation test for difference of means of two independent samples.
|
||||||
|
%
|
||||||
|
% [md, ds, dq] = meandiffpermutation(x, y, nperm, alpha);
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% x: vector with the samples of the x data set.
|
||||||
|
% y: vector with the samples of the y data set.
|
||||||
|
% nperm: number of permutations run.
|
||||||
|
% alpha: significance level.
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% md: difference of the means
|
||||||
|
% ds: vector containing the differences of the means of the resampled data sets
|
||||||
|
% dq: difference of the means at a significance of alpha.
|
||||||
|
|
||||||
|
md = mean(x) - mean(y); % measured difference
|
||||||
|
xy = [x; y]; % merge data sets
|
||||||
|
% permutations:
|
||||||
|
ds = zeros(nperm, 1);
|
||||||
|
for i = 1:nperm
|
||||||
|
xyr = xy(randperm(length(xy))); % shuffle xy
|
||||||
|
xr = xyr(1:length(x)); % random x sample
|
||||||
|
yr = xyr(length(x)+1:end); % random y sample
|
||||||
|
ds(i) = mean(xr) - mean(yr);
|
||||||
|
end
|
||||||
|
% significance:
|
||||||
|
dq = quantile(ds, 1.0 - alpha);
|
||||||
|
end
|
42
bootstrap/exercises/meandiffplot.m
Normal file
42
bootstrap/exercises/meandiffplot.m
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
function meandiffplot(x, y, md, ds, dq, k, nrows)
|
||||||
|
% Plot histogram of data sets and of null hypothesis for differences in mean.
|
||||||
|
%
|
||||||
|
% meandiffplot(x, y, md, ds, dq, k, rows);
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% x: vector with the samples of the x data set.
|
||||||
|
% y: vector with the samples of the y data set.
|
||||||
|
% md: difference of means of the two data sets.
|
||||||
|
% ds: vector containing the differences of the means of the resampled data sets
|
||||||
|
% dq: minimum difference of the means considered significant.
|
||||||
|
% k: current row for the plot panels.
|
||||||
|
% nrows: number of rows of panels in the figure.
|
||||||
|
|
||||||
|
%% (b) plot histograms:
|
||||||
|
subplot(nrows, 2, k*2-1);
|
||||||
|
bmin = min([x; y]);
|
||||||
|
bmax = max([x; y]);
|
||||||
|
bins = bmin:(bmax-bmin)/20.0:bmax;
|
||||||
|
[xh, b] = hist(x, bins);
|
||||||
|
[yh, b] = hist(y, bins);
|
||||||
|
bar(bins, xh, 'facecolor', 'b')
|
||||||
|
hold on
|
||||||
|
bar(bins, yh, 'facecolor', 'r');
|
||||||
|
xlabel('x and y [mV]')
|
||||||
|
ylabel('counts')
|
||||||
|
hold off
|
||||||
|
|
||||||
|
%% (f) pdf of the differences:
|
||||||
|
[h, b] = hist(ds, 20);
|
||||||
|
h = h/sum(h)/(b(2)-b(1)); % normalization
|
||||||
|
|
||||||
|
%% plot:
|
||||||
|
subplot(nrows, 2, k*2)
|
||||||
|
bar(b, h, 'facecolor', 'b');
|
||||||
|
hold on;
|
||||||
|
bar(b(b>=dq), h(b>=dq), 'facecolor', 'r');
|
||||||
|
plot([md md], [0 4], 'r', 'linewidth', 2);
|
||||||
|
xlabel('Difference of means [mV]');
|
||||||
|
ylabel('pdf of H0');
|
||||||
|
hold off;
|
||||||
|
end
|
37
bootstrap/exercises/meandiffsignificance.m
Normal file
37
bootstrap/exercises/meandiffsignificance.m
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
n = 200;
|
||||||
|
mx = -40.0;
|
||||||
|
nperm = 1000;
|
||||||
|
alpha = 0.05;
|
||||||
|
|
||||||
|
%% (h) repeat for various means of the y-data set:
|
||||||
|
mys = [-40.1, -40.2, -40.5];
|
||||||
|
for k=1:length(mys)
|
||||||
|
|
||||||
|
%% (a) generate data:
|
||||||
|
my = mys(k);
|
||||||
|
x = randn(n, 1) + mx;
|
||||||
|
y = randn(n, 1) + my;
|
||||||
|
|
||||||
|
%% (d), (e) permutation test:
|
||||||
|
[md, ds, dq] = meandiffpermutation(x, y, nperm, alpha);
|
||||||
|
|
||||||
|
%% (c) difference of means:
|
||||||
|
fprintf('\nmean x = %.1fmV, mean y = %.1fmV\n', mx, my);
|
||||||
|
fprintf(' difference of means = %.2fmV\n', md);
|
||||||
|
|
||||||
|
%% (g) significance:
|
||||||
|
fprintf(' difference of means at 5%% significance = %.2fmV\n', dq);
|
||||||
|
if md >= dq
|
||||||
|
fprintf(' --> measured difference of means is significant\n');
|
||||||
|
else
|
||||||
|
fprintf(' --> measured difference of means is not significant\n');
|
||||||
|
end
|
||||||
|
|
||||||
|
%% (b), (f) plot histograms of data and pdf of differences:
|
||||||
|
meandiffplot(x, y, md, ds, dq, k, length(mys));
|
||||||
|
|
||||||
|
subplot(length(mys), 2, k*2-1);
|
||||||
|
title(sprintf('mx=%.1fmV, my=%.1fmV', mx, my))
|
||||||
|
end
|
||||||
|
|
||||||
|
savefigpdf(gcf, 'meandiffsignificance.pdf', 12, 10);
|
BIN
bootstrap/exercises/meandiffsignificance.pdf
Normal file
BIN
bootstrap/exercises/meandiffsignificance.pdf
Normal file
Binary file not shown.
116
bootstrap/exercises/resampling-1.tex
Normal file
116
bootstrap/exercises/resampling-1.tex
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||||
|
|
||||||
|
\newcommand{\exercisetopic}{Resampling}
|
||||||
|
\newcommand{\exercisenum}{8}
|
||||||
|
\newcommand{\exercisedate}{December 14th, 2020}
|
||||||
|
|
||||||
|
\input{../../exercisesheader}
|
||||||
|
|
||||||
|
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\input{../../exercisestitle}
|
||||||
|
|
||||||
|
\begin{questions}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
|
||||||
|
|
||||||
|
\question \qt{Permutation test of correlations} \label{correlationtest}
|
||||||
|
We want to compute the significance of a correlation by means of a permutation test.
|
||||||
|
\begin{parts}
|
||||||
|
\part \label{correlationtestdata} Generate 1000 correlated pairs
|
||||||
|
$x$, $y$ of random numbers according to:
|
||||||
|
\begin{verbatim}
|
||||||
|
n = 1000
|
||||||
|
a = 0.2;
|
||||||
|
x = randn(n, 1);
|
||||||
|
y = randn(n, 1) + a*x;
|
||||||
|
\end{verbatim}
|
||||||
|
\part Generate a scatter plot of the two variables.
|
||||||
|
\part Why is $y$ correlated with $x$?
|
||||||
|
\part Compute the correlation coefficient between $x$ and $y$.
|
||||||
|
\part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
|
||||||
|
\part Do exactly this 1000 times and compute each time the correlation coefficient.
|
||||||
|
\part Compute and plot the probability density of these correlation
|
||||||
|
coefficients.
|
||||||
|
\part Is the correlation of the original data set significant?
|
||||||
|
\part What does ``significance of the correlation'' mean?
|
||||||
|
% \part Vary the sample size \code{n} and compute in the same way the
|
||||||
|
% significance of the correlation.
|
||||||
|
\end{parts}
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{correlationsignificance.m}
|
||||||
|
\includegraphics[width=1\textwidth]{correlationsignificance}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
|
\newsolutionpage
|
||||||
|
\question \qt{Bootstrap the correlation coefficient}
|
||||||
|
The permutation test generates the distribution of the null hypothesis
|
||||||
|
of uncorrelated data and we check whether the correlation coefficient
|
||||||
|
of the data differs significantly from this
|
||||||
|
distribution. Alternatively we can bootstrap the data while keeping
|
||||||
|
the pairs and determine the confidence interval of the correlation
|
||||||
|
coefficient of the data. If this differs significantly from a
|
||||||
|
correlation coefficient of zero we can conclude that the correlation
|
||||||
|
coefficient of the data indeed quantifies correlated data.
|
||||||
|
|
||||||
|
We take the same data set that we have generated in exercise
|
||||||
|
\ref{correlationtest} (\ref{correlationtestdata}).
|
||||||
|
\begin{parts}
|
||||||
|
\part Bootstrap 1000 times the correlation coefficient from the
|
||||||
|
data, i.e. generate bootstrap data by randomly resampling the
|
||||||
|
original data pairs with replacement. Use the \code{randi()}
|
||||||
|
function for generating random indices that you can use to select a
|
||||||
|
random sample from the original data.
|
||||||
|
\part Compute and plot the probability density of these correlation
|
||||||
|
coefficients.
|
||||||
|
\part Is the correlation of the original data set significant?
|
||||||
|
\end{parts}
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{correlationbootstrap.m}
|
||||||
|
\includegraphics[width=1\textwidth]{correlationbootstrap}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
|
|
||||||
|
\continue
|
||||||
|
\question \qt{Permutation test of difference of means}
|
||||||
|
We want to test whether two data sets come from distributions that
|
||||||
|
differ in their mean by means of a permutation test.
|
||||||
|
\begin{parts}
|
||||||
|
\part Generate two normally distributed data sets $x$ and $y$
|
||||||
|
containing each $n=200$ samples. Let's assume the $x$ samples are
|
||||||
|
measurements of the membrane potential of a mammalian photoreceptor
|
||||||
|
in darkness with a mean of $-40$\,mV and a standard deviation of
|
||||||
|
1\,mV. The $y$ values are the membrane potentials measured under dim
|
||||||
|
illumination and come from a distribution with the same standard
|
||||||
|
deviation and a mean of $-40.5$\,mV. See section 5.2 ``Scaling and
|
||||||
|
shifting random numbers'' in the script.
|
||||||
|
\part Plot histograms of the $x$ and $y$ data in a single
|
||||||
|
plot. Choose appropriate bins.
|
||||||
|
\part Compute the means of $x$ and $y$ and their difference.
|
||||||
|
\part The null hypothesis is that the $x$ and $y$ data come from the
|
||||||
|
same distribution. How can you generate new samples $x_r$ and $y_r$
|
||||||
|
from the original data that come from the same distribution?
|
||||||
|
\part Do exactly this 1000 times and compute each time the
|
||||||
|
difference of the means of the two resampled samples.
|
||||||
|
\part Compute and plot the probability density of the resulting
|
||||||
|
distribution of the null hypothesis.
|
||||||
|
\part Is the difference of the means of the original data sets significant?
|
||||||
|
\part Repeat this procedure for $y$ samples that are closer or
|
||||||
|
further apart from the mean of the $x$ data set. For this put the
|
||||||
|
computations of the permuation test in a function and all the plotting
|
||||||
|
in another function.
|
||||||
|
\end{parts}
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{meandiffpermutation.m}
|
||||||
|
\lstinputlisting{meandiffplot.m}
|
||||||
|
\lstinputlisting{meandiffsignificance.m}
|
||||||
|
\includegraphics[width=1\textwidth]{meandiffsignificance}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
|
\end{questions}
|
||||||
|
|
||||||
|
\end{document}
|
84
bootstrap/exercises/resampling-2.tex
Normal file
84
bootstrap/exercises/resampling-2.tex
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||||
|
|
||||||
|
\newcommand{\exercisetopic}{Resampling}
|
||||||
|
\newcommand{\exercisenum}{X2}
|
||||||
|
\newcommand{\exercisedate}{December 14th, 2020}
|
||||||
|
|
||||||
|
\input{../../exercisesheader}
|
||||||
|
|
||||||
|
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\input{../../exercisestitle}
|
||||||
|
|
||||||
|
\begin{questions}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\question \qt{Read chapter 7 of the script on ``resampling methods''!}\vspace{-3ex}
|
||||||
|
|
||||||
|
\question \qt{Bootstrap the standard error of the mean}
|
||||||
|
We want to compute the standard error of the mean of a data set by
|
||||||
|
means of the bootstrap method and compare the result with the formula
|
||||||
|
``standard deviation divided by the square-root of $n$''.
|
||||||
|
\begin{parts}
|
||||||
|
\part Download the file \code{thymusglandweights.dat} from Ilias.
|
||||||
|
This is a data set of the weights of the thymus glands of 14-day old chicken embryos
|
||||||
|
measured in milligram.
|
||||||
|
\part Load the data into Matlab (\code{load} function).
|
||||||
|
\part Compute histogram, mean, and standard error of the mean of the first 80 data points.
|
||||||
|
\part Compute the standard error of the mean of the first 80 data
|
||||||
|
points by bootstrapping the data 500 times. Write a function that
|
||||||
|
bootstraps the standard error of the mean of a given data set. The
|
||||||
|
function should also return a vector with the bootstrapped means.
|
||||||
|
\part Compute the 95\,\% confidence interval for the mean from the
|
||||||
|
bootstrap distribution (\code{quantile()} function) --- the
|
||||||
|
interval that contains the true mean with 95\,\% probability.
|
||||||
|
\part Use the whole data set and the bootstrap method for computing
|
||||||
|
the dependence of the standard error of the mean from the sample
|
||||||
|
size $n$.
|
||||||
|
\part Compare your result with the formula for the standard error
|
||||||
|
$\sigma/\sqrt{n}$.
|
||||||
|
\end{parts}
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{bootstrapmean.m}
|
||||||
|
\lstinputlisting{bootstraptymus.m}
|
||||||
|
\includegraphics[width=0.5\textwidth]{bootstraptymus-datahist}
|
||||||
|
\includegraphics[width=0.5\textwidth]{bootstraptymus-meanhist}
|
||||||
|
\includegraphics[width=0.5\textwidth]{bootstraptymus-samples}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
|
|
||||||
|
\question \qt{Student t-distribution}
|
||||||
|
The distribution of Student's t, $t=\bar x/(\sigma_x/\sqrt{n})$, the
|
||||||
|
estimated mean $\bar x$ of a data set of size $n$ divided by the
|
||||||
|
estimated standard error of the mean $\sigma_x/\sqrt{n}$, where
|
||||||
|
$\sigma_x$ is the estimated standard deviation, is not a normal
|
||||||
|
distribution but a Student-t distribution. We want to compute the
|
||||||
|
Student-t distribution and compare it with the normal distribution.
|
||||||
|
\begin{parts}
|
||||||
|
\part Generate 100000 normally distributed random numbers.
|
||||||
|
\part Draw from these data 1000 samples of size $n=3$, 5, 10, and
|
||||||
|
50. For each sample size $n$ ...
|
||||||
|
\part ... compute the mean $\bar x$ of the samples and plot the
|
||||||
|
probability density of these means.
|
||||||
|
\part ... compare the resulting probability densities with corresponding
|
||||||
|
normal distributions.
|
||||||
|
\part ... compute Student's $t=\bar x/(\sigma_x/\sqrt{n})$ and compare its
|
||||||
|
distribution with the normal distribution with standard deviation of
|
||||||
|
one. Is $t$ normally distributed? Under which conditions is $t$
|
||||||
|
normally distributed?
|
||||||
|
\end{parts}
|
||||||
|
\newsolutionpage
|
||||||
|
\begin{solution}
|
||||||
|
\lstinputlisting{tdistribution.m}
|
||||||
|
\includegraphics[width=1\textwidth]{tdistribution-n03}\\
|
||||||
|
\includegraphics[width=1\textwidth]{tdistribution-n05}\\
|
||||||
|
\includegraphics[width=1\textwidth]{tdistribution-n10}\\
|
||||||
|
\includegraphics[width=1\textwidth]{tdistribution-n50}
|
||||||
|
\end{solution}
|
||||||
|
|
||||||
|
\end{questions}
|
||||||
|
|
||||||
|
\end{document}
|
@ -1,28 +1,28 @@
|
|||||||
function savefigpdf( fig, name, width, height )
|
function savefigpdf(fig, name, width, height)
|
||||||
% Saves figure fig in pdf file name.pdf with appropriately set page size
|
% Saves figure fig in pdf file name.pdf with appropriately set page size
|
||||||
% and fonts
|
% and fonts
|
||||||
|
|
||||||
% default width:
|
% default width:
|
||||||
if nargin < 3
|
if nargin < 3
|
||||||
width = 11.7;
|
width = 11.7;
|
||||||
end
|
end
|
||||||
% default height:
|
% default height:
|
||||||
if nargin < 4
|
if nargin < 4
|
||||||
height = 9.0;
|
height = 9.0;
|
||||||
end
|
end
|
||||||
|
|
||||||
% paper:
|
% paper:
|
||||||
set( fig, 'PaperUnits', 'centimeters' );
|
set(fig, 'PaperUnits', 'centimeters');
|
||||||
set( fig, 'PaperSize', [width height] );
|
set(fig, 'PaperSize', [width height]);
|
||||||
set( fig, 'PaperPosition', [0.0 0.0 width height] );
|
set(fig, 'PaperPosition', [0.0 0.0 width height]);
|
||||||
set( fig, 'Color', 'white')
|
set(fig, 'Color', 'white')
|
||||||
|
|
||||||
% font:
|
% font:
|
||||||
set( findall( fig, 'type', 'axes' ), 'FontSize', 12 )
|
set(findall(fig, 'type', 'axes'), 'FontSize', 12)
|
||||||
set( findall( fig, 'type', 'text' ), 'FontSize', 12 )
|
set(findall(fig, 'type', 'text'), 'FontSize', 12)
|
||||||
|
|
||||||
% save:
|
% save:
|
||||||
saveas( fig, name, 'pdf' )
|
saveas(fig, name, 'pdf')
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
%% (a) generate random numbers:
|
%% (a) generate random numbers:
|
||||||
n = 100000;
|
n = 100000;
|
||||||
x=randn(n, 1);
|
x = randn(n, 1);
|
||||||
|
|
||||||
for nsamples=[3 5 10 50]
|
for nsamples = [3 5 10 50]
|
||||||
nsamples
|
nsamples
|
||||||
%% compute mean, standard deviation and t:
|
% compute mean, standard deviation and t:
|
||||||
nmeans = 10000;
|
nmeans = 10000;
|
||||||
means = zeros(nmeans, 1);
|
means = zeros(nmeans, 1);
|
||||||
sdevs = zeros(nmeans, 1);
|
sdevs = zeros(nmeans, 1);
|
||||||
@ -19,14 +19,14 @@ for nsamples=[3 5 10 50]
|
|||||||
% Gaussian pdfs:
|
% Gaussian pdfs:
|
||||||
msdev = std(means);
|
msdev = std(means);
|
||||||
tsdev = 1.0;
|
tsdev = 1.0;
|
||||||
dxg=0.01;
|
dxg = 0.01;
|
||||||
xmax = 10.0;
|
xmax = 10.0;
|
||||||
xmin = -xmax;
|
xmin = -xmax;
|
||||||
xg = [xmin:dxg:xmax];
|
xg = [xmin:dxg:xmax];
|
||||||
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
pm = exp(-0.5*(xg/msdev).^2)/sqrt(2.0*pi)/msdev;
|
||||||
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
pt = exp(-0.5*(xg/tsdev).^2)/sqrt(2.0*pi)/tsdev;
|
||||||
|
|
||||||
%% plots
|
% plots:
|
||||||
subplot(1, 2, 1)
|
subplot(1, 2, 1)
|
||||||
bins = xmin:0.2:xmax;
|
bins = xmin:0.2:xmax;
|
||||||
[h,b] = hist(means, bins);
|
[h,b] = hist(means, bins);
|
||||||
@ -35,7 +35,7 @@ for nsamples=[3 5 10 50]
|
|||||||
hold on
|
hold on
|
||||||
plot(xg, pm, 'r', 'linewidth', 2)
|
plot(xg, pm, 'r', 'linewidth', 2)
|
||||||
title(sprintf('sample size = %d', nsamples));
|
title(sprintf('sample size = %d', nsamples));
|
||||||
xlim( [-3, 3] );
|
xlim([-3, 3]);
|
||||||
xlabel('Mean');
|
xlabel('Mean');
|
||||||
ylabel('pdf');
|
ylabel('pdf');
|
||||||
hold off;
|
hold off;
|
||||||
@ -48,11 +48,11 @@ for nsamples=[3 5 10 50]
|
|||||||
hold on
|
hold on
|
||||||
plot(xg, pt, 'r', 'linewidth', 2)
|
plot(xg, pt, 'r', 'linewidth', 2)
|
||||||
title(sprintf('sample size = %d', nsamples));
|
title(sprintf('sample size = %d', nsamples));
|
||||||
xlim( [-8, 8] );
|
xlim([-8, 8]);
|
||||||
xlabel('Student-t');
|
xlabel('Student-t');
|
||||||
ylabel('pdf');
|
ylabel('pdf');
|
||||||
hold off;
|
hold off;
|
||||||
|
|
||||||
savefigpdf(gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5);
|
savefigpdf(gcf, sprintf('tdistribution-n%02d.pdf', nsamples), 14, 5);
|
||||||
pause( 3.0 )
|
pause(3.0)
|
||||||
end
|
end
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
\setcounter{page}{\pagenumber}
|
\setcounter{page}{\pagenumber}
|
||||||
\setcounter{chapter}{\chapternumber}
|
\setcounter{chapter}{\chapternumber}
|
||||||
|
|
||||||
|
\renewcommand{\exercisesolutions}{here} % 0: here, 1: chapter, 2: end
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{document}
|
\begin{document}
|
||||||
@ -17,10 +19,20 @@
|
|||||||
\include{bootstrap}
|
\include{bootstrap}
|
||||||
|
|
||||||
\section{TODO}
|
\section{TODO}
|
||||||
|
|
||||||
This chapter easily covers two lectures:
|
This chapter easily covers two lectures:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item 1. Bootstrapping with a proper introduction of of confidence intervals
|
\item 1. Bootstrapping with a proper introduction of of confidence intervals
|
||||||
\item 2. Permutation test with a proper introduction of statistical tests (dsitribution of nullhypothesis, significance, power, etc.)
|
\item 2. Permutation test with a proper introduction of statistical tests (distribution of nullhypothesis, significance, power, etc.)
|
||||||
|
\end{itemize}
|
||||||
|
ToDo:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Add jacknife methods to bootstrapping
|
||||||
|
\item Add discussion of confidence intervals to descriptive statistics chapter
|
||||||
|
\item Have a separate chapter on statistical tests before. What is the
|
||||||
|
essence of a statistical test (null hypothesis distribution), power
|
||||||
|
analysis, and a few examples of existing functions for statistical
|
||||||
|
tests.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
@ -49,7 +49,6 @@
|
|||||||
|
|
||||||
%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\usepackage{graphicx}
|
\usepackage{graphicx}
|
||||||
\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
|
|
||||||
|
|
||||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\usepackage{listings}
|
\usepackage{listings}
|
||||||
|
@ -1,20 +1,28 @@
|
|||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\chapter{Bootstrap methods}
|
\chapter{Resampling methods}
|
||||||
\label{bootstrapchapter}
|
\label{bootstrapchapter}
|
||||||
\exercisechapter{Bootstrap methods}
|
\exercisechapter{Resampling methods}
|
||||||
|
|
||||||
Bootstrapping methods are applied to create distributions of
|
|
||||||
statistical measures via resampling of a sample. Bootstrapping offers several
|
\entermde{Resampling-Methoden}{Resampling methods} are applied to
|
||||||
advantages:
|
generate distributions of statistical measures via resampling of
|
||||||
|
existing samples. Resampling offers several advantages:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Fewer assumptions (e.g. a measured sample does not need to be
|
\item Fewer assumptions (e.g. a measured sample does not need to be
|
||||||
normally distributed).
|
normally distributed).
|
||||||
\item Increased precision as compared to classical methods. %such as?
|
\item Increased precision as compared to classical methods. %such as?
|
||||||
\item General applicability: the bootstrapping methods are very
|
\item General applicability: the resampling methods are very
|
||||||
similar for different statistics and there is no need to specialize
|
similar for different statistics and there is no need to specialize
|
||||||
the method to specific statistic measures.
|
the method to specific statistic measures.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
Resampling methods can be used for both estimating the precision of
|
||||||
|
estimated statistics (e.g. standard error of the mean, confidence
|
||||||
|
intervals) and testing for significane.
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Bootstrapping}
|
||||||
|
|
||||||
\begin{figure}[tp]
|
\begin{figure}[tp]
|
||||||
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
|
\includegraphics[width=0.8\textwidth]{2012-10-29_16-26-05_771}\\[2ex]
|
||||||
@ -72,10 +80,10 @@ distribution of average values around the true mean of the population
|
|||||||
|
|
||||||
Alternatively, we can use \enterm{bootstrapping}
|
Alternatively, we can use \enterm{bootstrapping}
|
||||||
(\determ[Bootstrap!Verfahren]{Bootstrapverfahren}) to generate new
|
(\determ[Bootstrap!Verfahren]{Bootstrapverfahren}) to generate new
|
||||||
samples from one set of measurements
|
samples from one set of measurements by means of resampling. From
|
||||||
(\entermde{Resampling}{resampling}). From these bootstrapped samples
|
these bootstrapped samples we compute the desired statistical measure
|
||||||
we compute the desired statistical measure and estimate their
|
and estimate their distribution
|
||||||
distribution (\entermde{Bootstrap!Verteilung}{bootstrap distribution},
|
(\entermde{Bootstrap!Verteilung}{bootstrap distribution},
|
||||||
\subfigref{bootstrapsamplingdistributionfig}{c}). Interestingly, this
|
\subfigref{bootstrapsamplingdistributionfig}{c}). Interestingly, this
|
||||||
distribution is very similar to the sampling distribution regarding
|
distribution is very similar to the sampling distribution regarding
|
||||||
its width. The only difference is that the bootstrapped values are
|
its width. The only difference is that the bootstrapped values are
|
||||||
@ -84,19 +92,20 @@ of the statistical population. We can use the bootstrap distribution
|
|||||||
to draw conclusion regarding the precision of our estimation (e.g.
|
to draw conclusion regarding the precision of our estimation (e.g.
|
||||||
standard errors and confidence intervals).
|
standard errors and confidence intervals).
|
||||||
|
|
||||||
Bootstrapping methods create bootstrapped samples from a SRS by
|
Bootstrapping methods generate bootstrapped samples from a SRS by
|
||||||
resampling. The bootstrapped samples are used to estimate the sampling
|
resampling. The bootstrapped samples are used to estimate the sampling
|
||||||
distribution of a statistical measure. The bootstrapped samples have
|
distribution of a statistical measure. The bootstrapped samples have
|
||||||
the same size as the original sample and are created by randomly
|
the same size as the original sample and are generated by randomly
|
||||||
drawing with replacement. That is, each value of the original sample
|
drawing with replacement. That is, each value of the original sample
|
||||||
can occur once, multiple time, or not at all in a bootstrapped
|
can occur once, multiple times, or not at all in a bootstrapped
|
||||||
sample. This can be implemented by generating random indices into the
|
sample. This can be implemented by generating random indices into the
|
||||||
data set using the \code{randi()} function.
|
data set using the \code{randi()} function.
|
||||||
|
|
||||||
|
|
||||||
\section{Bootstrap of the standard error}
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\subsection{Bootstrap the standard error}
|
||||||
|
|
||||||
Bootstrapping can be nicely illustrated at the example of the
|
Bootstrapping can be nicely illustrated on the example of the
|
||||||
\enterm{standard error} of the mean (\determ{Standardfehler}). The
|
\enterm{standard error} of the mean (\determ{Standardfehler}). The
|
||||||
arithmetic mean is calculated for a simple random sample. The standard
|
arithmetic mean is calculated for a simple random sample. The standard
|
||||||
error of the mean is the standard deviation of the expected
|
error of the mean is the standard deviation of the expected
|
||||||
@ -116,11 +125,10 @@ population.
|
|||||||
the standard error of the mean.}
|
the standard error of the mean.}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Via bootstrapping we create a distribution of the mean values
|
Via bootstrapping we generate a distribution of mean values
|
||||||
(\figref{bootstrapsemfig}) and the standard deviation of this
|
(\figref{bootstrapsemfig}) and the standard deviation of this
|
||||||
distribution is the standard error of the mean.
|
distribution is the standard error of the sample mean.
|
||||||
|
|
||||||
\pagebreak[4]
|
|
||||||
\begin{exercise}{bootstrapsem.m}{bootstrapsem.out}
|
\begin{exercise}{bootstrapsem.m}{bootstrapsem.out}
|
||||||
Create the distribution of mean values from bootstrapped samples
|
Create the distribution of mean values from bootstrapped samples
|
||||||
resampled from a single SRS. Use this distribution to estimate the
|
resampled from a single SRS. Use this distribution to estimate the
|
||||||
@ -138,68 +146,166 @@ distribution is the standard error of the mean.
|
|||||||
\end{exercise}
|
\end{exercise}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Permutation tests}
|
\section{Permutation tests}
|
||||||
|
|
||||||
Statistical tests ask for the probability of a measured value to
|
Statistical tests ask for the probability of a measured value to
|
||||||
originate from a null hypothesis. Is this probability smaller than the
|
originate from a null hypothesis. Is this probability smaller than the
|
||||||
desired \entermde{Signifikanz}{significance level}, the
|
desired \entermde{Signifikanz}{significance level}, the
|
||||||
\entermde{Nullhypothese}{null hypothesis} may be rejected.
|
\entermde{Nullhypothese}{null hypothesis} can be rejected.
|
||||||
|
|
||||||
Traditionally, such probabilities are taken from theoretical
|
Traditionally, such probabilities are taken from theoretical
|
||||||
distributions which are based on assumptions about the data. Thus the
|
distributions which have been derived based on some assumptions about
|
||||||
applied statistical test has to be appropriate for the type of
|
the data. For example, the data should be normally distributed. Given
|
||||||
data. An alternative approach is to calculate the probability density
|
some data one has to find an appropriate test that matches the
|
||||||
of the null hypothesis directly from the data itself. To do this, we
|
properties of the data. An alternative approach is to calculate the
|
||||||
need to resample the data according to the null hypothesis from the
|
probability density of the null hypothesis directly from the data
|
||||||
SRS. By such permutation operations we destroy the feature of interest
|
themselves. To do so, we need to resample the data according to the
|
||||||
while we conserve all other statistical properties of the data.
|
null hypothesis from the SRS. By such permutation operations we
|
||||||
|
destroy the feature of interest while conserving all other statistical
|
||||||
|
properties of the data.
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Significance of a difference in the mean}
|
||||||
|
|
||||||
|
Often we would like to know whether two data sets differ in their
|
||||||
|
mean. Whether the ears of foxes are larger in southern Europe compared
|
||||||
|
to the ones from Scandinavia, whether a drug decreases blood pressure
|
||||||
|
in humans, whether a sensory stimulus increases the firing rate of a
|
||||||
|
neuron, etc. The \entermde{Nullhypothese}{null hypothesis} is that
|
||||||
|
they do not differ in their means, i.e. that both data sets come from
|
||||||
|
the same distribution. But even if the two data sets come from the
|
||||||
|
same distribution, their sample means may nevertheless differ by
|
||||||
|
chance. We need to figure out how these differences of the means are
|
||||||
|
distributed. Only if the measured difference between the means is
|
||||||
|
significantly larger than the ones obtained by chance we can reject
|
||||||
|
the null hypothesis and consider the two data sets to differ
|
||||||
|
significantly in their means.
|
||||||
|
|
||||||
|
We can easily estimate the distribution of the null hypothesis by
|
||||||
|
putting the data of both data sets in one big bag. By merging the two
|
||||||
|
data sets we assume that all the data values come from the same
|
||||||
|
distribution. We then randomly separate the data values into two new
|
||||||
|
data sets. These random data sets contain data from both original data
|
||||||
|
sets and thus come from the same distribution. From these random data
|
||||||
|
sets we compute the difference of their sample means. This procedure
|
||||||
|
is repeated many, say one thousand, times and each time we get a value
|
||||||
|
for a difference of means. The distribution of these values is the
|
||||||
|
distribution of the null hypothesis. It is the distribution of
|
||||||
|
differences of mean values that we get by chance although the two data
|
||||||
|
sets come from the same distribution. For a one-sided test that checks
|
||||||
|
whether the measured difference of means is significantly larger than
|
||||||
|
zero at a significance level of 5\,\% we compute the value of the
|
||||||
|
95\,\% percentile of the null distribution. If the measured value is
|
||||||
|
larger, we can reject the null hypothesis and consider the two data
|
||||||
|
sets to differ significantly in their means.
|
||||||
|
|
||||||
|
By using the original data to estimate the null hypothesis, we make no
|
||||||
|
assumption about the properties of the data. We do not need to worry
|
||||||
|
about the data being normally distributed. We do not need to memorize
|
||||||
|
which test to use in which situation. And we better understand what we
|
||||||
|
are testing, because we design the test ourselves. Nowadays, computer
|
||||||
|
are powerful enough to iterate even ten thousand times over the data
|
||||||
|
to compute the distribution of the null hypothesis --- with only a few
|
||||||
|
lines of code. This is why \entermde{Permutationstest}{permutaion
|
||||||
|
test} are getting quite popular.
|
||||||
|
|
||||||
\begin{figure}[tp]
|
\begin{figure}[tp]
|
||||||
\includegraphics[width=1\textwidth]{permutecorrelation}
|
\includegraphics[width=1\textwidth]{permuteaverage}
|
||||||
\titlecaption{\label{permutecorrelationfig}Permutation test for
|
\titlecaption{\label{permuteaverage}Permutation test for differences
|
||||||
correlations.}{Let the correlation coefficient of a dataset with
|
in means.}{We want to test whether two datasets
|
||||||
200 samples be $\rho=0.21$. The distribution of the null
|
$\left\{x_i\right\}$ (red) and $\left\{y_i\right\}$ (blue) come
|
||||||
hypothesis (yellow), optained from the correlation coefficients of
|
from different distributions by assessing the significance of the
|
||||||
permuted and therefore uncorrelated datasets is centered around
|
difference in their sample means. The data sets were generated
|
||||||
zero. The measured correlation coefficient is larger than the
|
with a difference in their population means of $d=0.7$. For
|
||||||
95\,\% percentile of the null hypothesis. The null hypothesis may
|
generating the distribution of the null hypothesis, i.e. the
|
||||||
thus be rejected and the measured correlation is considered
|
distribution of differences in the means if the two data sets come
|
||||||
statistically significant.}
|
from the same distribution, we randomly select the same number of
|
||||||
|
samples from both data sets (top right). This is repeated many
|
||||||
|
times and results in the desired distribution of differences of
|
||||||
|
means (bottom). The measured difference is clearly beyond the
|
||||||
|
95\,\% percentile of this distribution and thus indicates a
|
||||||
|
significant difference between the distributions of the two
|
||||||
|
original data sets.}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
A good example for the application of a
|
\begin{exercise}{meandiffsignificance.m}{meandiffsignificance.out}
|
||||||
\entermde{Permutationstest}{permutaion test} is the statistical
|
Estimate the statistical significance of a difference in the mean of two data sets.
|
||||||
assessment of \entermde[correlation]{Korrelation}{correlations}. Given
|
\vspace{-1ex}
|
||||||
are measured pairs of data points $(x_i, y_i)$. By calculating the
|
\begin{enumerate}
|
||||||
|
\item Generate two independent data sets, $\left\{x_i\right\}$ and
|
||||||
|
$\left\{y_i\right\}$, of $n=200$ samples each, by drawing random
|
||||||
|
numbers from a normal distribution. Add 0.2 to all the $y_i$ samples
|
||||||
|
to ensure the population means to differ by 0.2.
|
||||||
|
\item Calculate the difference between the sample means of the two data sets.
|
||||||
|
\item Estimate the distribution of the null hypothesis of no
|
||||||
|
difference of the means by generating new data sets with the same
|
||||||
|
number of samples randomly selected from both data sets. For this
|
||||||
|
lump the two data sets together into a single vector. Then permute
|
||||||
|
the order of the elements in this vector using the function
|
||||||
|
\varcode{randperm()}, split it into two data sets and calculate
|
||||||
|
the difference of their means. Repeat this 1000 times.
|
||||||
|
\item Read out the 95\,\% percentile from the resulting distribution
|
||||||
|
of the differences in the mean, the null hypothesis using the
|
||||||
|
\varcode{quantile()} function, and compare it with the difference of
|
||||||
|
means measured from the original data sets.
|
||||||
|
\end{enumerate}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Significance of correlations}
|
||||||
|
|
||||||
|
Another nice example for the application of a
|
||||||
|
\entermde{Permutationstest}{permutaion test} is testing for
|
||||||
|
significant \entermde[correlation]{Korrelation}{correlations}
|
||||||
|
(figure\,\ref{permutecorrelationfig}). Given are measured pairs of
|
||||||
|
data points $(x_i, y_i)$. By calculating the
|
||||||
\entermde[correlation!correlation
|
\entermde[correlation!correlation
|
||||||
coefficient]{Korrelationskoeffizient}{correlation
|
coefficient]{Korrelationskoeffizient}{correlation coefficient} we can
|
||||||
coefficient} we can quantify how strongly $y$ depends on $x$. The
|
quantify how strongly $y$ depends on $x$. The correlation coefficient
|
||||||
correlation coefficient alone, however, does not tell whether the
|
alone, however, does not tell whether the correlation is significantly
|
||||||
correlation is significantly different from a random correlation. The
|
different from a non-zero correlation that we might get although there
|
||||||
\entermde{Nullhypothese}{null hypothesis} for such a situation is that
|
is no true correlation in the data. The \entermde{Nullhypothese}{null
|
||||||
$y$ does not depend on $x$. In order to perform a permutation test, we
|
hypothesis} for such a situation is that $y$ does not depend on
|
||||||
need to destroy the correlation by permuting the $(x_i, y_i)$ pairs,
|
$x$. In order to perform a permutation test, we need to destroy the
|
||||||
i.e. we rearrange the $x_i$ and $y_i$ values in a random
|
correlation between the data pairs by permuting the $(x_i, y_i)$
|
||||||
|
pairs, i.e. we rearrange the $x_i$ and $y_i$ values in a random
|
||||||
fashion. Generating many sets of random pairs and computing the
|
fashion. Generating many sets of random pairs and computing the
|
||||||
resulting correlation coefficients yields a distribution of
|
corresponding correlation coefficients yields a distribution of
|
||||||
correlation coefficients that result randomly from uncorrelated
|
correlation coefficients that result randomly from truly uncorrelated
|
||||||
data. By comparing the actually measured correlation coefficient with
|
data. By comparing the actually measured correlation coefficient with
|
||||||
this distribution we can directly assess the significance of the
|
this distribution we can directly assess the significance of the
|
||||||
correlation (figure\,\ref{permutecorrelationfig}).
|
correlation.
|
||||||
|
|
||||||
|
\begin{figure}[tp]
|
||||||
|
\includegraphics[width=1\textwidth]{permutecorrelation}
|
||||||
|
\titlecaption{\label{permutecorrelationfig}Permutation test for
|
||||||
|
correlations.}{Let the correlation coefficient of a dataset with
|
||||||
|
200 samples be $\rho=0.21$ (top left). By shuffling the data pairs
|
||||||
|
we destroy any true correlation (top right). The resulting
|
||||||
|
distribution of the null hypothesis (bottm, yellow), optained from
|
||||||
|
the correlation coefficients of permuted and therefore
|
||||||
|
uncorrelated datasets is centered around zero. The measured
|
||||||
|
correlation coefficient is larger than the 95\,\% percentile of
|
||||||
|
the null hypothesis. The null hypothesis may thus be rejected and
|
||||||
|
the measured correlation is considered statistically significant.}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
\begin{exercise}{correlationsignificance.m}{correlationsignificance.out}
|
\begin{exercise}{correlationsignificance.m}{correlationsignificance.out}
|
||||||
Estimate the statistical significance of a correlation coefficient.
|
Estimate the statistical significance of a correlation coefficient.
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item Create pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
|
\item Generate pairs of $(x_i, y_i)$ values. Randomly choose $x$-values
|
||||||
and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
|
and calculate the respective $y$-values according to $y_i =0.2 \cdot x_i + u_i$
|
||||||
where $u_i$ is a random number drawn from a normal distribution.
|
where $u_i$ is a random number drawn from a normal distribution.
|
||||||
\item Calculate the correlation coefficient.
|
\item Calculate the correlation coefficient.
|
||||||
\item Generate the distribution of the null hypothesis by generating
|
\item Estimate the distribution of the null hypothesis by generating
|
||||||
uncorrelated pairs. For this permute $x$- and $y$-values
|
uncorrelated pairs. For this permute $x$- and $y$-values
|
||||||
\matlabfun{randperm()} 1000 times and calculate for each permutation
|
\matlabfun{randperm()} 1000 times and calculate for each permutation
|
||||||
the correlation coefficient.
|
the correlation coefficient.
|
||||||
\item Read out the 95\,\% percentile from the resulting distribution
|
\item Read out the 95\,\% percentile from the resulting distribution
|
||||||
of the null hypothesis and compare it with the correlation
|
of the null hypothesis using the \varcode{quantile()} function and
|
||||||
coefficient computed from the original data.
|
compare it with the correlation coefficient computed from the
|
||||||
|
original data.
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
\end{exercise}
|
\end{exercise}
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ for i in range(nresamples) :
|
|||||||
musrs.append(np.mean(rng.randn(nsamples)))
|
musrs.append(np.mean(rng.randn(nsamples)))
|
||||||
hmusrs, _ = np.histogram(musrs, bins, density=True)
|
hmusrs, _ = np.histogram(musrs, bins, density=True)
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
|
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.1*figure_height))
|
||||||
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=1.5))
|
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=1.5))
|
||||||
ax.set_xlabel('Mean')
|
ax.set_xlabel('Mean')
|
||||||
ax.set_xlim(-0.4, 0.4)
|
ax.set_xlim(-0.4, 0.4)
|
||||||
|
103
bootstrap/lecture/permuteaverage.py
Normal file
103
bootstrap/lecture/permuteaverage.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
import numpy as np
|
||||||
|
import scipy.stats as st
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.gridspec as gridspec
|
||||||
|
import matplotlib.ticker as ticker
|
||||||
|
from plotstyle import *
|
||||||
|
|
||||||
|
rng = np.random.RandomState(637281)
|
||||||
|
|
||||||
|
# generate data that differ in their mein by d:
|
||||||
|
n = 200
|
||||||
|
d = 0.7
|
||||||
|
x = rng.randn(n) + d;
|
||||||
|
y = rng.randn(n);
|
||||||
|
#x = rng.exponential(1.0, n);
|
||||||
|
#y = rng.exponential(1.0, n) + d;
|
||||||
|
|
||||||
|
# histogram of data:
|
||||||
|
db = 0.5
|
||||||
|
bins = np.arange(-2.5, 2.6, db)
|
||||||
|
hx, _ = np.histogram(x, bins)
|
||||||
|
hy, _ = np.histogram(y, bins)
|
||||||
|
|
||||||
|
# Diference of means, pooled standard deviation and Cohen's d:
|
||||||
|
ad = np.mean(x)-np.mean(y)
|
||||||
|
s = np.sqrt(((len(x)-1)*np.var(x)+(len(y)-1)*np.var(y))/(len(x)+len(y)-2))
|
||||||
|
cd = ad/s
|
||||||
|
|
||||||
|
# permutation:
|
||||||
|
nperm = 1000
|
||||||
|
ads = []
|
||||||
|
xy = np.hstack((x, y))
|
||||||
|
for i in range(nperm) :
|
||||||
|
xyp = rng.permutation(xy)
|
||||||
|
ads.append(np.mean(xyp[:len(x)])-np.mean(xyp[len(x):]))
|
||||||
|
|
||||||
|
# histogram of shuffled data:
|
||||||
|
hxp, _ = np.histogram(xyp[:len(x)], bins)
|
||||||
|
hyp, _ = np.histogram(xyp[len(x):], bins)
|
||||||
|
|
||||||
|
# pdf of the differences of means:
|
||||||
|
h, b = np.histogram(ads, 20, density=True)
|
||||||
|
|
||||||
|
# significance:
|
||||||
|
dq = np.percentile(ads, 95.0)
|
||||||
|
print('Measured difference of means = %.2f, difference at 95%% percentile of permutation = %.2f' % (ad, dq))
|
||||||
|
da = 1.0-0.01*st.percentileofscore(ads, ad)
|
||||||
|
print('Measured difference of means %.2f is at %.2f%% percentile of permutation' % (ad, 100.0*da))
|
||||||
|
|
||||||
|
ap, at = st.ttest_ind(x, y)
|
||||||
|
print('Measured difference of means %.2f is at %.2f%% percentile of test' % (ad, ap))
|
||||||
|
|
||||||
|
|
||||||
|
fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
|
||||||
|
gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
|
||||||
|
**adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
|
||||||
|
|
||||||
|
ax = fig.add_subplot(gs[0,0])
|
||||||
|
ax.bar(bins[:-1]-0.25*db, hy, 0.5*db, **fsA)
|
||||||
|
ax.bar(bins[:-1]+0.25*db, hx, 0.5*db, **fsB)
|
||||||
|
ax.annotate('', xy=(0.0, 45.0), xytext=(d, 45.0), arrowprops=dict(arrowstyle='<->'))
|
||||||
|
ax.text(0.5*d, 50.0, 'd=%.1f' % d, ha='center')
|
||||||
|
ax.set_xlim(-2.5, 2.5)
|
||||||
|
ax.set_ylim(0.0, 50)
|
||||||
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
|
||||||
|
ax.set_xlabel('Original x and y values')
|
||||||
|
ax.set_ylabel('Counts')
|
||||||
|
|
||||||
|
ax = fig.add_subplot(gs[0,1])
|
||||||
|
ax.bar(bins[:-1]-0.25*db, hyp, 0.5*db, **fsA)
|
||||||
|
ax.bar(bins[:-1]+0.25*db, hxp, 0.5*db, **fsB)
|
||||||
|
ax.set_xlim(-2.5, 2.5)
|
||||||
|
ax.set_ylim(0.0, 50)
|
||||||
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
|
||||||
|
ax.set_xlabel('Shuffled x and y values')
|
||||||
|
ax.set_ylabel('Counts')
|
||||||
|
|
||||||
|
ax = fig.add_subplot(gs[1,:])
|
||||||
|
ax.annotate('Measured\ndifference\nis significant!',
|
||||||
|
xy=(ad, 1.2), xycoords='data',
|
||||||
|
xytext=(ad-0.1, 2.2), textcoords='data', ha='right',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
|
||||||
|
connectionstyle="angle3,angleA=-20,angleB=100") )
|
||||||
|
ax.annotate('95% percentile',
|
||||||
|
xy=(0.19, 0.9), xycoords='data',
|
||||||
|
xytext=(0.3, 5.0), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
|
||||||
|
connectionstyle="angle3,angleA=40,angleB=80") )
|
||||||
|
ax.annotate('Distribution of\nnullhypothesis',
|
||||||
|
xy=(-0.08, 3.0), xycoords='data',
|
||||||
|
xytext=(-0.22, 4.5), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
||||||
|
connectionstyle="angle3,angleA=60,angleB=150") )
|
||||||
|
ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
|
||||||
|
ax.bar(b[:-1][b[:-1]>=dq], h[b[:-1]>=dq], width=b[1]-b[0], **fsB)
|
||||||
|
ax.plot( [ad, ad], [0, 1], **lsA)
|
||||||
|
ax.set_xlim(-0.25, 0.85)
|
||||||
|
ax.set_ylim(0.0, 5.0)
|
||||||
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||||
|
ax.set_xlabel('Difference of means')
|
||||||
|
ax.set_ylabel('PDF of H0')
|
||||||
|
|
||||||
|
plt.savefig('permuteaverage.pdf')
|
@ -1,9 +1,11 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.stats as st
|
import scipy.stats as st
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.gridspec as gridspec
|
||||||
|
import matplotlib.ticker as ticker
|
||||||
from plotstyle import *
|
from plotstyle import *
|
||||||
|
|
||||||
rng = np.random.RandomState(637281)
|
rng = np.random.RandomState(37281)
|
||||||
|
|
||||||
# generate correlated data:
|
# generate correlated data:
|
||||||
n = 200
|
n = 200
|
||||||
@ -19,32 +21,56 @@ rd = np.corrcoef(x, y)[0, 1]
|
|||||||
nperm = 1000
|
nperm = 1000
|
||||||
rs = []
|
rs = []
|
||||||
for i in range(nperm) :
|
for i in range(nperm) :
|
||||||
xr=rng.permutation(x)
|
xr = rng.permutation(x)
|
||||||
yr=rng.permutation(y)
|
yr = rng.permutation(y)
|
||||||
rs.append( np.corrcoef(xr, yr)[0, 1] )
|
rs.append(np.corrcoef(xr, yr)[0, 1])
|
||||||
|
rr = np.corrcoef(xr, yr)[0, 1]
|
||||||
|
|
||||||
# pdf of the correlation coefficients:
|
# pdf of the correlation coefficients:
|
||||||
h, b = np.histogram(rs, 20, density=True)
|
h, b = np.histogram(rs, 20, density=True)
|
||||||
|
|
||||||
# significance:
|
# significance:
|
||||||
rq = np.percentile(rs, 95.0)
|
rq = np.percentile(rs, 95.0)
|
||||||
print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of bootstrap = %.2f' % (rd, rq))
|
print('Measured correlation coefficient = %.2f, correlation coefficient at 95%% percentile of permutation = %.2f' % (rd, rq))
|
||||||
ra = 1.0-0.01*st.percentileofscore(rs, rd)
|
ra = 1.0-0.01*st.percentileofscore(rs, rd)
|
||||||
print('Measured correlation coefficient %.2f is at %.4f percentile of bootstrap' % (rd, ra))
|
print('Measured correlation coefficient %.2f is at %.4f percentile of permutation' % (rd, ra))
|
||||||
|
|
||||||
rp, ra = st.pearsonr(x, y)
|
rp, ra = st.pearsonr(x, y)
|
||||||
print('Measured correlation coefficient %.2f is at %.4f percentile of test' % (rp, ra))
|
print('Measured correlation coefficient %.2f is at %.4f percentile of test' % (rp, ra))
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
|
fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
|
||||||
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=0.5, top=1.0))
|
gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
|
||||||
|
**adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
|
||||||
|
|
||||||
|
ax = fig.add_subplot(gs[0,0])
|
||||||
|
ax.text(0.0, 4.0, 'r=%.2f' % rd, ha='center')
|
||||||
|
ax.plot(x, y, **psAm)
|
||||||
|
ax.set_xlim(-4.0, 4.0)
|
||||||
|
ax.set_ylim(-4.0, 4.0)
|
||||||
|
ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||||
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||||
|
ax.set_xlabel('x')
|
||||||
|
ax.set_ylabel('y')
|
||||||
|
|
||||||
|
ax = fig.add_subplot(gs[0,1])
|
||||||
|
ax.text(0.0, 4.0, 'r=%.2f' % rr, ha='center')
|
||||||
|
ax.plot(xr, yr, **psAm)
|
||||||
|
ax.set_xlim(-4.0, 4.0)
|
||||||
|
ax.set_ylim(-4.0, 4.0)
|
||||||
|
ax.xaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||||
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||||
|
ax.set_xlabel('Shuffled x')
|
||||||
|
ax.set_ylabel('Shuffled y')
|
||||||
|
|
||||||
|
ax = fig.add_subplot(gs[1,:])
|
||||||
ax.annotate('Measured\ncorrelation\nis significant!',
|
ax.annotate('Measured\ncorrelation\nis significant!',
|
||||||
xy=(rd, 1.1), xycoords='data',
|
xy=(rd, 1.1), xycoords='data',
|
||||||
xytext=(rd, 2.2), textcoords='data', ha='left',
|
xytext=(rd-0.01, 3.0), textcoords='data', ha='left',
|
||||||
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
arrowprops=dict(arrowstyle="->", relpos=(0.3,0.0),
|
||||||
connectionstyle="angle3,angleA=10,angleB=80") )
|
connectionstyle="angle3,angleA=10,angleB=80") )
|
||||||
ax.annotate('95% percentile',
|
ax.annotate('95% percentile',
|
||||||
xy=(0.14, 0.9), xycoords='data',
|
xy=(0.14, 0.9), xycoords='data',
|
||||||
xytext=(0.18, 4.0), textcoords='data', ha='left',
|
xytext=(0.16, 6.2), textcoords='data', ha='left',
|
||||||
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
|
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
|
||||||
connectionstyle="angle3,angleA=30,angleB=80") )
|
connectionstyle="angle3,angleA=30,angleB=80") )
|
||||||
ax.annotate('Distribution of\nuncorrelated\nsamples',
|
ax.annotate('Distribution of\nuncorrelated\nsamples',
|
||||||
@ -56,7 +82,9 @@ ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
|
|||||||
ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], **fsB)
|
ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], **fsB)
|
||||||
ax.plot( [rd, rd], [0, 1], **lsA)
|
ax.plot( [rd, rd], [0, 1], **lsA)
|
||||||
ax.set_xlim(-0.25, 0.35)
|
ax.set_xlim(-0.25, 0.35)
|
||||||
|
ax.set_ylim(0.0, 6.0)
|
||||||
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
||||||
ax.set_xlabel('Correlation coefficient')
|
ax.set_xlabel('Correlation coefficient')
|
||||||
ax.set_ylabel('Prob. density of H0')
|
ax.set_ylabel('PDF of H0')
|
||||||
|
|
||||||
plt.savefig('permutecorrelation.pdf')
|
plt.savefig('permutecorrelation.pdf')
|
||||||
|
24
chapter.mk
24
chapter.mk
@ -1,5 +1,5 @@
|
|||||||
# plots:
|
# plots:
|
||||||
plots : pythonplots gnuplots
|
plots : pythonplots
|
||||||
|
|
||||||
# python plots:
|
# python plots:
|
||||||
PYFILES=$(wildcard *.py)
|
PYFILES=$(wildcard *.py)
|
||||||
@ -10,25 +10,12 @@ pythonplots : $(PYPDFFILES)
|
|||||||
|
|
||||||
$(PYPDFFILES) : %.pdf: %.py ../../plotstyle.py
|
$(PYPDFFILES) : %.pdf: %.py ../../plotstyle.py
|
||||||
PYTHONPATH="../../" python3 $<
|
PYTHONPATH="../../" python3 $<
|
||||||
|
#ps2pdf $@ $(@:.pdf=-temp.pdf) # strip fonts, saves only about 1.5MB
|
||||||
|
#mv $(@:.pdf=-temp.pdf) $@
|
||||||
|
|
||||||
cleanpythonplots :
|
cleanpythonplots :
|
||||||
rm -f $(PYPDFFILES)
|
rm -f $(PYPDFFILES)
|
||||||
|
|
||||||
|
|
||||||
# gnuplot plots:
|
|
||||||
GPTFILES=$(wildcard *.gpt)
|
|
||||||
GPTTEXFILES=$(GPTFILES:.gpt=.tex)
|
|
||||||
|
|
||||||
gnuplots : $(GPTTEXFILES)
|
|
||||||
|
|
||||||
$(GPTTEXFILES) : %.tex: %.gpt whitestyles.gp
|
|
||||||
gnuplot whitestyles.gp $<
|
|
||||||
epstopdf $*.eps
|
|
||||||
|
|
||||||
cleangnuplots :
|
|
||||||
rm -f $(GPTTEXFILES)
|
|
||||||
|
|
||||||
|
|
||||||
# script:
|
# script:
|
||||||
chapter : $(BASENAME)-chapter.pdf
|
chapter : $(BASENAME)-chapter.pdf
|
||||||
|
|
||||||
@ -42,12 +29,15 @@ $(BASENAME)-chapter.pdf : $(BASENAME)-chapter.tex $(BASENAME).tex $(wildcard $(B
|
|||||||
watchchapter :
|
watchchapter :
|
||||||
while true; do ! make -q chapter && make chapter; sleep 0.5; done
|
while true; do ! make -q chapter && make chapter; sleep 0.5; done
|
||||||
|
|
||||||
|
watchplots :
|
||||||
|
while true; do ! make -q plots && make plots; sleep 0.5; done
|
||||||
|
|
||||||
cleantex:
|
cleantex:
|
||||||
rm -f *~
|
rm -f *~
|
||||||
rm -f $(BASENAME).aux $(BASENAME).log *.idx
|
rm -f $(BASENAME).aux $(BASENAME).log *.idx
|
||||||
rm -f $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME)-chapter.idx $(BASENAME)-chapter-solutions.tex $(BASENAME)-solutions.tex
|
rm -f $(BASENAME)-chapter.aux $(BASENAME)-chapter.log $(BASENAME)-chapter.out $(BASENAME)-chapter.idx $(BASENAME)-chapter-solutions.tex $(BASENAME)-solutions.tex
|
||||||
|
|
||||||
cleanplots: cleanpythonplots cleangnuplots
|
cleanplots: cleanpythonplots
|
||||||
|
|
||||||
cleanchapter : cleanplots cleantex
|
cleanchapter : cleanplots cleantex
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
more months might as well have been written by someone
|
more months might as well have been written by someone
|
||||||
else.}{Eagleson's law}
|
else.}{Eagleson's law}
|
||||||
|
|
||||||
|
\noindent
|
||||||
Cultivating a good code style is not just a matter of good taste but
|
Cultivating a good code style is not just a matter of good taste but
|
||||||
rather is a key ingredient for readability and maintainability of code
|
rather is a key ingredient for readability and maintainability of code
|
||||||
and, in the end, facilitates reproducibility of scientific
|
and, in the end, facilitates reproducibility of scientific
|
||||||
@ -204,7 +205,7 @@ random walk\footnote{A random walk is a simple simulation of Brownian
|
|||||||
(listing \ref{chaoticcode}) then in cleaner way (listing
|
(listing \ref{chaoticcode}) then in cleaner way (listing
|
||||||
\ref{cleancode})
|
\ref{cleancode})
|
||||||
|
|
||||||
\begin{lstlisting}[label=chaoticcode, caption={Chaotic implementation of the random-walk.}]
|
\begin{pagelisting}[label=chaoticcode, caption={Chaotic implementation of the random-walk.}]
|
||||||
num_runs = 10; max_steps = 1000;
|
num_runs = 10; max_steps = 1000;
|
||||||
|
|
||||||
positions = zeros(max_steps, num_runs);
|
positions = zeros(max_steps, num_runs);
|
||||||
@ -218,17 +219,14 @@ x = randn(1);
|
|||||||
if x<0
|
if x<0
|
||||||
positions(step, run)= positions(step-1, run)+1;
|
positions(step, run)= positions(step-1, run)+1;
|
||||||
|
|
||||||
|
|
||||||
elseif x>0
|
elseif x>0
|
||||||
positions(step,run)=positions(step-1,run)-1;
|
positions(step,run)=positions(step-1,run)-1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\pagebreak[4]
|
|
||||||
|
|
||||||
\begin{lstlisting}[label=cleancode, caption={Clean implementation of the random-walk.}]
|
\begin{pagelisting}[label=cleancode, caption={Clean implementation of the random-walk.}]
|
||||||
num_runs = 10;
|
num_runs = 10;
|
||||||
max_steps = 1000;
|
max_steps = 1000;
|
||||||
positions = zeros(max_steps, num_runs);
|
positions = zeros(max_steps, num_runs);
|
||||||
@ -243,7 +241,7 @@ for run = 1:num_runs
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\section{Using comments}
|
\section{Using comments}
|
||||||
|
|
||||||
@ -275,7 +273,6 @@ avoided:\\ \varcode{ x = x + 2; \% add two to x}\\
|
|||||||
make it even clearer.}{Steve McConnell}
|
make it even clearer.}{Steve McConnell}
|
||||||
\end{important}
|
\end{important}
|
||||||
|
|
||||||
\pagebreak[4]
|
|
||||||
\section{Documenting functions}
|
\section{Documenting functions}
|
||||||
All pre-defined \matlab{} functions begin with a comment block that
|
All pre-defined \matlab{} functions begin with a comment block that
|
||||||
describes the purpose of the function, the required and optional
|
describes the purpose of the function, the required and optional
|
||||||
@ -335,8 +332,7 @@ used within the context of another function \matlab{} allows to define
|
|||||||
within the same file. Listing \ref{localfunctions} shows an example of
|
within the same file. Listing \ref{localfunctions} shows an example of
|
||||||
a local function definition.
|
a local function definition.
|
||||||
|
|
||||||
\pagebreak[3]
|
\pageinputlisting[label=localfunctions, caption={Example for local
|
||||||
\lstinputlisting[label=localfunctions, caption={Example for local
|
|
||||||
functions.}]{calculateSines.m}
|
functions.}]{calculateSines.m}
|
||||||
|
|
||||||
\emph{Local function} live in the same \entermde{m-File}{m-file} as
|
\emph{Local function} live in the same \entermde{m-File}{m-file} as
|
||||||
|
@ -59,14 +59,14 @@ every opening parenthesis must be matched by a closing one or every
|
|||||||
respective error messages are clear and the editor will point out and
|
respective error messages are clear and the editor will point out and
|
||||||
highlight most syntax errors.
|
highlight most syntax errors.
|
||||||
|
|
||||||
\begin{lstlisting}[label=syntaxerror, caption={Unbalanced parenthesis error.}]
|
\begin{pagelisting}[label=syntaxerror, caption={Unbalanced parenthesis error.}]
|
||||||
>> mean(random_numbers
|
>> mean(random_numbers
|
||||||
|
|
|
|
||||||
Error: Expression or statement is incorrect--possibly unbalanced (, {, or [.
|
Error: Expression or statement is incorrect--possibly unbalanced (, {, or [.
|
||||||
|
|
||||||
Did you mean:
|
Did you mean:
|
||||||
>> mean(random_numbers)
|
>> mean(random_numbers)
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\subsection{Indexing error}\label{index_error}
|
\subsection{Indexing error}\label{index_error}
|
||||||
Second on the list of common errors are the
|
Second on the list of common errors are the
|
||||||
@ -125,7 +125,7 @@ dimensions. The command in line 7 works due to the fact, that matlab
|
|||||||
automatically extends the matrix, if you assign values to a range
|
automatically extends the matrix, if you assign values to a range
|
||||||
outside its bounds.
|
outside its bounds.
|
||||||
|
|
||||||
\begin{lstlisting}[label=assignmenterror, caption={Assignment errors.}]
|
\begin{pagelisting}[label=assignmenterror, caption={Assignment errors.}]
|
||||||
>> a = zeros(1, 100);
|
>> a = zeros(1, 100);
|
||||||
>> b = 0:10;
|
>> b = 0:10;
|
||||||
|
|
||||||
@ -136,7 +136,7 @@ outside its bounds.
|
|||||||
>> size(a)
|
>> size(a)
|
||||||
ans =
|
ans =
|
||||||
110 1
|
110 1
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\subsection{Dimension mismatch error}
|
\subsection{Dimension mismatch error}
|
||||||
Similarly, some arithmetic operations are only valid if the variables
|
Similarly, some arithmetic operations are only valid if the variables
|
||||||
@ -154,7 +154,7 @@ in listing\,\ref{dimensionmismatch} does not throw an error but the
|
|||||||
result is something else than the expected elementwise multiplication.
|
result is something else than the expected elementwise multiplication.
|
||||||
|
|
||||||
% XXX Some arithmetic operations make size constraints, violating them leads to dimension mismatch errors.
|
% XXX Some arithmetic operations make size constraints, violating them leads to dimension mismatch errors.
|
||||||
\begin{lstlisting}[label=dimensionmismatch, caption={Dimension mismatch errors.}]
|
\begin{pagelisting}[label=dimensionmismatch, caption={Dimension mismatch errors.}]
|
||||||
>> a = randn(100, 1);
|
>> a = randn(100, 1);
|
||||||
>> b = randn(10, 1);
|
>> b = randn(10, 1);
|
||||||
>> a + b
|
>> a + b
|
||||||
@ -171,7 +171,7 @@ result is something else than the expected elementwise multiplication.
|
|||||||
>> size(c)
|
>> size(c)
|
||||||
ans =
|
ans =
|
||||||
100 10
|
100 10
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -239,7 +239,7 @@ but it requires a deep understanding of the applied functions and also
|
|||||||
the task at hand.
|
the task at hand.
|
||||||
|
|
||||||
% XXX Converting a series of spike times into the firing rate as a function of time. Many tasks can be solved with a single line of code. But is this readable?
|
% XXX Converting a series of spike times into the firing rate as a function of time. Many tasks can be solved with a single line of code. But is this readable?
|
||||||
\begin{lstlisting}[label=easyvscomplicated, caption={One-liner versus readable code.}]
|
\begin{pagelisting}[label=easyvscomplicated, caption={One-liner versus readable code.}]
|
||||||
% the one-liner
|
% the one-liner
|
||||||
rate = conv(full(sparse(1, round(spike_times/dt), 1, 1, length(time))), kernel, 'same');
|
rate = conv(full(sparse(1, round(spike_times/dt), 1, 1, length(time))), kernel, 'same');
|
||||||
|
|
||||||
@ -248,7 +248,7 @@ rate = zeros(size(time));
|
|||||||
spike_indices = round(spike_times/dt);
|
spike_indices = round(spike_times/dt);
|
||||||
rate(spike_indices) = 1;
|
rate(spike_indices) = 1;
|
||||||
rate = conv(rate, kernel, 'same');
|
rate = conv(rate, kernel, 'same');
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
The preferred way depends on several considerations. (i) How deep is
|
The preferred way depends on several considerations. (i) How deep is
|
||||||
your personal understanding of the programming language? (ii) What
|
your personal understanding of the programming language? (ii) What
|
||||||
@ -291,7 +291,7 @@ example given in the \matlab{} help and assume that there is a
|
|||||||
function \varcode{rightTriangle()} (listing\,\ref{trianglelisting}).
|
function \varcode{rightTriangle()} (listing\,\ref{trianglelisting}).
|
||||||
|
|
||||||
% XXX Slightly more readable version of the example given in the \matlab{} help system. Note: The variable name for the angles have been capitalized in order to not override the matlab defined functions \code{alpha, beta,} and \code{gamma}.
|
% XXX Slightly more readable version of the example given in the \matlab{} help system. Note: The variable name for the angles have been capitalized in order to not override the matlab defined functions \code{alpha, beta,} and \code{gamma}.
|
||||||
\begin{lstlisting}[label=trianglelisting, caption={Example function for unit testing.}]
|
\begin{pagelisting}[label=trianglelisting, caption={Example function for unit testing.}]
|
||||||
function angles = rightTriangle(length_a, length_b)
|
function angles = rightTriangle(length_a, length_b)
|
||||||
ALPHA = atand(length_a / length_b);
|
ALPHA = atand(length_a / length_b);
|
||||||
BETA = atand(length_a / length_b);
|
BETA = atand(length_a / length_b);
|
||||||
@ -300,7 +300,7 @@ function angles = rightTriangle(length_a, length_b)
|
|||||||
|
|
||||||
angles = [ALPHA BETA GAMMA];
|
angles = [ALPHA BETA GAMMA];
|
||||||
end
|
end
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
This function expects two input arguments that are the length of the
|
This function expects two input arguments that are the length of the
|
||||||
sides $a$ and $b$ and assumes a right angle between them. From this
|
sides $a$ and $b$ and assumes a right angle between them. From this
|
||||||
@ -337,7 +337,7 @@ The test script for the \varcode{rightTriangle()} function
|
|||||||
(listing\,\ref{trianglelisting}) may look like in
|
(listing\,\ref{trianglelisting}) may look like in
|
||||||
listing\,\ref{testscript}.
|
listing\,\ref{testscript}.
|
||||||
|
|
||||||
\begin{lstlisting}[label=testscript, caption={Unit test for the \varcode{rightTriangle()} function stored in an m-file testRightTriangle.m}]
|
\begin{pagelisting}[label=testscript, caption={Unit test for the \varcode{rightTriangle()} function stored in an m-file testRightTriangle.m}]
|
||||||
tolerance = 1e-10;
|
tolerance = 1e-10;
|
||||||
|
|
||||||
% preconditions
|
% preconditions
|
||||||
@ -373,7 +373,7 @@ angles = rightTriangle(1, 1500);
|
|||||||
smallAngle = (pi / 180) * angles(1); % radians
|
smallAngle = (pi / 180) * angles(1); % radians
|
||||||
approx = sin(smallAngle);
|
approx = sin(smallAngle);
|
||||||
assert(abs(approx - smallAngle) <= tolerance, 'Problem with small angle approximation')
|
assert(abs(approx - smallAngle) <= tolerance, 'Problem with small angle approximation')
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
In a test script we can execute any code. The actual test whether or
|
In a test script we can execute any code. The actual test whether or
|
||||||
not the results match our predictions is done using the
|
not the results match our predictions is done using the
|
||||||
@ -390,16 +390,16 @@ executed. We further define a \varcode{tolerance} variable that is
|
|||||||
used when comparing double values (Why might the test on equality of
|
used when comparing double values (Why might the test on equality of
|
||||||
double values be tricky?).
|
double values be tricky?).
|
||||||
|
|
||||||
\begin{lstlisting}[label=runtestlisting, caption={Run the test!}]
|
\begin{pagelisting}[label=runtestlisting, caption={Run the test!}]
|
||||||
result = runtests('testRightTriangle')
|
result = runtests('testRightTriangle')
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
During the run, \matlab{} will put out error messages onto the command
|
During the run, \matlab{} will put out error messages onto the command
|
||||||
line and a summary of the test results is then stored within the
|
line and a summary of the test results is then stored within the
|
||||||
\varcode{result} variable. These can be displayed using the function
|
\varcode{result} variable. These can be displayed using the function
|
||||||
\code[table()]{table(result)}.
|
\code[table()]{table(result)}.
|
||||||
|
|
||||||
\begin{lstlisting}[label=testresults, caption={The test results.}, basicstyle=\ttfamily\scriptsize]
|
\begin{pagelisting}[label=testresults, caption={The test results.}, basicstyle=\ttfamily\scriptsize]
|
||||||
table(result)
|
table(result)
|
||||||
ans =
|
ans =
|
||||||
4x6 table
|
4x6 table
|
||||||
@ -411,7 +411,7 @@ _________________________________ ______ ______ ___________ ________ _____
|
|||||||
'testR.../Test_IsoscelesTriangles' true false false 0.004893 [1x1 struct]
|
'testR.../Test_IsoscelesTriangles' true false false 0.004893 [1x1 struct]
|
||||||
'testR.../Test_30_60_90Triangle' true false false 0.005057 [1x1 struct]
|
'testR.../Test_30_60_90Triangle' true false false 0.005057 [1x1 struct]
|
||||||
'testR.../Test_SmallAngleApprox' true false false 0.0049 [1x1 struct]
|
'testR.../Test_SmallAngleApprox' true false false 0.0049 [1x1 struct]
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
So far so good, all tests pass and our function appears to do what it
|
So far so good, all tests pass and our function appears to do what it
|
||||||
is supposed to do. But tests are only as good as the programmer who
|
is supposed to do. But tests are only as good as the programmer who
|
||||||
@ -479,11 +479,11 @@ stopped in debug mode (listing\,\ref{debuggerlisting}).
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
\begin{lstlisting}[label=debuggerlisting, caption={Command line when the program execution was stopped in the debugger.}]
|
\begin{pagelisting}[label=debuggerlisting, caption={Command line when the program execution was stopped in the debugger.}]
|
||||||
>> simplerandomwalk
|
>> simplerandomwalk
|
||||||
6 for run = 1:num_runs
|
6 for run = 1:num_runs
|
||||||
K>>
|
K>>
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
When stopped in the debugger we can view and change the state of the
|
When stopped in the debugger we can view and change the state of the
|
||||||
program at this point, we can also issue commands to try the next
|
program at this point, we can also issue commands to try the next
|
||||||
|
@ -10,7 +10,8 @@ pattern]{design pattern}.
|
|||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Looping over vector elements}
|
\section{Looping over vector elements}
|
||||||
Iterating over vector elements by means of a \mcode{for}-loop is a very commen pattern:
|
Iterating over vector elements by means of a \mcode{for}-loop is a very commen pattern:
|
||||||
\begin{lstlisting}[caption={\varcode{for}-loop for accessing vector elements by indices}]
|
|
||||||
|
\begin{pagelisting}[caption={\varcode{for}-loop for accessing vector elements by indices}]
|
||||||
x = [2:3:20]; % Some vector.
|
x = [2:3:20]; % Some vector.
|
||||||
for i=1:length(x) % For loop over the indices of the vector.
|
for i=1:length(x) % For loop over the indices of the vector.
|
||||||
i % This is the index (an integer number)
|
i % This is the index (an integer number)
|
||||||
@ -22,12 +23,14 @@ for i=1:length(x) % For loop over the indices of the vector.
|
|||||||
% as an argument to a function:
|
% as an argument to a function:
|
||||||
do_something(x(i));
|
do_something(x(i));
|
||||||
end
|
end
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
\noindent
|
||||||
If the result of the computation within the loop are single numbers
|
If the result of the computation within the loop are single numbers
|
||||||
that are to be stored in a vector, you should create this vector with
|
that are to be stored in a vector, you should create this vector with
|
||||||
the right size before the loop:
|
the right size before the loop:
|
||||||
\begin{lstlisting}[caption={\varcode{for}-loop for writing a vector}]
|
|
||||||
|
\begin{pagelisting}[caption={\varcode{for}-loop for writing a vector}]
|
||||||
x = [1.2 2.3 2.6 3.1]; % Some vector.
|
x = [1.2 2.3 2.6 3.1]; % Some vector.
|
||||||
% Create a vector for the results, as long as the number of loops:
|
% Create a vector for the results, as long as the number of loops:
|
||||||
y = zeros(length(x),1);
|
y = zeros(length(x),1);
|
||||||
@ -38,13 +41,15 @@ for i=1:length(x)
|
|||||||
end
|
end
|
||||||
% Now the result vector can be further processed:
|
% Now the result vector can be further processed:
|
||||||
mean(y);
|
mean(y);
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
\noindent
|
||||||
The computation within the loop could also result in a vector of some
|
The computation within the loop could also result in a vector of some
|
||||||
length and not just a single number. If the length of this vector
|
length and not just a single number. If the length of this vector
|
||||||
(here 10) is known beforehand, then you should create a matrix of
|
(here 10) is known beforehand, then you should create a matrix of
|
||||||
appropriate size for storing the results:
|
appropriate size for storing the results:
|
||||||
\begin{lstlisting}[caption={\varcode{for}-loop for writing rows of a matrix}]
|
|
||||||
|
\begin{pagelisting}[caption={\varcode{for}-loop for writing rows of a matrix}]
|
||||||
x = [2:3:20]; % Some vector.
|
x = [2:3:20]; % Some vector.
|
||||||
% Create space for results -
|
% Create space for results -
|
||||||
% as many rows as loops, as many columns as needed:
|
% as many rows as loops, as many columns as needed:
|
||||||
@ -56,30 +61,33 @@ for i=1:length(x)
|
|||||||
end
|
end
|
||||||
% Process the results stored in matrix y:
|
% Process the results stored in matrix y:
|
||||||
mean(y, 1)
|
mean(y, 1)
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
\noindent
|
||||||
Another possibility is that the result vectors (here of unknown size)
|
Another possibility is that the result vectors (here of unknown size)
|
||||||
need to be combined into a single large vector:
|
need to be combined into a single large vector:
|
||||||
\begin{lstlisting}[caption={\varcode{for}-loop for appending vectors}]
|
|
||||||
|
\begin{pagelisting}[caption={\varcode{for}-loop for appending vectors}]
|
||||||
x = [2:3:20]; % Some vector.
|
x = [2:3:20]; % Some vector.
|
||||||
y = []; % Empty vector for storing the results.
|
y = []; % Empty vector for storing the results.
|
||||||
for i=1:length(x)
|
for i=1:length(x)
|
||||||
% The function get_something() returns a vector of unspecified size:
|
% The function get_something() returns a vector of unspecified size:
|
||||||
z = get_somehow_more(x(i));
|
z = get_something(x(i));
|
||||||
% The content of z is appended to the result vector y:
|
% The content of z is appended to the result vector y:
|
||||||
y = [y; z(:)];
|
y = [y, z(:)];
|
||||||
% The z(:) syntax ensures that we append column-vectors.
|
% The z(:) syntax ensures that we append column-vectors.
|
||||||
end
|
end
|
||||||
% Process the results stored in the vector z:
|
% Process the results stored in the vector y:
|
||||||
mean(y)
|
mean(y, 1)
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Scaling and shifting random numbers, zeros, and ones}
|
\section{Scaling and shifting random numbers, zeros, and ones}
|
||||||
Random number generators usually return random numbers of a given mean
|
Random number generators usually return random numbers of a given mean
|
||||||
and standard deviation. Multiply those numbers by a factor to change their standard deviation and add a number to shift the mean.
|
and standard deviation. Multiply those numbers by a factor to change their standard deviation and add a number to shift the mean.
|
||||||
\begin{lstlisting}[caption={Scaling and shifting of random numbers}]
|
|
||||||
|
\begin{pagelisting}[caption={Scaling and shifting of random numbers}]
|
||||||
% 100 random numbers drawn from a normal distribution
|
% 100 random numbers drawn from a normal distribution
|
||||||
% with mean 0 and standard deviation 1:
|
% with mean 0 and standard deviation 1:
|
||||||
x = randn(100, 1);
|
x = randn(100, 1);
|
||||||
@ -89,18 +97,20 @@ x = randn(100, 1);
|
|||||||
mu = 4.8;
|
mu = 4.8;
|
||||||
sigma = 2.3;
|
sigma = 2.3;
|
||||||
y = randn(100, 1)*sigma + mu;
|
y = randn(100, 1)*sigma + mu;
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
\noindent
|
||||||
The same principle can be useful for in the context of the functions
|
The same principle can be useful for in the context of the functions
|
||||||
\mcode{zeros()} or \mcode{ones()}:
|
\mcode{zeros()} or \mcode{ones()}:
|
||||||
\begin{lstlisting}[caption={Scaling and shifting of \varcode{zeros()} and \varcode{ones()}}]
|
|
||||||
|
\begin{pagelisting}[caption={Scaling and shifting of \varcode{zeros()} and \varcode{ones()}}]
|
||||||
x = -1:0.01:2; % Vector of x-values for plotting
|
x = -1:0.01:2; % Vector of x-values for plotting
|
||||||
plot(x, exp(-x.*x));
|
plot(x, exp(-x.*x));
|
||||||
% Plot for the same x-values a horizontal line with y=0.8:
|
% Plot for the same x-values a horizontal line with y=0.8:
|
||||||
plot(x, zeros(size(x))+0.8);
|
plot(x, zeros(size(x))+0.8);
|
||||||
% ... or a line with y=0.5:
|
% ... or a line with y=0.5:
|
||||||
plot(x, ones(size(x))*0.5);
|
plot(x, ones(size(x))*0.5);
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
@ -119,25 +129,26 @@ can plot the values of the $y$ vector against the ones of the $x$
|
|||||||
vector.
|
vector.
|
||||||
|
|
||||||
The following scripts compute and plot the function $f(x)=e^{-x^2}$:
|
The following scripts compute and plot the function $f(x)=e^{-x^2}$:
|
||||||
\begin{lstlisting}[caption={Plotting a mathematical function --- very detailed}]
|
|
||||||
|
\begin{pagelisting}[caption={Plotting a mathematical function --- very detailed}]
|
||||||
xmin = -1.0;
|
xmin = -1.0;
|
||||||
xmax = 2.0;
|
xmax = 2.0;
|
||||||
dx = 0.01; % Step size
|
dx = 0.01; % Step size
|
||||||
x = xmin:dx:xmax; % Vector with x-values.
|
x = xmin:dx:xmax; % Vector with x-values.
|
||||||
y = exp(-x.*x); % No for loop! '.*' for multiplying the vector elements.
|
y = exp(-x.*x); % No for loop! '.*' for multiplying the vector elements.
|
||||||
plot(x, y);
|
plot(x, y);
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\begin{lstlisting}[caption={Plotting a mathematical function --- shorter}]
|
\begin{pagelisting}[caption={Plotting a mathematical function --- shorter}]
|
||||||
x = -1:0.01:2;
|
x = -1:0.01:2;
|
||||||
y = exp(-x.*x);
|
y = exp(-x.*x);
|
||||||
plot(x, y);
|
plot(x, y);
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\begin{lstlisting}[caption={Plotting a mathematical function --- compact}]
|
\begin{pagelisting}[caption={Plotting a mathematical function --- compact}]
|
||||||
x = -1:0.01:2;
|
x = -1:0.01:2;
|
||||||
plot(x, exp(-x.*x));
|
plot(x, exp(-x.*x));
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
@ -146,28 +157,34 @@ For estimating probabilities or probability densities from histograms
|
|||||||
we need to normalize them appropriately.
|
we need to normalize them appropriately.
|
||||||
|
|
||||||
The \mcode{histogram()} function does this automatically with the appropriate arguments:
|
The \mcode{histogram()} function does this automatically with the appropriate arguments:
|
||||||
\begin{lstlisting}[caption={Probability density with the \varcode{histogram()}-function}]
|
|
||||||
|
\begin{pagelisting}[caption={Probability density with the \varcode{histogram()}-function}]
|
||||||
x = randn(100, 1); % Some real-valued data.
|
x = randn(100, 1); % Some real-valued data.
|
||||||
histogram(x, 'Normalization', 'pdf');
|
histogram(x, 'Normalization', 'pdf');
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
\begin{lstlisting}[caption={Probability with the \varcode{histogram()}-function}]
|
|
||||||
|
\begin{pagelisting}[caption={Probability with the \varcode{histogram()}-function}]
|
||||||
x = randi(6, 100, 1); % Some integer-valued data.
|
x = randi(6, 100, 1); % Some integer-valued data.
|
||||||
histogram(x, 'Normalization', 'probability');
|
histogram(x, 'Normalization', 'probability');
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
\noindent
|
||||||
Alternatively one can normalize the histogram data as returned by the
|
Alternatively one can normalize the histogram data as returned by the
|
||||||
\code{hist()}-function manually:
|
\code{hist()}-function manually:
|
||||||
\begin{lstlisting}[caption={Probability density with the \varcode{hist()}- and \varcode{bar()}-function}]
|
|
||||||
|
\begin{pagelisting}[caption={Probability density with the \varcode{hist()}- and \varcode{bar()}-function}]
|
||||||
x = randn(100, 1); % Some real-valued data.
|
x = randn(100, 1); % Some real-valued data.
|
||||||
[h, b] = hist(x); % Compute histogram.
|
[h, b] = hist(x); % Compute histogram.
|
||||||
h = h/sum(h)/(b(2)-b(1)); % Normalization to a probability density.
|
h = h/sum(h)/(b(2)-b(1)); % Normalization to a probability density.
|
||||||
bar(b, h); % Plot the probability density.
|
bar(b, h); % Plot the probability density.
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
\begin{lstlisting}[caption={Probability with the \varcode{hist()}- and \varcode{bar()}-function}]
|
|
||||||
|
\begin{pagelisting}[caption={Probability with the \varcode{hist()}- and \varcode{bar()}-function}]
|
||||||
x = randi(6, 100, 1); % Some integer-valued data.
|
x = randi(6, 100, 1); % Some integer-valued data.
|
||||||
[h, b] = hist(x); % Compute histogram.
|
[h, b] = hist(x); % Compute histogram.
|
||||||
h = h/sum(h); % Normalize to probability.
|
h = h/sum(h); % Normalize to probability.
|
||||||
bar(b, h); % Plot the probabilities.
|
bar(b, h); % Plot the probabilities.
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
33
exercises.mk
Normal file
33
exercises.mk
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
EXERCISES=$(TEXFILES:.tex=.pdf)
|
||||||
|
SOLUTIONS=$(EXERCISES:%.pdf=%-solutions.pdf)
|
||||||
|
|
||||||
|
.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
|
||||||
|
|
||||||
|
pdf : $(SOLUTIONS) $(EXERCISES)
|
||||||
|
|
||||||
|
exercises : $(EXERCISES)
|
||||||
|
|
||||||
|
solutions : $(SOLUTIONS)
|
||||||
|
|
||||||
|
$(SOLUTIONS) : %-solutions.pdf : %.tex ../../exercisesheader.tex ../../exercisestitle.tex
|
||||||
|
{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
|
||||||
|
pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
|
||||||
|
rm $(patsubst %.pdf,%,$@).[!p]*
|
||||||
|
|
||||||
|
$(EXERCISES) : %.pdf : %.tex ../../exercisesheader.tex ../../exercisestitle.tex
|
||||||
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
watchexercises :
|
||||||
|
while true; do ! make -q exercises && make exercises; sleep 0.5; done
|
||||||
|
|
||||||
|
watchsolutions :
|
||||||
|
while true; do ! make -q solutions && make solutions; sleep 0.5; done
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ *.aux *.log $(EXERCISES:.pdf=.out) $(SOLUTIONS:.pdf=.out)
|
||||||
|
|
||||||
|
cleanup : clean
|
||||||
|
rm -f $(SOLUTIONS) $(EXERCISES)
|
110
exercisesheader.tex
Normal file
110
exercisesheader.tex
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
\usepackage[english]{babel}
|
||||||
|
\usepackage{pslatex}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
|
||||||
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{listings}
|
||||||
|
\lstset{
|
||||||
|
language=Matlab,
|
||||||
|
basicstyle=\ttfamily\footnotesize,
|
||||||
|
numbers=left,
|
||||||
|
numberstyle=\tiny,
|
||||||
|
title=\lstname,
|
||||||
|
showstringspaces=false,
|
||||||
|
commentstyle=\itshape\color{darkgray},
|
||||||
|
breaklines=true,
|
||||||
|
breakautoindent=true,
|
||||||
|
% columns=flexible,
|
||||||
|
frame=single,
|
||||||
|
xleftmargin=1.5em,
|
||||||
|
xrightmargin=1em,
|
||||||
|
aboveskip=10pt
|
||||||
|
}
|
||||||
|
|
||||||
|
%%%%% page style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[left=20mm,right=20mm,top=25mm,bottom=19mm,headsep=2ex,headheight=3ex,footskip=3.5ex]{geometry}
|
||||||
|
\pagestyle{headandfoot}
|
||||||
|
\ifprintanswers
|
||||||
|
\newcommand{\stitle}{Solutions}
|
||||||
|
\else
|
||||||
|
\newcommand{\stitle}{}
|
||||||
|
\fi
|
||||||
|
\header{{\bfseries\large \exercisenum. \exercisetopic}}{{\bfseries\large\stitle}}{{\bfseries\large\exercisedate}}
|
||||||
|
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
|
||||||
|
\runningfooter{}{\thepage}{}
|
||||||
|
|
||||||
|
\shadedsolutions
|
||||||
|
\definecolor{SolutionColor}{gray}{0.9}
|
||||||
|
|
||||||
|
\setlength{\baselineskip}{15pt}
|
||||||
|
\setlength{\parindent}{0.0cm}
|
||||||
|
\setlength{\parskip}{0.3cm}
|
||||||
|
|
||||||
|
\usepackage{multicol}
|
||||||
|
|
||||||
|
%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[mediumspace,mediumqspace,Gray,squaren]{SIunits} % \ohm, \micro
|
||||||
|
|
||||||
|
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\newcommand{\cin}[1]{[{\rm #1}]_{\text{in}}}
|
||||||
|
\newcommand{\cout}[1]{[{\rm #1}]_{\text{out}}}
|
||||||
|
\newcommand{\units}[1]{\text{#1}}
|
||||||
|
\newcommand{\K}{\text{K$^+$}}
|
||||||
|
\newcommand{\Na}{\text{Na$^+$}}
|
||||||
|
\newcommand{\Cl}{\text{Cl$^-$}}
|
||||||
|
\newcommand{\Ca}{\text{Ca$^{2+}$}}
|
||||||
|
\newcommand{\water}{$\text{H}_2\text{O}$}
|
||||||
|
|
||||||
|
\usepackage{dsfont}
|
||||||
|
\newcommand{\naZ}{\mathds{N}}
|
||||||
|
\newcommand{\gaZ}{\mathds{Z}}
|
||||||
|
\newcommand{\raZ}{\mathds{Q}}
|
||||||
|
\newcommand{\reZ}{\mathds{R}}
|
||||||
|
\newcommand{\reZp}{\mathds{R^+}}
|
||||||
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||||
|
\newcommand{\koZ}{\mathds{C}}
|
||||||
|
\newcommand{\RT}{{\rm Re\!\ }}
|
||||||
|
\newcommand{\IT}{{\rm Im\!\ }}
|
||||||
|
\newcommand{\arccot}{{\rm arccot}}
|
||||||
|
\newcommand{\sgn}{{\rm sgn}}
|
||||||
|
\newcommand{\im}{{\rm i}}
|
||||||
|
\newcommand{\drv}{\mathrm{d}}
|
||||||
|
\newcommand{\divis}[2]{$^{#1}\!\!/\!_{#2}$}
|
||||||
|
\newcommand{\vect}[2]{\left( \!\! \begin{array}{c} #1 \\ #2 \end{array} \!\! \right)}
|
||||||
|
\newcommand{\matt}[2]{\left( \!\! \begin{array}{cc} #1 \\ #2 \end{array} \!\! \right)}
|
||||||
|
\renewcommand{\Re}{\mathrm{Re}}
|
||||||
|
\renewcommand{\Im}{\mathrm{Im}}
|
||||||
|
\newcommand{\av}[1]{\left\langle #1 \right\rangle}
|
||||||
|
|
||||||
|
\newcommand{\pref}[1]{(\ref{#1})}
|
||||||
|
\newcommand{\eqnref}[1]{(\ref{#1})}
|
||||||
|
|
||||||
|
\newcommand{\hr}{\par\vspace{-5ex}\noindent\rule{\textwidth}{1pt}\par\vspace{-1ex}}
|
||||||
|
|
||||||
|
\newcommand{\qt}[1]{\textbf{#1}\\}
|
||||||
|
|
||||||
|
\newcommand{\extra}{--- Optional ---\ \mbox{}}
|
||||||
|
|
||||||
|
\newcommand{\code}[1]{\texttt{#1}}
|
||||||
|
|
||||||
|
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\newcommand{\continue}{\ifprintanswers%
|
||||||
|
\else
|
||||||
|
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
||||||
|
\fi}
|
||||||
|
\newcommand{\continuepage}{\ifprintanswers%
|
||||||
|
\newpage
|
||||||
|
\else
|
||||||
|
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
||||||
|
\fi}
|
||||||
|
\newcommand{\newsolutionpage}{\ifprintanswers%
|
||||||
|
\newpage%
|
||||||
|
\else
|
||||||
|
\fi}
|
||||||
|
|
||||||
|
%%%%% hyperref %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue!50!black,linkcolor=blue!50!black,urlcolor=blue!50!black]{hyperref}
|
@ -4,3 +4,4 @@
|
|||||||
{\large Jan Grewe, Jan Benda}\\[-3ex]
|
{\large Jan Grewe, Jan Benda}\\[-3ex]
|
||||||
Neuroethology Lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
|
Neuroethology Lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
|
||||||
\end{center}
|
\end{center}
|
||||||
|
\hr
|
71
header.tex
71
header.tex
@ -1,8 +1,10 @@
|
|||||||
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\title{\textbf{\huge\sffamily\tr{Introduction to\\[1ex] Scientific Computing}%
|
\title{\textbf{\huge\sffamily\tr{Introduction to\\[1ex] Scientific Computing}%
|
||||||
{Einf\"uhrung in die\\[1ex] wissenschaftliche Datenverarbeitung}}}
|
{Einf\"uhrung in die\\[1ex] wissenschaftliche Datenverarbeitung}}}
|
||||||
|
%\title{\textbf{\huge\sffamily\tr{Scientific Computing for Neurobiologists}%
|
||||||
|
% {Wissenschaftliche Datenverarbeitung f\"ur Neurobiologen}}}
|
||||||
|
|
||||||
\author{{\LARGE Jan Grewe \& Jan Benda}\\[5ex]Abteilung Neuroethologie\\[2ex]%
|
\author{{\LARGE Jan Grewe \& Jan Benda}\\[5ex]Neuroethology Lab\\[2ex]%
|
||||||
\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}\vspace{3ex}}
|
\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}\vspace{3ex}}
|
||||||
|
|
||||||
\date{WS 2020/2021\\\vfill%
|
\date{WS 2020/2021\\\vfill%
|
||||||
@ -66,20 +68,16 @@
|
|||||||
\pagecolor{white}
|
\pagecolor{white}
|
||||||
|
|
||||||
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
% gnuplot figures:
|
|
||||||
\newcommand{\texinputpath}{}
|
|
||||||
\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{\texinputpath#1.tex}}}
|
|
||||||
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
|
||||||
|
|
||||||
% maximum number of floats:
|
% maximum number of floats:
|
||||||
\setcounter{topnumber}{2}
|
\setcounter{topnumber}{1}
|
||||||
\setcounter{bottomnumber}{0}
|
\setcounter{bottomnumber}{1}
|
||||||
\setcounter{totalnumber}{2}
|
\setcounter{totalnumber}{2}
|
||||||
|
|
||||||
% float placement fractions:
|
% float placement fractions:
|
||||||
\renewcommand{\textfraction}{0.2}
|
\renewcommand{\textfraction}{0.1}
|
||||||
\renewcommand{\topfraction}{0.9}
|
\renewcommand{\topfraction}{0.9}
|
||||||
\renewcommand{\bottomfraction}{0.0}
|
\renewcommand{\bottomfraction}{0.9}
|
||||||
\renewcommand{\floatpagefraction}{0.7}
|
\renewcommand{\floatpagefraction}{0.7}
|
||||||
|
|
||||||
% spacing for floats:
|
% spacing for floats:
|
||||||
@ -209,11 +207,41 @@
|
|||||||
\let\l@lstlisting\l@figure
|
\let\l@lstlisting\l@figure
|
||||||
\makeatother
|
\makeatother
|
||||||
|
|
||||||
%%%%% english, german, code and file terms: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
% \lstinputlisting wrapped in a minipage to avoid page breaks:
|
||||||
|
\newcommand{\pageinputlisting}[2][]{\vspace{-2ex}\noindent\begin{minipage}[t]{1\linewidth}\lstinputlisting[#1]{#2}\end{minipage}}
|
||||||
|
|
||||||
|
%%%%% listing environment: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% usage:
|
||||||
|
%
|
||||||
|
% \begin{pagelisting}[label=listing1, caption={A script.}]
|
||||||
|
% >> x = 6
|
||||||
|
% \end{pagelisting}
|
||||||
|
%
|
||||||
|
% This is the lstlisting environment but wrapped in a minipage to avoid page breaks.
|
||||||
|
\lstnewenvironment{pagelisting}[1][]%
|
||||||
|
{\vspace{-2ex}\lstset{#1}\noindent\minipage[t]{1\linewidth}}%
|
||||||
|
{\endminipage}
|
||||||
|
|
||||||
|
%%%%% english and german terms: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\usepackage{ifthen}
|
\usepackage{ifthen}
|
||||||
|
|
||||||
|
% \enterm[en-index]{term}: Typeset term and add it to the index of english terms
|
||||||
|
%
|
||||||
|
% \determ[de-index]{term}: Typeset term and add it to the index of german terms
|
||||||
|
%
|
||||||
|
% \entermde[en-index]{de-index}{term}: Typeset term and add it to the index of english terms
|
||||||
|
% and de-index to the index of german terms
|
||||||
|
%
|
||||||
|
% how to specificy an index:
|
||||||
|
% \enterm{term} - just put term into the index
|
||||||
|
% \enterm[en-index]{term} - typeset term and put en-index into the index
|
||||||
|
% \enterm[statistics!mean]{term} - mean is a subentry of statistics
|
||||||
|
% \enterm[statistics!average|see{statistics!mean}]{term} - cross reference to statistics mean
|
||||||
|
% \enterm[statistics@\textbf{statistics}]{term} - put index at statistics but use
|
||||||
|
% \textbf{statistics} for typesetting in the index
|
||||||
|
|
||||||
% \enterm[english index entry]{<english term>}
|
% \enterm[english index entry]{<english term>}
|
||||||
% typeset the term in italics and add it (or the optional argument) to
|
% typeset the term in italics and add it (or rather the optional argument) to
|
||||||
% the english index.
|
% the english index.
|
||||||
\newcommand{\enterm}[2][]{\textit{#2}\ifthenelse{\equal{#1}{}}{\protect\sindex[enterm]{#2}}{\protect\sindex[enterm]{#1}}}
|
\newcommand{\enterm}[2][]{\textit{#2}\ifthenelse{\equal{#1}{}}{\protect\sindex[enterm]{#2}}{\protect\sindex[enterm]{#1}}}
|
||||||
|
|
||||||
@ -349,11 +377,12 @@
|
|||||||
}{%
|
}{%
|
||||||
\immediate\write\solutions{\unexpanded{\subsection}{Exercise \thechapter.\arabic{exercisef}}}%
|
\immediate\write\solutions{\unexpanded{\subsection}{Exercise \thechapter.\arabic{exercisef}}}%
|
||||||
\immediate\write\solutions{\unexpanded{\label}{solution\arabic{chapter}-\arabic{exercisef}}}%
|
\immediate\write\solutions{\unexpanded{\label}{solution\arabic{chapter}-\arabic{exercisef}}}%
|
||||||
\immediate\write\solutions{\unexpanded{\lstinputlisting[belowskip=0ex,aboveskip=0ex,%
|
\immediate\write\solutions{\unexpanded{\begin{minipage}{1\linewidth}\lstinputlisting[belowskip=0ex,aboveskip=0ex,%
|
||||||
nolol=true, title={\textbf{Source code:} \protect\StrSubstitute{#1}{_}{\_}}]}{\codepath#1}}%
|
nolol=true, title={\textbf{Source code:} \protect\StrSubstitute{#1}{_}{\_}}]}{\codepath#1}\unexpanded{\end{minipage}}}%
|
||||||
\ifthenelse{\equal{#2}{}}{}%
|
\ifthenelse{\equal{#2}{}}{}%
|
||||||
{\immediate\write\solutions{\unexpanded{\lstinputlisting[language={},%
|
{\immediate\write\solutions{\unexpanded{\begin{minipage}{1\linewidth}\lstinputlisting[language={},%
|
||||||
nolol=true, title={\textbf{Output:}}, belowskip=0ex, aboveskip=1ex]}{\codepath#2}}}%
|
nolol=true, title={\textbf{Output:}}, belowskip=0ex, aboveskip=1ex]}{\codepath#2}\unexpanded{\end{minipage}}}}%
|
||||||
|
\immediate\write\solutions{\unexpanded{\vspace*{\fill}}}%
|
||||||
\immediate\write\solutions{}%
|
\immediate\write\solutions{}%
|
||||||
}%
|
}%
|
||||||
}%
|
}%
|
||||||
@ -362,14 +391,18 @@
|
|||||||
{ \hypersetup{hypertexnames=false}%
|
{ \hypersetup{hypertexnames=false}%
|
||||||
\ifthenelse{\equal{\exercisesource}{}}{}%
|
\ifthenelse{\equal{\exercisesource}{}}{}%
|
||||||
{ \addtocounter{lstlisting}{-1}%
|
{ \addtocounter{lstlisting}{-1}%
|
||||||
\lstinputlisting[belowskip=0pt,aboveskip=1ex,nolol=true,%
|
\par\noindent\begin{minipage}[t]{1\linewidth}%
|
||||||
|
\lstinputlisting[belowskip=1ex,aboveskip=-1ex,nolol=true,%
|
||||||
title={\textbf{Solution:} \exercisefile}]%
|
title={\textbf{Solution:} \exercisefile}]%
|
||||||
{\exercisesource}%
|
{\exercisesource}%
|
||||||
|
\end{minipage}%
|
||||||
\ifthenelse{\equal{\exerciseoutput}{}}{}%
|
\ifthenelse{\equal{\exerciseoutput}{}}{}%
|
||||||
{ \addtocounter{lstlisting}{-1}%
|
{ \addtocounter{lstlisting}{-1}%
|
||||||
|
\par\noindent\begin{minipage}[t]{1\linewidth}%
|
||||||
\lstinputlisting[language={},title={\textbf{Output:}},%
|
\lstinputlisting[language={},title={\textbf{Output:}},%
|
||||||
nolol=true,belowskip=0pt]%
|
nolol=true,belowskip=0pt,aboveskip=-0.5ex]%
|
||||||
{\exerciseoutput}%
|
{\exerciseoutput}%
|
||||||
|
\end{minipage}%
|
||||||
}%
|
}%
|
||||||
}%
|
}%
|
||||||
\hypersetup{hypertexnames=true}%
|
\hypersetup{hypertexnames=true}%
|
||||||
@ -452,12 +485,12 @@
|
|||||||
{ \captionsetup{singlelinecheck=off,hypcap=false,labelformat={empty},%
|
{ \captionsetup{singlelinecheck=off,hypcap=false,labelformat={empty},%
|
||||||
labelfont={large,sf,it,bf},font={large,sf,it,bf}}
|
labelfont={large,sf,it,bf},font={large,sf,it,bf}}
|
||||||
\ifthenelse{\equal{#1}{}}%
|
\ifthenelse{\equal{#1}{}}%
|
||||||
{ \begin{mdframed}[linecolor=importantline,linewidth=1ex,%
|
{ \begin{mdframed}[nobreak=true,linecolor=importantline,linewidth=1ex,%
|
||||||
backgroundcolor=importantback,font={\sffamily}]%
|
backgroundcolor=importantback,font={\sffamily}]%
|
||||||
\setlength{\parindent}{0pt}%
|
\setlength{\parindent}{0pt}%
|
||||||
\setlength{\parskip}{1ex}%
|
\setlength{\parskip}{1ex}%
|
||||||
}%
|
}%
|
||||||
{ \begin{mdframed}[linecolor=importantline,linewidth=1ex,%
|
{ \begin{mdframed}[nobreak=true,linecolor=importantline,linewidth=1ex,%
|
||||||
backgroundcolor=importantback,font={\sffamily},%
|
backgroundcolor=importantback,font={\sffamily},%
|
||||||
frametitle={\captionof{iboxf}{#1}},frametitleaboveskip=-1ex,%
|
frametitle={\captionof{iboxf}{#1}},frametitleaboveskip=-1ex,%
|
||||||
frametitlebackgroundcolor=importantline]%
|
frametitlebackgroundcolor=importantline]%
|
||||||
|
@ -1,34 +1,3 @@
|
|||||||
TEXFILES=$(wildcard exercises??.tex)
|
TEXFILES=$(wildcard likelihood-?.tex)
|
||||||
EXERCISES=$(TEXFILES:.tex=.pdf)
|
|
||||||
SOLUTIONS=$(EXERCISES:exercises%=solutions%)
|
|
||||||
|
|
||||||
.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
|
include ../../exercises.mk
|
||||||
|
|
||||||
pdf : $(SOLUTIONS) $(EXERCISES)
|
|
||||||
|
|
||||||
exercises : $(EXERCISES)
|
|
||||||
|
|
||||||
solutions : $(SOLUTIONS)
|
|
||||||
|
|
||||||
$(SOLUTIONS) : solutions%.pdf : exercises%.tex instructions.tex
|
|
||||||
{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
|
|
||||||
pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
|
|
||||||
rm $(patsubst %.pdf,%,$@).[!p]*
|
|
||||||
|
|
||||||
$(EXERCISES) : %.pdf : %.tex instructions.tex
|
|
||||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
|
||||||
|
|
||||||
watch :
|
|
||||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
|
||||||
|
|
||||||
watchexercises :
|
|
||||||
while true; do ! make -q exercises && make exercises; sleep 0.5; done
|
|
||||||
|
|
||||||
watchsolutions :
|
|
||||||
while true; do ! make -q solutions && make solutions; sleep 0.5; done
|
|
||||||
|
|
||||||
clean :
|
|
||||||
rm -f *~ *.aux *.log *.out
|
|
||||||
|
|
||||||
cleanup : clean
|
|
||||||
rm -f $(SOLUTIONS) $(EXERCISES)
|
|
||||||
|
@ -1,41 +0,0 @@
|
|||||||
\vspace*{-8ex}
|
|
||||||
\begin{center}
|
|
||||||
\textbf{\Large Introduction to scientific computing}\\[1ex]
|
|
||||||
{\large Jan Grewe, Jan Benda}\\[-3ex]
|
|
||||||
Neuroethology lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
|
|
||||||
\end{center}
|
|
||||||
|
|
||||||
% \ifprintanswers%
|
|
||||||
% \else
|
|
||||||
|
|
||||||
% % Die folgenden Aufgaben dienen der Wiederholung, \"Ubung und
|
|
||||||
% % Selbstkontrolle und sollten eigenst\"andig bearbeitet und gel\"ost
|
|
||||||
% % werden. Die L\"osung soll in Form eines einzelnen Skriptes (m-files)
|
|
||||||
% % im ILIAS hochgeladen werden. Jede Aufgabe sollte in einer eigenen
|
|
||||||
% % ``Zelle'' gel\"ost sein. Die Zellen \textbf{m\"ussen} unabh\"angig
|
|
||||||
% % voneinander ausf\"uhrbar sein. Das Skript sollte nach dem Muster:
|
|
||||||
% % ``variablen\_datentypen\_\{nachname\}.m'' benannt werden
|
|
||||||
% % (z.B. variablen\_datentypen\_mueller.m).
|
|
||||||
|
|
||||||
% \begin{itemize}
|
|
||||||
% \item \"Uberzeuge dich von jeder einzelnen Zeile deines Codes, dass
|
|
||||||
% sie auch wirklich das macht, was sie machen soll! Teste dies mit
|
|
||||||
% kleinen Beispielen direkt in der Kommandozeile.
|
|
||||||
% \item Versuche die L\"osungen der Aufgaben m\"oglichst in
|
|
||||||
% sinnvolle kleine Funktionen herunterzubrechen.
|
|
||||||
% Sobald etwas \"ahnliches mehr als einmal berechnet werden soll,
|
|
||||||
% lohnt es sich eine Funktion daraus zu schreiben!
|
|
||||||
% \item Teste rechenintensive \code{for} Schleifen, Vektoren, Matrizen
|
|
||||||
% zuerst mit einer kleinen Anzahl von Wiederholungen oder kleiner
|
|
||||||
% Gr\"o{\ss}e, und benutze erst am Ende, wenn alles \"uberpr\"uft
|
|
||||||
% ist, eine gro{\ss}e Anzahl von Wiederholungen oder Elementen, um eine gute
|
|
||||||
% Statistik zu bekommen.
|
|
||||||
% \item Benutze die Hilfsfunktion von \code{matlab} (\code{help
|
|
||||||
% commando} oder \code{doc commando}) und das Internet, um
|
|
||||||
% herauszufinden, wie bestimmte \code{matlab} Funktionen zu verwenden
|
|
||||||
% sind und was f\"ur M\"oglichkeiten sie bieten.
|
|
||||||
% Auch zu inhaltlichen Konzepten bietet das Internet oft viele
|
|
||||||
% Antworten!
|
|
||||||
% \end{itemize}
|
|
||||||
|
|
||||||
% \fi
|
|
@ -1,93 +1,23 @@
|
|||||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||||
|
|
||||||
\usepackage[german]{babel}
|
\newcommand{\exercisetopic}{Maximum Likelihood}
|
||||||
\usepackage{pslatex}
|
\newcommand{\exercisenum}{10}
|
||||||
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
\newcommand{\exercisedate}{January 12th, 2021}
|
||||||
\usepackage{xcolor}
|
|
||||||
\usepackage{graphicx}
|
|
||||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
|
||||||
|
|
||||||
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
|
||||||
\pagestyle{headandfoot}
|
|
||||||
\ifprintanswers
|
|
||||||
\newcommand{\stitle}{: Solutions}
|
|
||||||
\else
|
|
||||||
\newcommand{\stitle}{}
|
|
||||||
\fi
|
|
||||||
\header{{\bfseries\large Exercise 11\stitle}}{{\bfseries\large Maximum likelihood}}{{\bfseries\large January 7th, 2020}}
|
|
||||||
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
|
||||||
jan.benda@uni-tuebingen.de}
|
|
||||||
\runningfooter{}{\thepage}{}
|
|
||||||
|
|
||||||
\setlength{\baselineskip}{15pt}
|
|
||||||
\setlength{\parindent}{0.0cm}
|
|
||||||
\setlength{\parskip}{0.3cm}
|
|
||||||
\renewcommand{\baselinestretch}{1.15}
|
|
||||||
|
|
||||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{listings}
|
|
||||||
\lstset{
|
|
||||||
language=Matlab,
|
|
||||||
basicstyle=\ttfamily\footnotesize,
|
|
||||||
numbers=left,
|
|
||||||
numberstyle=\tiny,
|
|
||||||
title=\lstname,
|
|
||||||
showstringspaces=false,
|
|
||||||
commentstyle=\itshape\color{darkgray},
|
|
||||||
breaklines=true,
|
|
||||||
breakautoindent=true,
|
|
||||||
columns=flexible,
|
|
||||||
frame=single,
|
|
||||||
xleftmargin=1em,
|
|
||||||
xrightmargin=1em,
|
|
||||||
aboveskip=10pt
|
|
||||||
}
|
|
||||||
|
|
||||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{amsmath}
|
|
||||||
\usepackage{amssymb}
|
|
||||||
\usepackage{bm}
|
|
||||||
\usepackage{dsfont}
|
|
||||||
\newcommand{\naZ}{\mathds{N}}
|
|
||||||
\newcommand{\gaZ}{\mathds{Z}}
|
|
||||||
\newcommand{\raZ}{\mathds{Q}}
|
|
||||||
\newcommand{\reZ}{\mathds{R}}
|
|
||||||
\newcommand{\reZp}{\mathds{R^+}}
|
|
||||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
|
||||||
\newcommand{\koZ}{\mathds{C}}
|
|
||||||
|
|
||||||
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\newcommand{\continue}{\ifprintanswers%
|
|
||||||
\else
|
|
||||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
||||||
\fi}
|
|
||||||
\newcommand{\continuepage}{\ifprintanswers%
|
|
||||||
\newpage
|
|
||||||
\else
|
|
||||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
||||||
\fi}
|
|
||||||
\newcommand{\newsolutionpage}{\ifprintanswers%
|
|
||||||
\newpage%
|
|
||||||
\else
|
|
||||||
\fi}
|
|
||||||
|
|
||||||
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\newcommand{\qt}[1]{\textbf{#1}\\}
|
|
||||||
\newcommand{\pref}[1]{(\ref{#1})}
|
|
||||||
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
|
|
||||||
\newcommand{\code}[1]{\texttt{#1}}
|
|
||||||
|
|
||||||
|
\input{../../exercisesheader}
|
||||||
|
|
||||||
|
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\input{instructions}
|
\input{../../exercisestitle}
|
||||||
|
|
||||||
|
|
||||||
\begin{questions}
|
\begin{questions}
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\question \qt{Read chapter 9 on ``Maximum likelihood estimation''!}\vspace{-3ex}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\question \qt{Maximum likelihood of the standard deviation}
|
\question \qt{Maximum likelihood of the standard deviation}
|
||||||
Let's compute the likelihood and the log-likelihood for the estimation
|
Let's compute the likelihood and the log-likelihood for the estimation
|
||||||
@ -107,10 +37,11 @@ of the standard deviation.
|
|||||||
\end{parts}
|
\end{parts}
|
||||||
\begin{solution}
|
\begin{solution}
|
||||||
\lstinputlisting{mlestd.m}
|
\lstinputlisting{mlestd.m}
|
||||||
|
\lstinputlisting[language={}]{mlestd.out}
|
||||||
\includegraphics[width=1\textwidth]{mlestd}\\
|
\includegraphics[width=1\textwidth]{mlestd}\\
|
||||||
|
|
||||||
The more data the smaller the product of the probabilities ($\approx
|
The more data the smaller the product of the probabilities ($\approx
|
||||||
p^n$ with $0 \le p < 1$) and the smaller the sum of the logarithms
|
p^n$ if $0 \le p < 1$) and the smaller the sum of the logarithms
|
||||||
of the probabilities ($\approx n\log p$, note that $\log p < 0$).
|
of the probabilities ($\approx n\log p$, note that $\log p < 0$).
|
||||||
|
|
||||||
The product eventually gets smaller than the precision of the
|
The product eventually gets smaller than the precision of the
|
||||||
@ -121,10 +52,10 @@ of the standard deviation.
|
|||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\question \qt{Maximum-likelihood estimator of a line through the origin}
|
\question \qt{Maximum-likelihood estimator of a line through the origin}
|
||||||
In the lecture we derived the following equation for an
|
In the lecture we derived the following equation for an
|
||||||
maximum-likelihood estimate of the slope $\theta$ of a straight line
|
maximum-likelihood estimate of the slope $m$ of a straight line
|
||||||
through the origin fitted to $n$ pairs of data values $(x_i|y_i)$ with
|
through the origin fitted to $n$ pairs of data values $(x_i|y_i)$ with
|
||||||
standard deviation $\sigma_i$:
|
standard deviation $\sigma_i$:
|
||||||
\[\theta = \frac{\sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2}}{ \sum_{i=1}^n
|
\[ m = \frac{\sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2}}{ \sum_{i=1}^n
|
||||||
\frac{x_i^2}{\sigma_i^2}} \]
|
\frac{x_i^2}{\sigma_i^2}} \]
|
||||||
\begin{parts}
|
\begin{parts}
|
||||||
\part \label{mleslopefunc} Write a function that takes two vectors
|
\part \label{mleslopefunc} Write a function that takes two vectors
|
||||||
@ -175,7 +106,7 @@ normally-distributed data. Such parameter need to be estimated
|
|||||||
numerically by means of maximum-likelihood from the data.
|
numerically by means of maximum-likelihood from the data.
|
||||||
|
|
||||||
Let us demonstrate this approach by means of data that are drawn from a
|
Let us demonstrate this approach by means of data that are drawn from a
|
||||||
gamma distribution,
|
gamma distribution.
|
||||||
\begin{parts}
|
\begin{parts}
|
||||||
\part Find out which \code{matlab} function computes the
|
\part Find out which \code{matlab} function computes the
|
||||||
probability-density function of the gamma distribution.
|
probability-density function of the gamma distribution.
|
||||||
@ -193,7 +124,7 @@ gamma distribution,
|
|||||||
\part Compute and plot a properly normalized histogram of these
|
\part Compute and plot a properly normalized histogram of these
|
||||||
random numbers.
|
random numbers.
|
||||||
|
|
||||||
\part Find out which \code{matlab} function fit a distribution to a
|
\part Find out which \code{matlab} function fits a distribution to a
|
||||||
vector of random numbers according to the maximum-likelihood method.
|
vector of random numbers according to the maximum-likelihood method.
|
||||||
How do you need to use this function in order to fit a gamma
|
How do you need to use this function in order to fit a gamma
|
||||||
distribution to the data?
|
distribution to the data?
|
@ -6,16 +6,19 @@ for k = 1:length(ns)
|
|||||||
fprintf('\nn=%d\n', n);
|
fprintf('\nn=%d\n', n);
|
||||||
% draw random numbers:
|
% draw random numbers:
|
||||||
x = randn(n,1)*sigma+mu;
|
x = randn(n,1)*sigma+mu;
|
||||||
fprintf(' mean of the data is %.2f\n', mean(x))
|
datamean = mean(x);
|
||||||
fprintf(' standard deviation of the data is %.2f\n', std(x))
|
fprintf(' mean %.3f\n', datamean)
|
||||||
|
% we assume the mean to be given, therefore dof=n:
|
||||||
|
fprintf(' standard deviation %.3f\n', std(x, 1))
|
||||||
|
|
||||||
% standard deviation as parameter:
|
% standard deviation as parameter:
|
||||||
psigs = 1.0:0.01:3.0;
|
psigs = 1.0:0.001:3.0;
|
||||||
% matrix with the probabilities for each x and psigs:
|
% matrix with the probabilities for each x and psigs:
|
||||||
lms = zeros(length(x), length(psigs));
|
lms = zeros(length(x), length(psigs));
|
||||||
for i=1:length(psigs)
|
for i=1:length(psigs)
|
||||||
psig = psigs(i);
|
psig = psigs(i);
|
||||||
p = exp(-0.5*((x-mu)/psig).^2.0)/sqrt(2.0*pi)/psig;
|
% same mean as for the standard deviation of the data:
|
||||||
|
p = exp(-0.5*((x-datamean)/psig).^2.0)/sqrt(2.0*pi)/psig;
|
||||||
lms(:,i) = p;
|
lms(:,i) = p;
|
||||||
end
|
end
|
||||||
lm = prod(lms, 1); % likelihood
|
lm = prod(lms, 1); % likelihood
|
||||||
@ -24,8 +27,9 @@ for k = 1:length(ns)
|
|||||||
if length(maxlm) > 1
|
if length(maxlm) > 1
|
||||||
maxlm = maxlm(1);
|
maxlm = maxlm(1);
|
||||||
end
|
end
|
||||||
fprintf(' likelihood: standard deviation of the data is %.2f\n', maxlm)
|
maxloglm = psigs(loglm==max(loglm));
|
||||||
fprintf(' log-likelihood: standard deviation of the data is %.2f\n', psigs(loglm==max(loglm)))
|
fprintf(' likelihood: standard deviation %.3f\n', maxlm)
|
||||||
|
fprintf(' log-likelihood: standard deviation %.3f\n', maxloglm)
|
||||||
|
|
||||||
% plot likelihood of standard deviation:
|
% plot likelihood of standard deviation:
|
||||||
subplot(2, 2, 2*k-1);
|
subplot(2, 2, 2*k-1);
|
||||||
|
11
likelihood/exercises/mlestd.out
Normal file
11
likelihood/exercises/mlestd.out
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
n=50
|
||||||
|
mean 3.468
|
||||||
|
standard deviation 1.833
|
||||||
|
likelihood: standard deviation 1.833
|
||||||
|
log-likelihood: standard deviation 1.833
|
||||||
|
|
||||||
|
n=1000
|
||||||
|
mean 2.993
|
||||||
|
standard deviation 2.077
|
||||||
|
likelihood: standard deviation 1.000
|
||||||
|
log-likelihood: standard deviation 2.077
|
@ -20,12 +20,11 @@
|
|||||||
|
|
||||||
\section{TODO}
|
\section{TODO}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Fitting psychometric functions:
|
\item Fitting psychometric functions, logistic regression:
|
||||||
Variable $x_i$, responses $r_i$ either 0 or 1.
|
Variable $x_i$, responses $r_i$ either 0 or 1.
|
||||||
$p(x_i, \theta)$ is Weibull or Boltzmann function.
|
$p(x_i, \theta)$ is Weibull or Boltzmann or logistic function.
|
||||||
Likelihood is $L = \prod p(x_i, \theta)^{r_i} (1-p(x_i, \theta))^{1-r_i}$.
|
Likelihood is $L = \prod p(x_i, \theta)^{r_i} (1-p(x_i, \theta))^{1-r_i}$.
|
||||||
Use fminsearch for fitting.
|
Use fminsearch for fitting?
|
||||||
\item GLM model fitting?
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
@ -4,111 +4,132 @@
|
|||||||
\label{maximumlikelihoodchapter}
|
\label{maximumlikelihoodchapter}
|
||||||
\exercisechapter{Maximum likelihood estimation}
|
\exercisechapter{Maximum likelihood estimation}
|
||||||
|
|
||||||
A common problem in statistics is to estimate from a probability
|
The core task of statistics is to infer from measured data some
|
||||||
distribution one or more parameters $\theta$ that best describe the
|
parameters describing the data. These parameters can be simply a mean,
|
||||||
data $x_1, x_2, \ldots x_n$. \enterm[maximum likelihood
|
a standard deviation, or any other parameter needed to describe the
|
||||||
estimator]{Maximum likelihood estimators} (\enterm[mle|see{maximum
|
distribution the data are originating from, a correlation coefficient,
|
||||||
|
or some parameters of a function describing a particular dependence
|
||||||
|
between the data. The brain faces exactly the same problem. Given the
|
||||||
|
activity pattern of some neurons (the data) it needs to infer some
|
||||||
|
aspects (parameters) of the environment and the internal state of the
|
||||||
|
body in order to generate some useful behavior. One possible approach
|
||||||
|
to estimate parameters from data are \enterm[maximum likelihood
|
||||||
|
estimator]{maximum likelihood estimators} (\enterm[mle|see{maximum
|
||||||
likelihood estimator}]{mle},
|
likelihood estimator}]{mle},
|
||||||
\determ{Maximum-Likelihood-Sch\"atzer}) choose the parameters such
|
\determ{Maximum-Likelihood-Sch\"atzer}). They choose the parameters
|
||||||
that they maximize the likelihood of the data $x_1, x_2, \ldots x_n$
|
such that they maximize the likelihood of the specific data values to
|
||||||
to originate from the distribution.
|
originate from a specific distribution.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Maximum Likelihood}
|
\section{Maximum likelihood}
|
||||||
|
|
||||||
Let $p(x|\theta)$ (to be read as ``probability(density) of $x$ given
|
Let $p(x|\theta)$ (to be read as ``probability(density) of $x$ given
|
||||||
$\theta$.'') the probability (density) distribution of $x$ given the
|
$\theta$.'') the probability (density) distribution of data value $x$
|
||||||
parameters $\theta$. This could be the normal distribution
|
given parameter values $\theta$. This could be the normal distribution
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{normpdfmean}
|
\label{normpdfmean}
|
||||||
p(x|\mu, \sigma) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\mu)^2}{2\sigma^2}}
|
p(x|\mu, \sigma) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\mu)^2}{2\sigma^2}}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
defined by the mean $\mu$ and the standard deviation $\sigma$ as
|
defined by the mean $\mu$ and the standard deviation $\sigma$ as
|
||||||
parameters $\theta$. If the $n$ independent observations of $x_1,
|
parameters $\theta$. If the $n$ observations $x_1, x_2, \ldots, x_n$
|
||||||
x_2, \ldots x_n$ originate from the same probability density
|
are independent of each other and originate from the same probability
|
||||||
distribution (they are \enterm[i.i.d.|see{independent and identically
|
density distribution (they are \enterm[i.i.d.|see{independent and
|
||||||
distributed}]{i.i.d.}, \enterm{independent and identically
|
identically distributed}]{i.i.d.}, \enterm{independent and
|
||||||
distributed}) then the conditional probability $p(x_1,x_2, \ldots
|
identically distributed}), then the conditional probability
|
||||||
x_n|\theta)$ of observing $x_1, x_2, \ldots x_n$ given some specific
|
$p(x_1,x_2, \ldots, x_n|\theta)$ of observing the particular data
|
||||||
parameter values $\theta$ is given by
|
values $x_1, x_2, \ldots, x_n$ given some specific parameter values
|
||||||
|
$\theta$ of the probability density is given by the product of the
|
||||||
|
probability densities of each data value:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
p(x_1,x_2, \ldots x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
|
\label{probdata}
|
||||||
\ldots p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
|
p(x_1,x_2, \ldots, x_n|\theta) = p(x_1|\theta) \cdot p(x_2|\theta)
|
||||||
|
\ldots, p(x_n|\theta) = \prod_{i=1}^n p(x_i|\theta) \; .
|
||||||
\end{equation}
|
\end{equation}
|
||||||
Vice versa, the \entermde{Likelihood}{likelihood} of the parameters $\theta$
|
Vice versa, the \entermde{Likelihood}{likelihood} of the parameters $\theta$
|
||||||
given the observed data $x_1, x_2, \ldots x_n$ is
|
given the observed data $x_1, x_2, \ldots, x_n$ is
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
{\cal L}(\theta|x_1,x_2, \ldots x_n) = p(x_1,x_2, \ldots x_n|\theta) \; .
|
\label{likelihood}
|
||||||
|
{\cal L}(\theta|x_1,x_2, \ldots, x_n) = p(x_1,x_2, \ldots, x_n|\theta) \; .
|
||||||
\end{equation}
|
\end{equation}
|
||||||
Note: the likelihood ${\cal L}$ is not a probability in the
|
Note, that the likelihood ${\cal L}$ is not a probability in the
|
||||||
classic sense since it does not integrate to unity ($\int {\cal
|
classic sense since it does not integrate to unity ($\int {\cal
|
||||||
L}(\theta|x_1,x_2, \ldots x_n) \, d\theta \ne 1$).
|
L}(\theta|x_1,x_2, \ldots, x_n) \, d\theta \ne 1$). For given
|
||||||
|
observations $x_1, x_2, \ldots, x_n$, the likelihood
|
||||||
When applying maximum likelihood estimations we are interested in the
|
\eqref{likelihood} is a function of the parameters $\theta$. This
|
||||||
parameter values
|
function has a global maximum for some specific parameter values. At
|
||||||
|
this maximum the probability \eqref{probdata} to observe the measured
|
||||||
|
data values is the largest.
|
||||||
|
|
||||||
|
Maximum likelihood estimators just find the parameter values
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
\theta_{mle} = \text{argmax}_{\theta} {\cal L}(\theta|x_1,x_2, \ldots, x_n)
|
||||||
\end{equation}
|
\end{equation}
|
||||||
that maximize the likelihood. $\text{argmax}_xf(x)$ is the value of
|
that maximize the likelihood \eqref{likelihood}.
|
||||||
the argument $x$ for which the function $f(x)$ assumes its global
|
$\text{argmax}_xf(x)$ is the value of the argument $x$ for which the
|
||||||
maximum. Thus, we search for the parameter values $\theta$ at which
|
function $f(x)$ assumes its global maximum. Thus, we search for the
|
||||||
the likelihood ${\cal L}(\theta)$ reaches its maximum. For these
|
parameter values $\theta$ at which the likelihood ${\cal L}(\theta)$
|
||||||
paraemter values the measured data most likely originated from the
|
reaches its maximum. For these parameter values the measured data most
|
||||||
corresponding distribution.
|
likely originated from the corresponding distribution.
|
||||||
|
|
||||||
The position of a function's maximum does not change when the values
|
The position of a function's maximum does not change when the values
|
||||||
of the function are transformed by a strictly monotonously rising
|
of the function are transformed by a strictly monotonously rising
|
||||||
function such as the logarithm. For numerical reasons and reasons that
|
function such as the logarithm. For numerical reasons and reasons that
|
||||||
we will discuss below, we search for the maximum of the logarithm of
|
we discuss below, we instead search for the maximum of the logarithm
|
||||||
the likelihood
|
of the likelihood
|
||||||
(\entermde[likelihood!log-]{Likelihood!Log-}{log-likelihood}):
|
(\entermde[likelihood!log-]{Likelihood!Log-}{log-likelihood})
|
||||||
|
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
\theta_{mle} & = & \text{argmax}_{\theta}\; {\cal L}(\theta|x_1,x_2, \ldots, x_n) \nonumber \\
|
||||||
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots x_n) \nonumber \\
|
& = & \text{argmax}_{\theta}\; \log {\cal L}(\theta|x_1,x_2, \ldots, x_n) \nonumber \\
|
||||||
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
|
& = & \text{argmax}_{\theta}\; \log \prod_{i=1}^n p(x_i|\theta) \nonumber \\
|
||||||
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
|
& = & \text{argmax}_{\theta}\; \sum_{i=1}^n \log p(x_i|\theta) \label{loglikelihood}
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
|
which is the sum of the logarithms of the probabilites of each
|
||||||
|
observation. Let's illustrate the concept of maximum likelihood
|
||||||
|
estimation on the arithmetic mean.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Example: the arithmetic mean}
|
\subsection{Arithmetic mean}
|
||||||
Suppose that the measurements $x_1, x_2, \ldots x_n$ originate from a
|
Suppose that the measurements $x_1, x_2, \ldots, x_n$ originate from a
|
||||||
normal distribution \eqnref{normpdfmean} and we consider the mean
|
normal distribution \eqref{normpdfmean} and we do not know the
|
||||||
$\mu$ as the only parameter $\theta$. Which value of $\theta$
|
population mean $\mu$ of the normal distribution
|
||||||
maximizes the likelihood of the data?
|
(\figrefb{mlemeanfig}). In this setting $\mu$ is the only parameter
|
||||||
|
$\theta$. Which value of $\mu$ maximizes the likelihood of the data?
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\includegraphics[width=1\textwidth]{mlemean}
|
\includegraphics[width=1\textwidth]{mlemean}
|
||||||
\titlecaption{\label{mlemeanfig} Maximum likelihood estimation of
|
\titlecaption{\label{mlemeanfig} Maximum likelihood estimation of
|
||||||
the mean.}{Top: The measured data (blue dots) together with three
|
the mean.}{Top: The measured data (blue dots) together with three
|
||||||
different possible normal distributions with different means
|
normal distributions differing in their means (arrows) from which
|
||||||
(arrows) the data could have originated from. Bottom left: the
|
the data could have originated from. Bottom left: the likelihood
|
||||||
likelihood as a function of $\theta$ i.e. the mean. It is maximal
|
as a function of the parameter $\mu$. For the data it is maximal
|
||||||
at a value of $\theta = 2$. Bottom right: the
|
at a value of $\mu = 2$. Bottom right: the log-likelihood. Taking
|
||||||
log-likelihood. Taking the logarithm does not change the position
|
the logarithm does not change the position of the maximum.}
|
||||||
of the maximum.}
|
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
The log-likelihood \eqnref{loglikelihood}
|
With the normal distribution \eqref{normpdfmean} and applying
|
||||||
|
logarithmic identities, the log-likelihood \eqref{loglikelihood} reads
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
\log {\cal L}(\theta|x_1,x_2, \ldots x_n)
|
\log {\cal L}(\mu|x_1,x_2, \ldots, x_n)
|
||||||
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \nonumber \\
|
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\mu)^2}{2\sigma^2}} \nonumber \\
|
||||||
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2} \; .
|
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\mu)^2}{2\sigma^2} \; .
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
Since the logarithm is the inverse function of the exponential
|
Since the logarithm is the inverse function of the exponential
|
||||||
($\log(e^x)=x$), taking the logarithm removes the exponential from the
|
($\log(e^x)=x$), taking the logarithm removes the exponential from the
|
||||||
normal distribution. To calculate the maximum of the log-likelihood,
|
normal distribution. This is the second reason why it is useful to
|
||||||
we need to take the derivative with respect to $\theta$ and set it to
|
maximize the log-likelihood. To calculate the maximum of the
|
||||||
zero:
|
log-likelihood, we need to take the derivative with respect to $\mu$
|
||||||
|
and set it to zero:
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
\frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n - \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \nonumber \\
|
\frac{\text{d}}{\text{d}\mu} \log {\cal L}(\mu|x_1,x_2, \ldots, x_n) & = & \sum_{i=1}^n - \frac{\text{d}}{\text{d}\mu} \log \sqrt{2\pi \sigma^2} - \frac{\text{d}}{\text{d}\mu} \frac{(x_i-\mu)^2}{2\sigma^2} \;\; = \;\; 0 \nonumber \\
|
||||||
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n \theta & = & 0 \nonumber \\
|
\Leftrightarrow \quad \sum_{i=1}^n - \frac{2(x_i-\mu)}{2\sigma^2} & = & 0 \nonumber \\
|
||||||
\Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \nonumber \\
|
\Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n \mu & = & 0 \nonumber \\
|
||||||
\Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i \;\; = \;\; \bar x
|
\Leftrightarrow \quad n \mu & = & \sum_{i=1}^n x_i \nonumber \\
|
||||||
|
\Leftrightarrow \quad \mu & = & \frac{1}{n} \sum_{i=1}^n x_i \;\; = \;\; \bar x \label{arithmeticmean}
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
Thus, the maximum likelihood estimator is the arithmetic mean. That
|
Thus, the maximum likelihood estimator of the population mean of
|
||||||
is, the arithmetic mean maximizes the likelihood that the data
|
normally distributed data is the arithmetic mean. That is, the
|
||||||
originate from a normal distribution centered at the arithmetic mean
|
arithmetic mean maximizes the likelihood that the data originate from
|
||||||
|
a normal distribution centered at the arithmetic mean
|
||||||
(\figref{mlemeanfig}). Equivalently, the standard deviation computed
|
(\figref{mlemeanfig}). Equivalently, the standard deviation computed
|
||||||
from the data, maximizes the likelihood that the data were generated
|
from the data, maximizes the likelihood that the data were generated
|
||||||
from a normal distribution with this standard deviation.
|
from a normal distribution with this standard deviation.
|
||||||
@ -123,6 +144,17 @@ from a normal distribution with this standard deviation.
|
|||||||
the maxima with the mean calculated from the data.
|
the maxima with the mean calculated from the data.
|
||||||
\end{exercise}
|
\end{exercise}
|
||||||
|
|
||||||
|
Comparing the values of the likelihood with the ones of the
|
||||||
|
log-likelihood shown in \figref{mlemeanfig}, shows the numerical
|
||||||
|
reason for taking the logarithm of the likelihood. The likelihood
|
||||||
|
values can get very small, because we multiply many, potentially small
|
||||||
|
probability densities with each other. The likelihood quickly gets
|
||||||
|
smaller than the samlles number a floating point number of a computer
|
||||||
|
can represent. Try it by increasing the number of data values in the
|
||||||
|
exercise. Taking the logarithm avoids this problem. The log-likelihood
|
||||||
|
assumes well behaving numbers that can be handled well by the
|
||||||
|
computer.
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Fitting probability distributions}
|
\section{Fitting probability distributions}
|
||||||
@ -130,32 +162,39 @@ Consider normally distributed data with unknown mean and standard
|
|||||||
deviation. From the considerations above we just have seen that a
|
deviation. From the considerations above we just have seen that a
|
||||||
Gaussian distribution with mean at the arithmetic mean and standard
|
Gaussian distribution with mean at the arithmetic mean and standard
|
||||||
deviation equal to the standard deviation computed from the data is
|
deviation equal to the standard deviation computed from the data is
|
||||||
the best Gaussian distribution that fits the data best in a maximum
|
the Gaussian that fits the data best in a maximum likelihood sense,
|
||||||
likelihood sense, i.e. the likelihood that the data were generated
|
i.e. the likelihood that the data were generated from this
|
||||||
from this distribution is the largest. Fitting a Gaussian distribution
|
distribution is the largest. Fitting a Gaussian distribution to data
|
||||||
to data is very simple: just compute the two parameter of the Gaussian
|
is very simple: just compute the two parameter of the Gaussian
|
||||||
distribution as the arithmetic mean and a standard deviation directly
|
distribution $\mu$ and $\sigma$ as the arithmetic mean and a standard
|
||||||
from the data.
|
deviation, respectively, directly from the data.
|
||||||
|
|
||||||
For non-Gaussian distributions (e.g. a Gamma-distribution), however,
|
For non-Gaussian distributions, for example a
|
||||||
such simple analytical expressions for the parameters of the
|
\entermde[distribution!Gamma-]{Verteilung!Gamma-}{Gamma-distribution}
|
||||||
distribution do not exist, e.g. the shape parameter of a
|
\begin{equation}
|
||||||
\entermde[distribution!Gamma-]{Verteilung!Gamma-}{Gamma-distribution}. How
|
\label{gammapdf}
|
||||||
do we fit such a distribution to some data? That is, how should we
|
p(x|\alpha,\beta) \sim x^{\alpha-1}e^{-\beta x}
|
||||||
compute the values of the parameters of the distribution, given the
|
\end{equation}
|
||||||
data?
|
with a shape parameter $\alpha$ and a rate parameter $\beta$
|
||||||
|
(\figrefb{mlepdffig}), in general no such simple analytical
|
||||||
A first guess could be to fit the probability density function by
|
expressions for estimating the parameters directly from the data do
|
||||||
minimization of the squared difference to a histogram of the measured
|
not exist. How do we fit such a distribution to the data? That is,
|
||||||
data. For several reasons this is, however, not the method of choice:
|
how should we compute the values of the parameters of the
|
||||||
(i) Probability densities can only be positive which leads, for small
|
distribution, given the data?
|
||||||
values in particular, to asymmetric distributions. (ii) The values of
|
|
||||||
a histogram estimating the density are not independent because the
|
A first guess could be to fit the probability density function by a
|
||||||
integral over a density is unity. The two basic assumptions of
|
\enterm{least squares} fit to a normalized histogram of the measured data in
|
||||||
normally distributed and independent samples, which are a prerequisite
|
the same way as we fit a function to some data. For several reasons
|
||||||
make the minimization of the squared difference \eqnref{chisqmin} to a
|
this is, however, not the method of choice: (i) Probability densities
|
||||||
maximum likelihood estimation, are violated. (iii) The histogram
|
can only be positive which leads, in particular for small values, to
|
||||||
strongly depends on the chosen bin size \figref{mlepdffig}).
|
asymmetric distributions of the estimated histogram around the true
|
||||||
|
density. (ii) The values of a histogram estimating the density are not
|
||||||
|
independent because the integral over a density is unity. The two
|
||||||
|
basic assumptions of normally distributed and independent samples,
|
||||||
|
which are a prerequisite for making the minimization of the squared
|
||||||
|
difference to a maximum likelihood estimation (see next section), are
|
||||||
|
violated. (iii) The estimation of the probability density by means of
|
||||||
|
a histogram strongly depends on the chosen bin size.
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\includegraphics[width=1\textwidth]{mlepdf}
|
\includegraphics[width=1\textwidth]{mlepdf}
|
||||||
@ -164,20 +203,19 @@ strongly depends on the chosen bin size \figref{mlepdffig}).
|
|||||||
order Gamma-distribution. The maximum likelihood estimation of the
|
order Gamma-distribution. The maximum likelihood estimation of the
|
||||||
probability density function is shown in orange, the true pdf is
|
probability density function is shown in orange, the true pdf is
|
||||||
shown in red. Right: normalized histogram of the data together
|
shown in red. Right: normalized histogram of the data together
|
||||||
with the real (red) and the fitted probability density
|
with the true probability density (red) and the probability
|
||||||
functions. The fit was done by minimizing the squared difference
|
density function obtained by a least squares fit to the
|
||||||
to the histogram.}
|
histogram.}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Instead we should stay with maximum-likelihood estimation. Exactly in
|
Instead we should stay with maximum-likelihood estimation. Exactly in
|
||||||
the same way as we estimated the mean value of a Gaussian distribution
|
the same way as we estimated the mean value of a Gaussian distribution
|
||||||
above, we can numerically fit the parameter of any type of
|
above, we can numerically fit the parameter of any type of
|
||||||
distribution directly from the data by means of maximizing the
|
distribution directly from the data by means of maximizing the
|
||||||
likelihood. We simply search for the parameter $\theta$ of the
|
likelihood. We simply search for the parameter values of the desired
|
||||||
desired probability density function that maximizes the
|
probability density function that maximize the log-likelihood. In
|
||||||
log-likelihood. In general this is a non-linear optimization problem
|
general this is a non-linear optimization problem that is solved with
|
||||||
that is solved with numerical methods such as the gradient descent
|
numerical methods such as the gradient descent \matlabfun{mle()}.
|
||||||
\matlabfun{mle()}.
|
|
||||||
|
|
||||||
\begin{exercise}{mlegammafit.m}{mlegammafit.out}
|
\begin{exercise}{mlegammafit.m}{mlegammafit.out}
|
||||||
Generate a sample of gamma-distributed random numbers and apply the
|
Generate a sample of gamma-distributed random numbers and apply the
|
||||||
@ -190,46 +228,63 @@ that is solved with numerical methods such as the gradient descent
|
|||||||
\section{Curve fitting}
|
\section{Curve fitting}
|
||||||
|
|
||||||
When fitting a function of the form $f(x;\theta)$ to data pairs
|
When fitting a function of the form $f(x;\theta)$ to data pairs
|
||||||
$(x_i|y_i)$ one tries to adapt the parameter $\theta$ such that the
|
$(x_i|y_i)$ one tries to adapt the parameters $\theta$ such that the
|
||||||
function best describes the data. With maximum likelihood we search
|
function best describes the data. In
|
||||||
for the parameter value $\theta$ for which the likelihood that the data
|
chapter~\ref{gradientdescentchapter} we simply assumed that ``best''
|
||||||
were drawn from the corresponding function is maximal. If we assume
|
means minimizing the squared distance between the data and the
|
||||||
that the $y_i$ values are normally distributed around the function
|
function. With maximum likelihood we search for those parameter
|
||||||
values $f(x_i;\theta)$ with a standard deviation $\sigma_i$, the
|
values $\theta$ that maximize the likelihood of the data to be drawn
|
||||||
log-likelihood is
|
from the corresponding function.
|
||||||
|
|
||||||
|
If we assume that the $y_i$ values are normally distributed around the
|
||||||
|
function values $f(x_i;\theta)$ with a standard deviation $\sigma_i$,
|
||||||
|
the log-likelihood is
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
\log {\cal L}(\theta|(x_1,y_1,\sigma_1), \ldots, (x_n,y_n,\sigma_n))
|
\log {\cal L}(\theta|(x_1,y_1,\sigma_1), \ldots, (x_n,y_n,\sigma_n))
|
||||||
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \nonumber \\
|
& = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma_i^2}}e^{-\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}} \nonumber \\
|
||||||
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2} \\
|
& = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma_i^2} -\frac{(y_i-f(x_i;\theta))^2}{2\sigma_i^2}
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
The only difference to the previous example is that the averages in
|
The only difference to the previous example of the arithmetic mean is
|
||||||
the equations above are now given as the function values
|
that the means $\mu$ in the equations above are given by the function
|
||||||
$f(x_i;\theta)$. The parameter $\theta$ should be the one that
|
values $f(x_i;\theta)$. The parameters $\theta$ should maximize the
|
||||||
maximizes the log-likelihood. The first part of the sum is independent
|
log-likelihood. The first term in the sum is independent of $\theta$
|
||||||
of $\theta$ and can thus be ignored when computing the the maximum:
|
and can be ignored when computing the the maximum. From the second
|
||||||
|
term we pull out the constant factor $-\frac{1}{2}$:
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
|
& = & - \frac{1}{2} \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
We can further simplify by inverting the sign and then search for the
|
We can further simplify by inverting the sign and then search for the
|
||||||
minimum. Also the factor $1/2$ can be ignored since it does not affect
|
minimum. Also the factor $\frac{1}{2}$ can be ignored since it does
|
||||||
the position of the minimum:
|
not affect the position of the minimum:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{chisqmin}
|
\label{chisqmin}
|
||||||
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
|
\theta_{mle} = \text{argmin}_{\theta} \; \sum_{i=1}^n \left( \frac{y_i-f(x_i;\theta)}{\sigma_i} \right)^2 \;\; = \;\; \text{argmin}_{\theta} \; \chi^2
|
||||||
\end{equation}
|
\end{equation}
|
||||||
The sum of the squared differences normalized by the standard
|
The sum of the squared differences between the $y$-data values and the
|
||||||
deviation is also called $\chi^2$. The parameter $\theta$ which
|
function values, normalized by the standard deviation of the data
|
||||||
minimizes the squared differences is thus the one that maximizes the
|
around the function, is called $\chi^2$ (chi squared). The parameter
|
||||||
likelihood that the data actually originate from the given
|
$\theta$ which minimizes the squared differences is thus the one that
|
||||||
function. Minimizing $\chi^2$ therefore is a maximum likelihood
|
maximizes the likelihood of the data to actually originate from the
|
||||||
estimation.
|
given function.
|
||||||
|
|
||||||
|
Whether minimizing $\chi^2$ or the \enterm{mean squared error}
|
||||||
|
\eqref{meansquarederror} introduced in
|
||||||
|
chapter~\ref{gradientdescentchapter} does not matter. The latter is
|
||||||
|
the mean and $\chi^2$ is the sum of the squared differences. They
|
||||||
|
simply differ by the constant factor $n$, the number of data pairs,
|
||||||
|
which does not affect the position of the minimum. $\chi^2$ is more
|
||||||
|
general in that it allows for different standard deviations for each
|
||||||
|
data pair. If they are all the same ($\sigma_i = \sigma$), the common
|
||||||
|
standard deviation can be pulled out of the sum and also does not
|
||||||
|
influence the position of the minimum. Both \enterm{least squares} and
|
||||||
|
minimizing $\chi^2$ are maximum likelihood estimators of the
|
||||||
|
parameters $\theta$ of a function.
|
||||||
|
|
||||||
From the mathematical considerations above we can see that the
|
From the mathematical considerations above we can see that the
|
||||||
minimization of the squared difference is a maximum-likelihood
|
minimization of the squared difference is a maximum-likelihood
|
||||||
estimation only if the data are normally distributed around the
|
estimation only if the data are normally distributed around the
|
||||||
function. In case of other distributions, the log-likelihood
|
function. In case of other distributions, the log-likelihood
|
||||||
\eqnref{loglikelihood} needs to be adapted accordingly and be
|
\eqref{loglikelihood} needs to be adapted accordingly.
|
||||||
maximized respectively.
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\includegraphics[width=1\textwidth]{mlepropline}
|
\includegraphics[width=1\textwidth]{mlepropline}
|
||||||
@ -242,26 +297,32 @@ maximized respectively.
|
|||||||
the normal distribution of the data around the line (right histogram).}
|
the normal distribution of the data around the line (right histogram).}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
Let's go on and calculate the minimum \eqref{chisqmin} of $\chi^2$
|
||||||
|
analytically for a simple function.
|
||||||
|
|
||||||
\subsection{Example: simple proportionality}
|
|
||||||
The function of a line with slope $\theta$ through the origin is
|
\subsection{Straight line trough the origin}
|
||||||
\[ f(x) = \theta x \; . \]
|
The function of a straight line with slope $m$ through the origin
|
||||||
The $\chi^2$-sum is thus
|
is
|
||||||
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \; . \]
|
\[ f(x) = m x \; . \]
|
||||||
To estimate the minimum we again take the first derivative with
|
With this specific function, $\chi^2$ reads
|
||||||
respect to $\theta$ and equate it to zero:
|
\[ \chi^2 = \sum_{i=1}^n \left( \frac{y_i-m x_i}{\sigma_i} \right)^2
|
||||||
\begin{eqnarray}
|
\; . \] To calculate the minimum we take the first derivative with
|
||||||
\frac{\text{d}}{\text{d}\theta}\chi^2 & = & \frac{\text{d}}{\text{d}\theta} \sum_{i=1}^n \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
respect to $m$ and equate it to zero:
|
||||||
& = & \sum_{i=1}^n \frac{\text{d}}{\text{d}\theta} \left( \frac{y_i-\theta x_i}{\sigma_i} \right)^2 \nonumber \\
|
|
||||||
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-\theta x_i}{\sigma_i} \right) \nonumber \\
|
|
||||||
& = & -2 \sum_{i=1}^n \left( \frac{x_i y_i}{\sigma_i^2} - \theta \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
|
|
||||||
\Leftrightarrow \quad \theta \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2} \nonumber
|
|
||||||
\end{eqnarray}
|
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
\Leftrightarrow \quad \theta & = & \frac{\sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
|
\frac{\text{d}}{\text{d}m}\chi^2 & = & \sum_{i=1}^n \frac{\text{d}}{\text{d}m} \left( \frac{y_i-m x_i}{\sigma_i} \right)^2 \nonumber \\
|
||||||
|
& = & -2 \sum_{i=1}^n \frac{x_i}{\sigma_i} \left( \frac{y_i-m x_i}{\sigma_i} \right) \nonumber \\
|
||||||
|
& = & -2 \sum_{i=1}^n \left( \frac{x_i y_i}{\sigma_i^2} - m \frac{x_i^2}{\sigma_i^2} \right) \;\; = \;\; 0 \nonumber \\
|
||||||
|
\Leftrightarrow \quad m \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2} & = & \sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2} \nonumber \\
|
||||||
|
\Leftrightarrow \quad m & = & \frac{\sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
This is an analytical expression for the estimation of the slope
|
This is an analytical expression for the estimation of the slope $m$
|
||||||
$\theta$ of the regression line (\figref{mleproplinefig}).
|
of the regression line (\figref{mleproplinefig}). We do not need to
|
||||||
|
apply numerical methods like the gradient descent to find the slope
|
||||||
|
that minimizes the squared differences. Instead, we can compute the
|
||||||
|
slope directly from the data by means of \eqnref{mleslope}, very much
|
||||||
|
like we calculate the mean of some data by means of the arithmetic
|
||||||
|
mean \eqref{arithmeticmean}.
|
||||||
|
|
||||||
\subsection{Linear and non-linear fits}
|
\subsection{Linear and non-linear fits}
|
||||||
A gradient descent, as we have done in the previous chapter, is not
|
A gradient descent, as we have done in the previous chapter, is not
|
||||||
@ -276,47 +337,68 @@ or the coefficients $a_k$ of a polynomial
|
|||||||
\matlabfun{polyfit()}.
|
\matlabfun{polyfit()}.
|
||||||
|
|
||||||
Parameters that are non-linearly combined can not be calculated
|
Parameters that are non-linearly combined can not be calculated
|
||||||
analytically. Consider for example the rate $\lambda$ of the
|
analytically. Consider, for example, the factor $c$ and the rate
|
||||||
exponential decay
|
$\lambda$ of the exponential decay
|
||||||
\[ y = c \cdot e^{\lambda x} \quad , \quad c, \lambda \in \reZ \; . \]
|
\[ y = c \cdot e^{\lambda x} \quad , \quad c, \lambda \in \reZ \; . \]
|
||||||
Such cases require numerical solutions for the optimization of the
|
Such cases require numerical solutions for the optimization of the
|
||||||
cost function, e.g. the gradient descent \matlabfun{lsqcurvefit()}.
|
cost function, e.g. the gradient descent \matlabfun{lsqcurvefit()}.
|
||||||
|
|
||||||
\subsection{Relation between slope and correlation coefficient}
|
\subsection{Relation between slope and correlation coefficient}
|
||||||
Let us have a closer look on \eqnref{mleslope}. If the standard
|
Let us have a closer look on \eqnref{mleslope} for the slope of a line
|
||||||
deviation of the data $\sigma_i$ is the same for each data point,
|
through the origin. If the standard deviation of the data $\sigma_i$
|
||||||
i.e. $\sigma_i = \sigma_j \; \forall \; i, j$, the standard deviation drops
|
is the same for each data point, i.e. $\sigma_i = \sigma_j \; \forall
|
||||||
out of \eqnref{mleslope} and we get
|
\; i, j$, the standard deviation drops out and \eqnref{mleslope}
|
||||||
|
simplifies to
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{whitemleslope}
|
\label{whitemleslope}
|
||||||
\theta = \frac{\sum_{i=1}^n x_i y_i}{\sum_{i=1}^n x_i^2}
|
m = \frac{\sum_{i=1}^n x_i y_i}{\sum_{i=1}^n x_i^2}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
To see what this expression is, we need to standardize the data. We
|
To see what the nominator and the denominator of this expression
|
||||||
make the data mean free and normalize them to their standard
|
describe, we need to subtract from the data their mean value. We make
|
||||||
deviation, i.e. $x \mapsto (x - \bar x)/\sigma_x$. The resulting
|
the data mean free, i.e. $x \mapsto x - \bar x$ and $y \mapsto y -
|
||||||
numbers are also called \entermde[z-values]{z-Wert}{$z$-values} or
|
\bar y$ . For mean-free data the variance
|
||||||
$z$-scores and they have the property $\bar x = 0$ and $\sigma_x =
|
|
||||||
1$. $z$-scores are often used in Biology to make quantities that
|
|
||||||
differ in their units comparable. For standardized data the variance
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\sigma_x^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)^2 = \frac{1}{n} \sum_{i=1}^n x_i^2 = 1
|
\sigma_x^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)^2 = \frac{1}{n} \sum_{i=1}^n x_i^2
|
||||||
\end{equation}
|
\end{equation}
|
||||||
is given by the mean squared data and equals one.
|
is given by the mean squared data. In the same way, the covariance
|
||||||
The covariance between $x$ and $y$ also simplifies to
|
between $x$ and $y$ simplifies to
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\text{cov}(x, y) = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)(y_i -
|
\text{cov}(x, y) = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)(y_i -
|
||||||
\bar y) =\frac{1}{n} \sum_{i=1}^n x_i y_i
|
\bar y) =\frac{1}{n} \sum_{i=1}^n x_i y_i \; ,
|
||||||
|
\end{equation}
|
||||||
|
the averaged product between pairs of $x$ and $y$ values. Expanding
|
||||||
|
the fraction in \eqnref{whitemleslope} by $\frac{1}{n}$ we get
|
||||||
|
\begin{equation}
|
||||||
|
\label{meanfreeslope}
|
||||||
|
m = \frac{\frac{1}{n}\sum_{i=1}^n x_i y_i}{\frac{1}{n}\sum_{i=1}^n x_i^2}
|
||||||
|
= \frac{\text{cov}(x, y)}{\sigma_x^2}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
the averaged product between pairs of $x$ and $y$ values. Recall that
|
|
||||||
the correlation coefficient $r_{x,y}$,
|
Recall that the correlation coefficient $r_{x,y}$ is the covariance
|
||||||
\eqnref{correlationcoefficient}, is the covariance normalized by the
|
normalized by the product of the standard deviations of $x$ and $y$:
|
||||||
product of the standard deviations of $x$ and $y$,
|
|
||||||
respectively. Therefore, in case the standard deviations equal one,
|
|
||||||
the correlation coefficient is identical to the covariance.
|
|
||||||
Consequently, for standardized data the slope of the regression line
|
|
||||||
\eqnref{whitemleslope} simplifies to
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\theta = \frac{1}{n} \sum_{i=1}^n x_i y_i = \text{cov}(x,y) = r_{x,y}
|
\label{meanfreecorrcoef}
|
||||||
|
r_{x,y} = \frac{\text{cov}(x, y)}{\sigma_x\sigma_y}
|
||||||
|
\end{equation}
|
||||||
|
If furthermore the standard deviations of $x$ and $y$ are the same,
|
||||||
|
i.e. $\sigma_x = \sigma_y$, the slope of a line trough the origin is
|
||||||
|
identical to the correlation coefficient.
|
||||||
|
|
||||||
|
This relation between slope and correlation coefficient in particular
|
||||||
|
holds for standardized data that have been made mean free and have
|
||||||
|
been normalized by their standard deviation, i.e. $x \mapsto (x - \bar
|
||||||
|
x)/\sigma_x$ and $y \mapsto (y - \bar x)/\sigma_y$. The resulting
|
||||||
|
numbers are called \entermde[z-value]{z-Wert}{$z$-values} or
|
||||||
|
\enterm[z-score]{$z$-scores}. Their mean equals zero and their
|
||||||
|
standard deviation one. $z$-scores are often used to make quantities
|
||||||
|
that differ in their units comparable. For standardized data the
|
||||||
|
denominators for both the slope \eqref{meanfreeslope} and the
|
||||||
|
correlation coefficient \eqref{meanfreecorrcoef} equal one. For
|
||||||
|
standardized data, both the slope of the regression line and the
|
||||||
|
corelation coefficient reduce to the covariance between the $x$ and
|
||||||
|
$y$ data:
|
||||||
|
\begin{equation}
|
||||||
|
m = \frac{1}{n} \sum_{i=1}^n x_i y_i = \text{cov}(x,y) = r_{x,y}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
For standardized data the slope of the regression line is the same as the
|
For standardized data the slope of the regression line is the same as the
|
||||||
correlation coefficient!
|
correlation coefficient!
|
||||||
@ -324,63 +406,100 @@ correlation coefficient!
|
|||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Neural coding}
|
\section{Neural coding}
|
||||||
In sensory systems certain aspects of the environment are encoded in
|
Maximum likelihood estimators are not only a central concept for data
|
||||||
the neuronal activity of populations of neurons. One example of such a
|
analysis. Neural systems face the very same problem. They also need to
|
||||||
population code is the tuning of neurons in the primary visual cortex
|
estimate parameters of the internal and external environment based on
|
||||||
(V1) to the orientation of an edge or bar in the visual
|
the activity of neurons.
|
||||||
stimulus. Different neurons respond best to different edge
|
|
||||||
orientations. Traditionally, such a tuning is measured by analyzing
|
|
||||||
the neuronal response strength (e.g. the firing rate) as a function of
|
|
||||||
the orientation of a black bar and is illustrated and summarized
|
|
||||||
with the so called \enterm{tuning-curve} (\determ{Abstimmkurve},
|
|
||||||
figure~\ref{mlecodingfig}, top).
|
|
||||||
|
|
||||||
\begin{figure}[tp]
|
\begin{figure}[tp]
|
||||||
\includegraphics[width=1\textwidth]{mlecoding}
|
\includegraphics[width=1\textwidth]{mlecoding}
|
||||||
\titlecaption{\label{mlecodingfig} Maximum likelihood estimation of
|
\titlecaption{\label{mlecodingfig} Maximum likelihood estimation of
|
||||||
a stimulus parameter from neuronal activity.}{Top: Tuning curve of
|
a stimulus parameter from neuronal activity.}{Top: Tuning curve
|
||||||
an individual neuron as a function of the stimulus orientation (a
|
$r_i(\phi;c,\phi_i)$ of a specific neuron $i$ as a function of the
|
||||||
dark bar in front of a white background). The stimulus that evokes
|
orientation $\phi$ of a stimulus, a dark bar in front of a white
|
||||||
the strongest activity in that neuron is the bar with the vertical
|
background. The preferred stimulus $\phi_i$ of that neuron, the
|
||||||
orientation (arrow, $\phi_i=90$\,\degree). The red area indicates
|
one that evokes the strongest firing rate response, is a bar with
|
||||||
the variability of the neuronal activity $p(r|\phi)$ around the
|
vertical orientation (arrow, $\phi_i=90$\,\degree). The width of
|
||||||
tuning curve. Center: In a population of neurons, each neuron may
|
the red area indicates the variability of the neuronal activity
|
||||||
have a different tuning curve (colors). A specific stimulus (the
|
$\sigma_i$ around the tuning curve. Center: In a population of
|
||||||
vertical bar) activates the individual neurons of the population
|
neurons, each neuron may have a different tuning curve (colors). A
|
||||||
in a specific way (dots). Bottom: The log-likelihood of the
|
specific stimulus activates the individual neurons of the
|
||||||
activity pattern will be maximized close to the real stimulus
|
population in a specific way (dots). Bottom: The log-likelihood of
|
||||||
|
the activity pattern has a maximum close to the real stimulus
|
||||||
orientation.}
|
orientation.}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
In sensory systems certain aspects of the environment are encoded in
|
||||||
|
the neuronal activity of populations of neurons. One example of such a
|
||||||
|
population code is the tuning of neurons in the primary visual cortex
|
||||||
|
(V1) to the orientation of a bar in the visual stimulus. Different
|
||||||
|
neurons respond best to different bar orientations. Traditionally,
|
||||||
|
such a tuning is measured by analyzing the neuronal response strength
|
||||||
|
(e.g. the firing rate) as a function of the orientation of a black bar
|
||||||
|
and is illustrated and summarized with the so called
|
||||||
|
\enterm{tuning-curve} (\determ{Abstimmkurve},
|
||||||
|
figure~\ref{mlecodingfig}, top).
|
||||||
|
|
||||||
The brain, however, is confronted with the inverse problem: given a
|
The brain, however, is confronted with the inverse problem: given a
|
||||||
certain activity pattern in the neuronal population, what is the
|
certain activity pattern in the neuronal population, what is the
|
||||||
stimulus (here the orientation of an edge)? In the sense of maximum
|
stimulus? In our example, what is the orientation of the bar? In the
|
||||||
likelihood, a possible answer to this question would be: the stimulus
|
sense of maximum likelihood, a possible answer to this question would
|
||||||
for which the particular activity pattern is most likely given the
|
be: the stimulus for which the particular activity pattern is most
|
||||||
tuning of the neurons.
|
likely given the tuning of the neurons and the noise (standard
|
||||||
|
deviation) of the responses.
|
||||||
|
|
||||||
Let's stay with the example of the orientation tuning in V1. The
|
Let's stay with the example of the orientation tuning in V1. The
|
||||||
tuning $\Omega_i(\phi)$ of the neurons $i$ to the preferred edge
|
tuning of the firing rate $r_i(\phi)$ of neuron $i$ to the preferred
|
||||||
orientation $\phi_i$ can be well described using a van-Mises function
|
bar orientation $\phi_i$ can be well described using a van-Mises
|
||||||
(the Gaussian function on a cyclic x-axis) (\figref{mlecodingfig}):
|
function (the Gaussian function on a cyclic x-axis)
|
||||||
|
(\figref{mlecodingfig}):
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\Omega_i(\phi) = c \cdot e^{\cos(2(\phi-\phi_i))} \quad , \quad c \in \reZ
|
\label{bartuningcurve}
|
||||||
|
r_i(\phi; c, \phi_i) = c \cdot e^{\cos(2(\phi-\phi_i))} \quad , \quad c > 0
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
Note the factor two in the cosine, because the response of the neuron
|
||||||
|
is the same for a bar turned by 180\,\degree.
|
||||||
|
|
||||||
If we approximate the neuronal activity by a normal distribution
|
If we approximate the neuronal activity by a normal distribution
|
||||||
around the tuning curve with a standard deviation $\sigma=\Omega/4$,
|
around the tuning curve with a standard deviation $\sigma_i$, then the
|
||||||
which is proportional to $\Omega$, then the probability $p_i(r|\phi)$
|
probability $p_i(r|\phi)$ of the $i$-th neuron having the observed
|
||||||
of the $i$-th neuron showing the activity $r$ given a certain
|
activity $r$, given a certain orientation $\phi$ of a bar is
|
||||||
orientation $\phi$ of an edge is given by
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
p_i(r|\phi) = \frac{1}{\sqrt{2\pi}\Omega_i(\phi)/4} e^{-\frac{1}{2}\left(\frac{r-\Omega_i(\phi)}{\Omega_i(\phi)/4}\right)^2} \; .
|
p_i(r|\phi) = \frac{1}{\sqrt{2\pi\sigma_i^2}} e^{-\frac{1}{2}\left(\frac{r-r_i(\phi; c, \phi_i)}{\sigma_i}\right)^2} \; .
|
||||||
\end{equation}
|
\end{equation}
|
||||||
The log-likelihood of the edge orientation $\phi$ given the
|
The log-likelihood of the bar orientation $\phi$ given the
|
||||||
activity pattern in the population $r_1$, $r_2$, ... $r_n$ is thus
|
activity pattern in the population $r_1$, $r_2$, ... $r_n$ is thus
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
{\cal L}(\phi|r_1, r_2, \ldots r_n) = \sum_{i=1}^n \log p_i(r_i|\phi)
|
{\cal L}(\phi|r_1, r_2, \ldots, r_n) = \sum_{i=1}^n \log p_i(r_i|\phi)
|
||||||
\end{equation}
|
\end{equation}
|
||||||
The angle $\phi$ that maximizes this likelihood is then an estimate of
|
The angle $\phi_{mle}$ that maximizes this likelihood is an estimate
|
||||||
the orientation of the edge.
|
of the true orientation of the bar (\figref{mlecodingfig}).
|
||||||
|
|
||||||
|
The noisiness of the neuron's responses as quantified by $\sigma_i$
|
||||||
|
usually is a function of the neuron's mean firing rate $r_i$:
|
||||||
|
$\sigma_i = \sigma_i(r_i)$. This dependence has a major impact on the
|
||||||
|
maximum likelihood estimation. Usually, the stronger the response of a
|
||||||
|
neuron, the higher its firing rate, the lower the noise. In this case,
|
||||||
|
strong responses have a stronger influence on the position of the
|
||||||
|
maximum of the log-likelihood.
|
||||||
|
|
||||||
|
Whether neural systems really implement maximum likelihood estimators
|
||||||
|
is another question. There are many ways how a stimulus property can
|
||||||
|
be read out from the activity of a population of neurons. The simplest
|
||||||
|
one being a ``winner takes all'' rule. The preferred stimulus
|
||||||
|
parameter of the neuron with the strongest response is the
|
||||||
|
estimate. Another possibility is to compute a population vector. The
|
||||||
|
estimated stimulus parameter is the sum of the preferred stimulus
|
||||||
|
parameters of all neurons in the population weighted by the activity
|
||||||
|
of the neurons. In case of angular stimulus parameters, like the
|
||||||
|
orientation of the bar in our example, a vector pointing in the
|
||||||
|
direction of the angle is used instead of the angle to incorporate the
|
||||||
|
cyclic nature of the parameter.
|
||||||
|
|
||||||
|
Using maximum likelihood estimators for analyzing neural population
|
||||||
|
activity gives us an upper bound of how well a stimulus parameter is
|
||||||
|
encoded in the activity of the neurons. The brain would not be able to
|
||||||
|
do better.
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
@ -5,11 +5,9 @@ import matplotlib.gridspec as gridspec
|
|||||||
from plotstyle import *
|
from plotstyle import *
|
||||||
|
|
||||||
rng = np.random.RandomState(4637281)
|
rng = np.random.RandomState(4637281)
|
||||||
lmarg=0.1
|
|
||||||
rmarg=0.1
|
|
||||||
|
|
||||||
fig = plt.figure(figsize=cm_size(figure_width, 2.8*figure_height))
|
fig = plt.figure(figsize=cm_size(figure_width, 2.8*figure_height))
|
||||||
spec = gridspec.GridSpec(nrows=4, ncols=1, height_ratios=[4, 4, 1, 3], hspace=0.2,
|
spec = gridspec.GridSpec(nrows=4, ncols=1, height_ratios=[4, 5, 1, 3], hspace=0.2,
|
||||||
**adjust_fs(fig, left=4.0))
|
**adjust_fs(fig, left=4.0))
|
||||||
ax = fig.add_subplot(spec[0, 0])
|
ax = fig.add_subplot(spec[0, 0])
|
||||||
ax.set_xlim(0.0, np.pi)
|
ax.set_xlim(0.0, np.pi)
|
||||||
@ -17,7 +15,7 @@ ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
|
|||||||
ax.set_xticklabels([])
|
ax.set_xticklabels([])
|
||||||
ax.set_ylim(0.0, 3.5)
|
ax.set_ylim(0.0, 3.5)
|
||||||
ax.yaxis.set_major_locator(plt.NullLocator())
|
ax.yaxis.set_major_locator(plt.NullLocator())
|
||||||
ax.text(-0.2, 0.5*3.5, 'Activity', rotation='vertical', va='center')
|
ax.text(-0.2, 0.5*3.5, 'Firing rate', rotation='vertical', va='center')
|
||||||
ax.annotate('Tuning curve',
|
ax.annotate('Tuning curve',
|
||||||
xy=(0.42*np.pi, 2.5), xycoords='data',
|
xy=(0.42*np.pi, 2.5), xycoords='data',
|
||||||
xytext=(0.3*np.pi, 3.2), textcoords='data', ha='right',
|
xytext=(0.3*np.pi, 3.2), textcoords='data', ha='right',
|
||||||
@ -32,27 +30,34 @@ ax.text(0.52*np.pi, 0.7, 'preferred\norientation')
|
|||||||
xx = np.arange(0.0, 2.0*np.pi, 0.01)
|
xx = np.arange(0.0, 2.0*np.pi, 0.01)
|
||||||
pp = 0.5*np.pi
|
pp = 0.5*np.pi
|
||||||
yy = np.exp(np.cos(2.0*(xx+pp)))
|
yy = np.exp(np.cos(2.0*(xx+pp)))
|
||||||
ax.fill_between(xx, yy+0.25*yy, yy-0.25*yy, **fsBa)
|
ss = 1.0/(0.75+2.0*yy)
|
||||||
|
ax.fill_between(xx, yy+ss, yy-ss, **fsBa)
|
||||||
ax.plot(xx, yy, **lsB)
|
ax.plot(xx, yy, **lsB)
|
||||||
|
|
||||||
ax = fig.add_subplot(spec[1, 0])
|
ax = fig.add_subplot(spec[1, 0])
|
||||||
ax.set_xlim(0.0, np.pi)
|
ax.set_xlim(0.0, np.pi)
|
||||||
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
|
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
|
||||||
ax.set_xticklabels([])
|
ax.set_xticklabels([])
|
||||||
ax.set_ylim(0.0, 3.0)
|
ax.set_ylim(-0.1, 3.5)
|
||||||
ax.yaxis.set_major_locator(plt.NullLocator())
|
ax.yaxis.set_major_locator(plt.NullLocator())
|
||||||
ax.text(-0.2, 0.5*3.5, 'Activity', rotation='vertical', va='center')
|
ax.text(-0.2, 0.5*3.5, 'Firing rate', rotation='vertical', va='center')
|
||||||
xx = np.arange(0.0, 1.0*np.pi, 0.01)
|
xx = np.arange(0.0, 1.0*np.pi, 0.01)
|
||||||
prefphases = np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi)
|
prefphases = np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi)
|
||||||
responses = []
|
responses = []
|
||||||
xresponse = 0.475*np.pi
|
sigmas = []
|
||||||
|
xresponse = 0.41*np.pi
|
||||||
|
ax.annotate('Orientation of bar',
|
||||||
|
xy=(xresponse, -0.1), xycoords='data',
|
||||||
|
xytext=(xresponse, 3.1), textcoords='data', ha='left', zorder=-10,
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.0)) )
|
||||||
for pp, ls, ps in zip(prefphases, [lsE, lsC, lsD, lsB, lsD, lsC, lsE],
|
for pp, ls, ps in zip(prefphases, [lsE, lsC, lsD, lsB, lsD, lsC, lsE],
|
||||||
[psE, psC, psD, psB, psD, psC, psE]) :
|
[psE, psC, psD, psB, psD, psC, psE]) :
|
||||||
yy = np.exp(np.cos(2.0*(xx+pp)))
|
yy = np.exp(np.cos(2.0*(xx+pp)))
|
||||||
#ax.plot(xx, yy, color=cm.autumn(2.0*np.abs(pp/np.pi-0.5), 1))
|
|
||||||
ax.plot(xx, yy, **ls)
|
ax.plot(xx, yy, **ls)
|
||||||
y = np.exp(np.cos(2.0*(xresponse+pp)))
|
y = np.exp(np.cos(2.0*(xresponse+pp)))
|
||||||
responses.append(y + rng.randn()*0.25*y)
|
s = 1.0/(0.75+2.0*y)
|
||||||
|
responses.append(y + rng.randn()*s)
|
||||||
|
sigmas.append(s)
|
||||||
ax.plot(xresponse, y, **ps)
|
ax.plot(xresponse, y, **ps)
|
||||||
responses = np.array(responses)
|
responses = np.array(responses)
|
||||||
|
|
||||||
@ -68,25 +73,23 @@ ax = fig.add_subplot(spec[3, 0])
|
|||||||
ax.set_xlim(0.0, np.pi)
|
ax.set_xlim(0.0, np.pi)
|
||||||
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
|
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
|
||||||
ax.set_xticklabels([])
|
ax.set_xticklabels([])
|
||||||
ax.set_ylim(-1600, 0)
|
ax.set_ylim(-210, 0)
|
||||||
ax.yaxis.set_major_locator(plt.NullLocator())
|
ax.yaxis.set_major_locator(plt.NullLocator())
|
||||||
ax.set_xlabel('Orientation')
|
ax.set_xlabel('Orientation')
|
||||||
ax.text(-0.2, -800, 'Log-Likelihood', rotation='vertical', va='center')
|
ax.text(-0.2, -100, 'Log-Likelihood', rotation='vertical', va='center')
|
||||||
phases = np.linspace(0.0, 1.1*np.pi, 100)
|
phases = np.linspace(0.0, 1.1*np.pi, 100)
|
||||||
probs = np.zeros((len(responses), len(phases)))
|
probs = np.zeros((len(responses), len(phases)))
|
||||||
for k, (pp, r) in enumerate(zip(prefphases, responses)) :
|
for k, (pp, r, sigma) in enumerate(zip(prefphases, responses, sigmas)) :
|
||||||
y = np.exp(np.cos(2.0*(phases+pp)))
|
y = np.exp(np.cos(2.0*(phases+pp)))
|
||||||
sigma = 0.1*y
|
probs[k,:] = np.exp(-0.5*((r-y)/sigma)**2.0)/np.sqrt(2.0*np.pi*sigma**2)
|
||||||
probs[k,:] = np.exp(-0.5*((r-y)/sigma)**2.0)/np.sqrt(2.0*np.pi)/sigma
|
|
||||||
loglikelihood = np.sum(np.log(probs), 0)
|
loglikelihood = np.sum(np.log(probs), 0)
|
||||||
maxl = np.max(loglikelihood)
|
|
||||||
maxp = phases[np.argmax(loglikelihood)]
|
maxp = phases[np.argmax(loglikelihood)]
|
||||||
ax.annotate('',
|
ax.annotate('',
|
||||||
xy=(maxp, -1600), xycoords='data',
|
xy=(maxp, -210), xycoords='data',
|
||||||
xytext=(maxp, -30), textcoords='data',
|
xytext=(maxp, -10), textcoords='data',
|
||||||
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
|
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
|
||||||
connectionstyle="angle3,angleA=80,angleB=90") )
|
connectionstyle="angle3,angleA=80,angleB=90") )
|
||||||
ax.text(maxp+0.05, -1100, 'most likely\norientation\ngiven the responses')
|
ax.text(maxp+0.05, -150, 'most likely\norientation\ngiven the responses')
|
||||||
ax.plot(phases, loglikelihood, **lsA)
|
ax.plot(phases, loglikelihood, clip_on=False, **lsA)
|
||||||
|
|
||||||
plt.savefig('mlecoding.pdf')
|
plt.savefig('mlecoding.pdf')
|
||||||
|
@ -52,7 +52,7 @@ for i, theta in enumerate(thetas) :
|
|||||||
p=np.prod(ps,axis=0)
|
p=np.prod(ps,axis=0)
|
||||||
# plot it:
|
# plot it:
|
||||||
ax = fig.add_subplot(spec[1, 0])
|
ax = fig.add_subplot(spec[1, 0])
|
||||||
ax.set_xlabel(r'Parameter $\theta$')
|
ax.set_xlabel(r'Parameter $\mu$')
|
||||||
ax.set_ylabel('Likelihood')
|
ax.set_ylabel('Likelihood')
|
||||||
ax.set_xticks(np.arange(1.6, 2.5, 0.4))
|
ax.set_xticks(np.arange(1.6, 2.5, 0.4))
|
||||||
ax.annotate('Maximum',
|
ax.annotate('Maximum',
|
||||||
@ -68,7 +68,7 @@ ax.annotate('',
|
|||||||
ax.plot(thetas, p, **lsAm)
|
ax.plot(thetas, p, **lsAm)
|
||||||
|
|
||||||
ax = fig.add_subplot(spec[1, 1])
|
ax = fig.add_subplot(spec[1, 1])
|
||||||
ax.set_xlabel(r'Parameter $\theta$')
|
ax.set_xlabel(r'Parameter $\mu$')
|
||||||
ax.set_ylabel('Log-Likelihood')
|
ax.set_ylabel('Log-Likelihood')
|
||||||
ax.set_ylim(-50,-20)
|
ax.set_ylim(-50,-20)
|
||||||
ax.set_xticks(np.arange(1.6, 2.5, 0.4))
|
ax.set_xticks(np.arange(1.6, 2.5, 0.4))
|
||||||
|
156
logistic/code/logistic.py
Normal file
156
logistic/code/logistic.py
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from scipy.stats import ttest_ind, mannwhitneyu
|
||||||
|
|
||||||
|
|
||||||
|
def auc(n, dx, uniform=False, plot=False):
|
||||||
|
# loser:
|
||||||
|
if uniform:
|
||||||
|
x0 = np.random.rand(n)
|
||||||
|
else:
|
||||||
|
x0 = np.random.randn(n)*0.3
|
||||||
|
y0 = np.zeros(len(x0))
|
||||||
|
# winner:
|
||||||
|
if uniform:
|
||||||
|
x1 = np.random.rand(n) + dx
|
||||||
|
else:
|
||||||
|
x1 = np.random.randn(n)*0.3 + dx
|
||||||
|
y1 = np.ones(len(x1))
|
||||||
|
|
||||||
|
# combine into a single table:
|
||||||
|
data = np.zeros((len(x0) + len(y0), 2))
|
||||||
|
data[:len(x0),0] = x0
|
||||||
|
data[:len(x0),1] = y0
|
||||||
|
data[len(x0):,0] = x1
|
||||||
|
data[len(x0):,1] = y1
|
||||||
|
|
||||||
|
# fraction of overlapping data values:
|
||||||
|
si = np.argsort(data[:,0])
|
||||||
|
i0 = np.argmax(data[si,1] != data[si[0],1])
|
||||||
|
i1 = len(data) - 1 - np.argmax(data[si[::-1],1] != data[si[-1],1])
|
||||||
|
overlap = (i1-i0+1)/len(data)
|
||||||
|
|
||||||
|
# Cohen's d:
|
||||||
|
m0 = np.mean(data[data[:,1] < 0.5,0])
|
||||||
|
v0 = np.var(data[data[:,1] < 0.5,0])
|
||||||
|
m1 = np.mean(data[data[:,1] > 0.5,0])
|
||||||
|
v1 = np.var(data[data[:,1] > 0.5,0])
|
||||||
|
cohensd = (m1 - m0)/np.sqrt(0.5*(v0+v1))
|
||||||
|
|
||||||
|
# t-test:
|
||||||
|
ttest, p = ttest_ind(data[data[:,1] > 0.5,0], data[data[:,1] < 0.5,0])
|
||||||
|
|
||||||
|
# Mann-Whitney U:
|
||||||
|
mannu, p = mannwhitneyu(data[data[:,1] < 0.5,0], data[data[:,1] > 0.5,0])
|
||||||
|
|
||||||
|
# ROC:
|
||||||
|
thresh = np.arange(np.min(data[:,0])-0.1, np.max(data[:,0])+0.2, 0.01)
|
||||||
|
true_pos = np.zeros(len(thresh))
|
||||||
|
false_pos = np.zeros(len(thresh))
|
||||||
|
for k in range(len(thresh)):
|
||||||
|
true_pos[k] = np.sum(data[data[:,0] > thresh[k],1] > 0.5)/np.sum(data[:,1] > 0.5)
|
||||||
|
false_pos[k] = np.sum(data[data[:,0] > thresh[k],1] < 0.5)/np.sum(data[:,1] < 0.5)
|
||||||
|
|
||||||
|
# AUC:
|
||||||
|
droc = 0.001
|
||||||
|
xroc = np.arange(0.0, 1.0+droc, droc)
|
||||||
|
yroc = np.interp(xroc, false_pos[::-1], true_pos[::-1])
|
||||||
|
auc = np.sum(yroc)*droc
|
||||||
|
|
||||||
|
if plot:
|
||||||
|
fig = plt.figure()
|
||||||
|
ax = fig.add_subplot(211)
|
||||||
|
ax.axvline(data[si[i0],0], color='k')
|
||||||
|
ax.axvline(data[si[i1],0], color='k', lw=2)
|
||||||
|
ax.plot(data[:,0], data[:,1], 'o')
|
||||||
|
ax.plot(data[data[:,1] < 0.5,0], np.zeros(len(data[data[:,1] < 0.5,0]))-0.5, 'or')
|
||||||
|
ax.plot(data[data[:,1] > 0.5,0], np.zeros(len(data[data[:,1] > 0.5,0]))-0.5, 'og')
|
||||||
|
ax.text(0.5*(data[si[i0],0]+data[si[i1],0]), 0.65, 'overlap=%.0f%%' % (100.0*overlap), ha='center')
|
||||||
|
ax.text(0.5*(data[si[i0],0]+data[si[i1],0]), 0.35, "Cohen's d=%.2f" % cohensd, ha='center')
|
||||||
|
ax.set_xlabel('x')
|
||||||
|
ax.set_yticks([0, 1])
|
||||||
|
ax.set_yticklabels(['Lose', 'Win'])
|
||||||
|
if uniform:
|
||||||
|
ax.set_title('Uniformly distributed data')
|
||||||
|
else:
|
||||||
|
ax.set_title('Normally distributed data')
|
||||||
|
|
||||||
|
ax = fig.add_subplot(223)
|
||||||
|
ax.plot(thresh, true_pos, '-og', label='TP')
|
||||||
|
ax.plot(thresh, false_pos, '-or', label='FP')
|
||||||
|
ax.legend()
|
||||||
|
ax.set_xlabel('threshold')
|
||||||
|
|
||||||
|
ax = fig.add_subplot(224)
|
||||||
|
ax.plot(false_pos, true_pos, '-o')
|
||||||
|
ax.fill_between(xroc, yroc)
|
||||||
|
ax.text(0.5, 0.5, 'AUC=%.0f%%' % (100.0*auc))
|
||||||
|
ax.set_xlabel('FP')
|
||||||
|
ax.set_ylabel('TP')
|
||||||
|
fig.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
return auc, overlap, cohensd, ttest, mannu
|
||||||
|
|
||||||
|
|
||||||
|
# demo:
|
||||||
|
auc(20, 0.5, True, True)
|
||||||
|
auc(20, 0.5, False, True)
|
||||||
|
|
||||||
|
# AUC versus overlap:
|
||||||
|
n = 100
|
||||||
|
aucs_uni = []
|
||||||
|
overlaps_uni = []
|
||||||
|
cohensd_uni = []
|
||||||
|
ttest_uni = []
|
||||||
|
mannu_uni = []
|
||||||
|
aucs_norm = []
|
||||||
|
overlaps_norm = []
|
||||||
|
cohensd_norm = []
|
||||||
|
ttest_norm = []
|
||||||
|
mannu_norm = []
|
||||||
|
for frac in np.arange(-1.5, 1.5, 0.02):
|
||||||
|
a, o, d, t, u = auc(n, frac, True, False)
|
||||||
|
aucs_uni.append(a)
|
||||||
|
overlaps_uni.append(o)
|
||||||
|
cohensd_uni.append(d)
|
||||||
|
ttest_uni.append(t)
|
||||||
|
mannu_uni.append(u)
|
||||||
|
a, o, d, t, u = auc(n, frac, False, False)
|
||||||
|
aucs_norm.append(a)
|
||||||
|
overlaps_norm.append(o)
|
||||||
|
cohensd_norm.append(d)
|
||||||
|
ttest_norm.append(t)
|
||||||
|
mannu_norm.append(u)
|
||||||
|
|
||||||
|
fig, axs = plt.subplots(2, 2)
|
||||||
|
ax = axs[0, 0]
|
||||||
|
ax.plot([0.0, 1.0, 0.0], [0.0, 0.5, 1.0], 'k')
|
||||||
|
ax.plot(overlaps_uni, aucs_uni, 'o', label='uniform pdfs')
|
||||||
|
ax.plot(overlaps_norm, aucs_norm, 'o', label='normal pdfs')
|
||||||
|
ax.set_ylim(0, 1)
|
||||||
|
ax.set_xlabel('fraction of overlapping data')
|
||||||
|
ax.set_ylabel('AUC')
|
||||||
|
ax.legend(loc='center left')
|
||||||
|
ax = axs[0, 1]
|
||||||
|
ax.plot(cohensd_uni, aucs_uni, 'o', label='uniform pdfs')
|
||||||
|
ax.plot(cohensd_norm, aucs_norm, 'o', label='normal pdfs')
|
||||||
|
ax.set_ylim(0, 1)
|
||||||
|
ax.set_xlabel("Cohen's d")
|
||||||
|
ax.set_ylabel('AUC')
|
||||||
|
#ax.legend(loc='center left')
|
||||||
|
ax = axs[1, 1]
|
||||||
|
ax.plot(ttest_uni, aucs_uni, 'o', label='uniform pdfs')
|
||||||
|
ax.plot(ttest_norm, aucs_norm, 'o', label='normal pdfs')
|
||||||
|
ax.set_ylim(0, 1)
|
||||||
|
ax.set_xlabel("Student t")
|
||||||
|
ax.set_ylabel('AUC')
|
||||||
|
ax = axs[1, 0]
|
||||||
|
ax.plot(mannu_uni, aucs_uni, 'o', label='uniform pdfs')
|
||||||
|
ax.plot(mannu_norm, aucs_norm, 'o', label='normal pdfs')
|
||||||
|
ax.set_ylim(0, 1)
|
||||||
|
ax.set_xlabel("Mann-Whitney U")
|
||||||
|
ax.set_ylabel('AUC')
|
||||||
|
fig.savefig('aucoverlap.pdf')
|
||||||
|
plt.show()
|
||||||
|
|
15
plotstyle.py
15
plotstyle.py
@ -22,7 +22,7 @@ colors['orange'] = '#FF9900'
|
|||||||
colors['lightorange'] = '#FFCC00'
|
colors['lightorange'] = '#FFCC00'
|
||||||
colors['yellow'] = '#FFF720'
|
colors['yellow'] = '#FFF720'
|
||||||
colors['green'] = '#99FF00'
|
colors['green'] = '#99FF00'
|
||||||
colors['blue'] = '#0010CC'
|
colors['blue'] = '#0020BB'
|
||||||
colors['gray'] = '#A7A7A7'
|
colors['gray'] = '#A7A7A7'
|
||||||
colors['black'] = '#000000'
|
colors['black'] = '#000000'
|
||||||
colors['white'] = '#FFFFFF'
|
colors['white'] = '#FFFFFF'
|
||||||
@ -33,17 +33,19 @@ colors['white'] = '#FFFFFF'
|
|||||||
# general settings for plot styles:
|
# general settings for plot styles:
|
||||||
lwthick = 3.0
|
lwthick = 3.0
|
||||||
lwthin = 1.8
|
lwthin = 1.8
|
||||||
|
edgewidth = 0.0 if xkcd_style else 1.0
|
||||||
mainline = {'linestyle': '-', 'linewidth': lwthick}
|
mainline = {'linestyle': '-', 'linewidth': lwthick}
|
||||||
minorline = {'linestyle': '-', 'linewidth': lwthin}
|
minorline = {'linestyle': '-', 'linewidth': lwthin}
|
||||||
largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
|
||||||
smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
largeopenmarker = {'marker': 'o', 'markersize': 7, 'markerfacecolor': colors['white'], 'markeredgewidth': 2}
|
||||||
|
smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': edgewidth}
|
||||||
largelinepoints = {'linestyle': '-', 'linewidth': lwthick, 'marker': 'o', 'markersize': 10, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
largelinepoints = {'linestyle': '-', 'linewidth': lwthick, 'marker': 'o', 'markersize': 10, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
||||||
smalllinepoints = {'linestyle': '-', 'linewidth': 1.4, 'marker': 'o', 'markersize': 7, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
smalllinepoints = {'linestyle': '-', 'linewidth': 1.4, 'marker': 'o', 'markersize': 7, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
|
||||||
filllw = 1.0
|
filllw = edgewidth
|
||||||
fillec = colors['white']
|
fillec = colors['white']
|
||||||
fillalpha = 0.4
|
fillalpha = 0.4
|
||||||
filledge = {'linewidth': filllw, 'joinstyle': 'round'}
|
filledge = {'linewidth': filllw, 'joinstyle': 'round'}
|
||||||
if int(mpl.__version__.split('.')[0]) < 2:
|
if mpl_major < 2:
|
||||||
del filledge['joinstyle']
|
del filledge['joinstyle']
|
||||||
|
|
||||||
# helper lines:
|
# helper lines:
|
||||||
@ -51,6 +53,8 @@ lsSpine = {'c': colors['black'], 'linestyle': '-', 'linewidth': 1, 'clip_on': Fa
|
|||||||
lsGrid = {'c': colors['gray'], 'linestyle': '--', 'linewidth': 1}
|
lsGrid = {'c': colors['gray'], 'linestyle': '--', 'linewidth': 1}
|
||||||
lsMarker = {'c': colors['black'], 'linestyle': '-', 'linewidth': 2}
|
lsMarker = {'c': colors['black'], 'linestyle': '-', 'linewidth': 2}
|
||||||
|
|
||||||
|
psMarker = dict({'color': colors['black'], 'linestyle': 'none'}, **largemarker)
|
||||||
|
|
||||||
# line (ls), point (ps), and fill styles (fs).
|
# line (ls), point (ps), and fill styles (fs).
|
||||||
|
|
||||||
# Each style is derived from a main color as indicated by the capital letter.
|
# Each style is derived from a main color as indicated by the capital letter.
|
||||||
@ -85,6 +89,7 @@ fsAa = {'facecolor': colors['blue'], 'edgecolor': 'none', 'alpha': fillalpha}
|
|||||||
lsB = dict({'color': colors['red']}, **mainline)
|
lsB = dict({'color': colors['red']}, **mainline)
|
||||||
lsBm = dict({'color': colors['red']}, **minorline)
|
lsBm = dict({'color': colors['red']}, **minorline)
|
||||||
psB = dict({'color': colors['red'], 'linestyle': 'none'}, **largemarker)
|
psB = dict({'color': colors['red'], 'linestyle': 'none'}, **largemarker)
|
||||||
|
psBo = dict({'markeredgecolor': colors['red'], 'linestyle': 'none'}, **largeopenmarker)
|
||||||
psBm = dict({'color': colors['red'], 'linestyle': 'none'}, **smallmarker)
|
psBm = dict({'color': colors['red'], 'linestyle': 'none'}, **smallmarker)
|
||||||
lpsB = dict({'color': colors['red']}, **largelinepoints)
|
lpsB = dict({'color': colors['red']}, **largelinepoints)
|
||||||
lpsBm = dict({'color': colors['red']}, **smalllinepoints)
|
lpsBm = dict({'color': colors['red']}, **smalllinepoints)
|
||||||
|
3
plotting/exercises/Makefile
Normal file
3
plotting/exercises/Makefile
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
TEXFILES=$(wildcard plotting-?.tex)
|
||||||
|
|
||||||
|
include ../../exercises.mk
|
@ -1,17 +1,11 @@
|
|||||||
\vspace*{-8ex}
|
\ifprintanswers%
|
||||||
\begin{center}
|
\else
|
||||||
\textbf{\Large Introduction to scientific computing}\\[1ex]
|
|
||||||
{\large Jan Grewe, Jan Benda}\\[-3ex]
|
|
||||||
Neuroethology lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
|
|
||||||
\end{center}
|
|
||||||
|
|
||||||
The exercises are meant for self-monitoring and revision of the
|
The exercises are meant for self-monitoring and revision of the
|
||||||
lecture. You should try to solve them on your own. In contrast
|
lecture. You should try to solve them on your own. In contrast
|
||||||
to previous exercises, the solutions can not be saved in a single file. Combine the files into a
|
to previous exercises, the solutions can not be saved in a single file. Combine the files into a
|
||||||
single zip archive and submit it via ILIAS. Name the archive according
|
single zip archive and submit it via ILIAS. Name the archive according
|
||||||
to the pattern: ``plotting\_\{surname\}.zip''.
|
to the pattern: ``plotting\_\{surname\}.zip''.
|
||||||
% \ifprintanswers%
|
|
||||||
% \else
|
|
||||||
|
|
||||||
% % Die folgenden Aufgaben dienen der Wiederholung, \"Ubung und
|
% % Die folgenden Aufgaben dienen der Wiederholung, \"Ubung und
|
||||||
% % Selbstkontrolle und sollten eigenst\"andig bearbeitet und gel\"ost
|
% % Selbstkontrolle und sollten eigenst\"andig bearbeitet und gel\"ost
|
||||||
@ -43,4 +37,4 @@ to the pattern: ``plotting\_\{surname\}.zip''.
|
|||||||
% Antworten!
|
% Antworten!
|
||||||
% \end{itemize}
|
% \end{itemize}
|
||||||
|
|
||||||
% \fi
|
\fi
|
||||||
|
@ -1,88 +1,18 @@
|
|||||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||||
|
|
||||||
\usepackage[german]{babel}
|
\newcommand{\exercisetopic}{Plotting}
|
||||||
\usepackage{pslatex}
|
\newcommand{\exercisenum}{X}
|
||||||
\usepackage[mediumspace,mediumqspace]{SIunits} % \ohm, \micro
|
\newcommand{\exercisedate}{December 14th, 2020}
|
||||||
\usepackage{xcolor}
|
|
||||||
\usepackage{graphicx}
|
|
||||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
|
||||||
|
|
||||||
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
\input{../../exercisesheader}
|
||||||
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
|
||||||
\pagestyle{headandfoot}
|
|
||||||
\ifprintanswers
|
|
||||||
\newcommand{\stitle}{: Solutions}
|
|
||||||
\else
|
|
||||||
\newcommand{\stitle}{}
|
|
||||||
\fi
|
|
||||||
\header{{\bfseries\large Exercise 7\stitle}}{{\bfseries\large Plotting}}{{\bfseries\large November 20, 2019}}
|
|
||||||
\firstpagefooter{Dr. Jan Grewe}{Phone: 29 74588}{Email:
|
|
||||||
jan.grewe@uni-tuebingen.de}
|
|
||||||
\runningfooter{}{\thepage}{}
|
|
||||||
|
|
||||||
\setlength{\baselineskip}{15pt}
|
\firstpagefooter{Dr. Jan Grewe}{}{jan.grewe@uni-tuebingen.de}
|
||||||
\setlength{\parindent}{0.0cm}
|
|
||||||
\setlength{\parskip}{0.3cm}
|
|
||||||
\renewcommand{\baselinestretch}{1.15}
|
|
||||||
|
|
||||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{listings}
|
|
||||||
\lstset{
|
|
||||||
language=Matlab,
|
|
||||||
basicstyle=\ttfamily\footnotesize,
|
|
||||||
numbers=left,
|
|
||||||
numberstyle=\tiny,
|
|
||||||
title=\lstname,
|
|
||||||
showstringspaces=false,
|
|
||||||
commentstyle=\itshape\color{darkgray},
|
|
||||||
breaklines=true,
|
|
||||||
breakautoindent=true,
|
|
||||||
columns=flexible,
|
|
||||||
frame=single,
|
|
||||||
xleftmargin=1em,
|
|
||||||
xrightmargin=1em,
|
|
||||||
aboveskip=10pt
|
|
||||||
}
|
|
||||||
|
|
||||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{amsmath}
|
|
||||||
\usepackage{amssymb}
|
|
||||||
\usepackage{bm}
|
|
||||||
\usepackage{dsfont}
|
|
||||||
\newcommand{\naZ}{\mathds{N}}
|
|
||||||
\newcommand{\gaZ}{\mathds{Z}}
|
|
||||||
\newcommand{\raZ}{\mathds{Q}}
|
|
||||||
\newcommand{\reZ}{\mathds{R}}
|
|
||||||
\newcommand{\reZp}{\mathds{R^+}}
|
|
||||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
|
||||||
\newcommand{\koZ}{\mathds{C}}
|
|
||||||
|
|
||||||
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\newcommand{\continue}{\ifprintanswers%
|
|
||||||
\else
|
|
||||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
||||||
\fi}
|
|
||||||
\newcommand{\continuepage}{\ifprintanswers%
|
|
||||||
\newpage
|
|
||||||
\else
|
|
||||||
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
||||||
\fi}
|
|
||||||
\newcommand{\newsolutionpage}{\ifprintanswers%
|
|
||||||
\newpage%
|
|
||||||
\else
|
|
||||||
\fi}
|
|
||||||
|
|
||||||
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\newcommand{\qt}[1]{\textbf{#1}\\}
|
|
||||||
\newcommand{\pref}[1]{(\ref{#1})}
|
|
||||||
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
|
|
||||||
\newcommand{\code}[1]{\texttt{#1}}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
|
\input{../../exercisestitle}
|
||||||
|
|
||||||
\input{instructions}
|
\input{instructions}
|
||||||
|
|
||||||
\begin{questions}
|
\begin{questions}
|
@ -3,7 +3,7 @@
|
|||||||
\input{../../header}
|
\input{../../header}
|
||||||
|
|
||||||
\lstset{inputpath=../code}
|
\lstset{inputpath=../code}
|
||||||
\graphicspath{{images/}}
|
\graphicspath{{figures/}}
|
||||||
|
|
||||||
\typein[\pagenumber]{Number of first page}
|
\typein[\pagenumber]{Number of first page}
|
||||||
\typein[\chapternumber]{Chapter number}
|
\typein[\chapternumber]{Chapter number}
|
||||||
|
@ -74,20 +74,20 @@ the data was changed or the same kind of plot has to be created for a
|
|||||||
number of datasets.
|
number of datasets.
|
||||||
|
|
||||||
\begin{important}[Why manual editing should be avoided.]
|
\begin{important}[Why manual editing should be avoided.]
|
||||||
On first glance the manual editing of a figure using tools such as
|
On first glance manual editing of a figure using tools such as
|
||||||
Corel draw, Illustrator, etc.\,appears much more convenient and less
|
inkscape, Corel draw, Illustrator, etc.\,appears much more
|
||||||
complex than coding everything into the analysis scripts. This,
|
convenient and less complex than coding everything into the analysis
|
||||||
however, is not entirely true. What if the figure has to be re-drawn
|
scripts. This, however, is not entirely true. What if the figure has
|
||||||
or updated? Then the editing work starts all over again. Rather,
|
to be re-drawn or updated, because, for example, you got more data?
|
||||||
there is a great risk associated with the manual editing
|
Then the editing work starts all over again. In addition, there is a
|
||||||
approach. Axes may be shifted, fonts have not been embedded into the
|
great risk associated with the manual editing approach. Axes may be
|
||||||
final document, annotations have been copy pasted between figures
|
shifted, fonts have not been embedded into the final document,
|
||||||
and are not valid. All of these mistakes can be found in
|
annotations have been copy-pasted between figures and are not
|
||||||
publications and then require an erratum, which is not
|
valid. All of these mistakes can be found in publications and then
|
||||||
desirable. Even if it appears more cumbersome in the beginning one
|
require an erratum, which is not desirable. Even if it appears more
|
||||||
should always try to create publication-ready figures directly from
|
cumbersome in the beginning, one should always try to generate
|
||||||
the data analysis tool using scripts or functions to properly layout
|
publication-ready figures directly from the data analysis tool using
|
||||||
the plot.
|
scripts or functions to properly layout and annotate the plot.
|
||||||
\end{important}
|
\end{important}
|
||||||
|
|
||||||
\subsection{Simple plotting}
|
\subsection{Simple plotting}
|
||||||
@ -148,7 +148,7 @@ or the color. For additional options consult the help.
|
|||||||
|
|
||||||
The following listing shows a simple line plot with axis labeling and a title
|
The following listing shows a simple line plot with axis labeling and a title
|
||||||
|
|
||||||
\lstinputlisting[caption={A simple plot showing a sinewave.},
|
\pageinputlisting[caption={A simple plot showing a sinewave.},
|
||||||
label=simpleplotlisting]{simple_plot.m}
|
label=simpleplotlisting]{simple_plot.m}
|
||||||
|
|
||||||
|
|
||||||
@ -162,10 +162,10 @@ chosen, and star marker symbols is used. Finally, the name of the
|
|||||||
curve is set to \emph{plot 1} which will be displayed in a legend, if
|
curve is set to \emph{plot 1} which will be displayed in a legend, if
|
||||||
chosen.
|
chosen.
|
||||||
|
|
||||||
\begin{lstlisting}[label=settinglineprops, caption={Setting line properties when calling \varcode{plot}.}]
|
\begin{pagelisting}[label=settinglineprops, caption={Setting line properties when calling \varcode{plot}.}]
|
||||||
x = 0:0.1:2*pi; y = sin(x); plot( x, y, 'color', 'r', 'linestyle',
|
x = 0:0.1:2*pi; y = sin(x); plot( x, y, 'color', 'r', 'linestyle',
|
||||||
':', 'marker', '*', 'linewidth', 1.5, 'displayname', 'plot 1')
|
':', 'marker', '*', 'linewidth', 1.5, 'displayname', 'plot 1')
|
||||||
\end{lstlisting}
|
\end{pagelisting}
|
||||||
|
|
||||||
\begin{important}[Choosing the right color.]
|
\begin{important}[Choosing the right color.]
|
||||||
Choosing the perfect color goes a little bit beyond personal
|
Choosing the perfect color goes a little bit beyond personal
|
||||||
@ -277,7 +277,7 @@ the last one defines the output format (box\,\ref{graphicsformatbox}).
|
|||||||
listing\,\ref{niceplotlisting}.}\label{spikedetectionfig}
|
listing\,\ref{niceplotlisting}.}\label{spikedetectionfig}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{ibox}[t]{\label{graphicsformatbox}File formats for digital artwork.}
|
\begin{ibox}[tp]{\label{graphicsformatbox}File formats for digital artwork.}
|
||||||
There are two fundamentally different types of formats for digital artwork:
|
There are two fundamentally different types of formats for digital artwork:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item \enterm[bitmap]{Bitmaps} (\determ{Rastergrafik})
|
\item \enterm[bitmap]{Bitmaps} (\determ{Rastergrafik})
|
||||||
@ -322,7 +322,7 @@ the last one defines the output format (box\,\ref{graphicsformatbox}).
|
|||||||
efficient.
|
efficient.
|
||||||
\end{ibox}
|
\end{ibox}
|
||||||
|
|
||||||
\lstinputlisting[caption={Script for creating the plot shown in
|
\pageinputlisting[caption={Script for creating the plot shown in
|
||||||
\figref{spikedetectionfig}.},
|
\figref{spikedetectionfig}.},
|
||||||
label=niceplotlisting]{automatic_plot.m}
|
label=niceplotlisting]{automatic_plot.m}
|
||||||
|
|
||||||
@ -380,7 +380,7 @@ draw the data. In the example we also provide further arguments to set
|
|||||||
the size, color of the dots and specify that they are filled
|
the size, color of the dots and specify that they are filled
|
||||||
(listing\,\ref{scatterlisting1}).
|
(listing\,\ref{scatterlisting1}).
|
||||||
|
|
||||||
\lstinputlisting[caption={Creating a scatter plot with red filled dots.},
|
\pageinputlisting[caption={Creating a scatter plot with red filled dots.},
|
||||||
label=scatterlisting1, firstline=9, lastline=9]{scatterplot.m}
|
label=scatterlisting1, firstline=9, lastline=9]{scatterplot.m}
|
||||||
|
|
||||||
We could have used plot for this purpose and set the marker to
|
We could have used plot for this purpose and set the marker to
|
||||||
@ -395,8 +395,7 @@ manipulate the color we need to specify a length(x)-by-3 matrix. For
|
|||||||
each dot we provide an individual color (i.e. the RGB triplet in each
|
each dot we provide an individual color (i.e. the RGB triplet in each
|
||||||
row of the color matrix, lines 2-4 in listing\,\ref{scatterlisting2})
|
row of the color matrix, lines 2-4 in listing\,\ref{scatterlisting2})
|
||||||
|
|
||||||
|
\pageinputlisting[caption={Creating a scatter plot with size and color
|
||||||
\lstinputlisting[caption={Creating a scatter plot with size and color
|
|
||||||
variations. The RGB triplets define the respective color intensity
|
variations. The RGB triplets define the respective color intensity
|
||||||
in a range 0:1. Here, we modify only the red color channel.},
|
in a range 0:1. Here, we modify only the red color channel.},
|
||||||
label=scatterlisting2, linerange={15-15, 21-23}]{scatterplot.m}
|
label=scatterlisting2, linerange={15-15, 21-23}]{scatterplot.m}
|
||||||
@ -431,7 +430,7 @@ figures\,\ref{regularsubplotsfig}, \ref{irregularsubplotsfig}).
|
|||||||
also below).}\label{regularsubplotsfig}
|
also below).}\label{regularsubplotsfig}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\lstinputlisting[caption={Script for creating subplots in a regular
|
\pageinputlisting[caption={Script for creating subplots in a regular
|
||||||
grid \figref{regularsubplotsfig}.}, label=regularsubplotlisting,
|
grid \figref{regularsubplotsfig}.}, label=regularsubplotlisting,
|
||||||
basicstyle=\ttfamily\scriptsize]{regular_subplot.m}
|
basicstyle=\ttfamily\scriptsize]{regular_subplot.m}
|
||||||
|
|
||||||
@ -458,7 +457,7 @@ create a grid with larger numbers of columns and rows, and specify the
|
|||||||
used cells of the grid by passing a vector as the third argument to
|
used cells of the grid by passing a vector as the third argument to
|
||||||
\code{subplot()}.
|
\code{subplot()}.
|
||||||
|
|
||||||
\lstinputlisting[caption={Script for creating subplots of different
|
\pageinputlisting[caption={Script for creating subplots of different
|
||||||
sizes \figref{irregularsubplotsfig}.},
|
sizes \figref{irregularsubplotsfig}.},
|
||||||
label=irregularsubplotslisting,
|
label=irregularsubplotslisting,
|
||||||
basicstyle=\ttfamily\scriptsize]{irregular_subplot.m}
|
basicstyle=\ttfamily\scriptsize]{irregular_subplot.m}
|
||||||
@ -516,7 +515,7 @@ its properties. See the \matlab{} help for more information.
|
|||||||
listing\,\ref{errorbarlisting} for A and C and listing\,\ref{errorbarlisting2} }\label{errorbarplot}
|
listing\,\ref{errorbarlisting} for A and C and listing\,\ref{errorbarlisting2} }\label{errorbarplot}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\lstinputlisting[caption={Illustrating estimation errors using error bars. Script that
|
\pageinputlisting[caption={Illustrating estimation errors using error bars. Script that
|
||||||
creates \figref{errorbarplot}. A, B},
|
creates \figref{errorbarplot}. A, B},
|
||||||
label=errorbarlisting, firstline=13, lastline=31,
|
label=errorbarlisting, firstline=13, lastline=31,
|
||||||
basicstyle=\ttfamily\scriptsize]{errorbarplot.m}
|
basicstyle=\ttfamily\scriptsize]{errorbarplot.m}
|
||||||
@ -550,7 +549,7 @@ leading to invisibility and a value of one to complete
|
|||||||
opaqueness. Finally, we use the normal plot command to draw a line
|
opaqueness. Finally, we use the normal plot command to draw a line
|
||||||
connecting the average values (line 12).
|
connecting the average values (line 12).
|
||||||
|
|
||||||
\lstinputlisting[caption={Illustrating estimation errors using a shaded area. Script that
|
\pageinputlisting[caption={Illustrating estimation errors using a shaded area. Script that
|
||||||
creates \figref{errorbarplot} C.}, label=errorbarlisting2,
|
creates \figref{errorbarplot} C.}, label=errorbarlisting2,
|
||||||
firstline=33,
|
firstline=33,
|
||||||
basicstyle=\ttfamily\scriptsize]{errorbarplot.m}
|
basicstyle=\ttfamily\scriptsize]{errorbarplot.m}
|
||||||
@ -575,7 +574,7 @@ listing\,\ref{annotationsplotlisting}. For more options consult the
|
|||||||
listing\,\ref{annotationsplotlisting}}\label{annotationsplot}
|
listing\,\ref{annotationsplotlisting}}\label{annotationsplot}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\lstinputlisting[caption={Adding annotations to figures. Script that
|
\pageinputlisting[caption={Adding annotations to figures. Script that
|
||||||
creates \figref{annotationsplot}.},
|
creates \figref{annotationsplot}.},
|
||||||
label=annotationsplotlisting,
|
label=annotationsplotlisting,
|
||||||
basicstyle=\ttfamily\scriptsize]{annotations.m}
|
basicstyle=\ttfamily\scriptsize]{annotations.m}
|
||||||
@ -632,7 +631,7 @@ Lissajous figure. The basic steps are:
|
|||||||
\item Finally, close the file (line 31).
|
\item Finally, close the file (line 31).
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
\lstinputlisting[caption={Making animations and saving them as a
|
\pageinputlisting[caption={Making animations and saving them as a
|
||||||
movie.}, label=animationlisting, firstline=16, lastline=36,
|
movie.}, label=animationlisting, firstline=16, lastline=36,
|
||||||
basicstyle=\ttfamily\scriptsize]{movie_example.m}
|
basicstyle=\ttfamily\scriptsize]{movie_example.m}
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ function [time, rate] = binned_rate(spikes, bin_width, dt, t_max)
|
|||||||
rate = zeros(size(time));
|
rate = zeros(size(time));
|
||||||
h = hist(spikes, bins) ./ bin_width;
|
h = hist(spikes, bins) ./ bin_width;
|
||||||
for i = 2:length(bins)
|
for i = 2:length(bins)
|
||||||
rate(round(bins(i - 1) / dt) + 1:round(bins(i) / dt)) = h(i);
|
rate(round(bins(i-1)/dt) + 1:round(bins(i)/dt)) = h(i);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,10 +0,0 @@
|
|||||||
function pcn = colorednoisepdf( x, misi, epsilon, tau )
|
|
||||||
% returns the ISI pdf for PIF with colored noise drive
|
|
||||||
% x: the input ISI
|
|
||||||
% misis: the mean isi
|
|
||||||
% epsilon: a parameter
|
|
||||||
% tau: the correlation time of the noise
|
|
||||||
gamma1 = x/tau+exp(-x/tau)-1.0;
|
|
||||||
gamma2 = 1.0-exp(-x/tau);
|
|
||||||
pcn=exp(-(x-misi).^2./(4.0*epsilon*tau.^2.*gamma1)).*(((misi-x).*gamma2+2*gamma1*tau).^2./(2*gamma1*tau^2)-epsilon*(gamma2.^2-2*gamma1.*exp(-x/tau))) ./ (2*tau*sqrt(4*pi*epsilon*gamma1.^3));
|
|
||||||
end
|
|
@ -1,24 +0,0 @@
|
|||||||
% misi = 0.02;
|
|
||||||
% epsilon = 1.0;
|
|
||||||
% tau = 0.1;
|
|
||||||
x=0:0.002:0.1;
|
|
||||||
% pcn = colorednoisepdf( x, misi, epsilon, tau )+10.0*randn( size( x ) );
|
|
||||||
% plot( x, pcn );
|
|
||||||
|
|
||||||
spikes = lifouspikes( 10, 15, 50.0, 1.0, 1.0 );
|
|
||||||
isivec = isis( spikes );
|
|
||||||
misi = mean( isivec );
|
|
||||||
1.0/misi
|
|
||||||
isibins = 0:0.0005:0.1;
|
|
||||||
[ n, c ] = hist( isivec, isibins );
|
|
||||||
n = n / sum(n)/(isibins(2)-isibins(1));
|
|
||||||
bar( c, n );
|
|
||||||
|
|
||||||
beta0 = [ 1.0, 0.01 ];
|
|
||||||
b = nlinfit(c(1:end-2), n(1:end-2), @(b,x)(colorednoisepdf(x, misi, b(1), b(2))), beta0)
|
|
||||||
|
|
||||||
pcn = colorednoisepdf( x, misi, b(1), b(2) );
|
|
||||||
hold on
|
|
||||||
plot( x, pcn, 'r', 'LineWidth', 3 );
|
|
||||||
hold off
|
|
||||||
|
|
@ -10,19 +10,18 @@ function [time, rate] = convolution_rate(spikes, sigma, dt, t_max)
|
|||||||
% t_max : the trial duration in seconds.
|
% t_max : the trial duration in seconds.
|
||||||
%
|
%
|
||||||
% Returns:
|
% Returns:
|
||||||
two vectors containing the time and the rate.
|
% two vectors containing the time and the rate.
|
||||||
|
|
||||||
time = 0:dt:t_max - dt;
|
time = 0:dt:t_max - dt;
|
||||||
rate = zeros(size(time));
|
rate = zeros(size(time));
|
||||||
spike_indices = round(spikes / dt);
|
spike_indices = round(spikes / dt);
|
||||||
rate(spike_indices) = 1;
|
rate(spike_indices) = 1;
|
||||||
kernel = gaussKernel(sigma, dt);
|
kernel = gaussKernel(sigma, dt);
|
||||||
|
rate = conv(rate, kernel, 'same');
|
||||||
rate = conv(rate, kernel, 'same');
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
function y = gaussKernel(s, step)
|
function y = gaussKernel(s, step)
|
||||||
x = -4 * s:step:4 * s;
|
x = -4 * s:step:4 * s;
|
||||||
y = exp(-0.5 .* (x ./ s) .^ 2) ./ sqrt(2 * pi) / s;
|
y = exp(-0.5 .* (x ./ s).^ 2) ./ sqrt(2 * pi) / s;
|
||||||
end
|
end
|
||||||
|
@ -1,44 +0,0 @@
|
|||||||
function [counts, bins] = counthist(spikes, w)
|
|
||||||
% Compute and plot histogram of spike counts.
|
|
||||||
%
|
|
||||||
% [counts, bins] = counthist(spikes, w)
|
|
||||||
%
|
|
||||||
% Arguments:
|
|
||||||
% spikes: a cell array of vectors of spike times in seconds
|
|
||||||
% w: observation window duration in seconds for computing the counts
|
|
||||||
%
|
|
||||||
% Returns:
|
|
||||||
% counts: the histogram of counts normalized to probabilities
|
|
||||||
% bins: the bin centers for the histogram
|
|
||||||
|
|
||||||
% collect spike counts:
|
|
||||||
tmax = spikes{1}(end);
|
|
||||||
n = [];
|
|
||||||
r = [];
|
|
||||||
for k = 1:length(spikes)
|
|
||||||
times = spikes{k};
|
|
||||||
% alternative 1: count the number of spikes in each window:
|
|
||||||
% for tk = 0:w:tmax-w
|
|
||||||
% nn = sum((times >= tk) & (times < tk+w));
|
|
||||||
% %nn = length(find((times >= tk) & (times < tk+w)));
|
|
||||||
% n = [n nn];
|
|
||||||
% end
|
|
||||||
% alternative 2: use the hist() function to do that!
|
|
||||||
tbins = 0.5*w:w:tmax-0.5*w;
|
|
||||||
nn = hist(times, tbins);
|
|
||||||
n = [n nn];
|
|
||||||
end
|
|
||||||
|
|
||||||
% histogram of spike counts:
|
|
||||||
maxn = max(n);
|
|
||||||
[counts, bins] = hist(n, 0:1:maxn+10);
|
|
||||||
% normalize to probabilities:
|
|
||||||
counts = counts / sum(counts);
|
|
||||||
|
|
||||||
% plot:
|
|
||||||
if nargout == 0
|
|
||||||
bar( bins, counts );
|
|
||||||
xlabel( 'counts k' );
|
|
||||||
ylabel( 'P(k)' );
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,55 +0,0 @@
|
|||||||
function [counts, bins] = counthist(spikes, w)
|
|
||||||
% computes count histogram and compare with Poisson distribution
|
|
||||||
%
|
|
||||||
% [counts, bins] = counthist(spikes, w)
|
|
||||||
%
|
|
||||||
% Arguments:
|
|
||||||
% spikes: a cell array of vectors of spike times in seconds
|
|
||||||
% w: observation window duration in seconds for computing the counts
|
|
||||||
%
|
|
||||||
% Returns:
|
|
||||||
% counts: the histogram of counts normalized to probabilities
|
|
||||||
% bins: the bin centers for the histogram
|
|
||||||
|
|
||||||
% collect spike counts:
|
|
||||||
tmax = spikes{1}(end);
|
|
||||||
n = [];
|
|
||||||
r = [];
|
|
||||||
for k = 1:length(spikes)
|
|
||||||
times = spikes{k};
|
|
||||||
% alternative 1: count the number of spikes in each window:
|
|
||||||
% for tk = 0:w:tmax-w
|
|
||||||
% nn = sum( ( times >= tk ) & ( times < tk+w ) );
|
|
||||||
% %nn = length( find( ( times >= tk ) & ( times < tk+w ) ) );
|
|
||||||
% n = [ n nn ];
|
|
||||||
% end
|
|
||||||
% alternative 2: use the hist function to do that!
|
|
||||||
tbins = 0.5*w:w:tmax-0.5*w;
|
|
||||||
nn = hist(times, tbins);
|
|
||||||
n = [ n nn ];
|
|
||||||
% the rate of the spikes:
|
|
||||||
rate = (length(times)-1)/(times(end) - times(1));
|
|
||||||
r = [ r rate ];
|
|
||||||
end
|
|
||||||
|
|
||||||
% histogram of spike counts:
|
|
||||||
maxn = max( n );
|
|
||||||
[counts, bins ] = hist( n, 0:1:maxn+10 );
|
|
||||||
% normalize to probabilities:
|
|
||||||
counts = counts / sum( counts );
|
|
||||||
|
|
||||||
% plot:
|
|
||||||
if nargout == 0
|
|
||||||
bar( bins, counts );
|
|
||||||
hold on;
|
|
||||||
% Poisson distribution:
|
|
||||||
rate = mean( r );
|
|
||||||
x = 0:1:maxn+10;
|
|
||||||
a = rate*w;
|
|
||||||
y = a.^x.*exp(-a)./factorial(x);
|
|
||||||
plot( x, y, 'r', 'LineWidth', 3 );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'counts k' );
|
|
||||||
ylabel( 'P(k)' );
|
|
||||||
end
|
|
||||||
end
|
|
25
pointprocesses/code/counts.m
Normal file
25
pointprocesses/code/counts.m
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
function n = counts(spikes, w)
|
||||||
|
% Count spikes in time windows.
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
|
% w: duration of window in seconds for computing the counts
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% n: vector with spike counts
|
||||||
|
tmax = spikes{1}(end);
|
||||||
|
n = [];
|
||||||
|
for k = 1:length(spikes)
|
||||||
|
times = spikes{k};
|
||||||
|
% alternative 1: count the number of spikes in each window:
|
||||||
|
% for tk = 0:w:tmax-w
|
||||||
|
% nn = sum((times >= tk) & (times < tk+w));
|
||||||
|
% % nn = length(find((times >= tk) & (times < tk+w)));
|
||||||
|
% n = [n, nn];
|
||||||
|
% end
|
||||||
|
% alternative 2: use the hist() function to do that!
|
||||||
|
tbins = 0.5*w:w:tmax-0.5*w;
|
||||||
|
nn = hist(times, tbins);
|
||||||
|
n = [n, nn];
|
||||||
|
end
|
||||||
|
end
|
@ -1,39 +0,0 @@
|
|||||||
function fano( spikes )
|
|
||||||
% computes fano factor as a function of window size
|
|
||||||
% spikes: a cell array of vectors of spike times
|
|
||||||
|
|
||||||
tmax = spikes{1}(end);
|
|
||||||
windows = 0.01:0.05:0.01*tmax;
|
|
||||||
mc = windows;
|
|
||||||
vc = windows;
|
|
||||||
ff = windows;
|
|
||||||
fs = windows;
|
|
||||||
for j = 1:length(windows)
|
|
||||||
w = windows( j );
|
|
||||||
% collect counts:
|
|
||||||
n = [];
|
|
||||||
for k = 1:length(spikes)
|
|
||||||
for tk = 0:w:tmax-w
|
|
||||||
nn = sum( ( spikes{k} >= tk ) & ( spikes{k} < tk+w ) );
|
|
||||||
%nn = length( find( ( spikes{k} >= tk ) & ( spikes{k} < tk+w ) ) );
|
|
||||||
n = [ n nn ];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
% statistics for current window:
|
|
||||||
mc(j) = mean( n );
|
|
||||||
vc(j) = var( n );
|
|
||||||
ff(j) = vc( j )/mc( j );
|
|
||||||
fs(j) = sqrt(vc( j )/mc( j ));
|
|
||||||
end
|
|
||||||
|
|
||||||
subplot( 1, 2, 1 );
|
|
||||||
scatter( mc, vc, 'filled' );
|
|
||||||
xlabel( 'Mean count' );
|
|
||||||
ylabel( 'Count variance' );
|
|
||||||
|
|
||||||
subplot( 1, 2, 2 );
|
|
||||||
scatter( 1000.0*windows, fs, 'filled' );
|
|
||||||
xlabel( 'Window W [ms]' );
|
|
||||||
ylabel( 'Fano factor' );
|
|
||||||
end
|
|
||||||
|
|
@ -2,8 +2,6 @@ function spikes = hompoissonspikes(rate, trials, tmax)
|
|||||||
% Generate spike times of a homogeneous poisson process
|
% Generate spike times of a homogeneous poisson process
|
||||||
% using the exponential interspike interval distribution.
|
% using the exponential interspike interval distribution.
|
||||||
%
|
%
|
||||||
% spikes = hompoissonspikes(rate, trials, tmax)
|
|
||||||
%
|
|
||||||
% Arguments:
|
% Arguments:
|
||||||
% rate: the rate of the Poisson process in Hertz
|
% rate: the rate of the Poisson process in Hertz
|
||||||
% trials: number of trials that should be generated
|
% trials: number of trials that should be generated
|
||||||
@ -11,7 +9,6 @@ function spikes = hompoissonspikes(rate, trials, tmax)
|
|||||||
%
|
%
|
||||||
% Returns:
|
% Returns:
|
||||||
% spikes: a cell array of vectors of spike times in seconds
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
|
|
||||||
spikes = cell(trials, 1);
|
spikes = cell(trials, 1);
|
||||||
mu = 1.0/rate;
|
mu = 1.0/rate;
|
||||||
nintervals = 2*round(tmax/mu);
|
nintervals = 2*round(tmax/mu);
|
||||||
|
@ -19,6 +19,6 @@ function [time, rate] = instantaneous_rate(spikes, dt, t_max)
|
|||||||
spike_indices = [1 round(spikes ./ dt)];
|
spike_indices = [1 round(spikes ./ dt)];
|
||||||
|
|
||||||
for i = 2:length(spike_indices)
|
for i = 2:length(spike_indices)
|
||||||
rate(spike_indices(i - 1):spike_indices(i)) = inst_rate(i - 1);
|
rate(spike_indices(i-1):spike_indices(i)) = inst_rate(i-1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -1,25 +1,13 @@
|
|||||||
function [pdf, centers] = isihist(isis, binwidth)
|
function [pdf, centers] = isihist(isis, binwidth)
|
||||||
% Compute normalized histogram of interspike intervals.
|
% Compute normalized histogram of interspike intervals.
|
||||||
%
|
%
|
||||||
% [pdf, centers] = isihist(isis, binwidth)
|
|
||||||
%
|
|
||||||
% Arguments:
|
% Arguments:
|
||||||
% isis: vector of interspike intervals in seconds
|
% isis: vector of interspike intervals in seconds
|
||||||
% binwidth: optional width in seconds to be used for the isi bins
|
% binwidth: width in seconds to be used for the ISI bins
|
||||||
%
|
%
|
||||||
% Returns:
|
% Returns:
|
||||||
% pdf: vector with probability density of interspike intervals in Hz
|
% pdf: vector with pdf of interspike intervals in Hertz
|
||||||
% centers: vector with centers of interspikeintervalls in seconds
|
% centers: vector with centers of interspikeintervalls in seconds
|
||||||
|
|
||||||
if nargin < 2
|
|
||||||
% compute good binwidth:
|
|
||||||
nperbin = 200; % average number of data points per bin
|
|
||||||
bins = length(isis)/nperbin; % number of bins
|
|
||||||
binwidth = max(isis)/bins;
|
|
||||||
if binwidth < 5e-4 % half a millisecond
|
|
||||||
binwidth = 5e-4;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
bins = 0.5*binwidth:binwidth:max(isis);
|
bins = 0.5*binwidth:binwidth:max(isis);
|
||||||
% histogram data:
|
% histogram data:
|
||||||
[nelements, centers] = hist(isis, bins);
|
[nelements, centers] = hist(isis, bins);
|
||||||
|
@ -1,24 +0,0 @@
|
|||||||
function isireturnmap( isis, lag2 )
|
|
||||||
% plot return maps for lag 1 and lag lag2
|
|
||||||
|
|
||||||
clf;
|
|
||||||
subplot( 1, 2, 1 );
|
|
||||||
lag = 1;
|
|
||||||
scatter( 1000.0*isis(1:end-lag)', 1000.0*isis(1+lag:end)', 'b', 'filled', 'MarkerEdgeColor', 'white' );
|
|
||||||
xlabel( 'ISI T_i [ms]' );
|
|
||||||
ylabel( 'ISI T_{i+1} [ms]' );
|
|
||||||
maxisi = max( isis );
|
|
||||||
maxy = ceil(maxisi/10)*10.0;
|
|
||||||
xlim( [0 1.5*maxy ])
|
|
||||||
ylim( [0 maxy ])
|
|
||||||
|
|
||||||
subplot( 1, 2, 2 );
|
|
||||||
lag = lag2;
|
|
||||||
scatter( 1000.0*isis(1:end-lag)', 1000.0*isis(1+lag:end)', 'b', 'filled', 'MarkerEdgeColor', 'white' );
|
|
||||||
xlabel( 'ISI T_i [ms]' );
|
|
||||||
ylabel( 'ISI T_{i+2} [ms]' );
|
|
||||||
xlim( [0 1.5*maxy ])
|
|
||||||
ylim( [0 maxy ])
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
@ -1,15 +1,11 @@
|
|||||||
function isivec = isis(spikes)
|
function isivec = isis(spikes)
|
||||||
% returns a single list of isis computed from all trials in spikes
|
% returns a single list of isis computed from all trials in spikes
|
||||||
%
|
%
|
||||||
% isivec = isis(spikes)
|
|
||||||
%
|
|
||||||
% Arguments:
|
% Arguments:
|
||||||
% spikes: a cell array of vectors of spike times in seconds
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
% isivec: a column vector with all the interspike intervalls
|
|
||||||
%
|
%
|
||||||
% Returns:
|
% Returns:
|
||||||
% isivec: a column vector with all the interspike intervalls
|
% isivec: a column vector with all the interspike intervals
|
||||||
|
|
||||||
isivec = [];
|
isivec = [];
|
||||||
for k = 1:length(spikes)
|
for k = 1:length(spikes)
|
||||||
difftimes = diff(spikes{k});
|
difftimes = diff(spikes{k});
|
||||||
|
@ -1,35 +1,21 @@
|
|||||||
function isicorr = isiserialcorr(isivec, maxlag)
|
function [isicorr, lags] = isiserialcorr(isivec, maxlag)
|
||||||
% serial correlation of interspike intervals
|
% serial correlation of interspike intervals
|
||||||
%
|
%
|
||||||
% isicorr = isiserialcorr(isivec, maxlag)
|
|
||||||
%
|
|
||||||
% Arguments:
|
% Arguments:
|
||||||
% isivec: vector of interspike intervals in seconds
|
% isivec: vector of interspike intervals in seconds
|
||||||
% maxlag: the maximum lag in seconds
|
% maxlag: the maximum lag
|
||||||
%
|
%
|
||||||
% Returns:
|
% Returns:
|
||||||
% isicorr: vector with the serial correlations for lag 0 to maxlag
|
% isicorr: vector with the serial correlations for lag 0 to maxlag
|
||||||
|
% lags: vector with the lags corresponding to isicorr
|
||||||
lags = 0:maxlag;
|
lags = 0:maxlag;
|
||||||
isicorr = zeros(size(lags));
|
isicorr = zeros(size(lags));
|
||||||
for k = 1:length(lags)
|
for k = 1:length(lags)
|
||||||
lag = lags(k);
|
lag = lags(k);
|
||||||
if length(isivec) > lag+10 % ensure "enough" data
|
if length(isivec) > lag+10 % ensure "enough" data
|
||||||
% NOTE: the arguments to corr must be column vectors!
|
% NOTE: the arguments to corr must be column vectors!
|
||||||
% We insure this in the isis() function that
|
% We insure this already in the isis() function.
|
||||||
% generates the isivec.
|
|
||||||
isicorr(k) = corr(isivec(1:end-lag), isivec(lag+1:end));
|
isicorr(k) = corr(isivec(1:end-lag), isivec(lag+1:end));
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if nargout == 0
|
|
||||||
% plot:
|
|
||||||
plot(lags, isicorr, '-b');
|
|
||||||
hold on;
|
|
||||||
scatter(lags, isicorr, 50.0, 'b', 'filled');
|
|
||||||
hold off;
|
|
||||||
xlabel('Lag k')
|
|
||||||
ylabel('\rho_k')
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,24 +1,14 @@
|
|||||||
w = 0.1;
|
function plotcounthist(spikes, w)
|
||||||
cmax = 8;
|
% Plot histogram of spike counts.
|
||||||
pmax = 0.5;
|
%
|
||||||
subplot(1, 3, 1);
|
% Arguments:
|
||||||
counthist(poissonspikes, w);
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
xlim([0 cmax])
|
% w: duration of window in seconds for computing the counts
|
||||||
set(gca, 'XTick', 0:2:cmax)
|
n = counts(spikes, w);
|
||||||
ylim([0 pmax])
|
maxn = max(n);
|
||||||
title('Poisson');
|
[counts, bins] = hist(n, 0:1:maxn+10);
|
||||||
|
counts = counts / sum(counts);
|
||||||
subplot(1, 3, 2);
|
bar(bins, counts);
|
||||||
counthist(pifouspikes, w);
|
xlabel('counts k');
|
||||||
xlim([0 cmax])
|
ylabel('P(k)');
|
||||||
set(gca, 'XTick', 0:2:cmax)
|
end
|
||||||
ylim([0 pmax])
|
|
||||||
title('PIF OU');
|
|
||||||
|
|
||||||
subplot(1, 3, 3);
|
|
||||||
counthist(lifadaptspikes, w);
|
|
||||||
xlim([0 cmax])
|
|
||||||
set(gca, 'XTick', 0:2:cmax)
|
|
||||||
ylim([0 pmax])
|
|
||||||
title('LIF adapt');
|
|
||||||
savefigpdf(gcf, 'counthist.pdf', 20, 7);
|
|
||||||
|
31
pointprocesses/code/plotfanofactor.m
Normal file
31
pointprocesses/code/plotfanofactor.m
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
function plotfanofactor(spikes, wmin, wmax)
|
||||||
|
% Compute and plot Fano factor as a function of window size.
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
|
% wmin: minimum window size in seconds
|
||||||
|
% wmax: maximum window size in seconds
|
||||||
|
windows = logspace(log10(wmin), log10(wmax), 100);
|
||||||
|
mc = zeros(1, length(windows));
|
||||||
|
vc = zeros(1, length(windows));
|
||||||
|
for k = 1:length(windows)
|
||||||
|
w = windows(k);
|
||||||
|
n = counts(spikes, w);
|
||||||
|
mc(k) = mean(n);
|
||||||
|
vc(k) = var(n);
|
||||||
|
end
|
||||||
|
|
||||||
|
subplot(1, 2, 1);
|
||||||
|
scatter(mc, vc, 'filled');
|
||||||
|
xlabel('Mean count');
|
||||||
|
ylabel('Count variance');
|
||||||
|
|
||||||
|
subplot(1, 2, 2);
|
||||||
|
scatter(1000.0*windows, vc ./ mc, 'filled');
|
||||||
|
xlabel('Window [ms]');
|
||||||
|
ylabel('Fano factor');
|
||||||
|
xlim(1000.0*[windows(1) windows(end)])
|
||||||
|
ylim([0.0 1.1]);
|
||||||
|
set(gca, 'XScale', 'log');
|
||||||
|
end
|
||||||
|
|
@ -1,24 +1,13 @@
|
|||||||
function plotisihist(isis, binwidth)
|
function plotisihist(isis, binwidth)
|
||||||
% Plot and annotate histogram of interspike intervals.
|
% Plot and annotate histogram of interspike intervals.
|
||||||
%
|
%
|
||||||
% plotisihist(isis, binwidth)
|
|
||||||
%
|
|
||||||
% Arguments:
|
% Arguments:
|
||||||
% isis: vector of interspike intervals in seconds
|
% isis: vector of interspike intervals in seconds
|
||||||
% binwidth: optional width in seconds to be used for the isi bins
|
% binwidth: width in seconds to be used for the ISI bins
|
||||||
|
[pdf, centers] = isihist(isis, binwidth);
|
||||||
% compute normalized histogram:
|
|
||||||
if nargin < 2
|
|
||||||
[pdf, centers] = isihist(isis);
|
|
||||||
else
|
|
||||||
[pdf, centers] = isihist(isis, binwidth);
|
|
||||||
end
|
|
||||||
|
|
||||||
% plot:
|
|
||||||
bar(1000.0*centers, pdf); % milliseconds on x-axis
|
bar(1000.0*centers, pdf); % milliseconds on x-axis
|
||||||
xlabel('ISI [ms]')
|
xlabel('ISI [ms]')
|
||||||
ylabel('p(ISI) [1/s]')
|
ylabel('p(ISI) [1/s]')
|
||||||
% annotation:
|
|
||||||
misi = mean(isis);
|
misi = mean(isis);
|
||||||
sdisi = std(isis);
|
sdisi = std(isis);
|
||||||
text(0.95, 0.8, sprintf('mean=%.1f ms', 1000.0*misi), ...
|
text(0.95, 0.8, sprintf('mean=%.1f ms', 1000.0*misi), ...
|
||||||
|
14
pointprocesses/code/plotisiserialcorr.m
Normal file
14
pointprocesses/code/plotisiserialcorr.m
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
function isicorr = plotisiserialcorr(isivec, maxlag)
|
||||||
|
% plot serial correlation of interspike intervals
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% isivec: vector of interspike intervals in seconds
|
||||||
|
% maxlag: the maximum lag
|
||||||
|
[isicorr, lags] = isiserialcorr(isivec, maxlag);
|
||||||
|
plot(lags, isicorr, '-b');
|
||||||
|
hold on;
|
||||||
|
scatter(lags, isicorr, 20.0, 'b', 'filled');
|
||||||
|
hold off;
|
||||||
|
xlabel('Lag k')
|
||||||
|
ylabel('\rho_k')
|
||||||
|
end
|
@ -1,100 +0,0 @@
|
|||||||
%% load data:
|
|
||||||
clear all
|
|
||||||
% alternative 1:
|
|
||||||
% pro: no structs. contra: global unknown variables
|
|
||||||
load poisson.mat
|
|
||||||
whos
|
|
||||||
poissonspikes = spikes;
|
|
||||||
load pifou.mat;
|
|
||||||
pifouspikes = spikes;
|
|
||||||
load lifadapt.mat;
|
|
||||||
lifadaptspikes = spikes;
|
|
||||||
clear spikes;
|
|
||||||
% alternative 2:
|
|
||||||
% pro: clean code. contra: structs that we do not really know yet
|
|
||||||
clear all
|
|
||||||
x = load( 'poisson.mat' );
|
|
||||||
poissonspikes = x.spikes;
|
|
||||||
x = load( 'pifou.mat' );
|
|
||||||
pifouspikes = x.spikes;
|
|
||||||
x = load( 'lifadapt.mat' );
|
|
||||||
lifadaptspikes = x.spikes;
|
|
||||||
|
|
||||||
%% spike raster plots:
|
|
||||||
tmax = 1.0;
|
|
||||||
subplot(1, 3, 1);
|
|
||||||
spikeraster(poissonspikes, tmax);
|
|
||||||
title('Poisson');
|
|
||||||
|
|
||||||
subplot(1, 3, 2);
|
|
||||||
spikeraster(pifouspikes, tmax);
|
|
||||||
title('PIF OU');
|
|
||||||
|
|
||||||
subplot(1, 3, 3);
|
|
||||||
spikeraster(lifadaptspikes, tmax);
|
|
||||||
title('LIF adapt');
|
|
||||||
|
|
||||||
%% isi histograms:
|
|
||||||
maxisi = 300.0;
|
|
||||||
binwidth = 0.002;
|
|
||||||
subplot(1, 3, 1);
|
|
||||||
poissonisis = isis(poissonspikes);
|
|
||||||
isihist(poissonisis, binwidth);
|
|
||||||
xlim([0, maxisi])
|
|
||||||
title('Poisson');
|
|
||||||
|
|
||||||
subplot(1, 3, 2);
|
|
||||||
pifouisis = isis(pifouspikes);
|
|
||||||
isihist(pifouisis, binwidth);
|
|
||||||
xlim([0, maxisi])
|
|
||||||
title('PIF OU');
|
|
||||||
|
|
||||||
subplot(1, 3, 3);
|
|
||||||
lifadaptisis = isis(lifadaptspikes);
|
|
||||||
isihist(lifadaptisis, binwidth);
|
|
||||||
xlim([0, maxisi])
|
|
||||||
title('LIF adapt');
|
|
||||||
|
|
||||||
%% serial correlations:
|
|
||||||
maxlag = 10;
|
|
||||||
rrange = [-0.5, 1.05];
|
|
||||||
subplot(1, 3, 1);
|
|
||||||
isiserialcorr(poissonisis, maxlag);
|
|
||||||
ylim(rrange)
|
|
||||||
title('Poisson');
|
|
||||||
|
|
||||||
subplot(1, 3, 2);
|
|
||||||
isiserialcorr(pifouisis, maxlag);
|
|
||||||
ylim(rrange)
|
|
||||||
title('PIF OU');
|
|
||||||
|
|
||||||
subplot(1, 3, 3);
|
|
||||||
isiserialcorr(lifadaptisis, maxlag);
|
|
||||||
ylim(rrange)
|
|
||||||
title('LIF adapt');
|
|
||||||
|
|
||||||
%% spike counts:
|
|
||||||
w = 0.1;
|
|
||||||
cmax = 8;
|
|
||||||
pmax = 0.5;
|
|
||||||
subplot(1, 3, 1);
|
|
||||||
counthist(poissonspikes, w);
|
|
||||||
xlim([0 cmax])
|
|
||||||
set(gca, 'XTick', 0:2:cmax)
|
|
||||||
ylim([0 pmax])
|
|
||||||
title('Poisson');
|
|
||||||
|
|
||||||
subplot(1, 3, 2);
|
|
||||||
counthist(pifouspikes, w);
|
|
||||||
xlim([0 cmax])
|
|
||||||
set(gca, 'XTick', 0:2:cmax)
|
|
||||||
ylim([0 pmax])
|
|
||||||
title('PIF OU');
|
|
||||||
|
|
||||||
subplot(1, 3, 3);
|
|
||||||
counthist(lifadaptspikes, w);
|
|
||||||
xlim([0 cmax])
|
|
||||||
set(gca, 'XTick', 0:2:cmax)
|
|
||||||
ylim([0 pmax])
|
|
||||||
title('LIF adapt');
|
|
||||||
savefigpdf(gcf, 'counthist.pdf', 20, 7);
|
|
@ -1,27 +0,0 @@
|
|||||||
rate = 100.0;
|
|
||||||
trials = 50;
|
|
||||||
tmax = 100.0;
|
|
||||||
|
|
||||||
% generate spikes:
|
|
||||||
spikes = poissonspikes( trials, rate, tmax );
|
|
||||||
% interspike intervals:
|
|
||||||
isivec = isis( spikes );
|
|
||||||
% histogram
|
|
||||||
f = figure( 1 );
|
|
||||||
isihist( isivec );
|
|
||||||
hold on
|
|
||||||
% theoretical density:
|
|
||||||
xmax = 5.0/rate;
|
|
||||||
x = 0:0.0001:xmax;
|
|
||||||
y = rate*exp(-rate*x);
|
|
||||||
plot( 1000.0*x, y, 'r', 'LineWidth', 3 );
|
|
||||||
% plot details:
|
|
||||||
title( sprintf( 'Poisson spike trains, rate=%g Hz, nisi=%d', rate, length( isivec ) ) )
|
|
||||||
xlim( [ 0.0 1000.0*xmax ] )
|
|
||||||
ylim( [ 0.0 1.1*rate ] )
|
|
||||||
legend( 'data', 'poisson' )
|
|
||||||
hold off
|
|
||||||
|
|
||||||
% serial correlations:
|
|
||||||
f = figure( 2 );
|
|
||||||
isiserialcorr( isivec, 10 );
|
|
@ -1,46 +0,0 @@
|
|||||||
rates = 1:1:100;
|
|
||||||
avisi = [];
|
|
||||||
sdisi = [];
|
|
||||||
cvisi = [];
|
|
||||||
|
|
||||||
for rate = rates
|
|
||||||
spikes = poissonspikes( 10, rate, 100.0 );
|
|
||||||
isivec = isis( spikes );
|
|
||||||
av = mean( isivec );
|
|
||||||
sd = std( isivec );
|
|
||||||
cv = sd/av;
|
|
||||||
avisi = [ avisi av ];
|
|
||||||
sdisi = [ sdisi sd ];
|
|
||||||
cvisi = [ cvisi cv ];
|
|
||||||
end
|
|
||||||
|
|
||||||
f = figure;
|
|
||||||
subplot( 1, 3, 1 );
|
|
||||||
scatter( rates, 1000.0*avisi, 'b', 'filled' );
|
|
||||||
hold on;
|
|
||||||
plot( rates, 1000.0./rates, 'r' );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'Rate \lambda [Hz]' );
|
|
||||||
ylim( [ 0 1000 ] );
|
|
||||||
title( 'Mean ISI [ms]' );
|
|
||||||
legend( 'simulation', 'theory 1/\lambda' );
|
|
||||||
|
|
||||||
subplot( 1, 3, 2 );
|
|
||||||
scatter( rates, 1000.0*sdisi, 'b', 'filled' );
|
|
||||||
hold on;
|
|
||||||
plot( rates, 1000.0./rates, 'r' );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'Rate \lambda [Hz]' );
|
|
||||||
ylim( [ 0 1000 ] )
|
|
||||||
title( 'Standard deviation ISI [ms]' );
|
|
||||||
legend( 'simulation', 'theory 1/\lambda' );
|
|
||||||
|
|
||||||
subplot( 1, 3, 3 );
|
|
||||||
scatter( rates, cvisi, 'b', 'filled' );
|
|
||||||
hold on;
|
|
||||||
plot( rates, ones( size( rates ) ), 'r' );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'Rate \lambda [Hz]' );
|
|
||||||
ylim( [ 0 2 ] )
|
|
||||||
title( 'CV' );
|
|
||||||
legend( 'simulation', 'theory' );
|
|
@ -1,14 +0,0 @@
|
|||||||
function p = psth(spikes, dt, tmax)
|
|
||||||
% plots a PSTH of the spikes with binwidth dt
|
|
||||||
t = 0.0:dt:tmax+dt;
|
|
||||||
p = zeros(1, length(t));
|
|
||||||
for k=1:length(spikes)
|
|
||||||
times = spikes{k};
|
|
||||||
[h, b] = hist(times, t);
|
|
||||||
p = p + h;
|
|
||||||
end
|
|
||||||
p = p/length(spikes)/dt;
|
|
||||||
t(end) = [];
|
|
||||||
p(end) = [];
|
|
||||||
plot(t, p);
|
|
||||||
end
|
|
@ -1,29 +1,32 @@
|
|||||||
function rasterplot(spikes, tmax)
|
function rasterplot(spikes, tmax)
|
||||||
% Display a spike raster of the spike times given in spikes.
|
% Display a spike raster of the spike times given in spikes.
|
||||||
%
|
%
|
||||||
% rasterplot(spikes, tmax)
|
% Arguments:
|
||||||
% spikes: a cell array of vectors of spike times in seconds
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
% tmax: plot spike raster upto tmax seconds
|
% tmax: plot spike raster up to tmax seconds
|
||||||
|
spiketimes = [];
|
||||||
ntrials = length(spikes);
|
trials = [];
|
||||||
for k = 1:ntrials
|
ntrials = length(spikes);
|
||||||
times = spikes{k};
|
for k = 1:ntrials
|
||||||
times = times(times<tmax);
|
times = spikes{k};
|
||||||
if tmax < 1.5
|
times = times(times<tmax);
|
||||||
times = 1000.0*times; % conversion to ms
|
% (x,y) pairs for start and stop of stroke
|
||||||
end
|
% plus nan separating strokes:
|
||||||
for i = 1:length( times )
|
spiketimes = [spiketimes, ...
|
||||||
line([times(i) times(i)],[k-0.4 k+0.4], 'Color', 'k');
|
[times(:)'; times(:)'; times(:)'*nan]];
|
||||||
|
trials = [trials, ...
|
||||||
|
[ones(1, length(times)) * (k-0.4); ...
|
||||||
|
ones(1, length(times)) * (k+0.4); ...
|
||||||
|
ones(1, length(times)) * nan]];
|
||||||
end
|
end
|
||||||
end
|
% convert matrices into column vectors of (x,y) pairs:
|
||||||
if tmax < 1.5
|
spiketimes = spiketimes(:);
|
||||||
xlabel('Time [ms]');
|
trials = trials(:);
|
||||||
xlim([0.0 1000.0*tmax]);
|
% plotting this is lightning fast:
|
||||||
else
|
plot(spiketimes, trials, 'k')
|
||||||
xlabel('Time [s]');
|
xlabel('Time [s]');
|
||||||
xlim([0.0 tmax]);
|
xlim([0.0 tmax]);
|
||||||
end
|
ylabel('Trials');
|
||||||
ylabel('Trials');
|
ylim([0.3 ntrials+0.7]);
|
||||||
ylim([0.3 ntrials+0.7 ]);
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
function counts = spikecounts(spikes, w)
|
|
||||||
% Compute vector of spike counts.
|
|
||||||
%
|
|
||||||
% counts = spikecounts(spikes, w)
|
|
||||||
%
|
|
||||||
% Arguments:
|
|
||||||
% spikes: a cell array of vectors of spike times in seconds
|
|
||||||
% w: observation window duration in seconds for computing the counts
|
|
||||||
%
|
|
||||||
% Returns:
|
|
||||||
% counts: vector of spike counts
|
|
||||||
|
|
||||||
% collect spike counts:
|
|
||||||
tmax = spikes{1}(end);
|
|
||||||
counts = [];
|
|
||||||
for k = 1:length(spikes)
|
|
||||||
times = spikes{k};
|
|
||||||
% method 1: count the number of spikes in each window:
|
|
||||||
% for tk = 0:w:tmax-w
|
|
||||||
% nn = sum((times >= tk) & (times < tk+w));
|
|
||||||
% %nn = length(times((times >= tk) & (times < tk+w)));
|
|
||||||
% %nn = length(find((times >= tk) & (times < tk+w)));
|
|
||||||
% counts = [counts nn];
|
|
||||||
% end
|
|
||||||
% method 2: use the hist() function to do that!
|
|
||||||
tbins = 0.5*w:w:tmax-0.5*w;
|
|
||||||
nn = hist(times, tbins);
|
|
||||||
counts = [counts nn];
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,17 +0,0 @@
|
|||||||
w = 0.1;
|
|
||||||
bins = 0.0:1.0:10.0;
|
|
||||||
|
|
||||||
counts{1} = spikecounts(poissonspikes, w);
|
|
||||||
counts{2} = spikecounts(pifouspikes, w);
|
|
||||||
counts{3} = spikecounts(lifadaptspikes, w);
|
|
||||||
titles = {'Poisson', 'PIF OU', 'LIF adapt'};
|
|
||||||
for k = 1:3
|
|
||||||
subplot(1, 3, k);
|
|
||||||
[h, b] = hist(counts{k}, bins);
|
|
||||||
bar(b, h/sum(h));
|
|
||||||
title(titles{k})
|
|
||||||
xlabel('Spike count');
|
|
||||||
xlim([-0.5, 8.5]);
|
|
||||||
ylim([0.0 0.8]);
|
|
||||||
end
|
|
||||||
savefigpdf(gcf, 'spikecountshists.pdf', 20, 7);
|
|
@ -1,12 +0,0 @@
|
|||||||
function r = spikerate(spikes, duration)
|
|
||||||
% returns the average spike rate of the spikes
|
|
||||||
% for the first duration seconds
|
|
||||||
% spikes: a cell array of vectors of spike times
|
|
||||||
|
|
||||||
rates = zeros(length(spikes),1);
|
|
||||||
for k = 1:length(spikes)
|
|
||||||
times = spikes{k};
|
|
||||||
rates(k) = sum(times<duration)/duration;
|
|
||||||
end
|
|
||||||
r = mean(rates);
|
|
||||||
end
|
|
@ -1,35 +1,3 @@
|
|||||||
BASENAME=pointprocesses
|
TEXFILES=$(wildcard pointprocesses-?.tex) psth.tex
|
||||||
TEXFILES=$(wildcard $(BASENAME)??.tex)
|
|
||||||
EXERCISES=$(TEXFILES:.tex=.pdf)
|
|
||||||
SOLUTIONS=$(EXERCISES:pointprocesses%=pointprocesses-solutions%)
|
|
||||||
|
|
||||||
.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
|
include ../../exercises.mk
|
||||||
|
|
||||||
pdf : $(SOLUTIONS) $(EXERCISES)
|
|
||||||
|
|
||||||
exercises : $(EXERCISES)
|
|
||||||
|
|
||||||
solutions : $(SOLUTIONS)
|
|
||||||
|
|
||||||
$(SOLUTIONS) : pointprocesses-solutions%.pdf : pointprocesses%.tex instructions.tex
|
|
||||||
{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
|
|
||||||
pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
|
|
||||||
rm $(patsubst %.pdf,%,$@).[!p]*
|
|
||||||
|
|
||||||
$(EXERCISES) : %.pdf : %.tex instructions.tex
|
|
||||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
|
||||||
|
|
||||||
watch :
|
|
||||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
|
||||||
|
|
||||||
watchexercises :
|
|
||||||
while true; do ! make -q exercises && make exercises; sleep 0.5; done
|
|
||||||
|
|
||||||
watchsolutions :
|
|
||||||
while true; do ! make -q solutions && make solutions; sleep 0.5; done
|
|
||||||
|
|
||||||
clean :
|
|
||||||
rm -f *~ *.aux *.log *.out
|
|
||||||
|
|
||||||
cleanup : clean
|
|
||||||
rm -f $(SOLUTIONS) $(EXERCISES)
|
|
||||||
|
Binary file not shown.
25
pointprocesses/exercises/counts.m
Normal file
25
pointprocesses/exercises/counts.m
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
function n = counts(spikes, w)
|
||||||
|
% Count spikes in time windows.
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
|
% w: duration of window in seconds for computing the counts
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% n: vector with spike counts
|
||||||
|
tmax = spikes{1}(end);
|
||||||
|
n = [];
|
||||||
|
for k = 1:length(spikes)
|
||||||
|
times = spikes{k};
|
||||||
|
% alternative 1: count the number of spikes in each window:
|
||||||
|
% for tk = 0:w:tmax-w
|
||||||
|
% nn = sum((times >= tk) & (times < tk+w));
|
||||||
|
% % nn = length(find((times >= tk) & (times < tk+w)));
|
||||||
|
% n = [n, nn];
|
||||||
|
% end
|
||||||
|
% alternative 2: use the hist() function to do that!
|
||||||
|
tbins = 0.5*w:w:tmax-0.5*w;
|
||||||
|
nn = hist(times, tbins);
|
||||||
|
n = [n, nn];
|
||||||
|
end
|
||||||
|
end
|
@ -1,18 +1,17 @@
|
|||||||
function fanoplot(spikes, titles)
|
function fanoplot(spikes, titles)
|
||||||
% computes and plots fano factor as a function of window size
|
% Plots fano factor as a function of window size.
|
||||||
% spikes: a cell array of vectors of spike times
|
%
|
||||||
% titles: string that is used as a title for the plots
|
% Arguments:
|
||||||
|
% spikes: a cell array of vectors of spike times
|
||||||
|
% titles: string that is used as a title for the plots
|
||||||
windows = logspace(-3.0, -0.5, 100);
|
windows = logspace(-3.0, -0.5, 100);
|
||||||
mc = windows;
|
mc = zeros(1, length(windows));
|
||||||
vc = windows;
|
vc = zeros(1, length(windows));
|
||||||
ff = windows;
|
|
||||||
for j = 1:length(windows)
|
for j = 1:length(windows)
|
||||||
w = windows(j);
|
w = windows(j);
|
||||||
counts = spikecounts(spikes, w);
|
spikecounts = counts(spikes, w);
|
||||||
% statistics for current window:
|
mc(j) = mean(spikecounts);
|
||||||
mc(j) = mean(counts);
|
vc(j) = var(spikecounts);
|
||||||
vc(j) = var(counts);
|
|
||||||
ff(j) = vc(j)/mc(j);
|
|
||||||
end
|
end
|
||||||
|
|
||||||
subplot(1, 2, 1);
|
subplot(1, 2, 1);
|
||||||
@ -22,7 +21,7 @@ function fanoplot(spikes, titles)
|
|||||||
ylabel('Count variance');
|
ylabel('Count variance');
|
||||||
|
|
||||||
subplot(1, 2, 2);
|
subplot(1, 2, 2);
|
||||||
scatter(1000.0*windows, ff, 'filled');
|
scatter(1000.0*windows, vc ./ mc, 'filled');
|
||||||
title(titles);
|
title(titles);
|
||||||
xlabel('Window [ms]');
|
xlabel('Window [ms]');
|
||||||
ylabel('Fano factor');
|
ylabel('Fano factor');
|
@ -1,9 +1,9 @@
|
|||||||
spikes{1} = poissonspikes;
|
spikes{1} = poissonspikes;
|
||||||
spikes{2} = pifouspikes;
|
spikes{2} = pifouspikes;
|
||||||
spikes{3} = lifadaptspikes;
|
spikes{3} = lifadaptspikes;
|
||||||
idents = {'poisson', 'pifou', 'lifadapt'};
|
titles = {'poisson', 'pifou', 'lifadapt'};
|
||||||
for k = 1:3
|
for k = 1:3
|
||||||
figure(k)
|
figure(k)
|
||||||
fanoplot(spikes{k}, titles{k});
|
fanoplot(spikes{k}, titles{k});
|
||||||
savefigpdf(gcf, sprintf('fanoplots%s.pdf', idents{k}), 20, 7);
|
savefigpdf(gcf, sprintf('fanoplots%s.pdf', titles{k}), 20, 7);
|
||||||
end
|
end
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,18 +0,0 @@
|
|||||||
% generate spike times:
|
|
||||||
rate = 20.0;
|
|
||||||
spikes = hompoissonspikes( 10, rate, 50.0 );
|
|
||||||
% isi histogram:
|
|
||||||
isivec = isis( spikes );
|
|
||||||
isihist( isivec );
|
|
||||||
hold on
|
|
||||||
% theoretical density:
|
|
||||||
xmax = 5.0/rate;
|
|
||||||
x = 0:0.0001:xmax;
|
|
||||||
y = rate*exp(-rate*x);
|
|
||||||
plot( 1000.0*x, y, 'r', 'LineWidth', 3 );
|
|
||||||
% plot details:
|
|
||||||
title( sprintf( 'Poisson spike trains, rate=%g Hz, nisi=%d', rate, length( isivec ) ) )
|
|
||||||
xlim( [ 0.0 1000.0*xmax ] )
|
|
||||||
ylim( [ 0.0 1.1*rate ] )
|
|
||||||
legend( 'data', 'poisson' )
|
|
||||||
hold off
|
|
@ -1,46 +0,0 @@
|
|||||||
rates = 1:1:100;
|
|
||||||
avisi = [];
|
|
||||||
sdisi = [];
|
|
||||||
cvisi = [];
|
|
||||||
|
|
||||||
for rate = rates
|
|
||||||
spikes = hompoissonspikes( 10, rate, 100.0 );
|
|
||||||
isivec = isis( spikes );
|
|
||||||
av = mean( isivec );
|
|
||||||
sd = std( isivec );
|
|
||||||
cv = sd/av;
|
|
||||||
avisi = [ avisi av ];
|
|
||||||
sdisi = [ sdisi sd ];
|
|
||||||
cvisi = [ cvisi cv ];
|
|
||||||
end
|
|
||||||
|
|
||||||
f = figure;
|
|
||||||
subplot( 1, 3, 1 );
|
|
||||||
scatter( rates, 1000.0*avisi, 'b', 'filled' );
|
|
||||||
hold on;
|
|
||||||
plot( rates, 1000.0./rates, 'r' );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'Rate \lambda [Hz]' );
|
|
||||||
ylim( [ 0 1000 ] );
|
|
||||||
title( 'Mean ISI [ms]' );
|
|
||||||
legend( 'simulation', 'theory 1/\lambda' );
|
|
||||||
|
|
||||||
subplot( 1, 3, 2 );
|
|
||||||
scatter( rates, 1000.0*sdisi, 'b', 'filled' );
|
|
||||||
hold on;
|
|
||||||
plot( rates, 1000.0./rates, 'r' );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'Rate \lambda [Hz]' );
|
|
||||||
ylim( [ 0 1000 ] )
|
|
||||||
title( 'Standard deviation ISI [ms]' );
|
|
||||||
legend( 'simulation', 'theory 1/\lambda' );
|
|
||||||
|
|
||||||
subplot( 1, 3, 3 );
|
|
||||||
scatter( rates, cvisi, 'b', 'filled' );
|
|
||||||
hold on;
|
|
||||||
plot( rates, ones( size( rates ) ), 'r' );
|
|
||||||
hold off;
|
|
||||||
xlabel( 'Rate \lambda [Hz]' );
|
|
||||||
ylim( [ 0 2 ] )
|
|
||||||
title( 'CV' );
|
|
||||||
legend( 'simulation', 'theory' );
|
|
@ -1,19 +0,0 @@
|
|||||||
function spikes = hompoissonspikes( trials, rate, tmax )
|
|
||||||
% Generate spike times of a homogeneous poisson process
|
|
||||||
% trials: number of trials that should be generated
|
|
||||||
% rate: the rate of the Poisson process in Hertz
|
|
||||||
% tmax: the duration of each trial in seconds
|
|
||||||
% returns a cell array of vectors of spike times
|
|
||||||
|
|
||||||
dt = 3.33e-5;
|
|
||||||
p = rate*dt;
|
|
||||||
if p > 0.2
|
|
||||||
p = 0.2
|
|
||||||
dt = p/rate;
|
|
||||||
end
|
|
||||||
x = rand( trials, ceil(tmax/dt) );
|
|
||||||
spikes = cell( trials, 1 );
|
|
||||||
for k=1:trials
|
|
||||||
spikes{k} = find( x(k,:) < p ) * dt;
|
|
||||||
end
|
|
||||||
end
|
|
Binary file not shown.
Binary file not shown.
29
pointprocesses/exercises/isihist.m
Normal file
29
pointprocesses/exercises/isihist.m
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
function [pdf, centers] = isihist(isis, binwidth)
|
||||||
|
% Compute normalized histogram of interspike intervals.
|
||||||
|
%
|
||||||
|
% [pdf, centers] = isihist(isis, binwidth)
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% isis: vector of interspike intervals in seconds
|
||||||
|
% binwidth: optional width in seconds to be used for the isi bins
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% pdf: vector with pdf of interspike intervals in Hertz
|
||||||
|
% centers: vector with centers of interspikeintervalls in seconds
|
||||||
|
|
||||||
|
if nargin < 2
|
||||||
|
% compute good binwidth:
|
||||||
|
nperbin = 200; % average number of data points per bin
|
||||||
|
bins = length(isis)/nperbin; % number of bins
|
||||||
|
binwidth = max(isis)/bins;
|
||||||
|
if binwidth < 5e-4 % half a millisecond
|
||||||
|
binwidth = 5e-4;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
bins = 0.5*binwidth:binwidth:max(isis);
|
||||||
|
% histogram data:
|
||||||
|
[nelements, centers] = hist(isis, bins);
|
||||||
|
% normalization (integral = 1):
|
||||||
|
pdf = nelements / sum(nelements) / binwidth;
|
||||||
|
end
|
||||||
|
|
Binary file not shown.
16
pointprocesses/exercises/isis.m
Normal file
16
pointprocesses/exercises/isis.m
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
function isivec = isis(spikes)
|
||||||
|
% returns a single list of isis computed from all trials in spikes
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% spikes: a cell array of vectors of spike times in seconds
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% isivec: a column vector with all the interspike intervals
|
||||||
|
isivec = [];
|
||||||
|
for k = 1:length(spikes)
|
||||||
|
difftimes = diff(spikes{k});
|
||||||
|
% difftimes(:) ensures a column vector
|
||||||
|
% regardless of the type of vector in spikes{k}
|
||||||
|
isivec = [isivec; difftimes(:)];
|
||||||
|
end
|
||||||
|
end
|
21
pointprocesses/exercises/isiserialcorr.m
Normal file
21
pointprocesses/exercises/isiserialcorr.m
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
function [isicorr, lags] = isiserialcorr(isivec, maxlag)
|
||||||
|
% serial correlation of interspike intervals
|
||||||
|
%
|
||||||
|
% Arguments:
|
||||||
|
% isivec: vector of interspike intervals in seconds
|
||||||
|
% maxlag: the maximum lag
|
||||||
|
%
|
||||||
|
% Returns:
|
||||||
|
% isicorr: vector with the serial correlations for lag 0 to maxlag
|
||||||
|
% lags: vector with the lags corresponding to isicorr
|
||||||
|
lags = 0:maxlag;
|
||||||
|
isicorr = zeros(size(lags));
|
||||||
|
for k = 1:length(lags)
|
||||||
|
lag = lags(k);
|
||||||
|
if length(isivec) > lag+10 % ensure "enough" data
|
||||||
|
% NOTE: the arguments to corr must be column vectors!
|
||||||
|
% We insure this already in the isis() function.
|
||||||
|
isicorr(k) = corr(isivec(1:end-lag), isivec(lag+1:end));
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user