Merge branch 'master' of raven.am28.uni-tuebingen.de:scientificComputing

2014-10-15 22:25:56 +02:00 · 2014-10-15 22:25:56 +02:00 · d4b147103f
commit d4b147103f
parent 2ca39c5618 7bab4e9baa
75 changed files with 11728 additions and 32 deletions
--- a/programming/exercises/psths.m
+++ b/programming/exercises/psths.m
@ -1,16 +1,60 @@
 load('ampullary.mat')
+sample_rate = 20000; % Hz
+max_time = 0;
+for i = 1:size(times,2)
+    max_time = max([max_time, max(times{i})]);
+end
 fig = figure();
-
+set(gcf,'Color', 'white')
 %% create PSTH on the basis of the interspike intervals
-fig.sub
+subplot(3,1,1)
+hold on
 % 1. get the interspike intervals for each trial
 for i = 1:size(times,2)
-   isi = diff(times{i});
-    
+    t = times{i};
+    isi = diff(t);
+    plot(t(2:end), 1./isi)
 end

+xlabel('time [s]')
+ylabel('firing rate [Hz]')
+box('off')
+title('instanataneous firing rate')
+
 %% create PSTH using the binning method
-
-
+subplot(3,1,2)
+box('off')
+bin_width = 0.02; % s
+edges = 0:bin_width:max_time;
+firing_rate = [];
+for i = 1:size(times,2)
+    t = times{i};
+    [n, t] = hist(t, edges);
+    if isempty(firing_rate)
+        firing_rate = n / bin_width;
+    else
+        firing_rate = firing_rate + (n / bin_width / size(times,2));
+    end
+end
+plot(t,firing_rate)
+xlabel('time [s]')
+ylabel('firing rate [Hz]')
+title('binning method')

 %% create PSTH using the kernel-convolution method
+subplot(3,1,3)
+binary_spikes = zeros(size(times,2), round(max_time*sample_rate));
+resps = zeros(size(binary_spikes));
+window = hann(bin_width/4*sample_rate,'symmetric');
+window = window/sum(window);
+
+for i = 1:size(times,2)
+    t = times{i};
+    temp = round(t*sample_rate);
+    if temp(1) <= 0 
+      temp(1) = 1;
+    end
+    binary_spikes(i, temp) = 1;
+    resps(i,:) = conv(binary_spikes(i,:), window, 'same');
+end
+plot((0:1/sample_rate:max_time), mean(resps,2))
--- a/statistics/assignments/Makefile
+++ b/statistics/assignments/Makefile
@ -0,0 +1,16 @@
+all:
+	for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \
+		echo $$number ; \
+		sed "s/000/$$number/g" day1.tex > tmp.tex; \
+		pdflatex tmp.tex; \
+		mv tmp.pdf day1_$$number.pdf; \
+		cp ../data/example$$number.csv ./ ;\
+		rm tmp.* ; \
+		zip example$$number.zip example$$number.csv  day1_$$number.pdf ; \
+		rm example$$number.csv ;\
+		rm day1_$$number.pdf ; \
+	done
+
+clean:
+	rm *.zip
+	rm -rf auto
--- a/statistics/assignments/day1.tex
+++ b/statistics/assignments/day1.tex
@ -0,0 +1,72 @@
+\documentclass[addpoints,10pt]{exam}
+\usepackage{url}
+\usepackage{color}
+\usepackage{hyperref}
+
+\pagestyle{headandfoot}
+\runningheadrule
+\firstpageheadrule
+
+\firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014}
+%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
+\firstpagefooter{}{}{}
+\runningfooter{}{}{}
+\pointsinmargin
+\bracketedpoints
+
+%\printanswers
+\shadedsolutions
+
+
+\begin{document}
+%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
+\sffamily
+%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{questions}
+  \question To publish scientific results, you will usually need to
+  use statistical methods. Some journals provide you with a brief
+  description of how they expect you to apply statistical methods. One
+  example can be found in the author guidelines of the journal
+  Nature. 
+
+  Assume you collected the following dataset. You can download it from
+  Ilias as {\tt example000.csv}. Here is the description of the dataset:
+
+  \begin{quotation}
+    \tt 
+    \input{../examples/example000.tex}
+  \end{quotation}
+
+  \begin{parts}
+    \part Download the dataset and write a script that loads it into
+    matlab. 
+
+    \part Think about the type of your data (I might ask you that
+    tomorrow).
+
+    \part Produce a plot that displays the data in an appropriate
+    way. Make sure to respect all elements of good plotting we
+    discussed today.
+
+    \part Download the statistical checklist from nature. Produce {\bf
+      one} slide that contains the plot and a concise summary of your
+    data which respects the requirements made by nature (assume you
+    are producing a figure legend for the figure in nature). It is
+    good style to avoid expressions like ``the plot shows'' or
+    similar. 
+
+    \part Upload your code, the data, and the slide as a zip to
+    Ilias. Deadline is 19h00. Structure the zip such that you can
+    present you program in front of the class. Several students will
+    be asked to present their slide and their code tomorrow morning. 
+
+  \end{parts}
+  
+\end{questions}
+
+
+
+
+
+\end{document}
--- a/statistics/assignments/day2.tex
+++ b/statistics/assignments/day2.tex
@ -0,0 +1,46 @@
+\documentclass[addpoints,10pt]{exam}
+\usepackage{url}
+\usepackage{color}
+\usepackage{hyperref}
+
+\pagestyle{headandfoot}
+\runningheadrule
+\firstpageheadrule
+
+\firstpageheader{Scientific Computing}{afternoon assignment day 02}{10/21/2014}
+%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
+\firstpagefooter{}{}{}
+\runningfooter{}{}{}
+\pointsinmargin
+\bracketedpoints
+
+%\printanswers
+\shadedsolutions
+
+
+\begin{document}
+%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
+\sffamily
+%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{questions}
+  \question Download example002 from yesterday (brain weights). 
+  \begin{parts}
+    \part Simulate a null distribution via permutation.
+    \part Determine whether you can reject ``means are equal'' on a
+    5\% significance level using the simulated null distribution.
+    \part Check whether the means are different with a two sample
+    t-test in matlab ({\tt ttest2}).
+    \part Plot the data appropriately and generate a single slide that
+    contains the plot and short discussion of the test that respects
+    the nature statistical checklist (ignore all question whether the
+    assumptions of the test are satisfied).
+    \part Upload the slide and the code to Ilias. Deadline is 19h00.
+  \end{parts}
+\end{questions}
+
+
+
+
+
+\end{document}
--- a/statistics/data/example001.csv
+++ b/statistics/data/example001.csv
@ -0,0 +1,43 @@
+MAO,Diagnosis
+6.8,I
+4.1,I
+7.3,I
+14.2,I
+18.8,I
+9.9,I
+7.4,I
+11.9,I
+5.2,I
+7.8,I
+7.8,I
+8.7,I
+12.7,I
+14.5,I
+10.7,I
+8.4,I
+9.7,I
+10.6,I
+7.8,II
+4.4,II
+11.4,II
+3.1,II
+4.3,II
+10.1,II
+1.5,II
+7.4,II
+5.2,II
+10,II
+3.7,II
+5.5,II
+8.5,II
+7.7,II
+6.8,II
+3.1,II
+6.4,III
+10.8,III
+1.1,III
+2.9,III
+4.5,III
+5.8,III
+9.4,III
+6.8,III
--- a/statistics/data/example002.csv
+++ b/statistics/data/example002.csv
@ -0,0 +1,186 @@
+Weight,Sex
+1607,m
+1157,m
+1248,m
+1310,m
+1398,m
+1237,m
+1232,m
+1343,m
+1380,m
+1274,m
+1245,m
+1286,m
+1508,m
+1105,m
+1123,m
+1198,m
+1300,m
+1249,m
+1185,m
+915,m
+1345,m
+1107,m
+1357,m
+1227,m
+1205,m
+1435,m
+1289,m
+1093,m
+1211,m
+1260,m
+1193,m
+1330,m
+1130,m
+1357,m
+1193,m
+1232,m
+1321,m
+1260,m
+1380,m
+1230,m
+1136,m
+1029,m
+1223,m
+1240,m
+1264,m
+1020,m
+1415,m
+1410,m
+1275,m
+1230,m
+1085,m
+1048,m
+1181,m
+1103,m
+1165,m
+1547,m
+1173,m
+1660,m
+1307,m
+1535,m
+1315,m
+1257,m
+1424,m
+1309,m
+1170,m
+1412,m
+1270,m
+1230,m
+1233,m
+1561,m
+1193,m
+1272,m
+1355,m
+1137,m
+1354,m
+1110,m
+1265,m
+1407,m
+1227,m
+1330,m
+1222,m
+1305,m
+1475,m
+1177,m
+1337,m
+1145,m
+1070,m
+1305,m
+1085,m
+1303,m
+1390,m
+1532,m
+1238,m
+1233,m
+1280,m
+1245,m
+1459,m
+1157,m
+1302,m
+1385,m
+1310,m
+1342,m
+1303,m
+1248,m
+1115,m
+1365,m
+1227,m
+1353,m
+1125,f
+1027,f
+1112,f
+983,f
+1090,f
+1247,f
+1045,f
+983,f
+972,f
+1045,f
+937,f
+1245,f
+1200,f
+1270,f
+1200,f
+1145,f
+1090,f
+1040,f
+1343,f
+1010,f
+1095,f
+1180,f
+1168,f
+1095,f
+1040,f
+1235,f
+1050,f
+1038,f
+1046,f
+1255,f
+1228,f
+1000,f
+1225,f
+1220,f
+1085,f
+1067,f
+1006,f
+1138,f
+1175,f
+1252,f
+1037,f
+958,f
+1020,f
+1068,f
+1107,f
+1317,f
+952,f
+1056,f
+1203,f
+1183,f
+1392,f
+1130,f
+1284,f
+996,f
+1228,f
+1087,f
+1035,f
+1170,f
+1064,f
+1250,f
+1129,f
+1088,f
+1037,f
+1117,f
+1095,f
+1027,f
+1027,f
+1190,f
+1153,f
+1037,f
+1120,f
+1212,f
+1024,f
+1135,f
+1177,f
+1096,f
+1114,f
--- a/statistics/data/example003.csv
+++ b/statistics/data/example003.csv
@ -0,0 +1,52 @@
+singtime
+4.3
+24.1
+6.6
+7.3
+4
+2.6
+4
+3.9
+9.4
+6.2
+1.6
+6.5
+0.2
+2.7
+17.4
+5.6
+2
+3.8
+1.2
+0.7
+1.6
+2.3
+3.7
+0.8
+0.5
+4.5
+11.5
+3.5
+0.8
+5.2
+2
+0.7
+1.7
+5
+2.8
+1.5
+3.9
+3.7
+4.5
+1.8
+1.2
+0.7
+0.7
+4.2
+4.7
+2.2
+1.4
+14.1
+8.6
+3.7
+3.5
--- a/statistics/data/example004.csv
+++ b/statistics/data/example004.csv
@ -0,0 +1,29 @@
+Pulse
+97
+111
+93
+98
+107
+77
+121
+88
+96
+123
+119
+91
+99
+95
+99
+102
+77
+85
+104
+106
+114
+85
+112
+102
+104
+94
+104
+98
--- a/statistics/data/example005.csv
+++ b/statistics/data/example005.csv
@ -0,0 +1,37 @@
+Branches
+23
+30
+54
+28
+31
+29
+34
+35
+30
+27
+21
+43
+51
+35
+51
+49
+35
+24
+26
+29
+21
+29
+37
+27
+28
+33
+33
+23
+37
+27
+40
+48
+41
+20
+30
+57
--- a/statistics/data/example006.csv
+++ b/statistics/data/example006.csv
@ -0,0 +1,32 @@
+Glucose
+81
+85
+93
+93
+99
+76
+75
+84
+78
+84
+81
+82
+89
+81
+96
+82
+74
+70
+84
+86
+80
+70
+131
+75
+88
+102
+115
+89
+82
+79
+106
--- a/statistics/data/example007.csv
+++ b/statistics/data/example007.csv
@ -0,0 +1,24 @@
+NerveCells
+35
+19
+33
+34
+17
+26
+16
+40
+28
+30
+23
+12
+27
+33
+22
+31
+28
+28
+35
+23
+23
+19
+29
--- a/statistics/data/example008.csv
+++ b/statistics/data/example008.csv
@ -0,0 +1,21 @@
+RateChange,Treatment
+28,Caffeine
+11,Caffeine
+-3,Caffeine
+14,Caffeine
+-2,Caffeine
+-4,Caffeine
+18,Caffeine
+2,Caffeine
+2,Caffeine
+26,Decaf
+1,Decaf
+0,Decaf
+-4,Decaf
+-4,Decaf
+14,Decaf
+16,Decaf
+8,Decaf
+0,Decaf
+18,Decaf
+-10,Decaf
--- a/statistics/data/example009.csv
+++ b/statistics/data/example009.csv
@ -0,0 +1,12 @@
+NEConcentration,Treatment
+543,Toluene
+523,Toluene
+431,Toluene
+635,Toluene
+564,Toluene
+549,Toluene
+535,Control
+385,Control
+502,Control
+412,Control
+387,Control
--- a/statistics/data/example010.csv
+++ b/statistics/data/example010.csv
@ -0,0 +1,13 @@
+Dopamine,Group
+3420,toluene
+2314,toluene
+1911,toluene
+2464,toluene
+2781,toluene
+2803,toluene
+1820,control
+1843,control
+1397,control
+1803,control
+2539,control
+1990,control
--- a/statistics/data/example011.csv
+++ b/statistics/data/example011.csv
@ -0,0 +1,10 @@
+Animal,Site I,Site II
+1,50.6,38
+2,39.2,18.6
+3,35.2,23.2
+4,17,19
+5,11.2,6.6
+6,14.2,16.4
+7,24.2,14.4
+8,37.4,37.6
+9,35.2,24.4
--- a/statistics/data/example012.csv
+++ b/statistics/data/example012.csv
@ -0,0 +1,10 @@
+Subject,mCPP,Placebo
+1,1.1,0
+2,1.3,-0.3
+3,1,0.6
+4,1.7,0.3
+5,1.4,-0.7
+6,0.1,-0.2
+7,0.5,0.6
+8,1.6,0.9
+9,-0.5,-2
--- a/statistics/data/example013.csv
+++ b/statistics/data/example013.csv
@ -0,0 +1,9 @@
+Animal,Control,Regenerating
+1,16.3,11.5
+2,4.8,3.6
+3,10.9,12.5
+4,14.2,6.3
+5,16.3,15.2
+6,9.9,8.1
+7,29.2,16.6
+8,22.4,13.1
--- a/statistics/data/example014.csv
+++ b/statistics/data/example014.csv
@ -0,0 +1,16 @@
+BodyTempDrop,AlcoholDose
+0.2,1.5
+1.9,1.5
+-0.1,1.5
+0.5,1.5
+0.8,1.5
+4,3
+3.2,3
+2.3,3
+2.9,3
+3.8,3
+3.3,6
+5.1,6
+5.3,6
+6.7,6
+5.9,6
--- a/statistics/data/example015.csv
+++ b/statistics/data/example015.csv
@ -0,0 +1,18 @@
+PeakFlow,Height
+733,174
+572,183
+500,176
+738,169
+616,183
+787,186
+866,178
+670,175
+550,172
+660,179
+575,171
+577,184
+783,200
+625,195
+470,176
+642,176
+856,190
--- a/statistics/data/example016.csv
+++ b/statistics/data/example016.csv
@ -0,0 +1,19 @@
+Patient,Before,After
+1,98,75
+2,100,60
+3,82,25
+4,100,55
+5,93,78
+6,119,102
+7,70,58
+8,78,70
+9,104,90
+10,70,50
+11,60,65
+12,88,45
+13,45,36
+14,159,144
+15,65,27
+16,98,90
+17,66,16
+18,67,53
--- a/statistics/data/example017.csv
+++ b/statistics/data/example017.csv
@ -0,0 +1,21 @@
+LegStrength,UpperBodyStrength
+55,low
+70,low
+45,low
+246,low
+240,low
+96,low
+225,low
+40,middle
+200,middle
+250,middle
+192,middle
+117,middle
+215,middle
+181,high
+85,high
+416,high
+228,high
+257,high
+316,high
+134,high
--- a/statistics/examples/example001.tex
+++ b/statistics/examples/example001.tex
@ -0,0 +1,6 @@
+MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is
+thought to play a role in the regulation of behavior. To see whether
+different categories of schizophrenic patients have different levels
+of MAO activity, researchers collected blood specimens from 42
+patients and measured the MAO activity in the platelets. Values are
+expressed as nmol benzylaldehyde product per 108 platelets per hour.
--- a/statistics/examples/example002.tex
+++ b/statistics/examples/example002.tex
@ -0,0 +1,3 @@
+Brain Weight: In 1888, P. Topinard published data on the brain weights
+of hundreds of French men and women. Brain weights are given in
+gram. 
--- a/statistics/examples/example003.tex
+++ b/statistics/examples/example003.tex
@ -0,0 +1,4 @@
+Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates.
+A field researcher measured the duration of 51 unsuccessful songs--that is, the time
+until the singing male gave up and left his perch. The data is given
+in minutes.
--- a/statistics/examples/example004.tex
+++ b/statistics/examples/example004.tex
@ -0,0 +1,3 @@
+Pulse after Exercise: A group of 28 adults did some moderate exercise
+for five minutes and then measured their pulses. Data is given in
+beats/minute.
--- a/statistics/examples/example005.tex
+++ b/statistics/examples/example005.tex
@ -0,0 +1,5 @@
+A dendritic tree is a branched structure that emanates from the body
+of a nerve cell. As part of a study of brain development, 36 nerve
+cells were taken from the brains of newborn guinea pigs. The
+investigators counted the number of dendritic branch segments
+emanating from each nerve cell.
--- a/statistics/examples/example006.tex
+++ b/statistics/examples/example006.tex
@ -0,0 +1,4 @@
+For each of 31 healthy dogs, a veterinarian measured the glucose
+concentration in the anterior chamber of the right eye and also in the
+blood serum. The following data are the anterior chamber glucose
+measurements, expressed as a percentage of the blood glucose.
--- a/statistics/examples/example007.tex
+++ b/statistics/examples/example007.tex
@ -0,0 +1,5 @@
+A veterinary anatomist investigated the spatial arrangement of the
+nerve cells in the intestine of a pony.  He removed a block of tissue
+from the intestinal wall, cut the block into many equal sections, and
+counted the number of nerve cells in each of 23 randomly selected
+sections.
--- a/statistics/examples/example008.tex
+++ b/statistics/examples/example008.tex
@ -0,0 +1,8 @@
+Researchers were interested in the short-term effect that caffeine has
+on heart rate. They enlisted a group of volunteers and measured each
+person's resting heart rate. Then they had each subject drink 6 ounces
+of coffee. Nine of the subjects were given coffee containing caffeine
+and 11 were given decaffeinated coffee. After 10 minutes each person's
+heart rate was measured again. The data in the table contains the
+change in heart rate; a positive number means that heart rate went up
+and a negative number means that heart rate went down.
--- a/statistics/examples/example009.tex
+++ b/statistics/examples/example009.tex
@ -0,0 +1,9 @@
+Toluene and the Brain Abuse of substances containing toluene (for
+example, glue) can produce various neurological symptoms. In an
+investigation of the mechanism of these toxic effects, researchers
+measured the concentrations of various chemicals in the brains of rats
+that had been exposed to a toluene-laden atmosphere, and also in
+unexposed control rats. The concentrations of the brain chemical
+norepinephrine (NE) in the medulla region of the brain, for six
+toluene-exposed rats and five control rats, are given in accompanying
+data file in ng/g.
--- a/statistics/examples/example010.tex
+++ b/statistics/examples/example010.tex
@ -0,0 +1,3 @@
+In a pharmacological study, researchers measured the concentration of
+the brain chemical dopamine in six rats exposed to toluene and six
+control rats. Number are specified in ng/g.
--- a/statistics/examples/example011.tex
+++ b/statistics/examples/example011.tex
@ -0,0 +1,6 @@
+Nerve Cell Density For each of nine horses, a veterinary anatomist
+measured the density of nerve cells at specified sites in the
+intestine. The results for site I (midregion of jejunum) and site II
+(mesenteric region of jejunum) are given in the accompanying dataset.
+Each density value is the average of counts of nerve cells in five
+equal sections of tissue.
--- a/statistics/examples/example012.tex
+++ b/statistics/examples/example012.tex
@ -0,0 +1,6 @@
+Hunger Rating During a weight loss study each of nine subjects was
+given either the active drug m-chlorophenylpiperazine (mCPP) for two
+weeks and then a placebo for another two weeks, or else was given the
+placebo for the first two weeks and then mCPP for the second two
+weeks. As part of the study the subjects were asked to rate how hungry
+they were at the end of each two-week period. 
--- a/statistics/examples/example013.tex
+++ b/statistics/examples/example013.tex
@ -0,0 +1,10 @@
+Certain types of nerve cells have the ability to regenerate a part of
+the cell that has been amputated.  In an early study of this process,
+measurements were made on the nerves in the spinal cord in rhesus
+monkeys. Nerves emanating from the left side of the cord were cut,
+while nerves from the right side were kept intact. During the
+regeneration process, the content of creatine phosphate (CP) was
+measured in the left and the right portion of the spinal cord. The
+following table shows the data for the right (control) side (Y1), and
+for the left (regenerating) side (Y2). The units of measurement are mg
+CP per 100 gm tissue.
--- a/statistics/examples/example014.tex
+++ b/statistics/examples/example014.tex
@ -0,0 +1,9 @@
+ In an investigation of the physiological effects of alcohol
+(ethanol), 15 mice were randomly allocated to three treatment groups,
+each to receive a different oral dose of alcohol. The dosage levels
+were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature
+of each mouse was measured immediately before the alcohol was given
+and again 20 minutes afterward. The accompanying data shows the drop
+(before minus after) in body temperature for each mouse. (The negative
+value - 0.1 refers to a mouse whose temperature rose rather than
+fell.)
--- a/statistics/examples/example015.tex
+++ b/statistics/examples/example015.tex
@ -0,0 +1,5 @@
+The peak flow rate of a person is the fastest rate
+at which the person can expel air after taking a deep breath.
+Peak flow rate is measured in units of liters per minute and
+gives an indication of the person's respiratory health. Flow is given
+in l/min, height in cm. 
--- a/statistics/examples/example016.tex
+++ b/statistics/examples/example016.tex
@ -0,0 +1,6 @@
+An experiment was conducted to study the effect of tamoxifen on
+patients with cervical cancer. One of the measurements made, both
+before and again after tamoxifen was given, was microvessel density
+(MVD). MVD, which is measured as number of vessels per mm$^2$, is a
+measurement that relates to the formation of blood vessels that feed a
+tumor and allow it to grow and spread.
--- a/statistics/examples/example017.tex
+++ b/statistics/examples/example017.tex
@ -0,0 +1,5 @@
+A group of female college students were divided into three groups
+according to upper body strength. Their leg strength was tested by
+measuring how many consecutive times they could leg press 246 pounds
+before exhaustion. (The subjects were allowed only one second of rest
+between consecutive lifts.)
--- a/statistics/figs/2012-10-29_14-55-39_181.jpg
+++ b/statistics/figs/2012-10-29_14-55-39_181.jpg
--- a/statistics/figs/2012-10-29_14-56-59_866.jpg
+++ b/statistics/figs/2012-10-29_14-56-59_866.jpg
--- a/statistics/figs/2012-10-29_14-58-18_054.jpg
+++ b/statistics/figs/2012-10-29_14-58-18_054.jpg
--- a/statistics/figs/2012-10-29_14-59-05_984.jpg
+++ b/statistics/figs/2012-10-29_14-59-05_984.jpg
--- a/statistics/figs/2012-10-29_15-04-38_517.jpg
+++ b/statistics/figs/2012-10-29_15-04-38_517.jpg
--- a/statistics/figs/2012-10-29_15-09-25_388.jpg
+++ b/statistics/figs/2012-10-29_15-09-25_388.jpg
--- a/statistics/figs/2012-10-29_16-26-05_771.jpg
+++ b/statistics/figs/2012-10-29_16-26-05_771.jpg
--- a/statistics/figs/2012-10-29_16-29-35_312.jpg
+++ b/statistics/figs/2012-10-29_16-29-35_312.jpg
--- a/statistics/figs/2012-10-29_16-41-39_523.jpg
+++ b/statistics/figs/2012-10-29_16-41-39_523.jpg
--- a/statistics/figs/StandardErrorOrStandardDeviation.pdf
+++ b/statistics/figs/StandardErrorOrStandardDeviation.pdf
--- a/statistics/figs/bootstraptest.png
+++ b/statistics/figs/bootstraptest.png
--- a/statistics/figs/bootstraptest2.png
+++ b/statistics/figs/bootstraptest2.png
--- a/statistics/figs/example01.png
+++ b/statistics/figs/example01.png
--- a/statistics/figs/example02.png
+++ b/statistics/figs/example02.png
--- a/statistics/figs/example03.png
+++ b/statistics/figs/example03.png
--- a/statistics/figs/example04.png
+++ b/statistics/figs/example04.png
--- a/statistics/figs/hunger.png
+++ b/statistics/figs/hunger.png
--- a/statistics/figs/repetition0.png
+++ b/statistics/figs/repetition0.png
--- a/statistics/figs/repetition1.png
+++ b/statistics/figs/repetition1.png
--- a/statistics/figs/repetition2.png
+++ b/statistics/figs/repetition2.png
--- a/statistics/figs/repetition3.png
+++ b/statistics/figs/repetition3.png
--- a/statistics/figs/repetition4.png
+++ b/statistics/figs/repetition4.png
--- a/statistics/figs/repetition5.png
+++ b/statistics/figs/repetition5.png
--- a/statistics/figs/samplingDistribution.png
+++ b/statistics/figs/samplingDistribution.png
--- a/statistics/figs/samplingDistributionMedian00.pdf
+++ b/statistics/figs/samplingDistributionMedian00.pdf
--- a/statistics/figs/samplingDistributionMedian01.pdf
+++ b/statistics/figs/samplingDistributionMedian01.pdf
--- a/statistics/figs/statistic1.png
+++ b/statistics/figs/statistic1.png
--- a/statistics/figs/statistic2.png
+++ b/statistics/figs/statistic2.png
--- a/statistics/figs/statistic3.png
+++ b/statistics/figs/statistic3.png
--- a/statistics/figs/statistic4.png
+++ b/statistics/figs/statistic4.png
--- a/statistics/lecture_statistics01.tex
+++ b/statistics/lecture_statistics01.tex
@ -22,14 +22,19 @@
  % \useoutertheme{miniframes}
 }

-\AtBeginSection[]
+\AtBeginSubsection[]
 {
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
+    \tableofcontents[ 
+    currentsubsection, 
+    hideothersubsections, 
+    sectionstyle=show/hide, 
+    subsectionstyle=show/shaded, 
+] 
    % \frametitle{\insertsectionhead}
-    % \tableofcontents[currentsection,hideothersubsections]
  \end{frame}
 }
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
@ -84,24 +89,6 @@ Bernstein Center T\"ubingen}

 \end{frame} 

-\begin{frame} 
-  \frametitle{plan}
-  \setcounter{tocdepth}{1}
-  \tableofcontents 
-
-\end{frame} 
-\begin{frame} 
-  \frametitle{information}
-  \begin{itemize}
-  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
-    A. A. (2010). Statistics for the Life Sciences (4th ed.,
-    p. 668). Prentice Hall.
-  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
-    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
-    Hall. doi:10.1037/0012764
-  \item \url{http://stats.stackexchange.com}
-  \end{itemize}
-\end{frame} 

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % errorbars (error bar paper)
@ -170,7 +157,8 @@ Bernstein Center T\"ubingen}
 \end{frame}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
+\section{Day 1 -- descriptive statistics and plots}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{types of data}

@ -315,8 +303,8 @@ Bernstein Center T\"ubingen}
  \frametitle{exercise}
  \begin{task}{Spearman rank correlation}
    \begin{enumerate}
-    \item Use {\tt randi} to generate two  100-dimensional vectors
-      {\tt x,y} of random integers between $0$ and $10$. 
+    \item Use {\tt randi} to generate two  vectors
+      {\tt x,y} with $100$ random integers between $0$ and $10$ each. 
    \item Find out how to compute the Spearman
      rank correlation  $$\rho = 1- {\frac {6 \sum
          d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
@ -358,7 +346,6 @@ correlation coefficient does not have that property.
 \end{frame}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{description of data and plotting}
 \subsection{what makes a good plot}
 %-------------------------------------------------------------
 \begin{frame}[fragile]
@ -522,7 +509,7 @@ correlation coefficient does not have that property.
 \end{frame}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{nominal scale}
+\subsection{plotting data}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 %-------------------------------------------------------------
@ -723,6 +710,23 @@ hold off
  \end{center}
 \end{frame}

+%-------------------------------------------------------------
+\begin{frame}[fragile,fragile]
+  \frametitle{robust statistics}
+  \begin{task}{When is statistic called robust (leave-one-out)?}
+    \begin{itemize}
+    \item Generate an array with $20$ random numbers using {\tt
+        randn}.
+    \item Compute $20$ means: the $i^{th}$ mean is computed from the
+      data set {\em without} the $i^{th}$ example.
+    \item Repeat this with the median.
+    \item Make a bar plot that depicts the means of the computed means
+      and medians along with an appropriate measure of dispersion.
+    \item What can you observe? Do you understand why?
+    \end{itemize}
+  \end{task}
+\end{frame}
+
 %-------------------------------------------------------------
 \begin{frame}[fragile]
  \frametitle{plotting interval/ratio/absolute data}
@ -791,7 +795,13 @@ hold off
  ordinal vs. ordinal data (why not the bar chart?). 
 \end{frame}

-
+%-------------------------------------------------------------
+\begin{frame}[fragile]
+  \begin{center}
+    \Huge
+    That's it. 
+  \end{center}
+\end{frame}


 \end{document} 
--- a/statistics/lecture_statistics02.tex
+++ b/statistics/lecture_statistics02.tex
@ -0,0 +1,772 @@
+\documentclass{beamer}
+\usepackage{xcolor}
+\usepackage{listings}
+\usepackage{pgf}
+%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} 
+%\usepackage{multimedia}
+\usepackage[latin1]{inputenc}
+\usepackage{amsmath}
+\usepackage{bm} 
+\usepackage[T1]{fontenc}
+\usepackage{hyperref}
+\usepackage{ulem}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>
+{
+  \usetheme{Singapore}
+  \setbeamercovered{opaque}
+  \usecolortheme{tuebingen}
+  \setbeamertemplate{navigation symbols}{}
+  \usefonttheme{default}
+  \useoutertheme{infolines}
+  % \useoutertheme{miniframes}
+}
+
+\AtBeginSubsection[]
+{
+  \begin{frame}<beamer>
+    \begin{center}
+      \Huge \insertsectionhead
+    \end{center}
+    \tableofcontents[ 
+    currentsubsection, 
+    hideothersubsections, 
+    sectionstyle=show/hide, 
+    subsectionstyle=show/shaded, 
+] 
+    % \frametitle{\insertsectionhead}
+  \end{frame}
+}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
+
+\setbeamertemplate{blocks}[rounded][shadow=true]
+
+\title[]{Scientific Computing -- Statistics}
+\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
+  University T\"ubingen\\
+Bernstein Center T\"ubingen}
+
+\institute[Scientific Computing]{}
+ \date{10/20/2014}
+%\logo{\pgfuseimage{logo}}
+
+\subject{Lectures}
+
+%%%%%%%%%% configuration for code
+\lstset{
+ basicstyle=\ttfamily,
+ numbers=left,
+ showstringspaces=false,
+ language=Matlab,
+ commentstyle=\itshape\color{darkgray},
+ keywordstyle=\color{blue},
+ stringstyle=\color{green},
+ backgroundcolor=\color{blue!10},
+ breaklines=true,
+ breakautoindent=true,
+ columns=flexible,
+ frame=single,
+ captionpos=b,
+ xleftmargin=1em,
+ xrightmargin=1em,
+ aboveskip=10pt
+ }
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\newcommand{\mycite}[1]{
+\begin{flushright}
+\tiny \color{black!80} #1
+\end{flushright}
+}
+
+\input{../latex/environments.tex}
+\makeatother
+ 
+\begin{document} 
+ 
+\begin{frame} 
+  \titlepage 
+
+\end{frame} 
+
+
+\begin{frame} 
+  \frametitle{information}
+  \begin{itemize}
+  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
+    A. A. (2010). Statistics for the Life Sciences (4th ed.,
+    p. 668). Prentice Hall.
+  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
+    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
+    Hall. doi:10.1037/0012764
+  \item \url{http://stats.stackexchange.com}
+  \end{itemize}
+\end{frame} 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Day 2 -- errorbars, confidence intervals, and tests}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Types of evidence}
+\begin{frame}
+  \scriptsize
+  \frametitle{Examples}
+  \begin{itemize}
+  \item Before new drugs are given to human subjects, it is common
+    practice to first test them in dogs or other animals. In part of
+    one study, a new investigational drug was given to eight male and
+    eight female dogs at doses of 8 mg/kg and 25 mg/kg.  Within each
+    sex, the two doses were assigned at random to the eight dogs. Many
+    ``endpoints'' were measured, such as cholesterol, sodium, glucose,
+    and so on, from blood samples, in order to screen for toxicity
+    problems in the dogs before starting studies on humans.  One
+    endpoint was alkaline phosphatase level (or APL, measured in U/l).
+    For females, the effect of increasing the dose from 8 to 25 mg/kg
+    was positive, although small (the average APL increased from 133.5
+    to 143 U/l), but for males the effect of increasing the dose from
+    8 to 25 mg/kg was negative.\pause
+  \item On 15 July 1911, 65-year-old Mrs. Jane Decker was struck by
+    lightning while in her house. She had been deaf since birth, but
+    after being struck, she recovered her hearing, which led to a
+    headline in the New York Times, ``Lightning Cures Deafness.''
+    \pause
+  \item Some research has suggested that there is a genetic basis for
+    sexual orientation. One such study involved measuring the
+    midsagittal area of the anterior commissure (AC) of the brain for
+    30 homosexual men, 30 heterosexual men, and 30 heterosexual
+    women. The researchers found that the AC tends to be larger in
+    heterosexual women than in heterosexual men and that it is even
+    larger in homosexual men.
+  \end{itemize}
+  \mycite{Samuels, Wittmer, Schaffner 2010}
+\end{frame}
+
+
+\begin{frame}
+  \scriptsize
+  \frametitle{types of evidence}
+  \begin{center}
+    \Large
+    {\em experiment} \\ is better than\\ {\em observational study}\\ is
+    better than\\ {\em anecdotal evidence}
+  \end{center}
+\end{frame}
+
+
+\subsection{What is inferential statistics?}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}
+  \frametitle{sources of error in an experiment}
+  \begin{task}{Think about it for 2 min}
+    If you repeat a scientific experiment, why do you not get the same
+    result every time you repeat it?
+  \end{task}
+  \pause
+  \begin{itemize}
+  \item sampling error (a finite subset of the population of interest
+    is selected in each experiment)
+  \item nonsampling errors (e.g. noise, uncontrolled factors)
+  \end{itemize}
+\end{frame}
+
+% ----------------------------------------------------------
+\begin{frame}[fragile]
+\frametitle{statisticians are lazy}
+\Large
+\only<1>{
+  \begin{center}
+    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
+  \end{center}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+}\pause
+\only<2>{
+  \begin{center}
+    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
+  \end{center}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+}\pause
+\only<3>{
+  \begin{center}
+    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
+  \end{center}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+}
+\end{frame}
+
+% % ----------------------------------------------------------
+\begin{frame} 
+\frametitle{illustrating examples}
+\begin{question}{lung volume of smokers}
+  Assume you know the sampling distribution of the mean lung volume
+  of smokers. Would you believe that
+  the sample came from a group of smokers?
+  \begin{center}
+    \includegraphics[width=.6\linewidth]{figs/example01.png}
+  \end{center}
+\end{question}
+\end{frame}
+
+\begin{frame} 
+\frametitle{illustrating examples}
+\begin{question}{lung volume of smokers}
+  What about now? How would the sampling distribution change if I
+  change the population to (i) athletes, (ii) old people, (iii) all people?
+  \begin{center}
+    \includegraphics[width=.6\linewidth]{figs/example02.png}
+  \end{center}
+\end{question}
+\end{frame}
+
+
+\begin{frame} 
+\frametitle{illustrating examples}
+\begin{question}{Is this diet effective?}
+  \begin{center}
+    \includegraphics[width=.6\linewidth]{figs/example03.png}
+  \end{center}
+\end{question}
+\end{frame}
+
+\begin{frame} 
+\frametitle{illustrating examples}
+\begin{question}{Is this diet effective?}
+  What do you think now? 
+  \begin{center}
+    \includegraphics[width=.6\linewidth]{figs/example04.png}
+  \end{center}
+\end{question}
+\end{frame}
+
+% ----------------------------------------------------------
+\begin{frame} 
+\frametitle{the (imaginary) meta-study}
+\begin{center}
+  \only<1>{
+    \framesubtitle{finite sampling introduces variation: the sampling distribution}
+    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
+    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
+      Tests} 
+  }\pause 
+  \only<2>{
+    \framesubtitle{statistic vs. population parameter}
+    \includegraphics[width=.8\linewidth]{figs/statistic1.png}
+    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
+      Tests} 
+  }\pause
+  \only<3>{
+    \framesubtitle{statistic vs. population parameter}
+    \includegraphics[width=.8\linewidth]{figs/statistic2.png}
+    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
+      Tests} 
+  }\pause
+  \only<4>{
+    \framesubtitle{shat parts of this diagram do we have in real life?}
+
+    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
+    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
+      Tests} 
+  }\pause
+  \only<5>{
+    \framesubtitle{what parts of this diagram do we have in real life?}
+
+    \includegraphics[width=.8\linewidth]{figs/statistic3.png}
+    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
+      Tests} 
+  }\pause
+  \only<6->{
+    \framesubtitle{what statistics does }
+    \begin{minipage}{1.0\linewidth}
+      \begin{minipage}{0.5\linewidth}
+        \includegraphics[width=1.\linewidth]{figs/statistic4.png}
+        \mycite{Hesterberg et al., Bootstrap Methods and Permutation
+          Tests}
+      \end{minipage}
+      \begin{minipage}{0.5\linewidth}
+        \begin{itemize}
+        \item it assumes, derives, or simulates the sampling
+          distribution\pause
+        \item the sampling distribution makes only sense if you think
+          about it in terms of the meta study\pause
+        \item  {\color{red} the sampling distribution is the key to
+            answering questions about the population from the value of
+            the statistic}
+        \end{itemize}
+      \end{minipage}
+    \end{minipage}
+  }
+
+\end{center}
+\end{frame}
+
+
+
+\begin{frame} 
+\frametitle{summary}
+\begin{itemize}
+\item In statistics, we use finite samples from a population to reason
+  about features of the population. \pause
+\item The particular feature of the population we are interested in is called
+  {\color{blue} population parameter}. We usually measure this
+  parameter in our finite sample as well
+  ({\color{blue}statistic}).\pause
+\item Because of variations due to finite sampling the statistic
+  almost never matches the population parameter. \pause
+\item Using the {\color{blue}sampling distribution} of the statistic, we make
+  statements about the relation between our statistic and the
+  population parameter. 
+\end{itemize}
+\end{frame}
+
+\subsection{Errorbars}
+% ----------------------------------------------------------
+\begin{frame} 
+\frametitle{illustrating example}
+
+As part of a study of the development of the thymus gland, researcher
+weighed the glands of $50$ chick embyos after 14 days of
+incubation. The following plot depicts the mean thymus gland weights in (mg): 
+\mycite{modified from SWS exercise 6.3.3.}
+\pause
+{\bf Which of the two bar plots is the correct way of displaying the
+  data?}
+
+\begin{columns}
+  \begin{column}[l]{.5\linewidth}
+    \includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
+  \end{column}
+  \begin{column}[r]{.5\linewidth}
+    \pause That depends on what you want to say
+    \begin{itemize}
+    \item To give a measure of variability in the data: use the
+      {\color{blue} standard deviation $\hat\sigma =
+        \sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
+    \item To make a statement about the variability in the mean
+      estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
+    \end{itemize}
+  \end{column}
+\end{columns}
+
+%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% that depends: variability (descriptiv statistics, how variable is
+% the mean -> inferential, makes only sense in the meta-study setting)
+% first matlab exercise: simulate standard error
+% recommend paper for eyeballing test results from standard errors
+% from std of mean to confidence intervals
+% introduce bootstrapping (matlab exercise), then t-statistic
+% intervals
+% end with standard error of the median (and the thing from wikipedia)
+\end{frame}
+%------------------------------------------------------------------------------
+\begin{frame}
+  \frametitle{standard error}
+  \framesubtitle{bootstrapping}
+  
+  \begin{task}{standard error vs. standard deviation}
+
+    \begin{itemize}
+    \item Download the dataset {\tt thymusglandweights.dat} from Ilias
+    \item Write a program that loads the data into matlab, extracts
+      the the first $80$ datapoints, and repeat the following steps
+      $m=500$ times:
+      \begin{enumerate}
+      \item draw $50$ data points from $x$ with replacement
+      \item compute their mean and store it
+      \end{enumerate}
+      Look at the standard deviation of the computed means.
+    \item Compare the result to the standard deviation of the original
+      $50$ data points and the standard error.
+    \end{itemize}
+  \end{task}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{standard error}
+\begin{lstlisting}
+load thymusglandweights.dat
+
+n = 80;
+m = 500;
+x = thymusglandweights(1:n);
+
+
+mu = zeros(m,1);
+for i = 1:m
+    mu(i) = mean(x(randi(n,n,1)));
+end
+disp(['bootstrap standard error: ', num2str(std(mu))]);
+disp(['standard error: ', num2str(std(x)/sqrt(n))]);
+\end{lstlisting}
+\end{frame}
+%------------------------------------------------------------------------------
+\begin{frame}[fragile]
+  \frametitle{standard error}
+  \framesubtitle{bootstrapping}
+  \begin{itemize}
+  \item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
+    {\color{blue}an estimate of the standard deviation of the means}
+    in repeated experiments which is computed form a single
+    experiment.
+  \item When you want to do statistical tests on the mean, it is
+    better to use the standard error, because one can eyeball
+    significance from it
+    \mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
+      in experimental biology. The Journal of Cell Biology, 177(1),
+      7--11.}
+    \item {\color{blue}Bootstrapping} is a way to generate an estimate
+      of the {\color{blue}sampling distribution of any statistic}. Instead of
+      sampling from the true distribution, it samples from the
+      empirical distribution represented by your dataset.
+      \mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
+  \end{itemize}
+\end{frame}
+
+%------------------------------------------------------------------------------
+\begin{frame}[fragile]
+  \frametitle{standard error of the median?}
+  {\bf What kind of errorbars should we use for the median?}
+
+  It depends again:
+
+  {\bf Descriptive statistics}
+  \begin{itemize}
+  \item As a {\color{blue}descriptive statistic} one could use the {\em median
+      absolute deviation}: the median of the absolute differences of
+    the datapoints from the median.
+  \item Alternatively, one could bootstrap a standard error of the
+    median.
+  \end{itemize}
+  \pause
+  {\bf Inferential statistics}
+  \begin{itemize}
+  \item For {\color{blue}inferential statistics} one should use
+    something that gives the reader {\color{blue}information about
+      significance}. 
+  \item Here, {\color{blue} confidence intervals} are a better choice.
+  \end{itemize}
+\end{frame}
+
+% ----------------------------------------------------------
+\subsection{confidence intervals \& bootstrapping}
+%------------------------------------------------------------------------------
+\begin{frame} 
+\frametitle{confidence intervals}
+\begin{center}
+  \only<1>{
+    \vspace{.1cm}
+    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+
+  }\pause
+  \only<2>{
+    \vspace{.1cm}
+    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+  }\pause
+  \only<3>{
+    \vspace{.1cm}
+    \includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+  }\pause
+  \only<4>{
+    \vspace{.1cm}
+    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+  }\pause
+  \only<5>{
+    \vspace{.1cm}
+    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+  }\pause
+  \only<6>{
+    \vspace{.1cm}
+    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
+  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
+  }
+\end{center}
+\end{frame}
+
+% ----------------------------------------------------------
+\begin{frame}
+  \frametitle{confidence intervals for the median}
+  \begin{definition}{Confidence interval}
+    A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
+    $\hat\theta$ is an interval $\hat\theta \pm a$ such that the
+    population parameter $\theta$ is contained in that interval
+    $(1-\alpha)\cdot 100\%$ of the experiments.
+
+    An alternative way to put it is that $(\hat\theta - \theta) \in
+    [-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
+  \end{definition}
+
+
+\begin{columns}
+  \begin{column}[l]{.5\linewidth}
+  If we knew the sampling distribution of the median $\hat m$, could
+  we generate a e.g. a $95\%$ confidence interval?\pause
+  \vspace{.5cm}
+
+  Yes, we could choose the interval such that $\hat m - m$ in that
+  interval in $95\%$ of the cases.
+  \end{column}
+  \begin{column}[r]{.5\linewidth}
+    \only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
+    \only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
+  \end{column}
+\end{columns}
+
+
+
+\end{frame}
+
+% ----------------------------------------------------------
+\begin{frame}
+  \frametitle{confidence intervals for the mean via bootstrapping}
+  \framesubtitle{how to get the sampling distribution}
+
+  \begin{task}{bootstrapping a confidence interval for the mean}
+    \begin{itemize}
+    \item Use the same dataset as before.
+    \item Bootstrap $500$ means.
+    \item Plot their distribution.
+    \item Compute the $2.5\%$ and the $97.5\%$ percentile of the
+      $500$ means.
+    \item Mark them in the plot. 
+    \end{itemize}
+    These two numbers give you $\hat m -a$ and $\hat m + a$ for
+      the $95\%$ confidence interval.
+  \end{task}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{confidence intervals for the median}
+\scriptsize
+\begin{lstlisting}
+load thymusglandweights.dat
+n = 80;
+x = thymusglandweights(1:n);
+
+m = 500;
+me = zeros(m,1);
+for i = 1:m
+    me(i) = mean(x(randi(n,n,1)));
+end
+
+disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
+
+\end{lstlisting}
+\end{frame}
+% ----------------------------------------------------------
+\begin{frame}
+  \frametitle{confidence intervals}
+  \framesubtitle{Notice the theme!}
+  \begin{enumerate}
+  \item choose a statistic
+  \item get a the sampling distribution of the statistic (by theory or
+    simulation)
+  \item use that distribution to reason about the relation between the
+    true population parameter (e.g. $m$) and the sampled statistic
+    $\hat m$
+  \end{enumerate}
+  
+  \begin{center}
+    \color{blue}
+    This is the scaffold of most statistical techniques. Try to find
+    it and it can help you understand them.
+  \end{center}
+
+\end{frame}
+
+
+
+% ----------------------------------------------------------
+\begin{frame} 
+\frametitle{confidence interval for the mean}
+\framesubtitle{Let's search the pattern in the normal way of computing
+a confidence interval for the mean}  
+\begin{itemize}
+\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
+  well
+\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
+\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
+    $\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
+\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
+    \frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
+  parameters.\pause
+\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
+  \mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
+\item Therefore,
+\begin{align*}
+  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
+\end{align*}
+\end{itemize}
+\end{frame}
+
+% ----------------------------------------------------------
+\begin{frame} 
+\frametitle{confidence interval for the mean}
+\begin{task}{Bootstrapping a confidence interval for the mean}
+ Extend your script to contain the analytical confidence
+ interval using
+\begin{align*}
+  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
+\end{align*}
+Hint: Use the function {\tt tinv(0.025, n-1)} to get the value of
+$t_{2.5\%}$ and similar for $t_{97.5\%}$.
+\end{task}
+
+
+\end{frame}
+
+
+\begin{frame}[fragile]
+\frametitle{solution}
+\scriptsize
+\begin{lstlisting}
+load thymusglandweights.dat
+n = 80;
+x = thymusglandweights(1:n);
+
+m = 500;
+me = zeros(m,1);
+for i = 1:m
+    me(i) = mean(x(randi(n,n,1)));
+end
+
+t025 = tinv(0.025, n-1);
+t975 = tinv(0.975, n-1);
+
+se = std(x)/sqrt(n);
+
+disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
+disp(['analytical CI: ' , num2str(mean(x)+t025*se), ' ' , num2str(mean(x)+t975*se)]);
+
+\end{lstlisting}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{statistical tests}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}
+  \frametitle{ingredients into a test}
+  
+  \begin{itemize}
+  \item {\bf What is the goal of a test?}\pause
+  \item[] Check whether a measured
+  statistic looks different from what you would expect if there was no
+  effect.\pause
+  \item {\bf What are the ingredients into a test?}\pause
+  \item[] a test statistic (e.g. the mean, the median, ...) and a null
+    distribution\pause
+  \item {\bf What is a null distribution?}\pause
+  \item[] The sampling distribution of the statistic in case there is
+    no effect (i.e. the Null hypothesis is true).
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{how tests work}
+  \begin{enumerate}
+  \item Choose a statistic.
+  \item Get a null distribution.
+  \item Compare your actually measure value with the Null
+    distribution.
+  \end{enumerate}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Example: one sample test}
+  \framesubtitle{step 2: get a Null distribution}
+  \scriptsize
+  Assume that the expected weight of a thymus gland from the
+  literature is 34.3g. We want to test whether the mean of our
+  thymus gland dataset is different from the expectation in the
+  literature. Comparing a statistic of a dataset against a fixed value
+  is called {\em one sample test}. 
+  \pause
+
+  \begin{itemize}
+  \item {\bf How could we simulate the distribution of the data if the
+      mean was really 30g?}\pause
+  \item[] Bootstrapping.
+  \end{itemize}
+
+  \begin{task}{generating a null distribution}
+    \begin{itemize}
+    \item Write a matlab program that bootstraps 2000 means from the
+      thymus gland dataset.
+    \item How can we adjust the data that it has mean 34.3g (remember,
+      we want to simulate the null distribution)?
+    \item Plot a histogram of these 2000 means.
+    \item Also indicate the actual mean of the data. 
+    \end{itemize}
+  \end{task}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Example: one sample test}
+  \framesubtitle{step 3: compare the actual value to the Null distribution}
+  \begin{minipage}{1.0\linewidth}
+    \begin{minipage}{0.5\linewidth}
+      The question we want to answer in this step is:
+      \begin{center}
+        \color{blue} Does the actually measure value look like it came
+        from the Null distribution?
+      \end{center}
+    \end{minipage}
+    \begin{minipage}{0.5\linewidth}
+      \includegraphics[width=\linewidth]{figs/bootstraptest.png}
+    \end{minipage}
+  \end{minipage}
+  {\bf How could we do this in our bootstrapping example?}\pause
+  \begin{itemize}
+  \item Set a threshold. \pause How do we choose the threshold? \pause Via type I error.\pause
+  \item Specify the type I error if we used the actual measured value
+    as threshold (p-value). Why is that a reasonable strategy?
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Example: one sample test}
+  \framesubtitle{step 3: compare the actual value to the Null distribution}
+  \begin{task}{type I error and p-value}
+    Extend the script such that it
+    \begin{itemize}
+    \item computes the $5\%$ significance boundaries from the
+      distribution and plot it into the histogram.
+    \item computes a p-value.
+    \end{itemize}
+  \end{task}
+\end{frame}
+
+\begin{frame}
+  \frametitle{two sample test}
+  \framesubtitle{permutation test}
+  Brain Weight In 1888, P. Topinard published data on the brain
+  weights of hundreds of French men and women. Brain weights are given
+  in gram. The data can be downloaded from Ilias (example 002 from
+  yesterday). 
+
+  \vspace{.5cm}
+  {\bf How could we determine (similar to bootstrapping) whether the
+    mean brain weight of males and females are different?}
+  \begin{itemize}
+  \item What do we use as a statistic?
+  \item[]<2-> The difference of the means of the two groups.
+  \item How do we simulate the null distribution?
+  \item[]<3-> Shuffle the labels ``male'' and ``female'', compute
+    difference in means of two groups, and repeat. 
+  \end{itemize}
+  
+\end{frame}
+
+
+\begin{frame}
+  \begin{center}
+    \Huge That's it.
+  \end{center}
+\end{frame}
+
+\end{document} 
+ 
+ 
--- a/statistics/matlab/bootstrap_mean.m
+++ b/statistics/matlab/bootstrap_mean.m
@ -0,0 +1,13 @@
+load thymusglandweights.dat
+x = thymusglandweights(1:50);
+
+m = 500;
+n = length(x);
+
+mu = zeros(m,1);
+for i = 1:m
+    mu(i) = mean(x(randi(n,n,1)));
+end
+fprintf("bootstrap standard error: %.4f\n", std(mu));
+fprintf("standard error: %.4f\n", std(x)/sqrt(n));
+
--- a/statistics/matlab/ci_mean.m
+++ b/statistics/matlab/ci_mean.m
@ -0,0 +1,19 @@
+load thymusglandweights.dat
+
+n = 80;
+x = thymusglandweights(1:n);
+
+m = 5000;
+me = zeros(m,1);
+for i = 1:m
+    me(i) = median(x(randi(n,n,1)));
+end
+
+t025 = tinv(0.025, n-1);
+t975 = tinv(0.975, n-1);
+
+se = std(x)/sqrt(n);
+
+fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,0.975));
+fprintf('analytical quantile: %.4f, %.4f \n', mean(x)+t025*se, mean(x)+t975*se);
+
--- a/statistics/matlab/ci_media.m
+++ b/statistics/matlab/ci_media.m
@ -0,0 +1,17 @@
+load thymusglandweights.dat
+x = thymusglandweights(1:50);
+
+m = 500;
+n = length(x);
+x = sort(x);
+me = zeros(m,1);
+for i = 1:m
+    me(i) = median(x(randi(n,n,1)));
+end
+
+a1 = binoinv(0.025,n,.5)-1;
+a2 = binoinv(1-0.025,n,.5);
+
+fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,1-0.025));
+fprintf('analytical quantile: %.4f, %.4f \n', x(a1),x(a2));
+
--- a/statistics/matlab/tests.m
+++ b/statistics/matlab/tests.m
@ -0,0 +1,38 @@
+close all
+clear all
+load thymusglandweights.dat
+
+literature_mean = 34.3;
+
+x = thymusglandweights;
+n = length(x);
+y = x - mean(x) + literature_mean;
+
+m = 2000;
+me = zeros(m,1);
+for i = 1:m
+    me(i) = median(y(randi(n,n,1)));
+end
+
+hist(me, 50);
+hold on
+mu = mean(x);
+plot([mu,mu],[0,200],'--r','LineWidth',3);
+xlabel('thymus gland weights [g]');
+ylabel('frequency');
+title('bootstrapped null distribution');
+hold off
+
+% 5% significance boundaries
+low = quantile(me,0.025);
+high =  quantile(me,0.975);
+disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
+
+hold on
+plot([low,low],[0,200],'--g','LineWidth',3);
+plot([high,high],[0,200],'--g','LineWidth',3);
+hold off
+
+pval = mean(abs(me-literature_mean) > abs(mu - literature_mean))
+
+legend('Null distribution','measured mean','5% significance boundaries')
--- a/statistics/matlab/thymusglandweights.dat
+++ b/statistics/matlab/thymusglandweights.dat