Merge branch 'master' of raven.am28.uni-tuebingen.de:scientificComputing

2014-10-15 18:43:10 +02:00 · 2014-10-15 18:43:10 +02:00 · 7bab4e9baa
commit 7bab4e9baa
parent 2a46741806 d9921cc9ae
74 changed files with 11678 additions and 26 deletions
--- a/statistics/assignments/Makefile
+++ b/statistics/assignments/Makefile
@ -0,0 +1,16 @@
 all:
 	for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \
 		echo $$number ; \
 		sed "s/000/$$number/g" day1.tex > tmp.tex; \
 		pdflatex tmp.tex; \
 		mv tmp.pdf day1_$$number.pdf; \
 		cp ../data/example$$number.csv ./ ;\
 		rm tmp.* ; \
 		zip example$$number.zip example$$number.csv  day1_$$number.pdf ; \
 		rm example$$number.csv ;\
 		rm day1_$$number.pdf ; \
 	done
 clean:
 	rm *.zip
 	rm -rf auto
--- a/statistics/assignments/day1.tex
+++ b/statistics/assignments/day1.tex
@ -0,0 +1,72 @@
 \documentclass[addpoints,10pt]{exam}
 \usepackage{url}
 \usepackage{color}
 \usepackage{hyperref}
 \pagestyle{headandfoot}
 \runningheadrule
 \firstpageheadrule
 \firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014}
 %\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
 \firstpagefooter{}{}{}
 \runningfooter{}{}{}
 \pointsinmargin
 \bracketedpoints
 %\printanswers
 \shadedsolutions
 \begin{document}
 %%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
 \sffamily
 %%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{questions}
  \question To publish scientific results, you will usually need to
  use statistical methods. Some journals provide you with a brief
  description of how they expect you to apply statistical methods. One
  example can be found in the author guidelines of the journal
  Nature. 
  Assume you collected the following dataset. You can download it from
  Ilias as {\tt example000.csv}. Here is the description of the dataset:
  \begin{quotation}
    \tt 
    \input{../examples/example000.tex}
  \end{quotation}
  \begin{parts}
    \part Download the dataset and write a script that loads it into
    matlab. 
    \part Think about the type of your data (I might ask you that
    tomorrow).
    \part Produce a plot that displays the data in an appropriate
    way. Make sure to respect all elements of good plotting we
    discussed today.
    \part Download the statistical checklist from nature. Produce {\bf
      one} slide that contains the plot and a concise summary of your
    data which respects the requirements made by nature (assume you
    are producing a figure legend for the figure in nature). It is
    good style to avoid expressions like ``the plot shows'' or
    similar. 
    \part Upload your code, the data, and the slide as a zip to
    Ilias. Deadline is 19h00. Structure the zip such that you can
    present you program in front of the class. Several students will
    be asked to present their slide and their code tomorrow morning. 
  \end{parts}
 \end{questions}
 \end{document}
--- a/statistics/assignments/day2.tex
+++ b/statistics/assignments/day2.tex
@ -0,0 +1,46 @@
 \documentclass[addpoints,10pt]{exam}
 \usepackage{url}
 \usepackage{color}
 \usepackage{hyperref}
 \pagestyle{headandfoot}
 \runningheadrule
 \firstpageheadrule
 \firstpageheader{Scientific Computing}{afternoon assignment day 02}{10/21/2014}
 %\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
 \firstpagefooter{}{}{}
 \runningfooter{}{}{}
 \pointsinmargin
 \bracketedpoints
 %\printanswers
 \shadedsolutions
 \begin{document}
 %%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
 \sffamily
 %%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{questions}
  \question Download example002 from yesterday (brain weights). 
  \begin{parts}
    \part Simulate a null distribution via permutation.
    \part Determine whether you can reject ``means are equal'' on a
    5\% significance level using the simulated null distribution.
    \part Check whether the means are different with a two sample
    t-test in matlab ({\tt ttest2}).
    \part Plot the data appropriately and generate a single slide that
    contains the plot and short discussion of the test that respects
    the nature statistical checklist (ignore all question whether the
    assumptions of the test are satisfied).
    \part Upload the slide and the code to Ilias. Deadline is 19h00.
  \end{parts}
 \end{questions}
 \end{document}
--- a/statistics/data/example001.csv
+++ b/statistics/data/example001.csv
@ -0,0 +1,43 @@
 MAO,Diagnosis
 6.8,I
 4.1,I
 7.3,I
 14.2,I
 18.8,I
 9.9,I
 7.4,I
 11.9,I
 5.2,I
 7.8,I
 7.8,I
 8.7,I
 12.7,I
 14.5,I
 10.7,I
 8.4,I
 9.7,I
 10.6,I
 7.8,II
 4.4,II
 11.4,II
 3.1,II
 4.3,II
 10.1,II
 1.5,II
 7.4,II
 5.2,II
 10,II
 3.7,II
 5.5,II
 8.5,II
 7.7,II
 6.8,II
 3.1,II
 6.4,III
 10.8,III
 1.1,III
 2.9,III
 4.5,III
 5.8,III
 9.4,III
 6.8,III
--- a/statistics/data/example002.csv
+++ b/statistics/data/example002.csv
@ -0,0 +1,186 @@
 Weight,Sex
 1607,m
 1157,m
 1248,m
 1310,m
 1398,m
 1237,m
 1232,m
 1343,m
 1380,m
 1274,m
 1245,m
 1286,m
 1508,m
 1105,m
 1123,m
 1198,m
 1300,m
 1249,m
 1185,m
 915,m
 1345,m
 1107,m
 1357,m
 1227,m
 1205,m
 1435,m
 1289,m
 1093,m
 1211,m
 1260,m
 1193,m
 1330,m
 1130,m
 1357,m
 1193,m
 1232,m
 1321,m
 1260,m
 1380,m
 1230,m
 1136,m
 1029,m
 1223,m
 1240,m
 1264,m
 1020,m
 1415,m
 1410,m
 1275,m
 1230,m
 1085,m
 1048,m
 1181,m
 1103,m
 1165,m
 1547,m
 1173,m
 1660,m
 1307,m
 1535,m
 1315,m
 1257,m
 1424,m
 1309,m
 1170,m
 1412,m
 1270,m
 1230,m
 1233,m
 1561,m
 1193,m
 1272,m
 1355,m
 1137,m
 1354,m
 1110,m
 1265,m
 1407,m
 1227,m
 1330,m
 1222,m
 1305,m
 1475,m
 1177,m
 1337,m
 1145,m
 1070,m
 1305,m
 1085,m
 1303,m
 1390,m
 1532,m
 1238,m
 1233,m
 1280,m
 1245,m
 1459,m
 1157,m
 1302,m
 1385,m
 1310,m
 1342,m
 1303,m
 1248,m
 1115,m
 1365,m
 1227,m
 1353,m
 1125,f
 1027,f
 1112,f
 983,f
 1090,f
 1247,f
 1045,f
 983,f
 972,f
 1045,f
 937,f
 1245,f
 1200,f
 1270,f
 1200,f
 1145,f
 1090,f
 1040,f
 1343,f
 1010,f
 1095,f
 1180,f
 1168,f
 1095,f
 1040,f
 1235,f
 1050,f
 1038,f
 1046,f
 1255,f
 1228,f
 1000,f
 1225,f
 1220,f
 1085,f
 1067,f
 1006,f
 1138,f
 1175,f
 1252,f
 1037,f
 958,f
 1020,f
 1068,f
 1107,f
 1317,f
 952,f
 1056,f
 1203,f
 1183,f
 1392,f
 1130,f
 1284,f
 996,f
 1228,f
 1087,f
 1035,f
 1170,f
 1064,f
 1250,f
 1129,f
 1088,f
 1037,f
 1117,f
 1095,f
 1027,f
 1027,f
 1190,f
 1153,f
 1037,f
 1120,f
 1212,f
 1024,f
 1135,f
 1177,f
 1096,f
 1114,f
--- a/statistics/data/example003.csv
+++ b/statistics/data/example003.csv
@ -0,0 +1,52 @@
 singtime
 4.3
 24.1
 6.6
 7.3
 4
 2.6
 4
 3.9
 9.4
 6.2
 1.6
 6.5
 0.2
 2.7
 17.4
 5.6
 2
 3.8
 1.2
 0.7
 1.6
 2.3
 3.7
 0.8
 0.5
 4.5
 11.5
 3.5
 0.8
 5.2
 2
 0.7
 1.7
 5
 2.8
 1.5
 3.9
 3.7
 4.5
 1.8
 1.2
 0.7
 0.7
 4.2
 4.7
 2.2
 1.4
 14.1
 8.6
 3.7
 3.5
--- a/statistics/data/example004.csv
+++ b/statistics/data/example004.csv
@ -0,0 +1,29 @@
 Pulse
 97
 111
 93
 98
 107
 77
 121
 88
 96
 123
 119
 91
 99
 95
 99
 102
 77
 85
 104
 106
 114
 85
 112
 102
 104
 94
 104
 98
--- a/statistics/data/example005.csv
+++ b/statistics/data/example005.csv
@ -0,0 +1,37 @@
 Branches
 23
 30
 54
 28
 31
 29
 34
 35
 30
 27
 21
 43
 51
 35
 51
 49
 35
 24
 26
 29
 21
 29
 37
 27
 28
 33
 33
 23
 37
 27
 40
 48
 41
 20
 30
 57
--- a/statistics/data/example006.csv
+++ b/statistics/data/example006.csv
@ -0,0 +1,32 @@
 Glucose
 81
 85
 93
 93
 99
 76
 75
 84
 78
 84
 81
 82
 89
 81
 96
 82
 74
 70
 84
 86
 80
 70
 131
 75
 88
 102
 115
 89
 82
 79
 106
--- a/statistics/data/example007.csv
+++ b/statistics/data/example007.csv
@ -0,0 +1,24 @@
 NerveCells
 35
 19
 33
 34
 17
 26
 16
 40
 28
 30
 23
 12
 27
 33
 22
 31
 28
 28
 35
 23
 23
 19
 29
--- a/statistics/data/example008.csv
+++ b/statistics/data/example008.csv
@ -0,0 +1,21 @@
 RateChange,Treatment
 28,Caffeine
 11,Caffeine
 -3,Caffeine
 14,Caffeine
 -2,Caffeine
 -4,Caffeine
 18,Caffeine
 2,Caffeine
 2,Caffeine
 26,Decaf
 1,Decaf
 0,Decaf
 -4,Decaf
 -4,Decaf
 14,Decaf
 16,Decaf
 8,Decaf
 0,Decaf
 18,Decaf
 -10,Decaf
--- a/statistics/data/example009.csv
+++ b/statistics/data/example009.csv
@ -0,0 +1,12 @@
 NEConcentration,Treatment
 543,Toluene
 523,Toluene
 431,Toluene
 635,Toluene
 564,Toluene
 549,Toluene
 535,Control
 385,Control
 502,Control
 412,Control
 387,Control
--- a/statistics/data/example010.csv
+++ b/statistics/data/example010.csv
@ -0,0 +1,13 @@
 Dopamine,Group
 3420,toluene
 2314,toluene
 1911,toluene
 2464,toluene
 2781,toluene
 2803,toluene
 1820,control
 1843,control
 1397,control
 1803,control
 2539,control
 1990,control
--- a/statistics/data/example011.csv
+++ b/statistics/data/example011.csv
@ -0,0 +1,10 @@
 Animal,Site I,Site II
 1,50.6,38
 2,39.2,18.6
 3,35.2,23.2
 4,17,19
 5,11.2,6.6
 6,14.2,16.4
 7,24.2,14.4
 8,37.4,37.6
 9,35.2,24.4
--- a/statistics/data/example012.csv
+++ b/statistics/data/example012.csv
@ -0,0 +1,10 @@
 Subject,mCPP,Placebo
 1,1.1,0
 2,1.3,-0.3
 3,1,0.6
 4,1.7,0.3
 5,1.4,-0.7
 6,0.1,-0.2
 7,0.5,0.6
 8,1.6,0.9
 9,-0.5,-2
--- a/statistics/data/example013.csv
+++ b/statistics/data/example013.csv
@ -0,0 +1,9 @@
 Animal,Control,Regenerating
 1,16.3,11.5
 2,4.8,3.6
 3,10.9,12.5
 4,14.2,6.3
 5,16.3,15.2
 6,9.9,8.1
 7,29.2,16.6
 8,22.4,13.1
--- a/statistics/data/example014.csv
+++ b/statistics/data/example014.csv
@ -0,0 +1,16 @@
 BodyTempDrop,AlcoholDose
 0.2,1.5
 1.9,1.5
 -0.1,1.5
 0.5,1.5
 0.8,1.5
 4,3
 3.2,3
 2.3,3
 2.9,3
 3.8,3
 3.3,6
 5.1,6
 5.3,6
 6.7,6
 5.9,6
--- a/statistics/data/example015.csv
+++ b/statistics/data/example015.csv
@ -0,0 +1,18 @@
 PeakFlow,Height
 733,174
 572,183
 500,176
 738,169
 616,183
 787,186
 866,178
 670,175
 550,172
 660,179
 575,171
 577,184
 783,200
 625,195
 470,176
 642,176
 856,190
--- a/statistics/data/example016.csv
+++ b/statistics/data/example016.csv
@ -0,0 +1,19 @@
 Patient,Before,After
 1,98,75
 2,100,60
 3,82,25
 4,100,55
 5,93,78
 6,119,102
 7,70,58
 8,78,70
 9,104,90
 10,70,50
 11,60,65
 12,88,45
 13,45,36
 14,159,144
 15,65,27
 16,98,90
 17,66,16
 18,67,53
--- a/statistics/data/example017.csv
+++ b/statistics/data/example017.csv
@ -0,0 +1,21 @@
 LegStrength,UpperBodyStrength
 55,low
 70,low
 45,low
 246,low
 240,low
 96,low
 225,low
 40,middle
 200,middle
 250,middle
 192,middle
 117,middle
 215,middle
 181,high
 85,high
 416,high
 228,high
 257,high
 316,high
 134,high
--- a/statistics/examples/example001.tex
+++ b/statistics/examples/example001.tex
@ -0,0 +1,6 @@
 MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is
 thought to play a role in the regulation of behavior. To see whether
 different categories of schizophrenic patients have different levels
 of MAO activity, researchers collected blood specimens from 42
 patients and measured the MAO activity in the platelets. Values are
 expressed as nmol benzylaldehyde product per 108 platelets per hour.
--- a/statistics/examples/example002.tex
+++ b/statistics/examples/example002.tex
@ -0,0 +1,3 @@
 Brain Weight: In 1888, P. Topinard published data on the brain weights
 of hundreds of French men and women. Brain weights are given in
 gram. 
--- a/statistics/examples/example003.tex
+++ b/statistics/examples/example003.tex
@ -0,0 +1,4 @@
 Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates.
 A field researcher measured the duration of 51 unsuccessful songs--that is, the time
 until the singing male gave up and left his perch. The data is given
 in minutes.
--- a/statistics/examples/example004.tex
+++ b/statistics/examples/example004.tex
@ -0,0 +1,3 @@
 Pulse after Exercise: A group of 28 adults did some moderate exercise
 for five minutes and then measured their pulses. Data is given in
 beats/minute.
--- a/statistics/examples/example005.tex
+++ b/statistics/examples/example005.tex
@ -0,0 +1,5 @@
 A dendritic tree is a branched structure that emanates from the body
 of a nerve cell. As part of a study of brain development, 36 nerve
 cells were taken from the brains of newborn guinea pigs. The
 investigators counted the number of dendritic branch segments
 emanating from each nerve cell.
--- a/statistics/examples/example006.tex
+++ b/statistics/examples/example006.tex
@ -0,0 +1,4 @@
 For each of 31 healthy dogs, a veterinarian measured the glucose
 concentration in the anterior chamber of the right eye and also in the
 blood serum. The following data are the anterior chamber glucose
 measurements, expressed as a percentage of the blood glucose.
--- a/statistics/examples/example007.tex
+++ b/statistics/examples/example007.tex
@ -0,0 +1,5 @@
 A veterinary anatomist investigated the spatial arrangement of the
 nerve cells in the intestine of a pony.  He removed a block of tissue
 from the intestinal wall, cut the block into many equal sections, and
 counted the number of nerve cells in each of 23 randomly selected
 sections.
--- a/statistics/examples/example008.tex
+++ b/statistics/examples/example008.tex
@ -0,0 +1,8 @@
 Researchers were interested in the short-term effect that caffeine has
 on heart rate. They enlisted a group of volunteers and measured each
 person's resting heart rate. Then they had each subject drink 6 ounces
 of coffee. Nine of the subjects were given coffee containing caffeine
 and 11 were given decaffeinated coffee. After 10 minutes each person's
 heart rate was measured again. The data in the table contains the
 change in heart rate; a positive number means that heart rate went up
 and a negative number means that heart rate went down.
--- a/statistics/examples/example009.tex
+++ b/statistics/examples/example009.tex
@ -0,0 +1,9 @@
 Toluene and the Brain Abuse of substances containing toluene (for
 example, glue) can produce various neurological symptoms. In an
 investigation of the mechanism of these toxic effects, researchers
 measured the concentrations of various chemicals in the brains of rats
 that had been exposed to a toluene-laden atmosphere, and also in
 unexposed control rats. The concentrations of the brain chemical
 norepinephrine (NE) in the medulla region of the brain, for six
 toluene-exposed rats and five control rats, are given in accompanying
 data file in ng/g.
--- a/statistics/examples/example010.tex
+++ b/statistics/examples/example010.tex
@ -0,0 +1,3 @@
 In a pharmacological study, researchers measured the concentration of
 the brain chemical dopamine in six rats exposed to toluene and six
 control rats. Number are specified in ng/g.
--- a/statistics/examples/example011.tex
+++ b/statistics/examples/example011.tex
@ -0,0 +1,6 @@
 Nerve Cell Density For each of nine horses, a veterinary anatomist
 measured the density of nerve cells at specified sites in the
 intestine. The results for site I (midregion of jejunum) and site II
 (mesenteric region of jejunum) are given in the accompanying dataset.
 Each density value is the average of counts of nerve cells in five
 equal sections of tissue.
--- a/statistics/examples/example012.tex
+++ b/statistics/examples/example012.tex
@ -0,0 +1,6 @@
 Hunger Rating During a weight loss study each of nine subjects was
 given either the active drug m-chlorophenylpiperazine (mCPP) for two
 weeks and then a placebo for another two weeks, or else was given the
 placebo for the first two weeks and then mCPP for the second two
 weeks. As part of the study the subjects were asked to rate how hungry
 they were at the end of each two-week period. 
--- a/statistics/examples/example013.tex
+++ b/statistics/examples/example013.tex
@ -0,0 +1,10 @@
 Certain types of nerve cells have the ability to regenerate a part of
 the cell that has been amputated.  In an early study of this process,
 measurements were made on the nerves in the spinal cord in rhesus
 monkeys. Nerves emanating from the left side of the cord were cut,
 while nerves from the right side were kept intact. During the
 regeneration process, the content of creatine phosphate (CP) was
 measured in the left and the right portion of the spinal cord. The
 following table shows the data for the right (control) side (Y1), and
 for the left (regenerating) side (Y2). The units of measurement are mg
 CP per 100 gm tissue.
--- a/statistics/examples/example014.tex
+++ b/statistics/examples/example014.tex
@ -0,0 +1,9 @@
 In an investigation of the physiological effects of alcohol
 (ethanol), 15 mice were randomly allocated to three treatment groups,
 each to receive a different oral dose of alcohol. The dosage levels
 were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature
 of each mouse was measured immediately before the alcohol was given
 and again 20 minutes afterward. The accompanying data shows the drop
 (before minus after) in body temperature for each mouse. (The negative
 value - 0.1 refers to a mouse whose temperature rose rather than
 fell.)
--- a/statistics/examples/example015.tex
+++ b/statistics/examples/example015.tex
@ -0,0 +1,5 @@
 The peak flow rate of a person is the fastest rate
 at which the person can expel air after taking a deep breath.
 Peak flow rate is measured in units of liters per minute and
 gives an indication of the person's respiratory health. Flow is given
 in l/min, height in cm. 
--- a/statistics/examples/example016.tex
+++ b/statistics/examples/example016.tex
@ -0,0 +1,6 @@
 An experiment was conducted to study the effect of tamoxifen on
 patients with cervical cancer. One of the measurements made, both
 before and again after tamoxifen was given, was microvessel density
 (MVD). MVD, which is measured as number of vessels per mm$^2$, is a
 measurement that relates to the formation of blood vessels that feed a
 tumor and allow it to grow and spread.
--- a/statistics/examples/example017.tex
+++ b/statistics/examples/example017.tex
@ -0,0 +1,5 @@
 A group of female college students were divided into three groups
 according to upper body strength. Their leg strength was tested by
 measuring how many consecutive times they could leg press 246 pounds
 before exhaustion. (The subjects were allowed only one second of rest
 between consecutive lifts.)
--- a/statistics/figs/2012-10-29_14-55-39_181.jpg
+++ b/statistics/figs/2012-10-29_14-55-39_181.jpg
--- a/statistics/figs/2012-10-29_14-56-59_866.jpg
+++ b/statistics/figs/2012-10-29_14-56-59_866.jpg
--- a/statistics/figs/2012-10-29_14-58-18_054.jpg
+++ b/statistics/figs/2012-10-29_14-58-18_054.jpg
--- a/statistics/figs/2012-10-29_14-59-05_984.jpg
+++ b/statistics/figs/2012-10-29_14-59-05_984.jpg
--- a/statistics/figs/2012-10-29_15-04-38_517.jpg
+++ b/statistics/figs/2012-10-29_15-04-38_517.jpg
--- a/statistics/figs/2012-10-29_15-09-25_388.jpg
+++ b/statistics/figs/2012-10-29_15-09-25_388.jpg
--- a/statistics/figs/2012-10-29_16-26-05_771.jpg
+++ b/statistics/figs/2012-10-29_16-26-05_771.jpg
--- a/statistics/figs/2012-10-29_16-29-35_312.jpg
+++ b/statistics/figs/2012-10-29_16-29-35_312.jpg
--- a/statistics/figs/2012-10-29_16-41-39_523.jpg
+++ b/statistics/figs/2012-10-29_16-41-39_523.jpg
--- a/statistics/figs/StandardErrorOrStandardDeviation.pdf
+++ b/statistics/figs/StandardErrorOrStandardDeviation.pdf
--- a/statistics/figs/bootstraptest.png
+++ b/statistics/figs/bootstraptest.png
--- a/statistics/figs/bootstraptest2.png
+++ b/statistics/figs/bootstraptest2.png
--- a/statistics/figs/example01.png
+++ b/statistics/figs/example01.png
--- a/statistics/figs/example02.png
+++ b/statistics/figs/example02.png
--- a/statistics/figs/example03.png
+++ b/statistics/figs/example03.png
--- a/statistics/figs/example04.png
+++ b/statistics/figs/example04.png
--- a/statistics/figs/hunger.png
+++ b/statistics/figs/hunger.png
--- a/statistics/figs/repetition0.png
+++ b/statistics/figs/repetition0.png
--- a/statistics/figs/repetition1.png
+++ b/statistics/figs/repetition1.png
--- a/statistics/figs/repetition2.png
+++ b/statistics/figs/repetition2.png
--- a/statistics/figs/repetition3.png
+++ b/statistics/figs/repetition3.png
--- a/statistics/figs/repetition4.png
+++ b/statistics/figs/repetition4.png
--- a/statistics/figs/repetition5.png
+++ b/statistics/figs/repetition5.png
--- a/statistics/figs/samplingDistribution.png
+++ b/statistics/figs/samplingDistribution.png
--- a/statistics/figs/samplingDistributionMedian00.pdf
+++ b/statistics/figs/samplingDistributionMedian00.pdf
--- a/statistics/figs/samplingDistributionMedian01.pdf
+++ b/statistics/figs/samplingDistributionMedian01.pdf
--- a/statistics/figs/statistic1.png
+++ b/statistics/figs/statistic1.png
--- a/statistics/figs/statistic2.png
+++ b/statistics/figs/statistic2.png
--- a/statistics/figs/statistic3.png
+++ b/statistics/figs/statistic3.png
--- a/statistics/figs/statistic4.png
+++ b/statistics/figs/statistic4.png
--- a/statistics/lecture_statistics01.tex
+++ b/statistics/lecture_statistics01.tex
@ -22,14 +22,19 @@
  % \useoutertheme{miniframes}
 }
-\AtBeginSection[]
+\AtBeginSubsection[]
 {
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
    \tableofcontents[ 
    currentsubsection, 
    hideothersubsections, 
    sectionstyle=show/hide, 
    subsectionstyle=show/shaded, 
 ] 
    % \frametitle{\insertsectionhead}
    % \tableofcontents[currentsection,hideothersubsections]
  \end{frame}
 }
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
@ -84,25 +89,7 @@ Bernstein Center T\"ubingen}
 \end{frame} 
 \begin{frame} 
  \frametitle{plan}
  \setcounter{tocdepth}{1}
  \tableofcontents 
 \end{frame} 
 \begin{frame} 
  \frametitle{information}
  \begin{itemize}
  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
    A. A. (2010). Statistics for the Life Sciences (4th ed.,
    p. 668). Prentice Hall.
  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764
  \item \url{http://stats.stackexchange.com}
  \end{itemize}
 \end{frame} 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % errorbars (error bar paper)
 % confidence intervals (sources of error)
@ -170,7 +157,8 @@ Bernstein Center T\"ubingen}
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
+\section{Day 1 -- descriptive statistics and plots}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{types of data}
@ -315,8 +303,8 @@ Bernstein Center T\"ubingen}
  \frametitle{exercise}
  \begin{task}{Spearman rank correlation}
    \begin{enumerate}
-    \item Use {\tt randi} to generate two  100-dimensional vectors
+    \item Use {\tt randi} to generate two  vectors
-      {\tt x,y} of random integers between $0$ and $10$. 
+      {\tt x,y} with $100$ random integers between $0$ and $10$ each. 
    \item Find out how to compute the Spearman
      rank correlation  $$\rho = 1- {\frac {6 \sum
          d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
@ -358,7 +346,6 @@ correlation coefficient does not have that property.
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{description of data and plotting}
 \subsection{what makes a good plot}
 %-------------------------------------------------------------
 \begin{frame}[fragile]
@ -522,7 +509,7 @@ correlation coefficient does not have that property.
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{nominal scale}
+\subsection{plotting data}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %-------------------------------------------------------------
@ -723,6 +710,23 @@ hold off
  \end{center}
 \end{frame}
 %-------------------------------------------------------------
 \begin{frame}[fragile,fragile]
  \frametitle{robust statistics}
  \begin{task}{When is statistic called robust (leave-one-out)?}
    \begin{itemize}
    \item Generate an array with $20$ random numbers using {\tt
        randn}.
    \item Compute $20$ means: the $i^{th}$ mean is computed from the
      data set {\em without} the $i^{th}$ example.
    \item Repeat this with the median.
    \item Make a bar plot that depicts the means of the computed means
      and medians along with an appropriate measure of dispersion.
    \item What can you observe? Do you understand why?
    \end{itemize}
  \end{task}
 \end{frame}
 %-------------------------------------------------------------
 \begin{frame}[fragile]
  \frametitle{plotting interval/ratio/absolute data}
@ -791,7 +795,13 @@ hold off
  ordinal vs. ordinal data (why not the bar chart?). 
 \end{frame}
-
+%-------------------------------------------------------------
 \begin{frame}[fragile]
  \begin{center}
    \Huge
    That's it. 
  \end{center}
 \end{frame}
 \end{document} 
--- a/statistics/lecture_statistics02.tex
+++ b/statistics/lecture_statistics02.tex
@ -0,0 +1,772 @@
 \documentclass{beamer}
 \usepackage{xcolor}
 \usepackage{listings}
 \usepackage{pgf}
 %\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} 
 %\usepackage{multimedia}
 \usepackage[latin1]{inputenc}
 \usepackage{amsmath}
 \usepackage{bm} 
 \usepackage[T1]{fontenc}
 \usepackage{hyperref}
 \usepackage{ulem}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>
 {
  \usetheme{Singapore}
  \setbeamercovered{opaque}
  \usecolortheme{tuebingen}
  \setbeamertemplate{navigation symbols}{}
  \usefonttheme{default}
  \useoutertheme{infolines}
  % \useoutertheme{miniframes}
 }
 \AtBeginSubsection[]
 {
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
    \tableofcontents[ 
    currentsubsection, 
    hideothersubsections, 
    sectionstyle=show/hide, 
    subsectionstyle=show/shaded, 
 ] 
    % \frametitle{\insertsectionhead}
  \end{frame}
 }
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
 \setbeamertemplate{blocks}[rounded][shadow=true]
 \title[]{Scientific Computing -- Statistics}
 \author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
  University T\"ubingen\\
 Bernstein Center T\"ubingen}
 \institute[Scientific Computing]{}
 \date{10/20/2014}
 %\logo{\pgfuseimage{logo}}
 \subject{Lectures}
 %%%%%%%%%% configuration for code
 \lstset{
 basicstyle=\ttfamily,
 numbers=left,
 showstringspaces=false,
 language=Matlab,
 commentstyle=\itshape\color{darkgray},
 keywordstyle=\color{blue},
 stringstyle=\color{green},
 backgroundcolor=\color{blue!10},
 breaklines=true,
 breakautoindent=true,
 columns=flexible,
 frame=single,
 captionpos=b,
 xleftmargin=1em,
 xrightmargin=1em,
 aboveskip=10pt
 }
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \newcommand{\mycite}[1]{
 \begin{flushright}
 \tiny \color{black!80} #1
 \end{flushright}
 }
 \input{../latex/environments.tex}
 \makeatother
 \begin{document} 
 \begin{frame} 
  \titlepage 
 \end{frame} 
 \begin{frame} 
  \frametitle{information}
  \begin{itemize}
  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
    A. A. (2010). Statistics for the Life Sciences (4th ed.,
    p. 668). Prentice Hall.
  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764
  \item \url{http://stats.stackexchange.com}
  \end{itemize}
 \end{frame} 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Day 2 -- errorbars, confidence intervals, and tests}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Types of evidence}
 \begin{frame}
  \scriptsize
  \frametitle{Examples}
  \begin{itemize}
  \item Before new drugs are given to human subjects, it is common
    practice to first test them in dogs or other animals. In part of
    one study, a new investigational drug was given to eight male and
    eight female dogs at doses of 8 mg/kg and 25 mg/kg.  Within each
    sex, the two doses were assigned at random to the eight dogs. Many
    ``endpoints'' were measured, such as cholesterol, sodium, glucose,
    and so on, from blood samples, in order to screen for toxicity
    problems in the dogs before starting studies on humans.  One
    endpoint was alkaline phosphatase level (or APL, measured in U/l).
    For females, the effect of increasing the dose from 8 to 25 mg/kg
    was positive, although small (the average APL increased from 133.5
    to 143 U/l), but for males the effect of increasing the dose from
    8 to 25 mg/kg was negative.\pause
  \item On 15 July 1911, 65-year-old Mrs. Jane Decker was struck by
    lightning while in her house. She had been deaf since birth, but
    after being struck, she recovered her hearing, which led to a
    headline in the New York Times, ``Lightning Cures Deafness.''
    \pause
  \item Some research has suggested that there is a genetic basis for
    sexual orientation. One such study involved measuring the
    midsagittal area of the anterior commissure (AC) of the brain for
    30 homosexual men, 30 heterosexual men, and 30 heterosexual
    women. The researchers found that the AC tends to be larger in
    heterosexual women than in heterosexual men and that it is even
    larger in homosexual men.
  \end{itemize}
  \mycite{Samuels, Wittmer, Schaffner 2010}
 \end{frame}
 \begin{frame}
  \scriptsize
  \frametitle{types of evidence}
  \begin{center}
    \Large
    {\em experiment} \\ is better than\\ {\em observational study}\\ is
    better than\\ {\em anecdotal evidence}
  \end{center}
 \end{frame}
 \subsection{What is inferential statistics?}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}
  \frametitle{sources of error in an experiment}
  \begin{task}{Think about it for 2 min}
    If you repeat a scientific experiment, why do you not get the same
    result every time you repeat it?
  \end{task}
  \pause
  \begin{itemize}
  \item sampling error (a finite subset of the population of interest
    is selected in each experiment)
  \item nonsampling errors (e.g. noise, uncontrolled factors)
  \end{itemize}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame}[fragile]
 \frametitle{statisticians are lazy}
 \Large
 \only<1>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
 }\pause
 \only<2>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
 }\pause
 \only<3>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
 }
 \end{frame}
 % % ----------------------------------------------------------
 \begin{frame} 
 \frametitle{illustrating examples}
 \begin{question}{lung volume of smokers}
  Assume you know the sampling distribution of the mean lung volume
  of smokers. Would you believe that
  the sample came from a group of smokers?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example01.png}
  \end{center}
 \end{question}
 \end{frame}
 \begin{frame} 
 \frametitle{illustrating examples}
 \begin{question}{lung volume of smokers}
  What about now? How would the sampling distribution change if I
  change the population to (i) athletes, (ii) old people, (iii) all people?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example02.png}
  \end{center}
 \end{question}
 \end{frame}
 \begin{frame} 
 \frametitle{illustrating examples}
 \begin{question}{Is this diet effective?}
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example03.png}
  \end{center}
 \end{question}
 \end{frame}
 \begin{frame} 
 \frametitle{illustrating examples}
 \begin{question}{Is this diet effective?}
  What do you think now? 
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example04.png}
  \end{center}
 \end{question}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame} 
 \frametitle{the (imaginary) meta-study}
 \begin{center}
  \only<1>{
    \framesubtitle{finite sampling introduces variation: the sampling distribution}
    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause 
  \only<2>{
    \framesubtitle{statistic vs. population parameter}
    \includegraphics[width=.8\linewidth]{figs/statistic1.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<3>{
    \framesubtitle{statistic vs. population parameter}
    \includegraphics[width=.8\linewidth]{figs/statistic2.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<4>{
    \framesubtitle{shat parts of this diagram do we have in real life?}
    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<5>{
    \framesubtitle{what parts of this diagram do we have in real life?}
    \includegraphics[width=.8\linewidth]{figs/statistic3.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<6->{
    \framesubtitle{what statistics does }
    \begin{minipage}{1.0\linewidth}
      \begin{minipage}{0.5\linewidth}
        \includegraphics[width=1.\linewidth]{figs/statistic4.png}
        \mycite{Hesterberg et al., Bootstrap Methods and Permutation
          Tests}
      \end{minipage}
      \begin{minipage}{0.5\linewidth}
        \begin{itemize}
        \item it assumes, derives, or simulates the sampling
          distribution\pause
        \item the sampling distribution makes only sense if you think
          about it in terms of the meta study\pause
        \item  {\color{red} the sampling distribution is the key to
            answering questions about the population from the value of
            the statistic}
        \end{itemize}
      \end{minipage}
    \end{minipage}
  }
 \end{center}
 \end{frame}
 \begin{frame} 
 \frametitle{summary}
 \begin{itemize}
 \item In statistics, we use finite samples from a population to reason
  about features of the population. \pause
 \item The particular feature of the population we are interested in is called
  {\color{blue} population parameter}. We usually measure this
  parameter in our finite sample as well
  ({\color{blue}statistic}).\pause
 \item Because of variations due to finite sampling the statistic
  almost never matches the population parameter. \pause
 \item Using the {\color{blue}sampling distribution} of the statistic, we make
  statements about the relation between our statistic and the
  population parameter. 
 \end{itemize}
 \end{frame}
 \subsection{Errorbars}
 % ----------------------------------------------------------
 \begin{frame} 
 \frametitle{illustrating example}
 As part of a study of the development of the thymus gland, researcher
 weighed the glands of $50$ chick embyos after 14 days of
 incubation. The following plot depicts the mean thymus gland weights in (mg): 
 \mycite{modified from SWS exercise 6.3.3.}
 \pause
 {\bf Which of the two bar plots is the correct way of displaying the
  data?}
 \begin{columns}
  \begin{column}[l]{.5\linewidth}
    \includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
  \end{column}
  \begin{column}[r]{.5\linewidth}
    \pause That depends on what you want to say
    \begin{itemize}
    \item To give a measure of variability in the data: use the
      {\color{blue} standard deviation $\hat\sigma =
        \sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
    \item To make a statement about the variability in the mean
      estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
    \end{itemize}
  \end{column}
 \end{columns}
 %%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % that depends: variability (descriptiv statistics, how variable is
 % the mean -> inferential, makes only sense in the meta-study setting)
 % first matlab exercise: simulate standard error
 % recommend paper for eyeballing test results from standard errors
 % from std of mean to confidence intervals
 % introduce bootstrapping (matlab exercise), then t-statistic
 % intervals
 % end with standard error of the median (and the thing from wikipedia)
 \end{frame}
 %------------------------------------------------------------------------------
 \begin{frame}
  \frametitle{standard error}
  \framesubtitle{bootstrapping}
  \begin{task}{standard error vs. standard deviation}
    \begin{itemize}
    \item Download the dataset {\tt thymusglandweights.dat} from Ilias
    \item Write a program that loads the data into matlab, extracts
      the the first $80$ datapoints, and repeat the following steps
      $m=500$ times:
      \begin{enumerate}
      \item draw $50$ data points from $x$ with replacement
      \item compute their mean and store it
      \end{enumerate}
      Look at the standard deviation of the computed means.
    \item Compare the result to the standard deviation of the original
      $50$ data points and the standard error.
    \end{itemize}
  \end{task}
 \end{frame}
 \begin{frame}[fragile]
  \frametitle{standard error}
 \begin{lstlisting}
 load thymusglandweights.dat
 n = 80;
 m = 500;
 x = thymusglandweights(1:n);
 mu = zeros(m,1);
 for i = 1:m
    mu(i) = mean(x(randi(n,n,1)));
 end
 disp(['bootstrap standard error: ', num2str(std(mu))]);
 disp(['standard error: ', num2str(std(x)/sqrt(n))]);
 \end{lstlisting}
 \end{frame}
 %------------------------------------------------------------------------------
 \begin{frame}[fragile]
  \frametitle{standard error}
  \framesubtitle{bootstrapping}
  \begin{itemize}
  \item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
    {\color{blue}an estimate of the standard deviation of the means}
    in repeated experiments which is computed form a single
    experiment.
  \item When you want to do statistical tests on the mean, it is
    better to use the standard error, because one can eyeball
    significance from it
    \mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
      in experimental biology. The Journal of Cell Biology, 177(1),
      7--11.}
    \item {\color{blue}Bootstrapping} is a way to generate an estimate
      of the {\color{blue}sampling distribution of any statistic}. Instead of
      sampling from the true distribution, it samples from the
      empirical distribution represented by your dataset.
      \mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
  \end{itemize}
 \end{frame}
 %------------------------------------------------------------------------------
 \begin{frame}[fragile]
  \frametitle{standard error of the median?}
  {\bf What kind of errorbars should we use for the median?}
  It depends again:
  {\bf Descriptive statistics}
  \begin{itemize}
  \item As a {\color{blue}descriptive statistic} one could use the {\em median
      absolute deviation}: the median of the absolute differences of
    the datapoints from the median.
  \item Alternatively, one could bootstrap a standard error of the
    median.
  \end{itemize}
  \pause
  {\bf Inferential statistics}
  \begin{itemize}
  \item For {\color{blue}inferential statistics} one should use
    something that gives the reader {\color{blue}information about
      significance}. 
  \item Here, {\color{blue} confidence intervals} are a better choice.
  \end{itemize}
 \end{frame}
 % ----------------------------------------------------------
 \subsection{confidence intervals \& bootstrapping}
 %------------------------------------------------------------------------------
 \begin{frame} 
 \frametitle{confidence intervals}
 \begin{center}
  \only<1>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<2>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<3>{
    \vspace{.1cm}
    \includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<4>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<5>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<6>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }
 \end{center}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame}
  \frametitle{confidence intervals for the median}
  \begin{definition}{Confidence interval}
    A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
    $\hat\theta$ is an interval $\hat\theta \pm a$ such that the
    population parameter $\theta$ is contained in that interval
    $(1-\alpha)\cdot 100\%$ of the experiments.
    An alternative way to put it is that $(\hat\theta - \theta) \in
    [-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
  \end{definition}
 \begin{columns}
  \begin{column}[l]{.5\linewidth}
  If we knew the sampling distribution of the median $\hat m$, could
  we generate a e.g. a $95\%$ confidence interval?\pause
  \vspace{.5cm}
  Yes, we could choose the interval such that $\hat m - m$ in that
  interval in $95\%$ of the cases.
  \end{column}
  \begin{column}[r]{.5\linewidth}
    \only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
    \only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
  \end{column}
 \end{columns}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame}
  \frametitle{confidence intervals for the mean via bootstrapping}
  \framesubtitle{how to get the sampling distribution}
  \begin{task}{bootstrapping a confidence interval for the mean}
    \begin{itemize}
    \item Use the same dataset as before.
    \item Bootstrap $500$ means.
    \item Plot their distribution.
    \item Compute the $2.5\%$ and the $97.5\%$ percentile of the
      $500$ means.
    \item Mark them in the plot. 
    \end{itemize}
    These two numbers give you $\hat m -a$ and $\hat m + a$ for
      the $95\%$ confidence interval.
  \end{task}
 \end{frame}
 \begin{frame}[fragile]
  \frametitle{confidence intervals for the median}
 \scriptsize
 \begin{lstlisting}
 load thymusglandweights.dat
 n = 80;
 x = thymusglandweights(1:n);
 m = 500;
 me = zeros(m,1);
 for i = 1:m
    me(i) = mean(x(randi(n,n,1)));
 end
 disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
 \end{lstlisting}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame}
  \frametitle{confidence intervals}
  \framesubtitle{Notice the theme!}
  \begin{enumerate}
  \item choose a statistic
  \item get a the sampling distribution of the statistic (by theory or
    simulation)
  \item use that distribution to reason about the relation between the
    true population parameter (e.g. $m$) and the sampled statistic
    $\hat m$
  \end{enumerate}
  \begin{center}
    \color{blue}
    This is the scaffold of most statistical techniques. Try to find
    it and it can help you understand them.
  \end{center}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame} 
 \frametitle{confidence interval for the mean}
 \framesubtitle{Let's search the pattern in the normal way of computing
 a confidence interval for the mean}  
 \begin{itemize}
 \item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
  well
 \item What is the mean of $\hat\mu$? What is its standard deviation?\pause
 \item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
    $\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
 \item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
    \frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
  parameters.\pause
 \item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
  \mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
 \item Therefore,
 \begin{align*}
  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
 \end{align*}
 \end{itemize}
 \end{frame}
 % ----------------------------------------------------------
 \begin{frame} 
 \frametitle{confidence interval for the mean}
 \begin{task}{Bootstrapping a confidence interval for the mean}
 Extend your script to contain the analytical confidence
 interval using
 \begin{align*}
  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
 \end{align*}
 Hint: Use the function {\tt tinv(0.025, n-1)} to get the value of
 $t_{2.5\%}$ and similar for $t_{97.5\%}$.
 \end{task}
 \end{frame}
 \begin{frame}[fragile]
 \frametitle{solution}
 \scriptsize
 \begin{lstlisting}
 load thymusglandweights.dat
 n = 80;
 x = thymusglandweights(1:n);
 m = 500;
 me = zeros(m,1);
 for i = 1:m
    me(i) = mean(x(randi(n,n,1)));
 end
 t025 = tinv(0.025, n-1);
 t975 = tinv(0.975, n-1);
 se = std(x)/sqrt(n);
 disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
 disp(['analytical CI: ' , num2str(mean(x)+t025*se), ' ' , num2str(mean(x)+t975*se)]);
 \end{lstlisting}
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{statistical tests}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}
  \frametitle{ingredients into a test}
  \begin{itemize}
  \item {\bf What is the goal of a test?}\pause
  \item[] Check whether a measured
  statistic looks different from what you would expect if there was no
  effect.\pause
  \item {\bf What are the ingredients into a test?}\pause
  \item[] a test statistic (e.g. the mean, the median, ...) and a null
    distribution\pause
  \item {\bf What is a null distribution?}\pause
  \item[] The sampling distribution of the statistic in case there is
    no effect (i.e. the Null hypothesis is true).
  \end{itemize}
 \end{frame}
 \begin{frame}
  \frametitle{how tests work}
  \begin{enumerate}
  \item Choose a statistic.
  \item Get a null distribution.
  \item Compare your actually measure value with the Null
    distribution.
  \end{enumerate}
 \end{frame}
 \begin{frame}
  \frametitle{Example: one sample test}
  \framesubtitle{step 2: get a Null distribution}
  \scriptsize
  Assume that the expected weight of a thymus gland from the
  literature is 34.3g. We want to test whether the mean of our
  thymus gland dataset is different from the expectation in the
  literature. Comparing a statistic of a dataset against a fixed value
  is called {\em one sample test}. 
  \pause
  \begin{itemize}
  \item {\bf How could we simulate the distribution of the data if the
      mean was really 30g?}\pause
  \item[] Bootstrapping.
  \end{itemize}
  \begin{task}{generating a null distribution}
    \begin{itemize}
    \item Write a matlab program that bootstraps 2000 means from the
      thymus gland dataset.
    \item How can we adjust the data that it has mean 34.3g (remember,
      we want to simulate the null distribution)?
    \item Plot a histogram of these 2000 means.
    \item Also indicate the actual mean of the data. 
    \end{itemize}
  \end{task}
 \end{frame}
 \begin{frame}
  \frametitle{Example: one sample test}
  \framesubtitle{step 3: compare the actual value to the Null distribution}
  \begin{minipage}{1.0\linewidth}
    \begin{minipage}{0.5\linewidth}
      The question we want to answer in this step is:
      \begin{center}
        \color{blue} Does the actually measure value look like it came
        from the Null distribution?
      \end{center}
    \end{minipage}
    \begin{minipage}{0.5\linewidth}
      \includegraphics[width=\linewidth]{figs/bootstraptest.png}
    \end{minipage}
  \end{minipage}
  {\bf How could we do this in our bootstrapping example?}\pause
  \begin{itemize}
  \item Set a threshold. \pause How do we choose the threshold? \pause Via type I error.\pause
  \item Specify the type I error if we used the actual measured value
    as threshold (p-value). Why is that a reasonable strategy?
  \end{itemize}
 \end{frame}
 \begin{frame}
  \frametitle{Example: one sample test}
  \framesubtitle{step 3: compare the actual value to the Null distribution}
  \begin{task}{type I error and p-value}
    Extend the script such that it
    \begin{itemize}
    \item computes the $5\%$ significance boundaries from the
      distribution and plot it into the histogram.
    \item computes a p-value.
    \end{itemize}
  \end{task}
 \end{frame}
 \begin{frame}
  \frametitle{two sample test}
  \framesubtitle{permutation test}
  Brain Weight In 1888, P. Topinard published data on the brain
  weights of hundreds of French men and women. Brain weights are given
  in gram. The data can be downloaded from Ilias (example 002 from
  yesterday). 
  \vspace{.5cm}
  {\bf How could we determine (similar to bootstrapping) whether the
    mean brain weight of males and females are different?}
  \begin{itemize}
  \item What do we use as a statistic?
  \item[]<2-> The difference of the means of the two groups.
  \item How do we simulate the null distribution?
  \item[]<3-> Shuffle the labels ``male'' and ``female'', compute
    difference in means of two groups, and repeat. 
  \end{itemize}
 \end{frame}
 \begin{frame}
  \begin{center}
    \Huge That's it.
  \end{center}
 \end{frame}
 \end{document} 
--- a/statistics/matlab/bootstrap_mean.m
+++ b/statistics/matlab/bootstrap_mean.m
@ -0,0 +1,13 @@
 load thymusglandweights.dat
 x = thymusglandweights(1:50);
 m = 500;
 n = length(x);
 mu = zeros(m,1);
 for i = 1:m
    mu(i) = mean(x(randi(n,n,1)));
 end
 fprintf("bootstrap standard error: %.4f\n", std(mu));
 fprintf("standard error: %.4f\n", std(x)/sqrt(n));
--- a/statistics/matlab/ci_mean.m
+++ b/statistics/matlab/ci_mean.m
@ -0,0 +1,19 @@
 load thymusglandweights.dat
 n = 80;
 x = thymusglandweights(1:n);
 m = 5000;
 me = zeros(m,1);
 for i = 1:m
    me(i) = median(x(randi(n,n,1)));
 end
 t025 = tinv(0.025, n-1);
 t975 = tinv(0.975, n-1);
 se = std(x)/sqrt(n);
 fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,0.975));
 fprintf('analytical quantile: %.4f, %.4f \n', mean(x)+t025*se, mean(x)+t975*se);
--- a/statistics/matlab/ci_media.m
+++ b/statistics/matlab/ci_media.m
@ -0,0 +1,17 @@
 load thymusglandweights.dat
 x = thymusglandweights(1:50);
 m = 500;
 n = length(x);
 x = sort(x);
 me = zeros(m,1);
 for i = 1:m
    me(i) = median(x(randi(n,n,1)));
 end
 a1 = binoinv(0.025,n,.5)-1;
 a2 = binoinv(1-0.025,n,.5);
 fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,1-0.025));
 fprintf('analytical quantile: %.4f, %.4f \n', x(a1),x(a2));
--- a/statistics/matlab/tests.m
+++ b/statistics/matlab/tests.m
@ -0,0 +1,38 @@
 close all
 clear all
 load thymusglandweights.dat
 literature_mean = 34.3;
 x = thymusglandweights;
 n = length(x);
 y = x - mean(x) + literature_mean;
 m = 2000;
 me = zeros(m,1);
 for i = 1:m
    me(i) = median(y(randi(n,n,1)));
 end
 hist(me, 50);
 hold on
 mu = mean(x);
 plot([mu,mu],[0,200],'--r','LineWidth',3);
 xlabel('thymus gland weights [g]');
 ylabel('frequency');
 title('bootstrapped null distribution');
 hold off
 % 5% significance boundaries
 low = quantile(me,0.025);
 high =  quantile(me,0.975);
 disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
 hold on
 plot([low,low],[0,200],'--g','LineWidth',3);
 plot([high,high],[0,200],'--g','LineWidth',3);
 hold off
 pval = mean(abs(me-literature_mean) > abs(mu - literature_mean))
 legend('Null distribution','measured mean','5% significance boundaries')
--- a/statistics/matlab/thymusglandweights.dat
+++ b/statistics/matlab/thymusglandweights.dat
		`@ -0,0 +1,29 @@`
							`Pulse`
							`97`
							`111`
							`93`
							`98`
							`107`
							`77`
							`121`
							`88`
							`96`
							`123`
							`119`
							`91`
							`99`
							`95`
							`99`
							`102`
							`77`
							`85`
							`104`
							`106`
							`114`
							`85`
							`112`
							`102`
							`104`
							`94`
							`104`
							`98`
		`@ -0,0 +1,37 @@`
							`Branches`
							`23`
							`30`
							`54`
							`28`
							`31`
							`29`
							`34`
							`35`
							`30`
							`27`
							`21`
							`43`
							`51`
							`35`
							`51`
							`49`
							`35`
							`24`
							`26`
							`29`
							`21`
							`29`
							`37`
							`27`
							`28`
							`33`
							`33`
							`23`
							`37`
							`27`
							`40`
							`48`
							`41`
							`20`
							`30`
							`57`
		`@ -0,0 +1,32 @@`
							`Glucose`
							`81`
							`85`
							`93`
							`93`
							`99`
							`76`
							`75`
							`84`
							`78`
							`84`
							`81`
							`82`
							`89`
							`81`
							`96`
							`82`
							`74`
							`70`
							`84`
							`86`
							`80`
							`70`
							`131`
							`75`
							`88`
							`102`
							`115`
							`89`
							`82`
							`79`
							`106`
		`@ -0,0 +1,24 @@`
							`NerveCells`
							`35`
							`19`
							`33`
							`34`
							`17`
							`26`
							`16`
							`40`
							`28`
							`30`
							`23`
							`12`
							`27`
							`33`
							`22`
							`31`
							`28`
							`28`
							`35`
							`23`
							`23`
							`19`
							`29`