Merge branch 'master' of raven.am28.uni-tuebingen.de:scientificComputing

This commit is contained in:
Jan Grewe 2014-10-15 18:43:10 +02:00
commit 7bab4e9baa
74 changed files with 11678 additions and 26 deletions

View File

@ -0,0 +1,16 @@
all:
for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \
echo $$number ; \
sed "s/000/$$number/g" day1.tex > tmp.tex; \
pdflatex tmp.tex; \
mv tmp.pdf day1_$$number.pdf; \
cp ../data/example$$number.csv ./ ;\
rm tmp.* ; \
zip example$$number.zip example$$number.csv day1_$$number.pdf ; \
rm example$$number.csv ;\
rm day1_$$number.pdf ; \
done
clean:
rm *.zip
rm -rf auto

72
statistics/assignments/day1.tex Executable file
View File

@ -0,0 +1,72 @@
\documentclass[addpoints,10pt]{exam}
\usepackage{url}
\usepackage{color}
\usepackage{hyperref}
\pagestyle{headandfoot}
\runningheadrule
\firstpageheadrule
\firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014}
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
\firstpagefooter{}{}{}
\runningfooter{}{}{}
\pointsinmargin
\bracketedpoints
%\printanswers
\shadedsolutions
\begin{document}
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
\sffamily
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
\begin{questions}
\question To publish scientific results, you will usually need to
use statistical methods. Some journals provide you with a brief
description of how they expect you to apply statistical methods. One
example can be found in the author guidelines of the journal
Nature.
Assume you collected the following dataset. You can download it from
Ilias as {\tt example000.csv}. Here is the description of the dataset:
\begin{quotation}
\tt
\input{../examples/example000.tex}
\end{quotation}
\begin{parts}
\part Download the dataset and write a script that loads it into
matlab.
\part Think about the type of your data (I might ask you that
tomorrow).
\part Produce a plot that displays the data in an appropriate
way. Make sure to respect all elements of good plotting we
discussed today.
\part Download the statistical checklist from nature. Produce {\bf
one} slide that contains the plot and a concise summary of your
data which respects the requirements made by nature (assume you
are producing a figure legend for the figure in nature). It is
good style to avoid expressions like ``the plot shows'' or
similar.
\part Upload your code, the data, and the slide as a zip to
Ilias. Deadline is 19h00. Structure the zip such that you can
present you program in front of the class. Several students will
be asked to present their slide and their code tomorrow morning.
\end{parts}
\end{questions}
\end{document}

View File

@ -0,0 +1,46 @@
\documentclass[addpoints,10pt]{exam}
\usepackage{url}
\usepackage{color}
\usepackage{hyperref}
\pagestyle{headandfoot}
\runningheadrule
\firstpageheadrule
\firstpageheader{Scientific Computing}{afternoon assignment day 02}{10/21/2014}
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
\firstpagefooter{}{}{}
\runningfooter{}{}{}
\pointsinmargin
\bracketedpoints
%\printanswers
\shadedsolutions
\begin{document}
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
\sffamily
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
\begin{questions}
\question Download example002 from yesterday (brain weights).
\begin{parts}
\part Simulate a null distribution via permutation.
\part Determine whether you can reject ``means are equal'' on a
5\% significance level using the simulated null distribution.
\part Check whether the means are different with a two sample
t-test in matlab ({\tt ttest2}).
\part Plot the data appropriately and generate a single slide that
contains the plot and short discussion of the test that respects
the nature statistical checklist (ignore all question whether the
assumptions of the test are satisfied).
\part Upload the slide and the code to Ilias. Deadline is 19h00.
\end{parts}
\end{questions}
\end{document}

43
statistics/data/example001.csv Executable file
View File

@ -0,0 +1,43 @@
MAO,Diagnosis
6.8,I
4.1,I
7.3,I
14.2,I
18.8,I
9.9,I
7.4,I
11.9,I
5.2,I
7.8,I
7.8,I
8.7,I
12.7,I
14.5,I
10.7,I
8.4,I
9.7,I
10.6,I
7.8,II
4.4,II
11.4,II
3.1,II
4.3,II
10.1,II
1.5,II
7.4,II
5.2,II
10,II
3.7,II
5.5,II
8.5,II
7.7,II
6.8,II
3.1,II
6.4,III
10.8,III
1.1,III
2.9,III
4.5,III
5.8,III
9.4,III
6.8,III
1 MAO Diagnosis
2 6.8 I
3 4.1 I
4 7.3 I
5 14.2 I
6 18.8 I
7 9.9 I
8 7.4 I
9 11.9 I
10 5.2 I
11 7.8 I
12 7.8 I
13 8.7 I
14 12.7 I
15 14.5 I
16 10.7 I
17 8.4 I
18 9.7 I
19 10.6 I
20 7.8 II
21 4.4 II
22 11.4 II
23 3.1 II
24 4.3 II
25 10.1 II
26 1.5 II
27 7.4 II
28 5.2 II
29 10 II
30 3.7 II
31 5.5 II
32 8.5 II
33 7.7 II
34 6.8 II
35 3.1 II
36 6.4 III
37 10.8 III
38 1.1 III
39 2.9 III
40 4.5 III
41 5.8 III
42 9.4 III
43 6.8 III

186
statistics/data/example002.csv Executable file
View File

@ -0,0 +1,186 @@
Weight,Sex
1607,m
1157,m
1248,m
1310,m
1398,m
1237,m
1232,m
1343,m
1380,m
1274,m
1245,m
1286,m
1508,m
1105,m
1123,m
1198,m
1300,m
1249,m
1185,m
915,m
1345,m
1107,m
1357,m
1227,m
1205,m
1435,m
1289,m
1093,m
1211,m
1260,m
1193,m
1330,m
1130,m
1357,m
1193,m
1232,m
1321,m
1260,m
1380,m
1230,m
1136,m
1029,m
1223,m
1240,m
1264,m
1020,m
1415,m
1410,m
1275,m
1230,m
1085,m
1048,m
1181,m
1103,m
1165,m
1547,m
1173,m
1660,m
1307,m
1535,m
1315,m
1257,m
1424,m
1309,m
1170,m
1412,m
1270,m
1230,m
1233,m
1561,m
1193,m
1272,m
1355,m
1137,m
1354,m
1110,m
1265,m
1407,m
1227,m
1330,m
1222,m
1305,m
1475,m
1177,m
1337,m
1145,m
1070,m
1305,m
1085,m
1303,m
1390,m
1532,m
1238,m
1233,m
1280,m
1245,m
1459,m
1157,m
1302,m
1385,m
1310,m
1342,m
1303,m
1248,m
1115,m
1365,m
1227,m
1353,m
1125,f
1027,f
1112,f
983,f
1090,f
1247,f
1045,f
983,f
972,f
1045,f
937,f
1245,f
1200,f
1270,f
1200,f
1145,f
1090,f
1040,f
1343,f
1010,f
1095,f
1180,f
1168,f
1095,f
1040,f
1235,f
1050,f
1038,f
1046,f
1255,f
1228,f
1000,f
1225,f
1220,f
1085,f
1067,f
1006,f
1138,f
1175,f
1252,f
1037,f
958,f
1020,f
1068,f
1107,f
1317,f
952,f
1056,f
1203,f
1183,f
1392,f
1130,f
1284,f
996,f
1228,f
1087,f
1035,f
1170,f
1064,f
1250,f
1129,f
1088,f
1037,f
1117,f
1095,f
1027,f
1027,f
1190,f
1153,f
1037,f
1120,f
1212,f
1024,f
1135,f
1177,f
1096,f
1114,f
1 Weight Sex
2 1607 m
3 1157 m
4 1248 m
5 1310 m
6 1398 m
7 1237 m
8 1232 m
9 1343 m
10 1380 m
11 1274 m
12 1245 m
13 1286 m
14 1508 m
15 1105 m
16 1123 m
17 1198 m
18 1300 m
19 1249 m
20 1185 m
21 915 m
22 1345 m
23 1107 m
24 1357 m
25 1227 m
26 1205 m
27 1435 m
28 1289 m
29 1093 m
30 1211 m
31 1260 m
32 1193 m
33 1330 m
34 1130 m
35 1357 m
36 1193 m
37 1232 m
38 1321 m
39 1260 m
40 1380 m
41 1230 m
42 1136 m
43 1029 m
44 1223 m
45 1240 m
46 1264 m
47 1020 m
48 1415 m
49 1410 m
50 1275 m
51 1230 m
52 1085 m
53 1048 m
54 1181 m
55 1103 m
56 1165 m
57 1547 m
58 1173 m
59 1660 m
60 1307 m
61 1535 m
62 1315 m
63 1257 m
64 1424 m
65 1309 m
66 1170 m
67 1412 m
68 1270 m
69 1230 m
70 1233 m
71 1561 m
72 1193 m
73 1272 m
74 1355 m
75 1137 m
76 1354 m
77 1110 m
78 1265 m
79 1407 m
80 1227 m
81 1330 m
82 1222 m
83 1305 m
84 1475 m
85 1177 m
86 1337 m
87 1145 m
88 1070 m
89 1305 m
90 1085 m
91 1303 m
92 1390 m
93 1532 m
94 1238 m
95 1233 m
96 1280 m
97 1245 m
98 1459 m
99 1157 m
100 1302 m
101 1385 m
102 1310 m
103 1342 m
104 1303 m
105 1248 m
106 1115 m
107 1365 m
108 1227 m
109 1353 m
110 1125 f
111 1027 f
112 1112 f
113 983 f
114 1090 f
115 1247 f
116 1045 f
117 983 f
118 972 f
119 1045 f
120 937 f
121 1245 f
122 1200 f
123 1270 f
124 1200 f
125 1145 f
126 1090 f
127 1040 f
128 1343 f
129 1010 f
130 1095 f
131 1180 f
132 1168 f
133 1095 f
134 1040 f
135 1235 f
136 1050 f
137 1038 f
138 1046 f
139 1255 f
140 1228 f
141 1000 f
142 1225 f
143 1220 f
144 1085 f
145 1067 f
146 1006 f
147 1138 f
148 1175 f
149 1252 f
150 1037 f
151 958 f
152 1020 f
153 1068 f
154 1107 f
155 1317 f
156 952 f
157 1056 f
158 1203 f
159 1183 f
160 1392 f
161 1130 f
162 1284 f
163 996 f
164 1228 f
165 1087 f
166 1035 f
167 1170 f
168 1064 f
169 1250 f
170 1129 f
171 1088 f
172 1037 f
173 1117 f
174 1095 f
175 1027 f
176 1027 f
177 1190 f
178 1153 f
179 1037 f
180 1120 f
181 1212 f
182 1024 f
183 1135 f
184 1177 f
185 1096 f
186 1114 f

52
statistics/data/example003.csv Executable file
View File

@ -0,0 +1,52 @@
singtime
4.3
24.1
6.6
7.3
4
2.6
4
3.9
9.4
6.2
1.6
6.5
0.2
2.7
17.4
5.6
2
3.8
1.2
0.7
1.6
2.3
3.7
0.8
0.5
4.5
11.5
3.5
0.8
5.2
2
0.7
1.7
5
2.8
1.5
3.9
3.7
4.5
1.8
1.2
0.7
0.7
4.2
4.7
2.2
1.4
14.1
8.6
3.7
3.5
1 singtime
2 4.3
3 24.1
4 6.6
5 7.3
6 4
7 2.6
8 4
9 3.9
10 9.4
11 6.2
12 1.6
13 6.5
14 0.2
15 2.7
16 17.4
17 5.6
18 2
19 3.8
20 1.2
21 0.7
22 1.6
23 2.3
24 3.7
25 0.8
26 0.5
27 4.5
28 11.5
29 3.5
30 0.8
31 5.2
32 2
33 0.7
34 1.7
35 5
36 2.8
37 1.5
38 3.9
39 3.7
40 4.5
41 1.8
42 1.2
43 0.7
44 0.7
45 4.2
46 4.7
47 2.2
48 1.4
49 14.1
50 8.6
51 3.7
52 3.5

29
statistics/data/example004.csv Executable file
View File

@ -0,0 +1,29 @@
Pulse
97
111
93
98
107
77
121
88
96
123
119
91
99
95
99
102
77
85
104
106
114
85
112
102
104
94
104
98
1 Pulse
2 97
3 111
4 93
5 98
6 107
7 77
8 121
9 88
10 96
11 123
12 119
13 91
14 99
15 95
16 99
17 102
18 77
19 85
20 104
21 106
22 114
23 85
24 112
25 102
26 104
27 94
28 104
29 98

37
statistics/data/example005.csv Executable file
View File

@ -0,0 +1,37 @@
Branches
23
30
54
28
31
29
34
35
30
27
21
43
51
35
51
49
35
24
26
29
21
29
37
27
28
33
33
23
37
27
40
48
41
20
30
57
1 Branches
2 23
3 30
4 54
5 28
6 31
7 29
8 34
9 35
10 30
11 27
12 21
13 43
14 51
15 35
16 51
17 49
18 35
19 24
20 26
21 29
22 21
23 29
24 37
25 27
26 28
27 33
28 33
29 23
30 37
31 27
32 40
33 48
34 41
35 20
36 30
37 57

32
statistics/data/example006.csv Executable file
View File

@ -0,0 +1,32 @@
Glucose
81
85
93
93
99
76
75
84
78
84
81
82
89
81
96
82
74
70
84
86
80
70
131
75
88
102
115
89
82
79
106
1 Glucose
2 81
3 85
4 93
5 93
6 99
7 76
8 75
9 84
10 78
11 84
12 81
13 82
14 89
15 81
16 96
17 82
18 74
19 70
20 84
21 86
22 80
23 70
24 131
25 75
26 88
27 102
28 115
29 89
30 82
31 79
32 106

24
statistics/data/example007.csv Executable file
View File

@ -0,0 +1,24 @@
NerveCells
35
19
33
34
17
26
16
40
28
30
23
12
27
33
22
31
28
28
35
23
23
19
29
1 NerveCells
2 35
3 19
4 33
5 34
6 17
7 26
8 16
9 40
10 28
11 30
12 23
13 12
14 27
15 33
16 22
17 31
18 28
19 28
20 35
21 23
22 23
23 19
24 29

21
statistics/data/example008.csv Executable file
View File

@ -0,0 +1,21 @@
RateChange,Treatment
28,Caffeine
11,Caffeine
-3,Caffeine
14,Caffeine
-2,Caffeine
-4,Caffeine
18,Caffeine
2,Caffeine
2,Caffeine
26,Decaf
1,Decaf
0,Decaf
-4,Decaf
-4,Decaf
14,Decaf
16,Decaf
8,Decaf
0,Decaf
18,Decaf
-10,Decaf
1 RateChange Treatment
2 28 Caffeine
3 11 Caffeine
4 -3 Caffeine
5 14 Caffeine
6 -2 Caffeine
7 -4 Caffeine
8 18 Caffeine
9 2 Caffeine
10 2 Caffeine
11 26 Decaf
12 1 Decaf
13 0 Decaf
14 -4 Decaf
15 -4 Decaf
16 14 Decaf
17 16 Decaf
18 8 Decaf
19 0 Decaf
20 18 Decaf
21 -10 Decaf

12
statistics/data/example009.csv Executable file
View File

@ -0,0 +1,12 @@
NEConcentration,Treatment
543,Toluene
523,Toluene
431,Toluene
635,Toluene
564,Toluene
549,Toluene
535,Control
385,Control
502,Control
412,Control
387,Control
1 NEConcentration Treatment
2 543 Toluene
3 523 Toluene
4 431 Toluene
5 635 Toluene
6 564 Toluene
7 549 Toluene
8 535 Control
9 385 Control
10 502 Control
11 412 Control
12 387 Control

13
statistics/data/example010.csv Executable file
View File

@ -0,0 +1,13 @@
Dopamine,Group
3420,toluene
2314,toluene
1911,toluene
2464,toluene
2781,toluene
2803,toluene
1820,control
1843,control
1397,control
1803,control
2539,control
1990,control
1 Dopamine Group
2 3420 toluene
3 2314 toluene
4 1911 toluene
5 2464 toluene
6 2781 toluene
7 2803 toluene
8 1820 control
9 1843 control
10 1397 control
11 1803 control
12 2539 control
13 1990 control

10
statistics/data/example011.csv Executable file
View File

@ -0,0 +1,10 @@
Animal,Site I,Site II
1,50.6,38
2,39.2,18.6
3,35.2,23.2
4,17,19
5,11.2,6.6
6,14.2,16.4
7,24.2,14.4
8,37.4,37.6
9,35.2,24.4
1 Animal Site I Site II
2 1 50.6 38
3 2 39.2 18.6
4 3 35.2 23.2
5 4 17 19
6 5 11.2 6.6
7 6 14.2 16.4
8 7 24.2 14.4
9 8 37.4 37.6
10 9 35.2 24.4

10
statistics/data/example012.csv Executable file
View File

@ -0,0 +1,10 @@
Subject,mCPP,Placebo
1,1.1,0
2,1.3,-0.3
3,1,0.6
4,1.7,0.3
5,1.4,-0.7
6,0.1,-0.2
7,0.5,0.6
8,1.6,0.9
9,-0.5,-2
1 Subject mCPP Placebo
2 1 1.1 0
3 2 1.3 -0.3
4 3 1 0.6
5 4 1.7 0.3
6 5 1.4 -0.7
7 6 0.1 -0.2
8 7 0.5 0.6
9 8 1.6 0.9
10 9 -0.5 -2

9
statistics/data/example013.csv Executable file
View File

@ -0,0 +1,9 @@
Animal,Control,Regenerating
1,16.3,11.5
2,4.8,3.6
3,10.9,12.5
4,14.2,6.3
5,16.3,15.2
6,9.9,8.1
7,29.2,16.6
8,22.4,13.1
1 Animal Control Regenerating
2 1 16.3 11.5
3 2 4.8 3.6
4 3 10.9 12.5
5 4 14.2 6.3
6 5 16.3 15.2
7 6 9.9 8.1
8 7 29.2 16.6
9 8 22.4 13.1

16
statistics/data/example014.csv Executable file
View File

@ -0,0 +1,16 @@
BodyTempDrop,AlcoholDose
0.2,1.5
1.9,1.5
-0.1,1.5
0.5,1.5
0.8,1.5
4,3
3.2,3
2.3,3
2.9,3
3.8,3
3.3,6
5.1,6
5.3,6
6.7,6
5.9,6
1 BodyTempDrop AlcoholDose
2 0.2 1.5
3 1.9 1.5
4 -0.1 1.5
5 0.5 1.5
6 0.8 1.5
7 4 3
8 3.2 3
9 2.3 3
10 2.9 3
11 3.8 3
12 3.3 6
13 5.1 6
14 5.3 6
15 6.7 6
16 5.9 6

18
statistics/data/example015.csv Executable file
View File

@ -0,0 +1,18 @@
PeakFlow,Height
733,174
572,183
500,176
738,169
616,183
787,186
866,178
670,175
550,172
660,179
575,171
577,184
783,200
625,195
470,176
642,176
856,190
1 PeakFlow Height
2 733 174
3 572 183
4 500 176
5 738 169
6 616 183
7 787 186
8 866 178
9 670 175
10 550 172
11 660 179
12 575 171
13 577 184
14 783 200
15 625 195
16 470 176
17 642 176
18 856 190

19
statistics/data/example016.csv Executable file
View File

@ -0,0 +1,19 @@
Patient,Before,After
1,98,75
2,100,60
3,82,25
4,100,55
5,93,78
6,119,102
7,70,58
8,78,70
9,104,90
10,70,50
11,60,65
12,88,45
13,45,36
14,159,144
15,65,27
16,98,90
17,66,16
18,67,53
1 Patient Before After
2 1 98 75
3 2 100 60
4 3 82 25
5 4 100 55
6 5 93 78
7 6 119 102
8 7 70 58
9 8 78 70
10 9 104 90
11 10 70 50
12 11 60 65
13 12 88 45
14 13 45 36
15 14 159 144
16 15 65 27
17 16 98 90
18 17 66 16
19 18 67 53

21
statistics/data/example017.csv Executable file
View File

@ -0,0 +1,21 @@
LegStrength,UpperBodyStrength
55,low
70,low
45,low
246,low
240,low
96,low
225,low
40,middle
200,middle
250,middle
192,middle
117,middle
215,middle
181,high
85,high
416,high
228,high
257,high
316,high
134,high
1 LegStrength UpperBodyStrength
2 55 low
3 70 low
4 45 low
5 246 low
6 240 low
7 96 low
8 225 low
9 40 middle
10 200 middle
11 250 middle
12 192 middle
13 117 middle
14 215 middle
15 181 high
16 85 high
17 416 high
18 228 high
19 257 high
20 316 high
21 134 high

View File

@ -0,0 +1,6 @@
MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is
thought to play a role in the regulation of behavior. To see whether
different categories of schizophrenic patients have different levels
of MAO activity, researchers collected blood specimens from 42
patients and measured the MAO activity in the platelets. Values are
expressed as nmol benzylaldehyde product per 108 platelets per hour.

View File

@ -0,0 +1,3 @@
Brain Weight: In 1888, P. Topinard published data on the brain weights
of hundreds of French men and women. Brain weights are given in
gram.

View File

@ -0,0 +1,4 @@
Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates.
A field researcher measured the duration of 51 unsuccessful songs--that is, the time
until the singing male gave up and left his perch. The data is given
in minutes.

View File

@ -0,0 +1,3 @@
Pulse after Exercise: A group of 28 adults did some moderate exercise
for five minutes and then measured their pulses. Data is given in
beats/minute.

View File

@ -0,0 +1,5 @@
A dendritic tree is a branched structure that emanates from the body
of a nerve cell. As part of a study of brain development, 36 nerve
cells were taken from the brains of newborn guinea pigs. The
investigators counted the number of dendritic branch segments
emanating from each nerve cell.

View File

@ -0,0 +1,4 @@
For each of 31 healthy dogs, a veterinarian measured the glucose
concentration in the anterior chamber of the right eye and also in the
blood serum. The following data are the anterior chamber glucose
measurements, expressed as a percentage of the blood glucose.

View File

@ -0,0 +1,5 @@
A veterinary anatomist investigated the spatial arrangement of the
nerve cells in the intestine of a pony. He removed a block of tissue
from the intestinal wall, cut the block into many equal sections, and
counted the number of nerve cells in each of 23 randomly selected
sections.

View File

@ -0,0 +1,8 @@
Researchers were interested in the short-term effect that caffeine has
on heart rate. They enlisted a group of volunteers and measured each
person's resting heart rate. Then they had each subject drink 6 ounces
of coffee. Nine of the subjects were given coffee containing caffeine
and 11 were given decaffeinated coffee. After 10 minutes each person's
heart rate was measured again. The data in the table contains the
change in heart rate; a positive number means that heart rate went up
and a negative number means that heart rate went down.

View File

@ -0,0 +1,9 @@
Toluene and the Brain Abuse of substances containing toluene (for
example, glue) can produce various neurological symptoms. In an
investigation of the mechanism of these toxic effects, researchers
measured the concentrations of various chemicals in the brains of rats
that had been exposed to a toluene-laden atmosphere, and also in
unexposed control rats. The concentrations of the brain chemical
norepinephrine (NE) in the medulla region of the brain, for six
toluene-exposed rats and five control rats, are given in accompanying
data file in ng/g.

View File

@ -0,0 +1,3 @@
In a pharmacological study, researchers measured the concentration of
the brain chemical dopamine in six rats exposed to toluene and six
control rats. Number are specified in ng/g.

View File

@ -0,0 +1,6 @@
Nerve Cell Density For each of nine horses, a veterinary anatomist
measured the density of nerve cells at specified sites in the
intestine. The results for site I (midregion of jejunum) and site II
(mesenteric region of jejunum) are given in the accompanying dataset.
Each density value is the average of counts of nerve cells in five
equal sections of tissue.

View File

@ -0,0 +1,6 @@
Hunger Rating During a weight loss study each of nine subjects was
given either the active drug m-chlorophenylpiperazine (mCPP) for two
weeks and then a placebo for another two weeks, or else was given the
placebo for the first two weeks and then mCPP for the second two
weeks. As part of the study the subjects were asked to rate how hungry
they were at the end of each two-week period.

View File

@ -0,0 +1,10 @@
Certain types of nerve cells have the ability to regenerate a part of
the cell that has been amputated. In an early study of this process,
measurements were made on the nerves in the spinal cord in rhesus
monkeys. Nerves emanating from the left side of the cord were cut,
while nerves from the right side were kept intact. During the
regeneration process, the content of creatine phosphate (CP) was
measured in the left and the right portion of the spinal cord. The
following table shows the data for the right (control) side (Y1), and
for the left (regenerating) side (Y2). The units of measurement are mg
CP per 100 gm tissue.

View File

@ -0,0 +1,9 @@
In an investigation of the physiological effects of alcohol
(ethanol), 15 mice were randomly allocated to three treatment groups,
each to receive a different oral dose of alcohol. The dosage levels
were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature
of each mouse was measured immediately before the alcohol was given
and again 20 minutes afterward. The accompanying data shows the drop
(before minus after) in body temperature for each mouse. (The negative
value - 0.1 refers to a mouse whose temperature rose rather than
fell.)

View File

@ -0,0 +1,5 @@
The peak flow rate of a person is the fastest rate
at which the person can expel air after taking a deep breath.
Peak flow rate is measured in units of liters per minute and
gives an indication of the person's respiratory health. Flow is given
in l/min, height in cm.

View File

@ -0,0 +1,6 @@
An experiment was conducted to study the effect of tamoxifen on
patients with cervical cancer. One of the measurements made, both
before and again after tamoxifen was given, was microvessel density
(MVD). MVD, which is measured as number of vessels per mm$^2$, is a
measurement that relates to the formation of blood vessels that feed a
tumor and allow it to grow and spread.

View File

@ -0,0 +1,5 @@
A group of female college students were divided into three groups
according to upper body strength. Their leg strength was tested by
measuring how many consecutive times they could leg press 246 pounds
before exhaustion. (The subjects were allowed only one second of rest
between consecutive lifts.)

Binary file not shown.

After

Width:  |  Height:  |  Size: 546 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 575 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 385 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 865 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 425 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 582 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 724 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 461 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

BIN
statistics/figs/example01.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
statistics/figs/hunger.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Binary file not shown.

BIN
statistics/figs/statistic1.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

BIN
statistics/figs/statistic2.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

BIN
statistics/figs/statistic3.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

BIN
statistics/figs/statistic4.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

View File

@ -22,14 +22,19 @@
% \useoutertheme{miniframes} % \useoutertheme{miniframes}
} }
\AtBeginSection[] \AtBeginSubsection[]
{ {
\begin{frame}<beamer> \begin{frame}<beamer>
\begin{center} \begin{center}
\Huge \insertsectionhead \Huge \insertsectionhead
\end{center} \end{center}
\tableofcontents[
currentsubsection,
hideothersubsections,
sectionstyle=show/hide,
subsectionstyle=show/shaded,
]
% \frametitle{\insertsectionhead} % \frametitle{\insertsectionhead}
% \tableofcontents[currentsection,hideothersubsections]
\end{frame} \end{frame}
} }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
@ -84,25 +89,7 @@ Bernstein Center T\"ubingen}
\end{frame} \end{frame}
\begin{frame}
\frametitle{plan}
\setcounter{tocdepth}{1}
\tableofcontents
\end{frame}
\begin{frame}
\frametitle{information}
\begin{itemize}
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
A. A. (2010). Statistics for the Life Sciences (4th ed.,
p. 668). Prentice Hall.
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
Hall. doi:10.1037/0012764
\item \url{http://stats.stackexchange.com}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% errorbars (error bar paper) % errorbars (error bar paper)
% confidence intervals (sources of error) % confidence intervals (sources of error)
@ -170,7 +157,8 @@ Bernstein Center T\"ubingen}
\end{frame} \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots} \section{Day 1 -- descriptive statistics and plots}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{types of data} \subsection{types of data}
@ -315,8 +303,8 @@ Bernstein Center T\"ubingen}
\frametitle{exercise} \frametitle{exercise}
\begin{task}{Spearman rank correlation} \begin{task}{Spearman rank correlation}
\begin{enumerate} \begin{enumerate}
\item Use {\tt randi} to generate two 100-dimensional vectors \item Use {\tt randi} to generate two vectors
{\tt x,y} of random integers between $0$ and $10$. {\tt x,y} with $100$ random integers between $0$ and $10$ each.
\item Find out how to compute the Spearman \item Find out how to compute the Spearman
rank correlation $$\rho = 1- {\frac {6 \sum rank correlation $$\rho = 1- {\frac {6 \sum
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
@ -358,7 +346,6 @@ correlation coefficient does not have that property.
\end{frame} \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{description of data and plotting}
\subsection{what makes a good plot} \subsection{what makes a good plot}
%------------------------------------------------------------- %-------------------------------------------------------------
\begin{frame}[fragile] \begin{frame}[fragile]
@ -522,7 +509,7 @@ correlation coefficient does not have that property.
\end{frame} \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{nominal scale} \subsection{plotting data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%------------------------------------------------------------- %-------------------------------------------------------------
@ -723,6 +710,23 @@ hold off
\end{center} \end{center}
\end{frame} \end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile,fragile]
\frametitle{robust statistics}
\begin{task}{When is statistic called robust (leave-one-out)?}
\begin{itemize}
\item Generate an array with $20$ random numbers using {\tt
randn}.
\item Compute $20$ means: the $i^{th}$ mean is computed from the
data set {\em without} the $i^{th}$ example.
\item Repeat this with the median.
\item Make a bar plot that depicts the means of the computed means
and medians along with an appropriate measure of dispersion.
\item What can you observe? Do you understand why?
\end{itemize}
\end{task}
\end{frame}
%------------------------------------------------------------- %-------------------------------------------------------------
\begin{frame}[fragile] \begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data} \frametitle{plotting interval/ratio/absolute data}
@ -791,7 +795,13 @@ hold off
ordinal vs. ordinal data (why not the bar chart?). ordinal vs. ordinal data (why not the bar chart?).
\end{frame} \end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\begin{center}
\Huge
That's it.
\end{center}
\end{frame}
\end{document} \end{document}

View File

@ -0,0 +1,772 @@
\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
\usetheme{Singapore}
\setbeamercovered{opaque}
\usecolortheme{tuebingen}
\setbeamertemplate{navigation symbols}{}
\usefonttheme{default}
\useoutertheme{infolines}
% \useoutertheme{miniframes}
}
\AtBeginSubsection[]
{
\begin{frame}<beamer>
\begin{center}
\Huge \insertsectionhead
\end{center}
\tableofcontents[
currentsubsection,
hideothersubsections,
sectionstyle=show/hide,
subsectionstyle=show/shaded,
]
% \frametitle{\insertsectionhead}
\end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\setbeamertemplate{blocks}[rounded][shadow=true]
\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
University T\"ubingen\\
Bernstein Center T\"ubingen}
\institute[Scientific Computing]{}
\date{10/20/2014}
%\logo{\pgfuseimage{logo}}
\subject{Lectures}
%%%%%%%%%% configuration for code
\lstset{
basicstyle=\ttfamily,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
captionpos=b,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}
\input{../latex/environments.tex}
\makeatother
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{information}
\begin{itemize}
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
A. A. (2010). Statistics for the Life Sciences (4th ed.,
p. 668). Prentice Hall.
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
Hall. doi:10.1037/0012764
\item \url{http://stats.stackexchange.com}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 2 -- errorbars, confidence intervals, and tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Types of evidence}
\begin{frame}
\scriptsize
\frametitle{Examples}
\begin{itemize}
\item Before new drugs are given to human subjects, it is common
practice to first test them in dogs or other animals. In part of
one study, a new investigational drug was given to eight male and
eight female dogs at doses of 8 mg/kg and 25 mg/kg. Within each
sex, the two doses were assigned at random to the eight dogs. Many
``endpoints'' were measured, such as cholesterol, sodium, glucose,
and so on, from blood samples, in order to screen for toxicity
problems in the dogs before starting studies on humans. One
endpoint was alkaline phosphatase level (or APL, measured in U/l).
For females, the effect of increasing the dose from 8 to 25 mg/kg
was positive, although small (the average APL increased from 133.5
to 143 U/l), but for males the effect of increasing the dose from
8 to 25 mg/kg was negative.\pause
\item On 15 July 1911, 65-year-old Mrs. Jane Decker was struck by
lightning while in her house. She had been deaf since birth, but
after being struck, she recovered her hearing, which led to a
headline in the New York Times, ``Lightning Cures Deafness.''
\pause
\item Some research has suggested that there is a genetic basis for
sexual orientation. One such study involved measuring the
midsagittal area of the anterior commissure (AC) of the brain for
30 homosexual men, 30 heterosexual men, and 30 heterosexual
women. The researchers found that the AC tends to be larger in
heterosexual women than in heterosexual men and that it is even
larger in homosexual men.
\end{itemize}
\mycite{Samuels, Wittmer, Schaffner 2010}
\end{frame}
\begin{frame}
\scriptsize
\frametitle{types of evidence}
\begin{center}
\Large
{\em experiment} \\ is better than\\ {\em observational study}\\ is
better than\\ {\em anecdotal evidence}
\end{center}
\end{frame}
\subsection{What is inferential statistics?}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{sources of error in an experiment}
\begin{task}{Think about it for 2 min}
If you repeat a scientific experiment, why do you not get the same
result every time you repeat it?
\end{task}
\pause
\begin{itemize}
\item sampling error (a finite subset of the population of interest
is selected in each experiment)
\item nonsampling errors (e.g. noise, uncontrolled factors)
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\begin{frame}[fragile]
\frametitle{statisticians are lazy}
\Large
\only<1>{
\begin{center}
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
\end{center}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<2>{
\begin{center}
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
\end{center}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<3>{
\begin{center}
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
\end{center}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}
\end{frame}
% % ----------------------------------------------------------
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
Assume you know the sampling distribution of the mean lung volume
of smokers. Would you believe that
the sample came from a group of smokers?
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example01.png}
\end{center}
\end{question}
\end{frame}
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
What about now? How would the sampling distribution change if I
change the population to (i) athletes, (ii) old people, (iii) all people?
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example02.png}
\end{center}
\end{question}
\end{frame}
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example03.png}
\end{center}
\end{question}
\end{frame}
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
What do you think now?
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example04.png}
\end{center}
\end{question}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{the (imaginary) meta-study}
\begin{center}
\only<1>{
\framesubtitle{finite sampling introduces variation: the sampling distribution}
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<2>{
\framesubtitle{statistic vs. population parameter}
\includegraphics[width=.8\linewidth]{figs/statistic1.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<3>{
\framesubtitle{statistic vs. population parameter}
\includegraphics[width=.8\linewidth]{figs/statistic2.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<4>{
\framesubtitle{shat parts of this diagram do we have in real life?}
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<5>{
\framesubtitle{what parts of this diagram do we have in real life?}
\includegraphics[width=.8\linewidth]{figs/statistic3.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<6->{
\framesubtitle{what statistics does }
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.5\linewidth}
\includegraphics[width=1.\linewidth]{figs/statistic4.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
\end{minipage}
\begin{minipage}{0.5\linewidth}
\begin{itemize}
\item it assumes, derives, or simulates the sampling
distribution\pause
\item the sampling distribution makes only sense if you think
about it in terms of the meta study\pause
\item {\color{red} the sampling distribution is the key to
answering questions about the population from the value of
the statistic}
\end{itemize}
\end{minipage}
\end{minipage}
}
\end{center}
\end{frame}
\begin{frame}
\frametitle{summary}
\begin{itemize}
\item In statistics, we use finite samples from a population to reason
about features of the population. \pause
\item The particular feature of the population we are interested in is called
{\color{blue} population parameter}. We usually measure this
parameter in our finite sample as well
({\color{blue}statistic}).\pause
\item Because of variations due to finite sampling the statistic
almost never matches the population parameter. \pause
\item Using the {\color{blue}sampling distribution} of the statistic, we make
statements about the relation between our statistic and the
population parameter.
\end{itemize}
\end{frame}
\subsection{Errorbars}
% ----------------------------------------------------------
\begin{frame}
\frametitle{illustrating example}
As part of a study of the development of the thymus gland, researcher
weighed the glands of $50$ chick embyos after 14 days of
incubation. The following plot depicts the mean thymus gland weights in (mg):
\mycite{modified from SWS exercise 6.3.3.}
\pause
{\bf Which of the two bar plots is the correct way of displaying the
data?}
\begin{columns}
\begin{column}[l]{.5\linewidth}
\includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
\end{column}
\begin{column}[r]{.5\linewidth}
\pause That depends on what you want to say
\begin{itemize}
\item To give a measure of variability in the data: use the
{\color{blue} standard deviation $\hat\sigma =
\sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
\item To make a statement about the variability in the mean
estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
\end{itemize}
\end{column}
\end{columns}
%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% that depends: variability (descriptiv statistics, how variable is
% the mean -> inferential, makes only sense in the meta-study setting)
% first matlab exercise: simulate standard error
% recommend paper for eyeballing test results from standard errors
% from std of mean to confidence intervals
% introduce bootstrapping (matlab exercise), then t-statistic
% intervals
% end with standard error of the median (and the thing from wikipedia)
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}
\frametitle{standard error}
\framesubtitle{bootstrapping}
\begin{task}{standard error vs. standard deviation}
\begin{itemize}
\item Download the dataset {\tt thymusglandweights.dat} from Ilias
\item Write a program that loads the data into matlab, extracts
the the first $80$ datapoints, and repeat the following steps
$m=500$ times:
\begin{enumerate}
\item draw $50$ data points from $x$ with replacement
\item compute their mean and store it
\end{enumerate}
Look at the standard deviation of the computed means.
\item Compare the result to the standard deviation of the original
$50$ data points and the standard error.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{standard error}
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
m = 500;
x = thymusglandweights(1:n);
mu = zeros(m,1);
for i = 1:m
mu(i) = mean(x(randi(n,n,1)));
end
disp(['bootstrap standard error: ', num2str(std(mu))]);
disp(['standard error: ', num2str(std(x)/sqrt(n))]);
\end{lstlisting}
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{standard error}
\framesubtitle{bootstrapping}
\begin{itemize}
\item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
{\color{blue}an estimate of the standard deviation of the means}
in repeated experiments which is computed form a single
experiment.
\item When you want to do statistical tests on the mean, it is
better to use the standard error, because one can eyeball
significance from it
\mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
in experimental biology. The Journal of Cell Biology, 177(1),
7--11.}
\item {\color{blue}Bootstrapping} is a way to generate an estimate
of the {\color{blue}sampling distribution of any statistic}. Instead of
sampling from the true distribution, it samples from the
empirical distribution represented by your dataset.
\mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
\end{itemize}
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{standard error of the median?}
{\bf What kind of errorbars should we use for the median?}
It depends again:
{\bf Descriptive statistics}
\begin{itemize}
\item As a {\color{blue}descriptive statistic} one could use the {\em median
absolute deviation}: the median of the absolute differences of
the datapoints from the median.
\item Alternatively, one could bootstrap a standard error of the
median.
\end{itemize}
\pause
{\bf Inferential statistics}
\begin{itemize}
\item For {\color{blue}inferential statistics} one should use
something that gives the reader {\color{blue}information about
significance}.
\item Here, {\color{blue} confidence intervals} are a better choice.
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\subsection{confidence intervals \& bootstrapping}
%------------------------------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals}
\begin{center}
\only<1>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<2>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<3>{
\vspace{.1cm}
\includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<4>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<5>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<6>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}
\end{center}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals for the median}
\begin{definition}{Confidence interval}
A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
$\hat\theta$ is an interval $\hat\theta \pm a$ such that the
population parameter $\theta$ is contained in that interval
$(1-\alpha)\cdot 100\%$ of the experiments.
An alternative way to put it is that $(\hat\theta - \theta) \in
[-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
\end{definition}
\begin{columns}
\begin{column}[l]{.5\linewidth}
If we knew the sampling distribution of the median $\hat m$, could
we generate a e.g. a $95\%$ confidence interval?\pause
\vspace{.5cm}
Yes, we could choose the interval such that $\hat m - m$ in that
interval in $95\%$ of the cases.
\end{column}
\begin{column}[r]{.5\linewidth}
\only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
\only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
\end{column}
\end{columns}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals for the mean via bootstrapping}
\framesubtitle{how to get the sampling distribution}
\begin{task}{bootstrapping a confidence interval for the mean}
\begin{itemize}
\item Use the same dataset as before.
\item Bootstrap $500$ means.
\item Plot their distribution.
\item Compute the $2.5\%$ and the $97.5\%$ percentile of the
$500$ means.
\item Mark them in the plot.
\end{itemize}
These two numbers give you $\hat m -a$ and $\hat m + a$ for
the $95\%$ confidence interval.
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{confidence intervals for the median}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);
m = 500;
me = zeros(m,1);
for i = 1:m
me(i) = mean(x(randi(n,n,1)));
end
disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
\end{lstlisting}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals}
\framesubtitle{Notice the theme!}
\begin{enumerate}
\item choose a statistic
\item get a the sampling distribution of the statistic (by theory or
simulation)
\item use that distribution to reason about the relation between the
true population parameter (e.g. $m$) and the sampled statistic
$\hat m$
\end{enumerate}
\begin{center}
\color{blue}
This is the scaffold of most statistical techniques. Try to find
it and it can help you understand them.
\end{center}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence interval for the mean}
\framesubtitle{Let's search the pattern in the normal way of computing
a confidence interval for the mean}
\begin{itemize}
\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
well
\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
$\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
\frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
parameters.\pause
\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
\mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
\item Therefore,
\begin{align*}
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence interval for the mean}
\begin{task}{Bootstrapping a confidence interval for the mean}
Extend your script to contain the analytical confidence
interval using
\begin{align*}
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
Hint: Use the function {\tt tinv(0.025, n-1)} to get the value of
$t_{2.5\%}$ and similar for $t_{97.5\%}$.
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{solution}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);
m = 500;
me = zeros(m,1);
for i = 1:m
me(i) = mean(x(randi(n,n,1)));
end
t025 = tinv(0.025, n-1);
t975 = tinv(0.975, n-1);
se = std(x)/sqrt(n);
disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
disp(['analytical CI: ' , num2str(mean(x)+t025*se), ' ' , num2str(mean(x)+t975*se)]);
\end{lstlisting}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{statistical tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{ingredients into a test}
\begin{itemize}
\item {\bf What is the goal of a test?}\pause
\item[] Check whether a measured
statistic looks different from what you would expect if there was no
effect.\pause
\item {\bf What are the ingredients into a test?}\pause
\item[] a test statistic (e.g. the mean, the median, ...) and a null
distribution\pause
\item {\bf What is a null distribution?}\pause
\item[] The sampling distribution of the statistic in case there is
no effect (i.e. the Null hypothesis is true).
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{how tests work}
\begin{enumerate}
\item Choose a statistic.
\item Get a null distribution.
\item Compare your actually measure value with the Null
distribution.
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{Example: one sample test}
\framesubtitle{step 2: get a Null distribution}
\scriptsize
Assume that the expected weight of a thymus gland from the
literature is 34.3g. We want to test whether the mean of our
thymus gland dataset is different from the expectation in the
literature. Comparing a statistic of a dataset against a fixed value
is called {\em one sample test}.
\pause
\begin{itemize}
\item {\bf How could we simulate the distribution of the data if the
mean was really 30g?}\pause
\item[] Bootstrapping.
\end{itemize}
\begin{task}{generating a null distribution}
\begin{itemize}
\item Write a matlab program that bootstraps 2000 means from the
thymus gland dataset.
\item How can we adjust the data that it has mean 34.3g (remember,
we want to simulate the null distribution)?
\item Plot a histogram of these 2000 means.
\item Also indicate the actual mean of the data.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{Example: one sample test}
\framesubtitle{step 3: compare the actual value to the Null distribution}
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.5\linewidth}
The question we want to answer in this step is:
\begin{center}
\color{blue} Does the actually measure value look like it came
from the Null distribution?
\end{center}
\end{minipage}
\begin{minipage}{0.5\linewidth}
\includegraphics[width=\linewidth]{figs/bootstraptest.png}
\end{minipage}
\end{minipage}
{\bf How could we do this in our bootstrapping example?}\pause
\begin{itemize}
\item Set a threshold. \pause How do we choose the threshold? \pause Via type I error.\pause
\item Specify the type I error if we used the actual measured value
as threshold (p-value). Why is that a reasonable strategy?
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Example: one sample test}
\framesubtitle{step 3: compare the actual value to the Null distribution}
\begin{task}{type I error and p-value}
Extend the script such that it
\begin{itemize}
\item computes the $5\%$ significance boundaries from the
distribution and plot it into the histogram.
\item computes a p-value.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{two sample test}
\framesubtitle{permutation test}
Brain Weight In 1888, P. Topinard published data on the brain
weights of hundreds of French men and women. Brain weights are given
in gram. The data can be downloaded from Ilias (example 002 from
yesterday).
\vspace{.5cm}
{\bf How could we determine (similar to bootstrapping) whether the
mean brain weight of males and females are different?}
\begin{itemize}
\item What do we use as a statistic?
\item[]<2-> The difference of the means of the two groups.
\item How do we simulate the null distribution?
\item[]<3-> Shuffle the labels ``male'' and ``female'', compute
difference in means of two groups, and repeat.
\end{itemize}
\end{frame}
\begin{frame}
\begin{center}
\Huge That's it.
\end{center}
\end{frame}
\end{document}

View File

@ -0,0 +1,13 @@
load thymusglandweights.dat
x = thymusglandweights(1:50);
m = 500;
n = length(x);
mu = zeros(m,1);
for i = 1:m
mu(i) = mean(x(randi(n,n,1)));
end
fprintf("bootstrap standard error: %.4f\n", std(mu));
fprintf("standard error: %.4f\n", std(x)/sqrt(n));

View File

@ -0,0 +1,19 @@
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);
m = 5000;
me = zeros(m,1);
for i = 1:m
me(i) = median(x(randi(n,n,1)));
end
t025 = tinv(0.025, n-1);
t975 = tinv(0.975, n-1);
se = std(x)/sqrt(n);
fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,0.975));
fprintf('analytical quantile: %.4f, %.4f \n', mean(x)+t025*se, mean(x)+t975*se);

View File

@ -0,0 +1,17 @@
load thymusglandweights.dat
x = thymusglandweights(1:50);
m = 500;
n = length(x);
x = sort(x);
me = zeros(m,1);
for i = 1:m
me(i) = median(x(randi(n,n,1)));
end
a1 = binoinv(0.025,n,.5)-1;
a2 = binoinv(1-0.025,n,.5);
fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,1-0.025));
fprintf('analytical quantile: %.4f, %.4f \n', x(a1),x(a2));

38
statistics/matlab/tests.m Normal file
View File

@ -0,0 +1,38 @@
close all
clear all
load thymusglandweights.dat
literature_mean = 34.3;
x = thymusglandweights;
n = length(x);
y = x - mean(x) + literature_mean;
m = 2000;
me = zeros(m,1);
for i = 1:m
me(i) = median(y(randi(n,n,1)));
end
hist(me, 50);
hold on
mu = mean(x);
plot([mu,mu],[0,200],'--r','LineWidth',3);
xlabel('thymus gland weights [g]');
ylabel('frequency');
title('bootstrapped null distribution');
hold off
% 5% significance boundaries
low = quantile(me,0.025);
high = quantile(me,0.975);
disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
hold on
plot([low,low],[0,200],'--g','LineWidth',3);
plot([high,high],[0,200],'--g','LineWidth',3);
hold off
pval = mean(abs(me-literature_mean) > abs(mu - literature_mean))
legend('Null distribution','measured mean','5% significance boundaries')

File diff suppressed because it is too large Load Diff