first day done

This commit is contained in:
Fabian Sinz 2014-10-15 11:24:18 +02:00
parent 1e7f07a2d2
commit 7e4d79a2c5
38 changed files with 1589 additions and 5 deletions

View File

@ -0,0 +1,16 @@
all:
for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \
echo $$number ; \
sed "s/000/$$number/g" day1.tex > tmp.tex; \
pdflatex tmp.tex; \
mv tmp.pdf day1_$$number.pdf; \
cp ../data/example$$number.csv ./ ;\
rm tmp.* ; \
zip example$$number.zip example$$number.csv day1_$$number.pdf ; \
rm example$$number.csv ;\
rm day1_$$number.pdf ; \
done
clean:
rm *.zip
rm -rf auto

72
statistics/assignments/day1.tex Executable file
View File

@ -0,0 +1,72 @@
\documentclass[addpoints,10pt]{exam}
\usepackage{url}
\usepackage{color}
\usepackage{hyperref}
\pagestyle{headandfoot}
\runningheadrule
\firstpageheadrule
\firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014}
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
\firstpagefooter{}{}{}
\runningfooter{}{}{}
\pointsinmargin
\bracketedpoints
%\printanswers
\shadedsolutions
\begin{document}
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
\sffamily
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
\begin{questions}
\question To publish scientific results, you will usually need to
use statistical methods. Some journals provide you with a brief
description of how they expect you to apply statistical methods. One
example can be found in the author guidelines of the journal
Nature.
Assume you collected the following dataset. You can download it from
Ilias as {\tt example000.csv}. Here is the description of the dataset:
\begin{quotation}
\tt
\input{../examples/example000.tex}
\end{quotation}
\begin{parts}
\part Download the dataset and write a script that loads it into
matlab.
\part Think about the type of your data (I might ask you that
tomorrow).
\part Produce a plot that displays the data in an appropriate
way. Make sure to respect all elements of good plotting we
discussed today.
\part Download the statistical checklist from nature. Produce {\bf
one} slide that contains the plot and a concise summary of your
data which respects the requirements made by nature (assume you
are producing a figure legend for the figure in nature). It is
good style to avoid expressions like ``the plot shows'' or
similar.
\part Upload your code, the data, and the slide as a zip to
Ilias. Deadline is 19h00. Structure the zip such that you can
present you program in front of the class. Several students will
be asked to present their slide and their code tomorrow morning.
\end{parts}
\end{questions}
\end{document}

43
statistics/data/example001.csv Executable file
View File

@ -0,0 +1,43 @@
MAO,Diagnosis
6.8,I
4.1,I
7.3,I
14.2,I
18.8,I
9.9,I
7.4,I
11.9,I
5.2,I
7.8,I
7.8,I
8.7,I
12.7,I
14.5,I
10.7,I
8.4,I
9.7,I
10.6,I
7.8,II
4.4,II
11.4,II
3.1,II
4.3,II
10.1,II
1.5,II
7.4,II
5.2,II
10,II
3.7,II
5.5,II
8.5,II
7.7,II
6.8,II
3.1,II
6.4,III
10.8,III
1.1,III
2.9,III
4.5,III
5.8,III
9.4,III
6.8,III
1 MAO Diagnosis
2 6.8 I
3 4.1 I
4 7.3 I
5 14.2 I
6 18.8 I
7 9.9 I
8 7.4 I
9 11.9 I
10 5.2 I
11 7.8 I
12 7.8 I
13 8.7 I
14 12.7 I
15 14.5 I
16 10.7 I
17 8.4 I
18 9.7 I
19 10.6 I
20 7.8 II
21 4.4 II
22 11.4 II
23 3.1 II
24 4.3 II
25 10.1 II
26 1.5 II
27 7.4 II
28 5.2 II
29 10 II
30 3.7 II
31 5.5 II
32 8.5 II
33 7.7 II
34 6.8 II
35 3.1 II
36 6.4 III
37 10.8 III
38 1.1 III
39 2.9 III
40 4.5 III
41 5.8 III
42 9.4 III
43 6.8 III

186
statistics/data/example002.csv Executable file
View File

@ -0,0 +1,186 @@
Weight,Sex
1607,m
1157,m
1248,m
1310,m
1398,m
1237,m
1232,m
1343,m
1380,m
1274,m
1245,m
1286,m
1508,m
1105,m
1123,m
1198,m
1300,m
1249,m
1185,m
915,m
1345,m
1107,m
1357,m
1227,m
1205,m
1435,m
1289,m
1093,m
1211,m
1260,m
1193,m
1330,m
1130,m
1357,m
1193,m
1232,m
1321,m
1260,m
1380,m
1230,m
1136,m
1029,m
1223,m
1240,m
1264,m
1020,m
1415,m
1410,m
1275,m
1230,m
1085,m
1048,m
1181,m
1103,m
1165,m
1547,m
1173,m
1660,m
1307,m
1535,m
1315,m
1257,m
1424,m
1309,m
1170,m
1412,m
1270,m
1230,m
1233,m
1561,m
1193,m
1272,m
1355,m
1137,m
1354,m
1110,m
1265,m
1407,m
1227,m
1330,m
1222,m
1305,m
1475,m
1177,m
1337,m
1145,m
1070,m
1305,m
1085,m
1303,m
1390,m
1532,m
1238,m
1233,m
1280,m
1245,m
1459,m
1157,m
1302,m
1385,m
1310,m
1342,m
1303,m
1248,m
1115,m
1365,m
1227,m
1353,m
1125,f
1027,f
1112,f
983,f
1090,f
1247,f
1045,f
983,f
972,f
1045,f
937,f
1245,f
1200,f
1270,f
1200,f
1145,f
1090,f
1040,f
1343,f
1010,f
1095,f
1180,f
1168,f
1095,f
1040,f
1235,f
1050,f
1038,f
1046,f
1255,f
1228,f
1000,f
1225,f
1220,f
1085,f
1067,f
1006,f
1138,f
1175,f
1252,f
1037,f
958,f
1020,f
1068,f
1107,f
1317,f
952,f
1056,f
1203,f
1183,f
1392,f
1130,f
1284,f
996,f
1228,f
1087,f
1035,f
1170,f
1064,f
1250,f
1129,f
1088,f
1037,f
1117,f
1095,f
1027,f
1027,f
1190,f
1153,f
1037,f
1120,f
1212,f
1024,f
1135,f
1177,f
1096,f
1114,f
1 Weight Sex
2 1607 m
3 1157 m
4 1248 m
5 1310 m
6 1398 m
7 1237 m
8 1232 m
9 1343 m
10 1380 m
11 1274 m
12 1245 m
13 1286 m
14 1508 m
15 1105 m
16 1123 m
17 1198 m
18 1300 m
19 1249 m
20 1185 m
21 915 m
22 1345 m
23 1107 m
24 1357 m
25 1227 m
26 1205 m
27 1435 m
28 1289 m
29 1093 m
30 1211 m
31 1260 m
32 1193 m
33 1330 m
34 1130 m
35 1357 m
36 1193 m
37 1232 m
38 1321 m
39 1260 m
40 1380 m
41 1230 m
42 1136 m
43 1029 m
44 1223 m
45 1240 m
46 1264 m
47 1020 m
48 1415 m
49 1410 m
50 1275 m
51 1230 m
52 1085 m
53 1048 m
54 1181 m
55 1103 m
56 1165 m
57 1547 m
58 1173 m
59 1660 m
60 1307 m
61 1535 m
62 1315 m
63 1257 m
64 1424 m
65 1309 m
66 1170 m
67 1412 m
68 1270 m
69 1230 m
70 1233 m
71 1561 m
72 1193 m
73 1272 m
74 1355 m
75 1137 m
76 1354 m
77 1110 m
78 1265 m
79 1407 m
80 1227 m
81 1330 m
82 1222 m
83 1305 m
84 1475 m
85 1177 m
86 1337 m
87 1145 m
88 1070 m
89 1305 m
90 1085 m
91 1303 m
92 1390 m
93 1532 m
94 1238 m
95 1233 m
96 1280 m
97 1245 m
98 1459 m
99 1157 m
100 1302 m
101 1385 m
102 1310 m
103 1342 m
104 1303 m
105 1248 m
106 1115 m
107 1365 m
108 1227 m
109 1353 m
110 1125 f
111 1027 f
112 1112 f
113 983 f
114 1090 f
115 1247 f
116 1045 f
117 983 f
118 972 f
119 1045 f
120 937 f
121 1245 f
122 1200 f
123 1270 f
124 1200 f
125 1145 f
126 1090 f
127 1040 f
128 1343 f
129 1010 f
130 1095 f
131 1180 f
132 1168 f
133 1095 f
134 1040 f
135 1235 f
136 1050 f
137 1038 f
138 1046 f
139 1255 f
140 1228 f
141 1000 f
142 1225 f
143 1220 f
144 1085 f
145 1067 f
146 1006 f
147 1138 f
148 1175 f
149 1252 f
150 1037 f
151 958 f
152 1020 f
153 1068 f
154 1107 f
155 1317 f
156 952 f
157 1056 f
158 1203 f
159 1183 f
160 1392 f
161 1130 f
162 1284 f
163 996 f
164 1228 f
165 1087 f
166 1035 f
167 1170 f
168 1064 f
169 1250 f
170 1129 f
171 1088 f
172 1037 f
173 1117 f
174 1095 f
175 1027 f
176 1027 f
177 1190 f
178 1153 f
179 1037 f
180 1120 f
181 1212 f
182 1024 f
183 1135 f
184 1177 f
185 1096 f
186 1114 f

52
statistics/data/example003.csv Executable file
View File

@ -0,0 +1,52 @@
singtime
4.3
24.1
6.6
7.3
4
2.6
4
3.9
9.4
6.2
1.6
6.5
0.2
2.7
17.4
5.6
2
3.8
1.2
0.7
1.6
2.3
3.7
0.8
0.5
4.5
11.5
3.5
0.8
5.2
2
0.7
1.7
5
2.8
1.5
3.9
3.7
4.5
1.8
1.2
0.7
0.7
4.2
4.7
2.2
1.4
14.1
8.6
3.7
3.5
1 singtime
2 4.3
3 24.1
4 6.6
5 7.3
6 4
7 2.6
8 4
9 3.9
10 9.4
11 6.2
12 1.6
13 6.5
14 0.2
15 2.7
16 17.4
17 5.6
18 2
19 3.8
20 1.2
21 0.7
22 1.6
23 2.3
24 3.7
25 0.8
26 0.5
27 4.5
28 11.5
29 3.5
30 0.8
31 5.2
32 2
33 0.7
34 1.7
35 5
36 2.8
37 1.5
38 3.9
39 3.7
40 4.5
41 1.8
42 1.2
43 0.7
44 0.7
45 4.2
46 4.7
47 2.2
48 1.4
49 14.1
50 8.6
51 3.7
52 3.5

29
statistics/data/example004.csv Executable file
View File

@ -0,0 +1,29 @@
Pulse
97
111
93
98
107
77
121
88
96
123
119
91
99
95
99
102
77
85
104
106
114
85
112
102
104
94
104
98
1 Pulse
2 97
3 111
4 93
5 98
6 107
7 77
8 121
9 88
10 96
11 123
12 119
13 91
14 99
15 95
16 99
17 102
18 77
19 85
20 104
21 106
22 114
23 85
24 112
25 102
26 104
27 94
28 104
29 98

37
statistics/data/example005.csv Executable file
View File

@ -0,0 +1,37 @@
Branches
23
30
54
28
31
29
34
35
30
27
21
43
51
35
51
49
35
24
26
29
21
29
37
27
28
33
33
23
37
27
40
48
41
20
30
57
1 Branches
2 23
3 30
4 54
5 28
6 31
7 29
8 34
9 35
10 30
11 27
12 21
13 43
14 51
15 35
16 51
17 49
18 35
19 24
20 26
21 29
22 21
23 29
24 37
25 27
26 28
27 33
28 33
29 23
30 37
31 27
32 40
33 48
34 41
35 20
36 30
37 57

32
statistics/data/example006.csv Executable file
View File

@ -0,0 +1,32 @@
Glucose
81
85
93
93
99
76
75
84
78
84
81
82
89
81
96
82
74
70
84
86
80
70
131
75
88
102
115
89
82
79
106
1 Glucose
2 81
3 85
4 93
5 93
6 99
7 76
8 75
9 84
10 78
11 84
12 81
13 82
14 89
15 81
16 96
17 82
18 74
19 70
20 84
21 86
22 80
23 70
24 131
25 75
26 88
27 102
28 115
29 89
30 82
31 79
32 106

24
statistics/data/example007.csv Executable file
View File

@ -0,0 +1,24 @@
NerveCells
35
19
33
34
17
26
16
40
28
30
23
12
27
33
22
31
28
28
35
23
23
19
29
1 NerveCells
2 35
3 19
4 33
5 34
6 17
7 26
8 16
9 40
10 28
11 30
12 23
13 12
14 27
15 33
16 22
17 31
18 28
19 28
20 35
21 23
22 23
23 19
24 29

21
statistics/data/example008.csv Executable file
View File

@ -0,0 +1,21 @@
RateChange,Treatment
28,Caffeine
11,Caffeine
-3,Caffeine
14,Caffeine
-2,Caffeine
-4,Caffeine
18,Caffeine
2,Caffeine
2,Caffeine
26,Decaf
1,Decaf
0,Decaf
-4,Decaf
-4,Decaf
14,Decaf
16,Decaf
8,Decaf
0,Decaf
18,Decaf
-10,Decaf
1 RateChange Treatment
2 28 Caffeine
3 11 Caffeine
4 -3 Caffeine
5 14 Caffeine
6 -2 Caffeine
7 -4 Caffeine
8 18 Caffeine
9 2 Caffeine
10 2 Caffeine
11 26 Decaf
12 1 Decaf
13 0 Decaf
14 -4 Decaf
15 -4 Decaf
16 14 Decaf
17 16 Decaf
18 8 Decaf
19 0 Decaf
20 18 Decaf
21 -10 Decaf

12
statistics/data/example009.csv Executable file
View File

@ -0,0 +1,12 @@
NEConcentration,Treatment
543,Toluene
523,Toluene
431,Toluene
635,Toluene
564,Toluene
549,Toluene
535,Control
385,Control
502,Control
412,Control
387,Control
1 NEConcentration Treatment
2 543 Toluene
3 523 Toluene
4 431 Toluene
5 635 Toluene
6 564 Toluene
7 549 Toluene
8 535 Control
9 385 Control
10 502 Control
11 412 Control
12 387 Control

13
statistics/data/example010.csv Executable file
View File

@ -0,0 +1,13 @@
Dopamine,Group
3420,toluene
2314,toluene
1911,toluene
2464,toluene
2781,toluene
2803,toluene
1820,control
1843,control
1397,control
1803,control
2539,control
1990,control
1 Dopamine Group
2 3420 toluene
3 2314 toluene
4 1911 toluene
5 2464 toluene
6 2781 toluene
7 2803 toluene
8 1820 control
9 1843 control
10 1397 control
11 1803 control
12 2539 control
13 1990 control

10
statistics/data/example011.csv Executable file
View File

@ -0,0 +1,10 @@
Animal,Site I,Site II
1,50.6,38
2,39.2,18.6
3,35.2,23.2
4,17,19
5,11.2,6.6
6,14.2,16.4
7,24.2,14.4
8,37.4,37.6
9,35.2,24.4
1 Animal Site I Site II
2 1 50.6 38
3 2 39.2 18.6
4 3 35.2 23.2
5 4 17 19
6 5 11.2 6.6
7 6 14.2 16.4
8 7 24.2 14.4
9 8 37.4 37.6
10 9 35.2 24.4

10
statistics/data/example012.csv Executable file
View File

@ -0,0 +1,10 @@
Subject,mCPP,Placebo
1,1.1,0
2,1.3,-0.3
3,1,0.6
4,1.7,0.3
5,1.4,-0.7
6,0.1,-0.2
7,0.5,0.6
8,1.6,0.9
9,-0.5,-2
1 Subject mCPP Placebo
2 1 1.1 0
3 2 1.3 -0.3
4 3 1 0.6
5 4 1.7 0.3
6 5 1.4 -0.7
7 6 0.1 -0.2
8 7 0.5 0.6
9 8 1.6 0.9
10 9 -0.5 -2

9
statistics/data/example013.csv Executable file
View File

@ -0,0 +1,9 @@
Animal,Control,Regenerating
1,16.3,11.5
2,4.8,3.6
3,10.9,12.5
4,14.2,6.3
5,16.3,15.2
6,9.9,8.1
7,29.2,16.6
8,22.4,13.1
1 Animal Control Regenerating
2 1 16.3 11.5
3 2 4.8 3.6
4 3 10.9 12.5
5 4 14.2 6.3
6 5 16.3 15.2
7 6 9.9 8.1
8 7 29.2 16.6
9 8 22.4 13.1

16
statistics/data/example014.csv Executable file
View File

@ -0,0 +1,16 @@
BodyTempDrop,AlcoholDose
0.2,1.5
1.9,1.5
-0.1,1.5
0.5,1.5
0.8,1.5
4,3
3.2,3
2.3,3
2.9,3
3.8,3
3.3,6
5.1,6
5.3,6
6.7,6
5.9,6
1 BodyTempDrop AlcoholDose
2 0.2 1.5
3 1.9 1.5
4 -0.1 1.5
5 0.5 1.5
6 0.8 1.5
7 4 3
8 3.2 3
9 2.3 3
10 2.9 3
11 3.8 3
12 3.3 6
13 5.1 6
14 5.3 6
15 6.7 6
16 5.9 6

18
statistics/data/example015.csv Executable file
View File

@ -0,0 +1,18 @@
PeakFlow,Height
733,174
572,183
500,176
738,169
616,183
787,186
866,178
670,175
550,172
660,179
575,171
577,184
783,200
625,195
470,176
642,176
856,190
1 PeakFlow Height
2 733 174
3 572 183
4 500 176
5 738 169
6 616 183
7 787 186
8 866 178
9 670 175
10 550 172
11 660 179
12 575 171
13 577 184
14 783 200
15 625 195
16 470 176
17 642 176
18 856 190

19
statistics/data/example016.csv Executable file
View File

@ -0,0 +1,19 @@
Patient,Before,After
1,98,75
2,100,60
3,82,25
4,100,55
5,93,78
6,119,102
7,70,58
8,78,70
9,104,90
10,70,50
11,60,65
12,88,45
13,45,36
14,159,144
15,65,27
16,98,90
17,66,16
18,67,53
1 Patient Before After
2 1 98 75
3 2 100 60
4 3 82 25
5 4 100 55
6 5 93 78
7 6 119 102
8 7 70 58
9 8 78 70
10 9 104 90
11 10 70 50
12 11 60 65
13 12 88 45
14 13 45 36
15 14 159 144
16 15 65 27
17 16 98 90
18 17 66 16
19 18 67 53

21
statistics/data/example017.csv Executable file
View File

@ -0,0 +1,21 @@
LegStrength,UpperBodyStrength
55,low
70,low
45,low
246,low
240,low
96,low
225,low
40,middle
200,middle
250,middle
192,middle
117,middle
215,middle
181,high
85,high
416,high
228,high
257,high
316,high
134,high
1 LegStrength UpperBodyStrength
2 55 low
3 70 low
4 45 low
5 246 low
6 240 low
7 96 low
8 225 low
9 40 middle
10 200 middle
11 250 middle
12 192 middle
13 117 middle
14 215 middle
15 181 high
16 85 high
17 416 high
18 228 high
19 257 high
20 316 high
21 134 high

View File

@ -0,0 +1,6 @@
MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is
thought to play a role in the regulation of behavior. To see whether
different categories of schizophrenic patients have different levels
of MAO activity, researchers collected blood specimens from 42
patients and measured the MAO activity in the platelets. Values are
expressed as nmol benzylaldehyde product per 108 platelets per hour.

View File

@ -0,0 +1,3 @@
Brain Weight In 1888, P. Topinard published data on the brain weights
of hundreds of French men and women. Brain weights are given in
gram.

View File

@ -0,0 +1,4 @@
Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates.
A field researcher measured the duration of 51 unsuccessful songs--that is, the time
until the singing male gave up and left his perch. The data is given
in minutes.

View File

@ -0,0 +1,3 @@
Pulse after Exercise: A group of 28 adults did some moderate exercise
for five minutes and then measured their pulses. Data is given in
beats/minute.

View File

@ -0,0 +1,5 @@
A dendritic tree is a branched structure that emanates from the body
of a nerve cell. As part of a study of brain development, 36 nerve
cells were taken from the brains of newborn guinea pigs. The
investigators counted the number of dendritic branch segments
emanating from each nerve cell.

View File

@ -0,0 +1,4 @@
For each of 31 healthy dogs, a veterinarian measured the glucose
concentration in the anterior chamber of the right eye and also in the
blood serum. The following data are the anterior chamber glucose
measurements, expressed as a percentage of the blood glucose.

View File

@ -0,0 +1,5 @@
A veterinary anatomist investigated the spatial arrangement of the
nerve cells in the intestine of a pony. He removed a block of tissue
from the intestinal wall, cut the block into many equal sections, and
counted the number of nerve cells in each of 23 randomly selected
sections.

View File

@ -0,0 +1,8 @@
Researchers were interested in the short-term effect that caffeine has
on heart rate. They enlisted a group of volunteers and measured each
person's resting heart rate. Then they had each subject drink 6 ounces
of coffee. Nine of the subjects were given coffee containing caffeine
and 11 were given decaffeinated coffee. After 10 minutes each person's
heart rate was measured again. The data in the table contains the
change in heart rate; a positive number means that heart rate went up
and a negative number means that heart rate went down.

View File

@ -0,0 +1,9 @@
Toluene and the Brain Abuse of substances containing toluene (for
example, glue) can produce various neurological symptoms. In an
investigation of the mechanism of these toxic effects, researchers
measured the concentrations of various chemicals in the brains of rats
that had been exposed to a toluene-laden atmosphere, and also in
unexposed control rats. The concentrations of the brain chemical
norepinephrine (NE) in the medulla region of the brain, for six
toluene-exposed rats and five control rats, are given in accompanying
data file in ng/g.

View File

@ -0,0 +1,3 @@
In a pharmacological study, researchers measured the concentration of
the brain chemical dopamine in six rats exposed to toluene and six
control rats. Number are specified in ng/g.

View File

@ -0,0 +1,6 @@
Nerve Cell Density For each of nine horses, a veterinary anatomist
measured the density of nerve cells at specified sites in the
intestine. The results for site I (midregion of jejunum) and site II
(mesenteric region of jejunum) are given in the accompanying dataset.
Each density value is the average of counts of nerve cells in five
equal sections of tissue.

View File

@ -0,0 +1,6 @@
Hunger Rating During a weight loss study each of nine subjects was
given either the active drug m-chlorophenylpiperazine (mCPP) for two
weeks and then a placebo for another two weeks, or else was given the
placebo for the first two weeks and then mCPP for the second two
weeks. As part of the study the subjects were asked to rate how hungry
they were at the end of each two-week period.

View File

@ -0,0 +1,10 @@
Certain types of nerve cells have the ability to regenerate a part of
the cell that has been amputated. In an early study of this process,
measurements were made on the nerves in the spinal cord in rhesus
monkeys. Nerves emanating from the left side of the cord were cut,
while nerves from the right side were kept intact. During the
regeneration process, the content of creatine phosphate (CP) was
measured in the left and the right portion of the spinal cord. The
following table shows the data for the right (control) side (Y1), and
for the left (regenerating) side (Y2). The units of measurement are mg
CP per 100 gm tissue.

View File

@ -0,0 +1,9 @@
In an investigation of the physiological effects of alcohol
(ethanol), 15 mice were randomly allocated to three treatment groups,
each to receive a different oral dose of alcohol. The dosage levels
were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature
of each mouse was measured immediately before the alcohol was given
and again 20 minutes afterward. The accompanying data shows the drop
(before minus after) in body temperature for each mouse. (The negative
value - 0.1 refers to a mouse whose temperature rose rather than
fell.)

View File

@ -0,0 +1,5 @@
The peak flow rate of a person is the fastest rate
at which the person can expel air after taking a deep breath.
Peak flow rate is measured in units of liters per minute and
gives an indication of the person's respiratory health. Flow is given
in l/min, height in cm.

View File

@ -0,0 +1,6 @@
An experiment was conducted to study the effect of tamoxifen on
patients with cervical cancer. One of the measurements made, both
before and again after tamoxifen was given, was microvessel density
(MVD). MVD, which is measured as number of vessels per mm$^2$, is a
measurement that relates to the formation of blood vessels that feed a
tumor and allow it to grow and spread.

View File

@ -0,0 +1,5 @@
A group of female college students were divided into three groups
according to upper body strength. Their leg strength was tested by
measuring how many consecutive times they could leg press 246 pounds
before exhaustion. (The subjects were allowed only one second of rest
between consecutive lifts.)

View File

@ -170,7 +170,9 @@ Bernstein Center T\"ubingen}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
\section[descriptive statistics, errorbars, and plots]{Day 1 --
descriptive statistics and plots}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{types of data}
@ -315,8 +317,8 @@ Bernstein Center T\"ubingen}
\frametitle{exercise}
\begin{task}{Spearman rank correlation}
\begin{enumerate}
\item Use {\tt randi} to generate two 100-dimensional vectors
{\tt x,y} of random integers between $0$ and $10$.
\item Use {\tt randi} to generate two vectors
{\tt x,y} with $100$ random integers between $0$ and $10$ each.
\item Find out how to compute the Spearman
rank correlation $$\rho = 1- {\frac {6 \sum
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
@ -358,7 +360,6 @@ correlation coefficient does not have that property.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{description of data and plotting}
\subsection{what makes a good plot}
%-------------------------------------------------------------
\begin{frame}[fragile]
@ -723,6 +724,23 @@ hold off
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile,fragile]
\frametitle{robust statistics}
\begin{task}{When is statistic called robust (leave-one-out)?}
\begin{itemize}
\item Generate an array with $20$ random numbers using {\tt
randn}.
\item Compute $20$ means: the $i^{th}$ mean is computed from the
data set {\em without} the $i^{th}$ example.
\item Repeat this with the median.
\item Make a bar plot that depicts the means of the computed means
and medians along with an appropriate measure of dispersion.
\item What can you observe? Do you understand why?
\end{itemize}
\end{task}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
@ -791,7 +809,13 @@ hold off
ordinal vs. ordinal data (why not the bar chart?).
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\begin{center}
\Huge
That's it.
\end{center}
\end{frame}
\end{document}

View File

@ -0,0 +1,823 @@
\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
\usetheme{Singapore}
\setbeamercovered{opaque}
\usecolortheme{tuebingen}
\setbeamertemplate{navigation symbols}{}
\usefonttheme{default}
\useoutertheme{infolines}
% \useoutertheme{miniframes}
}
\AtBeginSection[]
{
\begin{frame}<beamer>
\begin{center}
\Huge \insertsectionhead
\end{center}
% \frametitle{\insertsectionhead}
% \tableofcontents[currentsection,hideothersubsections]
\end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\setbeamertemplate{blocks}[rounded][shadow=true]
\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
University T\"ubingen\\
Bernstein Center T\"ubingen}
\institute[Scientific Computing]{}
\date{10/20/2014}
%\logo{\pgfuseimage{logo}}
\subject{Lectures}
%%%%%%%%%% configuration for code
\lstset{
basicstyle=\ttfamily,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
captionpos=b,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}
\input{../latex/environments.tex}
\makeatother
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{plan}
\setcounter{tocdepth}{1}
\tableofcontents
\end{frame}
\begin{frame}
\frametitle{information}
\begin{itemize}
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
A. A. (2010). Statistics for the Life Sciences (4th ed.,
p. 668). Prentice Hall.
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
Hall. doi:10.1037/0012764
\item \url{http://stats.stackexchange.com}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% errorbars (error bar paper)
% confidence intervals (sources of error)
% plotting (the right plot for the right data, Dan plotting paper)
% statistical test structure (bootstrapping, resampling, permutation)
% Don'ts: repeated testing, exclude data points
% study design
% PCA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[Prelude]{Prelude}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ----------------------------------------------------------
\begin{frame}
\frametitle{my expectations to this course}
\begin{itemize}
\item interest and participation
\item motivation to understand and question concepts
\item high scientific standard
\item intellectual honesty
\item sincere cooperation
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{this week will be ...}
\only<1>{
\framesubtitle{... no \sout{fun} piece of cake}
\begin{center}
\includegraphics[height=0.7\textheight]{figs/feeding.jpg}
\end{center}
}
\only<2>{
\framesubtitle{... no \sout{fun} piece of cake}
\begin{center}
\includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg}
\end{center}
}
\only<3>{
\framesubtitle{... no lecture (please!)}
\begin{center}
\includegraphics[height=0.7\textheight]{figs/soccer.jpg}
\end{center}
}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{What you should learn this week}
\begin{itemize}
\item What makes good plots?
\item What is descriptive/inferential statistics?
\item What is the general structure of a statistical test?
\item What does a p-value mean?
\item How can I build my own tests?
\item How large should my $n$ be?
\item What is {\em maximum likelihood} and why is it important?
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[descriptive statistics, errorbars, and plots]{Day 1 --
descriptive statistics and plots}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{types of data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{data scales}
\framesubtitle{What data types are distinguished in statistics?}
\Large
{\bf Why are data types important?}
\pause
\begin{itemize}
\item selection of statistics
\item selection of plots
\item selection of correct tests
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{data scales}
\framesubtitle{nominal/categorial scale}
\begin{itemize}
\item properties like cell type, experimental group (i.e. treatment
1, treatment 2, control)
\item each observation/sample is put into one category
\item there is no reasonable order among the categories
\item example: [rods, cones] vs. [cones, rods]
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{data scales}
\framesubtitle{ordinal scale}
\begin{itemize}
\item like nominal scale, but there is an order
\item {\bf but:} there is no reasonable measure of {\em distance}
between the classes
\item examples: ranks, ratings
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{data scales}
\framesubtitle{interval scale}
\begin{itemize}
\item quantitative/metric values
\item reasonable measure of distance between values but no absolute zero
\item examples: temperature in $^\circ$C
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{data scales}
\framesubtitle{absolut/ratio scale}
\begin{itemize}
\item like interval scale but with absolute zero
\item example: temperature in $^\circ$K
\end{itemize}
\pause
\begin{emphasize}{relationsships between scales}
\begin{itemize}
\item scales exhibit increasing information content from nominal
to absolute
\item conversion ,,downwards'' always possible
\end{itemize}
\end{emphasize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{examples from neuroscience and psychology}
\begin{itemize}
\item {\bf nominal:}\pause
\begin{itemize}
\item treatment group
\item stimulus class
\item cell type
\end{itemize}
\item {\bf ordinal:} \pause
\begin{itemize}
\item ratings
\item clinical stages of a disease
\item states of an ion channel
\end{itemize}
\item {\bf Absolut-/Ratioskala:}\pause
\begin{itemize}
\item firing rate
\item membrane potential
\item ion concentration
\end{itemize}
\end{itemize}
\end{frame}
%-------------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{statistics}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%-------------------------------------------------------------
\begin{frame}
\frametitle{What is "a statistic"?}
\begin{definition}{statistic}
A statistic (singular) is a single measure of some attribute of a
sample (e.g., its arithmetic mean value). It is calculated by
applying a function (statistical algorithm) to the values of the
items of the sample, which are known together as a set of data.
\source{http://en.wikipedia.org/wiki/Statistic}
\end{definition}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{Beispiele f\"ur Teststatistiken}
\begin{itemize}
\item {\bf nominal:}\pause
\begin{itemize}
\item count
\item relative frequency/proportion
\end{itemize}
\item {\bf ordinal:} \pause
\begin{itemize}
\item median
\item quantile/percentile
\item rank correlation
\end{itemize}
\item {\bf absolute/ratio:}\pause
\begin{itemize}
\item mean
\item variance/ standard deviation
\item Pearson correlation
\end{itemize}
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{exercise}
\begin{task}{Spearman rank correlation}
\begin{enumerate}
\item Use {\tt randi} to generate two vectors
{\tt x,y} with $100$ random integers between $0$ and $10$ each.
\item Find out how to compute the Spearman
rank correlation $$\rho = 1- {\frac {6 \sum
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
difference in the rank between the single data points.
\item Compute $\rho$ between $x$ and $y$, between $x$ and
$y^2$, between $\log(x+1)$ and $y^2$.
\item Compute the "standard" (Pearson) correlation coefficient
between these values.
\item What can you observe and why does it make sense?
\end{enumerate}
\end{task}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{solution}
\begin{solution}{Spearman rank correlation }
\scriptsize
\begin{lstlisting}
>>> x = randi(10, 100, 1);
>>> y = randi(10, 100, 1);
>>> corr(x,y,'type','Spearman')
ans =
0.1220
>>> corr(x,y.^2,'type','Spearman')
ans =
0.1220
>>> corr(x,y,'type','Pearson')
ans =
0.1074
>>> corr(x,y.^2,'type','Pearson')
ans =
0.0551
\end{lstlisting}
The rank correlation does not change under a monotone transformation
of the data. Therefore, it can be used for ordinal data. The Pearson
correlation coefficient does not have that property.
\end{solution}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{what makes a good plot}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{}
\begin{center}
\Huge What makes a good plot?
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{features of a good plot}
A good plot
\begin{itemize}
\item helps the reader to clearly understand your point.\pause
\item is not misleading and let's the reader judge the information
on her own (different y-axis/length scales in two related plots,
"squeezing" via log-plots). \pause
\item contains information about the data (a comic might be
illustrative, but does not contain information about the
data).\pause
\item adheres to the principle of {\em ink minimization}.
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{features of a good plot}
\framesubtitle{design/organization}
\begin{itemize}
\item Is the display consistent with the model or hypothesis
being tested?\pause
\item Are there "empty dimensions" in the display that could be
removed (A 3D pie chart for 2D categorical data, extraneous colors
that do not encode meaningful information)?\pause
\item Does the display provide an honest and transparent portrayal
of the data (hiding, smoothing, modifying data points should be
avoided or explicitly mentioned)?
\end{itemize}
\mycite{Allen et al. 2012, Neuron}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{features of a good plot}
\framesubtitle{axes}
\begin{itemize}
\item Are axes scales defined as linear, log, or radial?\pause
\item Does each axis label describe the variable and its units (use
"a.u." for arbitrary units)?\pause
\item Are axes limits appropriate for the data (The graphic should
not be bounded at zero if the data can take on both positive and
negative values.)?\pause
\item Is the aspect ratio appropriate for the data (When x and y
axes contrast the same variable under different conditions the
graphic should be square.)?
\end{itemize}
\mycite{Allen et al. 2012, Neuron}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{features of a good plot}
\framesubtitle{color mapping}
\begin{itemize}
\item Is a color bar provided?\pause
\item Is the color map sensible for the data type (does the data
extend to both $\pm$, does it live in an interval, is it
circular)?\pause
\item Are contrasting colors consistent with a natural interpretation?
\item Can features be discriminated when printed in grayscale?
\item Has red/green contrast been avoided to accommodate common
forms of colorblindness?
\end{itemize}
\mycite{Allen et al. 2012, Neuron}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{features of a good plot}
\framesubtitle{uncertainty}
\begin{itemize}
\item Does the display indicate the uncertainty of estimated parameters?\pause
\item Is the type of error surface appropriate for the data?
\begin{itemize}
\item Use standard deviations to describe variability in the population.\pause
\item Use standard errors or confidence intervals to make inferences
about parameters estimated from a sample.\pause
\item Parametric confidence intervals should only be used if data
meet the assumptions of the underlying model.\pause
\end{itemize}
\item Are the units of uncertainty defined (is it standard error, is
it $95\%$ confidence interval)?
\end{itemize}
\mycite{Allen et al. 2012, Neuron}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{features of a good plot}
\framesubtitle{annotation}
\begin{itemize}
\item Are all symbols defined, preferably by directly labeling objects?\pause
\item Is the directionality of a contrast between conditions obvious?\pause
\item Is the number of samples or independent experiments indicated?\pause
\item Are statistical procedures and criteria for significance described?\pause
\item Are uncommon abbreviations avoided or clearly defined?\pause
\item Are abbreviations consistent with those used in the text?
\end{itemize}
\mycite{Allen et al. 2012, Neuron}
\end{frame}
\subsection{bad examples}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{suboptimal example}
\begin{center}
\includegraphics[width=.5\linewidth]{figs/nobelbad}
\end{center}
\mycite{Hafting et al. 2005, nature}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{suboptimal example}
\begin{center}
\includegraphics[width=.5\linewidth]{figs/badbarright.png}
\end{center}
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{suboptimal example}
\begin{center}
\includegraphics[width=.4\linewidth]{figs/yaxisscalingleft.png}
\hspace{.5cm}
\includegraphics[width=.4\linewidth]{figs/yaxisscalingright.png}
\end{center}
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{suboptimal example}
\begin{center}
\includegraphics[width=.4\linewidth]{figs/badscatterleft.png}
\hspace{.5cm}
\includegraphics[width=.4\linewidth]{figs/badscatterright.png}
\end{center}
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
\end{frame}
%-------------------------------------------------------------
\begin{frame}
\frametitle{suboptimal example}
\begin{center}
\includegraphics[width=.8\linewidth]{figs/badbarplot}
\end{center}
\source{www.enfovis.com}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{nominal scale}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting nominal data}
\framesubtitle{bar plot for count and relative frequency}
\begin{center}
\includegraphics[width=.8\linewidth]{figs/nominaldataplot}
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting nominal data}
\framesubtitle{bar plot for count and relative frequency}
\scriptsize
\begin{lstlisting}
% plot
bar([1,2], [50, 90], 'facecolor', 'k')
% labels axes
ylabel('cell count')
xlabel('cell type')
% cosmetics
xlim([0.5,2.5])
ylim([0, 100])
box('off')
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20)
% settings for saving the figure
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
\end{lstlisting}
\end{frame}
%----------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting nominal data}
\framesubtitle{pie chart for count and relative frequency}
\begin{center}
\includegraphics[width=.8\linewidth]{figs/nominaldataplot2}
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting nominal data}
\framesubtitle{exercise}
\begin{task}{pie chart}
Plot the same data ($n_{py}=50$, $n_{in}=90$) as a pie chart in Matlab.
\end{task}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting nominal data}
\framesubtitle{pie chart for relative frequency}
\scriptsize
\begin{lstlisting}
data = [50, 90];
h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'})
hText = findobj(h,'Type','text') % text object handles
set(h(1), 'FaceColor', [.2,.2,.2]);
set(h(2), 'Rotation', 45);
set(h(3), 'FaceColor', [.8,.8,.8]);
set(h(4), 'Rotation', 45);
title('cell count')
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'})
box('off')
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
\end{lstlisting}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{histogram}
\begin{center}
\includegraphics[width=.8\linewidth]{figs/histogram}
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{bad choice of bins}
\begin{center}
\includegraphics[width=.4\linewidth]{figs/histogrambad}
\includegraphics[width=.4\linewidth]{figs/histogrambad2}
\end{center}
\begin{summary}{Rule of thumb}
Choose the bins $b\approx n/20$.
\end{summary}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{how to do in Matlab}
\scriptsize
\begin{lstlisting}
x = randn(2000,1); % generate Gaussian data
hist(x, 50); % generate histogram
% set facecolor to gray
h = findobj(gca, 'Type','patch');
set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2)
% plot a white grid over it
h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2)
uistack(h, 'top')
% cosmetics
box('off');
xlabel('Data')
ylabel('Count')
\end{lstlisting}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{bar plot}
There are several ways to plot a sample $x_1, ..., x_n$ of interval/ratio/absolute
scale with a bar plot
\begin{center}
\includegraphics[width=.6\linewidth]{figs/barplots.png}
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile,fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{bar plot}
\scriptsize
\begin{lstlisting}
% bar plot
x = rand(10,1);
gray = [.5,.5,.5];
bar(1, mean(x), 'EdgeColor','w','FaceColor', gray);
hold on
bar(2, mean(x), 'EdgeColor','w','FaceColor', gray);
plot(0*x + 2, x, 'ok');
bar(3, mean(x), 'EdgeColor','w','FaceColor', gray);
errorbar(3, mean(x), std(x), 'ok');
bar(4, mean(x), 'EdgeColor','w','FaceColor', gray);
errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok');
set(gca, 'xtick',[])
ylabel('uniformly distributed random data in [0,1]')
box('off')
title('different forms of bar plots')
hold off
\end{lstlisting}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile,fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{bar plot and measure of central tendency and spread}
\begin{itemize}
\item A bar plot collapses real data onto a single number and some
measure of spread. This number is usually a {\em measure of central
tendency}, i.e. a typical/central value for the probability
distribution of the data.\pause
\item What measures of central tendency can you think of?\pause
\begin{itemize}
\item mean
\item median
\item geometric mean (the nth root of the product of the data values)
\item weighted mean
\item midrange (mean of the maximum and minimum values of a data set)
\end{itemize}\pause
\item Additionally, the bar plot is equipped with a measure of {\em
spread} or {\em dispersion}. What measure of spread can you think of?\pause
\begin{itemize}
\item standard deviation
\item range (maximum minus minimum of a dataset)
\item inter-quartile range
\end{itemize}
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile,fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{measure of central tendency and spread}
\Large
\begin{center}
\bf The part of statistics that summarizes data in a small number
of values is called {\em descriptive statistics}.
\end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile,fragile]
\frametitle{robust statistics}
\begin{task}{When is statistic called robust (leave-one-out)?}
\begin{itemize}
\item Generate an array with $20$ random numbers using {\tt
randn}.
\item Compute $20$ means: the $i^{th}$ mean is computed from the
data set {\em without} the $i^{th}$ example.
\item Repeat this with the median.
\item Make a bar plot that depicts the means of the computed means
and medians along with an appropriate measure of dispersion.
\item What can you observe? Do you understand why?
\end{itemize}
\end{task}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{boxplot}
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.5\linewidth}
\begin{center}
\includegraphics[width=\linewidth]{figs/boxplot.png}
\end{center}
\end{minipage}
\begin{minipage}{0.5\linewidth}
Who knows what the elements mean?\pause
\begin{itemize}
\item the box depicts the inter-quartile range
\item the line denotes the median
\item the whiskers denote the extreme value of the data not
considered outliers
\item outliers are plotted separately
\end{itemize}
\begin{task}{Outliers}
\begin{itemize}
\item Find out how an outlier is defined in a matlab boxplot.
\item Can you remove an outlier from the dataset?
\end{itemize}
\end{task}
\end{minipage}
\end{minipage}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting interval/ratio/absolute data}
\framesubtitle{violinplot}
\begin{center}
\includegraphics[width=.8\linewidth]{figs/violinplots.png}
\end{center}
\begin{itemize}
\item Violinplots depict the distribution of the data by a
smoothed histogram.
\item Additional information (data points, median,
inter-quartile range) are plotted inside.
\end{itemize}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting combinations of scales}
What could we use for a combination of categorial/nominal and
interval/ratio/absolute?
\pause
\begin{center}
\includegraphics[width=.5\linewidth]{figs/factorplot.png}
\end{center}
Each category is a single bar.
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{plotting combinations of scales}
What could we use for a combination of interval/ratio/absolute and
interval/ratio/absolute, e.g. $(x_1, y_1), ..., (x_n,y_n)$? \pause
\begin{center}
\includegraphics[width=.8\linewidth]{figs/paireddata.png}
\end{center}
Scatter plot or paired bar chart. Scatter plot can also be used for
ordinal vs. ordinal data (why not the bar chart?).
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
\begin{center}
\Huge
That's it.
\end{center}
\end{frame}
\end{document}