first day done
This commit is contained in:
parent
1e7f07a2d2
commit
7e4d79a2c5
16
statistics/assignments/Makefile
Normal file
16
statistics/assignments/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
all:
|
||||
for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \
|
||||
echo $$number ; \
|
||||
sed "s/000/$$number/g" day1.tex > tmp.tex; \
|
||||
pdflatex tmp.tex; \
|
||||
mv tmp.pdf day1_$$number.pdf; \
|
||||
cp ../data/example$$number.csv ./ ;\
|
||||
rm tmp.* ; \
|
||||
zip example$$number.zip example$$number.csv day1_$$number.pdf ; \
|
||||
rm example$$number.csv ;\
|
||||
rm day1_$$number.pdf ; \
|
||||
done
|
||||
|
||||
clean:
|
||||
rm *.zip
|
||||
rm -rf auto
|
72
statistics/assignments/day1.tex
Executable file
72
statistics/assignments/day1.tex
Executable file
@ -0,0 +1,72 @@
|
||||
\documentclass[addpoints,10pt]{exam}
|
||||
\usepackage{url}
|
||||
\usepackage{color}
|
||||
\usepackage{hyperref}
|
||||
|
||||
\pagestyle{headandfoot}
|
||||
\runningheadrule
|
||||
\firstpageheadrule
|
||||
|
||||
\firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014}
|
||||
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
|
||||
\firstpagefooter{}{}{}
|
||||
\runningfooter{}{}{}
|
||||
\pointsinmargin
|
||||
\bracketedpoints
|
||||
|
||||
%\printanswers
|
||||
\shadedsolutions
|
||||
|
||||
|
||||
\begin{document}
|
||||
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\sffamily
|
||||
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\begin{questions}
|
||||
\question To publish scientific results, you will usually need to
|
||||
use statistical methods. Some journals provide you with a brief
|
||||
description of how they expect you to apply statistical methods. One
|
||||
example can be found in the author guidelines of the journal
|
||||
Nature.
|
||||
|
||||
Assume you collected the following dataset. You can download it from
|
||||
Ilias as {\tt example000.csv}. Here is the description of the dataset:
|
||||
|
||||
\begin{quotation}
|
||||
\tt
|
||||
\input{../examples/example000.tex}
|
||||
\end{quotation}
|
||||
|
||||
\begin{parts}
|
||||
\part Download the dataset and write a script that loads it into
|
||||
matlab.
|
||||
|
||||
\part Think about the type of your data (I might ask you that
|
||||
tomorrow).
|
||||
|
||||
\part Produce a plot that displays the data in an appropriate
|
||||
way. Make sure to respect all elements of good plotting we
|
||||
discussed today.
|
||||
|
||||
\part Download the statistical checklist from nature. Produce {\bf
|
||||
one} slide that contains the plot and a concise summary of your
|
||||
data which respects the requirements made by nature (assume you
|
||||
are producing a figure legend for the figure in nature). It is
|
||||
good style to avoid expressions like ``the plot shows'' or
|
||||
similar.
|
||||
|
||||
\part Upload your code, the data, and the slide as a zip to
|
||||
Ilias. Deadline is 19h00. Structure the zip such that you can
|
||||
present you program in front of the class. Several students will
|
||||
be asked to present their slide and their code tomorrow morning.
|
||||
|
||||
\end{parts}
|
||||
|
||||
\end{questions}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\end{document}
|
43
statistics/data/example001.csv
Executable file
43
statistics/data/example001.csv
Executable file
@ -0,0 +1,43 @@
|
||||
MAO,Diagnosis
|
||||
6.8,I
|
||||
4.1,I
|
||||
7.3,I
|
||||
14.2,I
|
||||
18.8,I
|
||||
9.9,I
|
||||
7.4,I
|
||||
11.9,I
|
||||
5.2,I
|
||||
7.8,I
|
||||
7.8,I
|
||||
8.7,I
|
||||
12.7,I
|
||||
14.5,I
|
||||
10.7,I
|
||||
8.4,I
|
||||
9.7,I
|
||||
10.6,I
|
||||
7.8,II
|
||||
4.4,II
|
||||
11.4,II
|
||||
3.1,II
|
||||
4.3,II
|
||||
10.1,II
|
||||
1.5,II
|
||||
7.4,II
|
||||
5.2,II
|
||||
10,II
|
||||
3.7,II
|
||||
5.5,II
|
||||
8.5,II
|
||||
7.7,II
|
||||
6.8,II
|
||||
3.1,II
|
||||
6.4,III
|
||||
10.8,III
|
||||
1.1,III
|
||||
2.9,III
|
||||
4.5,III
|
||||
5.8,III
|
||||
9.4,III
|
||||
6.8,III
|
|
186
statistics/data/example002.csv
Executable file
186
statistics/data/example002.csv
Executable file
@ -0,0 +1,186 @@
|
||||
Weight,Sex
|
||||
1607,m
|
||||
1157,m
|
||||
1248,m
|
||||
1310,m
|
||||
1398,m
|
||||
1237,m
|
||||
1232,m
|
||||
1343,m
|
||||
1380,m
|
||||
1274,m
|
||||
1245,m
|
||||
1286,m
|
||||
1508,m
|
||||
1105,m
|
||||
1123,m
|
||||
1198,m
|
||||
1300,m
|
||||
1249,m
|
||||
1185,m
|
||||
915,m
|
||||
1345,m
|
||||
1107,m
|
||||
1357,m
|
||||
1227,m
|
||||
1205,m
|
||||
1435,m
|
||||
1289,m
|
||||
1093,m
|
||||
1211,m
|
||||
1260,m
|
||||
1193,m
|
||||
1330,m
|
||||
1130,m
|
||||
1357,m
|
||||
1193,m
|
||||
1232,m
|
||||
1321,m
|
||||
1260,m
|
||||
1380,m
|
||||
1230,m
|
||||
1136,m
|
||||
1029,m
|
||||
1223,m
|
||||
1240,m
|
||||
1264,m
|
||||
1020,m
|
||||
1415,m
|
||||
1410,m
|
||||
1275,m
|
||||
1230,m
|
||||
1085,m
|
||||
1048,m
|
||||
1181,m
|
||||
1103,m
|
||||
1165,m
|
||||
1547,m
|
||||
1173,m
|
||||
1660,m
|
||||
1307,m
|
||||
1535,m
|
||||
1315,m
|
||||
1257,m
|
||||
1424,m
|
||||
1309,m
|
||||
1170,m
|
||||
1412,m
|
||||
1270,m
|
||||
1230,m
|
||||
1233,m
|
||||
1561,m
|
||||
1193,m
|
||||
1272,m
|
||||
1355,m
|
||||
1137,m
|
||||
1354,m
|
||||
1110,m
|
||||
1265,m
|
||||
1407,m
|
||||
1227,m
|
||||
1330,m
|
||||
1222,m
|
||||
1305,m
|
||||
1475,m
|
||||
1177,m
|
||||
1337,m
|
||||
1145,m
|
||||
1070,m
|
||||
1305,m
|
||||
1085,m
|
||||
1303,m
|
||||
1390,m
|
||||
1532,m
|
||||
1238,m
|
||||
1233,m
|
||||
1280,m
|
||||
1245,m
|
||||
1459,m
|
||||
1157,m
|
||||
1302,m
|
||||
1385,m
|
||||
1310,m
|
||||
1342,m
|
||||
1303,m
|
||||
1248,m
|
||||
1115,m
|
||||
1365,m
|
||||
1227,m
|
||||
1353,m
|
||||
1125,f
|
||||
1027,f
|
||||
1112,f
|
||||
983,f
|
||||
1090,f
|
||||
1247,f
|
||||
1045,f
|
||||
983,f
|
||||
972,f
|
||||
1045,f
|
||||
937,f
|
||||
1245,f
|
||||
1200,f
|
||||
1270,f
|
||||
1200,f
|
||||
1145,f
|
||||
1090,f
|
||||
1040,f
|
||||
1343,f
|
||||
1010,f
|
||||
1095,f
|
||||
1180,f
|
||||
1168,f
|
||||
1095,f
|
||||
1040,f
|
||||
1235,f
|
||||
1050,f
|
||||
1038,f
|
||||
1046,f
|
||||
1255,f
|
||||
1228,f
|
||||
1000,f
|
||||
1225,f
|
||||
1220,f
|
||||
1085,f
|
||||
1067,f
|
||||
1006,f
|
||||
1138,f
|
||||
1175,f
|
||||
1252,f
|
||||
1037,f
|
||||
958,f
|
||||
1020,f
|
||||
1068,f
|
||||
1107,f
|
||||
1317,f
|
||||
952,f
|
||||
1056,f
|
||||
1203,f
|
||||
1183,f
|
||||
1392,f
|
||||
1130,f
|
||||
1284,f
|
||||
996,f
|
||||
1228,f
|
||||
1087,f
|
||||
1035,f
|
||||
1170,f
|
||||
1064,f
|
||||
1250,f
|
||||
1129,f
|
||||
1088,f
|
||||
1037,f
|
||||
1117,f
|
||||
1095,f
|
||||
1027,f
|
||||
1027,f
|
||||
1190,f
|
||||
1153,f
|
||||
1037,f
|
||||
1120,f
|
||||
1212,f
|
||||
1024,f
|
||||
1135,f
|
||||
1177,f
|
||||
1096,f
|
||||
1114,f
|
|
52
statistics/data/example003.csv
Executable file
52
statistics/data/example003.csv
Executable file
@ -0,0 +1,52 @@
|
||||
singtime
|
||||
4.3
|
||||
24.1
|
||||
6.6
|
||||
7.3
|
||||
4
|
||||
2.6
|
||||
4
|
||||
3.9
|
||||
9.4
|
||||
6.2
|
||||
1.6
|
||||
6.5
|
||||
0.2
|
||||
2.7
|
||||
17.4
|
||||
5.6
|
||||
2
|
||||
3.8
|
||||
1.2
|
||||
0.7
|
||||
1.6
|
||||
2.3
|
||||
3.7
|
||||
0.8
|
||||
0.5
|
||||
4.5
|
||||
11.5
|
||||
3.5
|
||||
0.8
|
||||
5.2
|
||||
2
|
||||
0.7
|
||||
1.7
|
||||
5
|
||||
2.8
|
||||
1.5
|
||||
3.9
|
||||
3.7
|
||||
4.5
|
||||
1.8
|
||||
1.2
|
||||
0.7
|
||||
0.7
|
||||
4.2
|
||||
4.7
|
||||
2.2
|
||||
1.4
|
||||
14.1
|
||||
8.6
|
||||
3.7
|
||||
3.5
|
|
29
statistics/data/example004.csv
Executable file
29
statistics/data/example004.csv
Executable file
@ -0,0 +1,29 @@
|
||||
Pulse
|
||||
97
|
||||
111
|
||||
93
|
||||
98
|
||||
107
|
||||
77
|
||||
121
|
||||
88
|
||||
96
|
||||
123
|
||||
119
|
||||
91
|
||||
99
|
||||
95
|
||||
99
|
||||
102
|
||||
77
|
||||
85
|
||||
104
|
||||
106
|
||||
114
|
||||
85
|
||||
112
|
||||
102
|
||||
104
|
||||
94
|
||||
104
|
||||
98
|
|
37
statistics/data/example005.csv
Executable file
37
statistics/data/example005.csv
Executable file
@ -0,0 +1,37 @@
|
||||
Branches
|
||||
23
|
||||
30
|
||||
54
|
||||
28
|
||||
31
|
||||
29
|
||||
34
|
||||
35
|
||||
30
|
||||
27
|
||||
21
|
||||
43
|
||||
51
|
||||
35
|
||||
51
|
||||
49
|
||||
35
|
||||
24
|
||||
26
|
||||
29
|
||||
21
|
||||
29
|
||||
37
|
||||
27
|
||||
28
|
||||
33
|
||||
33
|
||||
23
|
||||
37
|
||||
27
|
||||
40
|
||||
48
|
||||
41
|
||||
20
|
||||
30
|
||||
57
|
|
32
statistics/data/example006.csv
Executable file
32
statistics/data/example006.csv
Executable file
@ -0,0 +1,32 @@
|
||||
Glucose
|
||||
81
|
||||
85
|
||||
93
|
||||
93
|
||||
99
|
||||
76
|
||||
75
|
||||
84
|
||||
78
|
||||
84
|
||||
81
|
||||
82
|
||||
89
|
||||
81
|
||||
96
|
||||
82
|
||||
74
|
||||
70
|
||||
84
|
||||
86
|
||||
80
|
||||
70
|
||||
131
|
||||
75
|
||||
88
|
||||
102
|
||||
115
|
||||
89
|
||||
82
|
||||
79
|
||||
106
|
|
24
statistics/data/example007.csv
Executable file
24
statistics/data/example007.csv
Executable file
@ -0,0 +1,24 @@
|
||||
NerveCells
|
||||
35
|
||||
19
|
||||
33
|
||||
34
|
||||
17
|
||||
26
|
||||
16
|
||||
40
|
||||
28
|
||||
30
|
||||
23
|
||||
12
|
||||
27
|
||||
33
|
||||
22
|
||||
31
|
||||
28
|
||||
28
|
||||
35
|
||||
23
|
||||
23
|
||||
19
|
||||
29
|
|
21
statistics/data/example008.csv
Executable file
21
statistics/data/example008.csv
Executable file
@ -0,0 +1,21 @@
|
||||
RateChange,Treatment
|
||||
28,Caffeine
|
||||
11,Caffeine
|
||||
-3,Caffeine
|
||||
14,Caffeine
|
||||
-2,Caffeine
|
||||
-4,Caffeine
|
||||
18,Caffeine
|
||||
2,Caffeine
|
||||
2,Caffeine
|
||||
26,Decaf
|
||||
1,Decaf
|
||||
0,Decaf
|
||||
-4,Decaf
|
||||
-4,Decaf
|
||||
14,Decaf
|
||||
16,Decaf
|
||||
8,Decaf
|
||||
0,Decaf
|
||||
18,Decaf
|
||||
-10,Decaf
|
|
12
statistics/data/example009.csv
Executable file
12
statistics/data/example009.csv
Executable file
@ -0,0 +1,12 @@
|
||||
NEConcentration,Treatment
|
||||
543,Toluene
|
||||
523,Toluene
|
||||
431,Toluene
|
||||
635,Toluene
|
||||
564,Toluene
|
||||
549,Toluene
|
||||
535,Control
|
||||
385,Control
|
||||
502,Control
|
||||
412,Control
|
||||
387,Control
|
|
13
statistics/data/example010.csv
Executable file
13
statistics/data/example010.csv
Executable file
@ -0,0 +1,13 @@
|
||||
Dopamine,Group
|
||||
3420,toluene
|
||||
2314,toluene
|
||||
1911,toluene
|
||||
2464,toluene
|
||||
2781,toluene
|
||||
2803,toluene
|
||||
1820,control
|
||||
1843,control
|
||||
1397,control
|
||||
1803,control
|
||||
2539,control
|
||||
1990,control
|
|
10
statistics/data/example011.csv
Executable file
10
statistics/data/example011.csv
Executable file
@ -0,0 +1,10 @@
|
||||
Animal,Site I,Site II
|
||||
1,50.6,38
|
||||
2,39.2,18.6
|
||||
3,35.2,23.2
|
||||
4,17,19
|
||||
5,11.2,6.6
|
||||
6,14.2,16.4
|
||||
7,24.2,14.4
|
||||
8,37.4,37.6
|
||||
9,35.2,24.4
|
|
10
statistics/data/example012.csv
Executable file
10
statistics/data/example012.csv
Executable file
@ -0,0 +1,10 @@
|
||||
Subject,mCPP,Placebo
|
||||
1,1.1,0
|
||||
2,1.3,-0.3
|
||||
3,1,0.6
|
||||
4,1.7,0.3
|
||||
5,1.4,-0.7
|
||||
6,0.1,-0.2
|
||||
7,0.5,0.6
|
||||
8,1.6,0.9
|
||||
9,-0.5,-2
|
|
9
statistics/data/example013.csv
Executable file
9
statistics/data/example013.csv
Executable file
@ -0,0 +1,9 @@
|
||||
Animal,Control,Regenerating
|
||||
1,16.3,11.5
|
||||
2,4.8,3.6
|
||||
3,10.9,12.5
|
||||
4,14.2,6.3
|
||||
5,16.3,15.2
|
||||
6,9.9,8.1
|
||||
7,29.2,16.6
|
||||
8,22.4,13.1
|
|
16
statistics/data/example014.csv
Executable file
16
statistics/data/example014.csv
Executable file
@ -0,0 +1,16 @@
|
||||
BodyTempDrop,AlcoholDose
|
||||
0.2,1.5
|
||||
1.9,1.5
|
||||
-0.1,1.5
|
||||
0.5,1.5
|
||||
0.8,1.5
|
||||
4,3
|
||||
3.2,3
|
||||
2.3,3
|
||||
2.9,3
|
||||
3.8,3
|
||||
3.3,6
|
||||
5.1,6
|
||||
5.3,6
|
||||
6.7,6
|
||||
5.9,6
|
|
18
statistics/data/example015.csv
Executable file
18
statistics/data/example015.csv
Executable file
@ -0,0 +1,18 @@
|
||||
PeakFlow,Height
|
||||
733,174
|
||||
572,183
|
||||
500,176
|
||||
738,169
|
||||
616,183
|
||||
787,186
|
||||
866,178
|
||||
670,175
|
||||
550,172
|
||||
660,179
|
||||
575,171
|
||||
577,184
|
||||
783,200
|
||||
625,195
|
||||
470,176
|
||||
642,176
|
||||
856,190
|
|
19
statistics/data/example016.csv
Executable file
19
statistics/data/example016.csv
Executable file
@ -0,0 +1,19 @@
|
||||
Patient,Before,After
|
||||
1,98,75
|
||||
2,100,60
|
||||
3,82,25
|
||||
4,100,55
|
||||
5,93,78
|
||||
6,119,102
|
||||
7,70,58
|
||||
8,78,70
|
||||
9,104,90
|
||||
10,70,50
|
||||
11,60,65
|
||||
12,88,45
|
||||
13,45,36
|
||||
14,159,144
|
||||
15,65,27
|
||||
16,98,90
|
||||
17,66,16
|
||||
18,67,53
|
|
21
statistics/data/example017.csv
Executable file
21
statistics/data/example017.csv
Executable file
@ -0,0 +1,21 @@
|
||||
LegStrength,UpperBodyStrength
|
||||
55,low
|
||||
70,low
|
||||
45,low
|
||||
246,low
|
||||
240,low
|
||||
96,low
|
||||
225,low
|
||||
40,middle
|
||||
200,middle
|
||||
250,middle
|
||||
192,middle
|
||||
117,middle
|
||||
215,middle
|
||||
181,high
|
||||
85,high
|
||||
416,high
|
||||
228,high
|
||||
257,high
|
||||
316,high
|
||||
134,high
|
|
6
statistics/examples/example001.tex
Normal file
6
statistics/examples/example001.tex
Normal file
@ -0,0 +1,6 @@
|
||||
MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is
|
||||
thought to play a role in the regulation of behavior. To see whether
|
||||
different categories of schizophrenic patients have different levels
|
||||
of MAO activity, researchers collected blood specimens from 42
|
||||
patients and measured the MAO activity in the platelets. Values are
|
||||
expressed as nmol benzylaldehyde product per 108 platelets per hour.
|
3
statistics/examples/example002.tex
Normal file
3
statistics/examples/example002.tex
Normal file
@ -0,0 +1,3 @@
|
||||
Brain Weight In 1888, P. Topinard published data on the brain weights
|
||||
of hundreds of French men and women. Brain weights are given in
|
||||
gram.
|
4
statistics/examples/example003.tex
Normal file
4
statistics/examples/example003.tex
Normal file
@ -0,0 +1,4 @@
|
||||
Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates.
|
||||
A field researcher measured the duration of 51 unsuccessful songs--that is, the time
|
||||
until the singing male gave up and left his perch. The data is given
|
||||
in minutes.
|
3
statistics/examples/example004.tex
Normal file
3
statistics/examples/example004.tex
Normal file
@ -0,0 +1,3 @@
|
||||
Pulse after Exercise: A group of 28 adults did some moderate exercise
|
||||
for five minutes and then measured their pulses. Data is given in
|
||||
beats/minute.
|
5
statistics/examples/example005.tex
Normal file
5
statistics/examples/example005.tex
Normal file
@ -0,0 +1,5 @@
|
||||
A dendritic tree is a branched structure that emanates from the body
|
||||
of a nerve cell. As part of a study of brain development, 36 nerve
|
||||
cells were taken from the brains of newborn guinea pigs. The
|
||||
investigators counted the number of dendritic branch segments
|
||||
emanating from each nerve cell.
|
4
statistics/examples/example006.tex
Normal file
4
statistics/examples/example006.tex
Normal file
@ -0,0 +1,4 @@
|
||||
For each of 31 healthy dogs, a veterinarian measured the glucose
|
||||
concentration in the anterior chamber of the right eye and also in the
|
||||
blood serum. The following data are the anterior chamber glucose
|
||||
measurements, expressed as a percentage of the blood glucose.
|
5
statistics/examples/example007.tex
Normal file
5
statistics/examples/example007.tex
Normal file
@ -0,0 +1,5 @@
|
||||
A veterinary anatomist investigated the spatial arrangement of the
|
||||
nerve cells in the intestine of a pony. He removed a block of tissue
|
||||
from the intestinal wall, cut the block into many equal sections, and
|
||||
counted the number of nerve cells in each of 23 randomly selected
|
||||
sections.
|
8
statistics/examples/example008.tex
Normal file
8
statistics/examples/example008.tex
Normal file
@ -0,0 +1,8 @@
|
||||
Researchers were interested in the short-term effect that caffeine has
|
||||
on heart rate. They enlisted a group of volunteers and measured each
|
||||
person's resting heart rate. Then they had each subject drink 6 ounces
|
||||
of coffee. Nine of the subjects were given coffee containing caffeine
|
||||
and 11 were given decaffeinated coffee. After 10 minutes each person's
|
||||
heart rate was measured again. The data in the table contains the
|
||||
change in heart rate; a positive number means that heart rate went up
|
||||
and a negative number means that heart rate went down.
|
9
statistics/examples/example009.tex
Normal file
9
statistics/examples/example009.tex
Normal file
@ -0,0 +1,9 @@
|
||||
Toluene and the Brain Abuse of substances containing toluene (for
|
||||
example, glue) can produce various neurological symptoms. In an
|
||||
investigation of the mechanism of these toxic effects, researchers
|
||||
measured the concentrations of various chemicals in the brains of rats
|
||||
that had been exposed to a toluene-laden atmosphere, and also in
|
||||
unexposed control rats. The concentrations of the brain chemical
|
||||
norepinephrine (NE) in the medulla region of the brain, for six
|
||||
toluene-exposed rats and five control rats, are given in accompanying
|
||||
data file in ng/g.
|
3
statistics/examples/example010.tex
Normal file
3
statistics/examples/example010.tex
Normal file
@ -0,0 +1,3 @@
|
||||
In a pharmacological study, researchers measured the concentration of
|
||||
the brain chemical dopamine in six rats exposed to toluene and six
|
||||
control rats. Number are specified in ng/g.
|
6
statistics/examples/example011.tex
Normal file
6
statistics/examples/example011.tex
Normal file
@ -0,0 +1,6 @@
|
||||
Nerve Cell Density For each of nine horses, a veterinary anatomist
|
||||
measured the density of nerve cells at specified sites in the
|
||||
intestine. The results for site I (midregion of jejunum) and site II
|
||||
(mesenteric region of jejunum) are given in the accompanying dataset.
|
||||
Each density value is the average of counts of nerve cells in five
|
||||
equal sections of tissue.
|
6
statistics/examples/example012.tex
Normal file
6
statistics/examples/example012.tex
Normal file
@ -0,0 +1,6 @@
|
||||
Hunger Rating During a weight loss study each of nine subjects was
|
||||
given either the active drug m-chlorophenylpiperazine (mCPP) for two
|
||||
weeks and then a placebo for another two weeks, or else was given the
|
||||
placebo for the first two weeks and then mCPP for the second two
|
||||
weeks. As part of the study the subjects were asked to rate how hungry
|
||||
they were at the end of each two-week period.
|
10
statistics/examples/example013.tex
Normal file
10
statistics/examples/example013.tex
Normal file
@ -0,0 +1,10 @@
|
||||
Certain types of nerve cells have the ability to regenerate a part of
|
||||
the cell that has been amputated. In an early study of this process,
|
||||
measurements were made on the nerves in the spinal cord in rhesus
|
||||
monkeys. Nerves emanating from the left side of the cord were cut,
|
||||
while nerves from the right side were kept intact. During the
|
||||
regeneration process, the content of creatine phosphate (CP) was
|
||||
measured in the left and the right portion of the spinal cord. The
|
||||
following table shows the data for the right (control) side (Y1), and
|
||||
for the left (regenerating) side (Y2). The units of measurement are mg
|
||||
CP per 100 gm tissue.
|
9
statistics/examples/example014.tex
Normal file
9
statistics/examples/example014.tex
Normal file
@ -0,0 +1,9 @@
|
||||
In an investigation of the physiological effects of alcohol
|
||||
(ethanol), 15 mice were randomly allocated to three treatment groups,
|
||||
each to receive a different oral dose of alcohol. The dosage levels
|
||||
were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature
|
||||
of each mouse was measured immediately before the alcohol was given
|
||||
and again 20 minutes afterward. The accompanying data shows the drop
|
||||
(before minus after) in body temperature for each mouse. (The negative
|
||||
value - 0.1 refers to a mouse whose temperature rose rather than
|
||||
fell.)
|
5
statistics/examples/example015.tex
Normal file
5
statistics/examples/example015.tex
Normal file
@ -0,0 +1,5 @@
|
||||
The peak flow rate of a person is the fastest rate
|
||||
at which the person can expel air after taking a deep breath.
|
||||
Peak flow rate is measured in units of liters per minute and
|
||||
gives an indication of the person's respiratory health. Flow is given
|
||||
in l/min, height in cm.
|
6
statistics/examples/example016.tex
Normal file
6
statistics/examples/example016.tex
Normal file
@ -0,0 +1,6 @@
|
||||
An experiment was conducted to study the effect of tamoxifen on
|
||||
patients with cervical cancer. One of the measurements made, both
|
||||
before and again after tamoxifen was given, was microvessel density
|
||||
(MVD). MVD, which is measured as number of vessels per mm$^2$, is a
|
||||
measurement that relates to the formation of blood vessels that feed a
|
||||
tumor and allow it to grow and spread.
|
5
statistics/examples/example017.tex
Normal file
5
statistics/examples/example017.tex
Normal file
@ -0,0 +1,5 @@
|
||||
A group of female college students were divided into three groups
|
||||
according to upper body strength. Their leg strength was tested by
|
||||
measuring how many consecutive times they could leg press 246 pounds
|
||||
before exhaustion. (The subjects were allowed only one second of rest
|
||||
between consecutive lifts.)
|
34
statistics/lecture_statistics.tex → statistics/lecture_statistics01.tex
Executable file → Normal file
34
statistics/lecture_statistics.tex → statistics/lecture_statistics01.tex
Executable file → Normal file
@ -170,7 +170,9 @@ Bernstein Center T\"ubingen}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
|
||||
\section[descriptive statistics, errorbars, and plots]{Day 1 --
|
||||
descriptive statistics and plots}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{types of data}
|
||||
|
||||
@ -315,8 +317,8 @@ Bernstein Center T\"ubingen}
|
||||
\frametitle{exercise}
|
||||
\begin{task}{Spearman rank correlation}
|
||||
\begin{enumerate}
|
||||
\item Use {\tt randi} to generate two 100-dimensional vectors
|
||||
{\tt x,y} of random integers between $0$ and $10$.
|
||||
\item Use {\tt randi} to generate two vectors
|
||||
{\tt x,y} with $100$ random integers between $0$ and $10$ each.
|
||||
\item Find out how to compute the Spearman
|
||||
rank correlation $$\rho = 1- {\frac {6 \sum
|
||||
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
|
||||
@ -358,7 +360,6 @@ correlation coefficient does not have that property.
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{description of data and plotting}
|
||||
\subsection{what makes a good plot}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
@ -723,6 +724,23 @@ hold off
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile,fragile]
|
||||
\frametitle{robust statistics}
|
||||
\begin{task}{When is statistic called robust (leave-one-out)?}
|
||||
\begin{itemize}
|
||||
\item Generate an array with $20$ random numbers using {\tt
|
||||
randn}.
|
||||
\item Compute $20$ means: the $i^{th}$ mean is computed from the
|
||||
data set {\em without} the $i^{th}$ example.
|
||||
\item Repeat this with the median.
|
||||
\item Make a bar plot that depicts the means of the computed means
|
||||
and medians along with an appropriate measure of dispersion.
|
||||
\item What can you observe? Do you understand why?
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
@ -791,7 +809,13 @@ hold off
|
||||
ordinal vs. ordinal data (why not the bar chart?).
|
||||
\end{frame}
|
||||
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\begin{center}
|
||||
\Huge
|
||||
That's it.
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\end{document}
|
823
statistics/lecture_statistics02.tex
Normal file
823
statistics/lecture_statistics02.tex
Normal file
@ -0,0 +1,823 @@
|
||||
\documentclass{beamer}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{listings}
|
||||
\usepackage{pgf}
|
||||
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
||||
%\usepackage{multimedia}
|
||||
\usepackage[latin1]{inputenc}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{bm}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{ulem}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\mode<presentation>
|
||||
{
|
||||
\usetheme{Singapore}
|
||||
\setbeamercovered{opaque}
|
||||
\usecolortheme{tuebingen}
|
||||
\setbeamertemplate{navigation symbols}{}
|
||||
\usefonttheme{default}
|
||||
\useoutertheme{infolines}
|
||||
% \useoutertheme{miniframes}
|
||||
}
|
||||
|
||||
\AtBeginSection[]
|
||||
{
|
||||
\begin{frame}<beamer>
|
||||
\begin{center}
|
||||
\Huge \insertsectionhead
|
||||
\end{center}
|
||||
% \frametitle{\insertsectionhead}
|
||||
% \tableofcontents[currentsection,hideothersubsections]
|
||||
\end{frame}
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
||||
|
||||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
||||
|
||||
\title[]{Scientific Computing -- Statistics}
|
||||
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
|
||||
University T\"ubingen\\
|
||||
Bernstein Center T\"ubingen}
|
||||
|
||||
\institute[Scientific Computing]{}
|
||||
\date{10/20/2014}
|
||||
%\logo{\pgfuseimage{logo}}
|
||||
|
||||
\subject{Lectures}
|
||||
|
||||
%%%%%%%%%% configuration for code
|
||||
\lstset{
|
||||
basicstyle=\ttfamily,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
captionpos=b,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\newcommand{\mycite}[1]{
|
||||
\begin{flushright}
|
||||
\tiny \color{black!80} #1
|
||||
\end{flushright}
|
||||
}
|
||||
|
||||
\input{../latex/environments.tex}
|
||||
\makeatother
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}
|
||||
\titlepage
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{plan}
|
||||
\setcounter{tocdepth}{1}
|
||||
\tableofcontents
|
||||
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{information}
|
||||
\begin{itemize}
|
||||
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
|
||||
A. A. (2010). Statistics for the Life Sciences (4th ed.,
|
||||
p. 668). Prentice Hall.
|
||||
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
||||
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
||||
Hall. doi:10.1037/0012764
|
||||
\item \url{http://stats.stackexchange.com}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% errorbars (error bar paper)
|
||||
% confidence intervals (sources of error)
|
||||
% plotting (the right plot for the right data, Dan plotting paper)
|
||||
% statistical test structure (bootstrapping, resampling, permutation)
|
||||
% Don'ts: repeated testing, exclude data points
|
||||
% study design
|
||||
% PCA
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section[Prelude]{Prelude}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{my expectations to this course}
|
||||
\begin{itemize}
|
||||
\item interest and participation
|
||||
\item motivation to understand and question concepts
|
||||
\item high scientific standard
|
||||
\item intellectual honesty
|
||||
\item sincere cooperation
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{this week will be ...}
|
||||
|
||||
\only<1>{
|
||||
\framesubtitle{... no \sout{fun} piece of cake}
|
||||
\begin{center}
|
||||
\includegraphics[height=0.7\textheight]{figs/feeding.jpg}
|
||||
\end{center}
|
||||
}
|
||||
|
||||
\only<2>{
|
||||
\framesubtitle{... no \sout{fun} piece of cake}
|
||||
\begin{center}
|
||||
\includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg}
|
||||
\end{center}
|
||||
}
|
||||
|
||||
\only<3>{
|
||||
\framesubtitle{... no lecture (please!)}
|
||||
\begin{center}
|
||||
\includegraphics[height=0.7\textheight]{figs/soccer.jpg}
|
||||
\end{center}
|
||||
}
|
||||
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{What you should learn this week}
|
||||
\begin{itemize}
|
||||
\item What makes good plots?
|
||||
\item What is descriptive/inferential statistics?
|
||||
\item What is the general structure of a statistical test?
|
||||
\item What does a p-value mean?
|
||||
\item How can I build my own tests?
|
||||
\item How large should my $n$ be?
|
||||
\item What is {\em maximum likelihood} and why is it important?
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section[descriptive statistics, errorbars, and plots]{Day 1 --
|
||||
descriptive statistics and plots}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{types of data}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{data scales}
|
||||
\framesubtitle{What data types are distinguished in statistics?}
|
||||
\Large
|
||||
{\bf Why are data types important?}
|
||||
\pause
|
||||
\begin{itemize}
|
||||
\item selection of statistics
|
||||
\item selection of plots
|
||||
\item selection of correct tests
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{data scales}
|
||||
\framesubtitle{nominal/categorial scale}
|
||||
\begin{itemize}
|
||||
\item properties like cell type, experimental group (i.e. treatment
|
||||
1, treatment 2, control)
|
||||
\item each observation/sample is put into one category
|
||||
\item there is no reasonable order among the categories
|
||||
\item example: [rods, cones] vs. [cones, rods]
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{data scales}
|
||||
\framesubtitle{ordinal scale}
|
||||
\begin{itemize}
|
||||
\item like nominal scale, but there is an order
|
||||
\item {\bf but:} there is no reasonable measure of {\em distance}
|
||||
between the classes
|
||||
\item examples: ranks, ratings
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{data scales}
|
||||
\framesubtitle{interval scale}
|
||||
\begin{itemize}
|
||||
\item quantitative/metric values
|
||||
\item reasonable measure of distance between values but no absolute zero
|
||||
\item examples: temperature in $^\circ$C
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{data scales}
|
||||
\framesubtitle{absolut/ratio scale}
|
||||
\begin{itemize}
|
||||
\item like interval scale but with absolute zero
|
||||
\item example: temperature in $^\circ$K
|
||||
\end{itemize}
|
||||
\pause
|
||||
\begin{emphasize}{relationsships between scales}
|
||||
\begin{itemize}
|
||||
\item scales exhibit increasing information content from nominal
|
||||
to absolute
|
||||
\item conversion ,,downwards'' always possible
|
||||
\end{itemize}
|
||||
\end{emphasize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{examples from neuroscience and psychology}
|
||||
\begin{itemize}
|
||||
\item {\bf nominal:}\pause
|
||||
\begin{itemize}
|
||||
\item treatment group
|
||||
\item stimulus class
|
||||
\item cell type
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf ordinal:} \pause
|
||||
\begin{itemize}
|
||||
\item ratings
|
||||
\item clinical stages of a disease
|
||||
\item states of an ion channel
|
||||
\end{itemize}
|
||||
\item {\bf Absolut-/Ratioskala:}\pause
|
||||
\begin{itemize}
|
||||
\item firing rate
|
||||
\item membrane potential
|
||||
\item ion concentration
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{statistics}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{What is "a statistic"?}
|
||||
\begin{definition}{statistic}
|
||||
A statistic (singular) is a single measure of some attribute of a
|
||||
sample (e.g., its arithmetic mean value). It is calculated by
|
||||
applying a function (statistical algorithm) to the values of the
|
||||
items of the sample, which are known together as a set of data.
|
||||
|
||||
\source{http://en.wikipedia.org/wiki/Statistic}
|
||||
\end{definition}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Beispiele f\"ur Teststatistiken}
|
||||
\begin{itemize}
|
||||
\item {\bf nominal:}\pause
|
||||
\begin{itemize}
|
||||
\item count
|
||||
\item relative frequency/proportion
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf ordinal:} \pause
|
||||
\begin{itemize}
|
||||
\item median
|
||||
\item quantile/percentile
|
||||
\item rank correlation
|
||||
\end{itemize}
|
||||
\item {\bf absolute/ratio:}\pause
|
||||
\begin{itemize}
|
||||
\item mean
|
||||
\item variance/ standard deviation
|
||||
\item Pearson correlation
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{exercise}
|
||||
\begin{task}{Spearman rank correlation}
|
||||
\begin{enumerate}
|
||||
\item Use {\tt randi} to generate two vectors
|
||||
{\tt x,y} with $100$ random integers between $0$ and $10$ each.
|
||||
\item Find out how to compute the Spearman
|
||||
rank correlation $$\rho = 1- {\frac {6 \sum
|
||||
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
|
||||
difference in the rank between the single data points.
|
||||
\item Compute $\rho$ between $x$ and $y$, between $x$ and
|
||||
$y^2$, between $\log(x+1)$ and $y^2$.
|
||||
\item Compute the "standard" (Pearson) correlation coefficient
|
||||
between these values.
|
||||
\item What can you observe and why does it make sense?
|
||||
\end{enumerate}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{solution}
|
||||
\begin{solution}{Spearman rank correlation }
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
>>> x = randi(10, 100, 1);
|
||||
>>> y = randi(10, 100, 1);
|
||||
>>> corr(x,y,'type','Spearman')
|
||||
ans =
|
||||
0.1220
|
||||
>>> corr(x,y.^2,'type','Spearman')
|
||||
ans =
|
||||
0.1220
|
||||
>>> corr(x,y,'type','Pearson')
|
||||
ans =
|
||||
0.1074
|
||||
>>> corr(x,y.^2,'type','Pearson')
|
||||
ans =
|
||||
0.0551
|
||||
\end{lstlisting}
|
||||
The rank correlation does not change under a monotone transformation
|
||||
of the data. Therefore, it can be used for ordinal data. The Pearson
|
||||
correlation coefficient does not have that property.
|
||||
\end{solution}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{what makes a good plot}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{}
|
||||
\begin{center}
|
||||
\Huge What makes a good plot?
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{features of a good plot}
|
||||
A good plot
|
||||
\begin{itemize}
|
||||
\item helps the reader to clearly understand your point.\pause
|
||||
\item is not misleading and let's the reader judge the information
|
||||
on her own (different y-axis/length scales in two related plots,
|
||||
"squeezing" via log-plots). \pause
|
||||
\item contains information about the data (a comic might be
|
||||
illustrative, but does not contain information about the
|
||||
data).\pause
|
||||
\item adheres to the principle of {\em ink minimization}.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{features of a good plot}
|
||||
\framesubtitle{design/organization}
|
||||
\begin{itemize}
|
||||
\item Is the display consistent with the model or hypothesis
|
||||
being tested?\pause
|
||||
\item Are there "empty dimensions" in the display that could be
|
||||
removed (A 3D pie chart for 2D categorical data, extraneous colors
|
||||
that do not encode meaningful information)?\pause
|
||||
\item Does the display provide an honest and transparent portrayal
|
||||
of the data (hiding, smoothing, modifying data points should be
|
||||
avoided or explicitly mentioned)?
|
||||
\end{itemize}
|
||||
\mycite{Allen et al. 2012, Neuron}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{features of a good plot}
|
||||
\framesubtitle{axes}
|
||||
\begin{itemize}
|
||||
\item Are axes scales defined as linear, log, or radial?\pause
|
||||
\item Does each axis label describe the variable and its units (use
|
||||
"a.u." for arbitrary units)?\pause
|
||||
\item Are axes limits appropriate for the data (The graphic should
|
||||
not be bounded at zero if the data can take on both positive and
|
||||
negative values.)?\pause
|
||||
\item Is the aspect ratio appropriate for the data (When x and y
|
||||
axes contrast the same variable under different conditions the
|
||||
graphic should be square.)?
|
||||
\end{itemize}
|
||||
\mycite{Allen et al. 2012, Neuron}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{features of a good plot}
|
||||
\framesubtitle{color mapping}
|
||||
\begin{itemize}
|
||||
\item Is a color bar provided?\pause
|
||||
\item Is the color map sensible for the data type (does the data
|
||||
extend to both $\pm$, does it live in an interval, is it
|
||||
circular)?\pause
|
||||
\item Are contrasting colors consistent with a natural interpretation?
|
||||
\item Can features be discriminated when printed in grayscale?
|
||||
\item Has red/green contrast been avoided to accommodate common
|
||||
forms of colorblindness?
|
||||
\end{itemize}
|
||||
\mycite{Allen et al. 2012, Neuron}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{features of a good plot}
|
||||
\framesubtitle{uncertainty}
|
||||
\begin{itemize}
|
||||
\item Does the display indicate the uncertainty of estimated parameters?\pause
|
||||
\item Is the type of error surface appropriate for the data?
|
||||
\begin{itemize}
|
||||
\item Use standard deviations to describe variability in the population.\pause
|
||||
\item Use standard errors or confidence intervals to make inferences
|
||||
about parameters estimated from a sample.\pause
|
||||
\item Parametric confidence intervals should only be used if data
|
||||
meet the assumptions of the underlying model.\pause
|
||||
\end{itemize}
|
||||
\item Are the units of uncertainty defined (is it standard error, is
|
||||
it $95\%$ confidence interval)?
|
||||
\end{itemize}
|
||||
\mycite{Allen et al. 2012, Neuron}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{features of a good plot}
|
||||
\framesubtitle{annotation}
|
||||
\begin{itemize}
|
||||
\item Are all symbols defined, preferably by directly labeling objects?\pause
|
||||
\item Is the directionality of a contrast between conditions obvious?\pause
|
||||
\item Is the number of samples or independent experiments indicated?\pause
|
||||
\item Are statistical procedures and criteria for significance described?\pause
|
||||
\item Are uncommon abbreviations avoided or clearly defined?\pause
|
||||
\item Are abbreviations consistent with those used in the text?
|
||||
\end{itemize}
|
||||
\mycite{Allen et al. 2012, Neuron}
|
||||
\end{frame}
|
||||
|
||||
\subsection{bad examples}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{suboptimal example}
|
||||
\begin{center}
|
||||
\includegraphics[width=.5\linewidth]{figs/nobelbad}
|
||||
\end{center}
|
||||
\mycite{Hafting et al. 2005, nature}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{suboptimal example}
|
||||
\begin{center}
|
||||
\includegraphics[width=.5\linewidth]{figs/badbarright.png}
|
||||
\end{center}
|
||||
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{suboptimal example}
|
||||
\begin{center}
|
||||
\includegraphics[width=.4\linewidth]{figs/yaxisscalingleft.png}
|
||||
\hspace{.5cm}
|
||||
\includegraphics[width=.4\linewidth]{figs/yaxisscalingright.png}
|
||||
\end{center}
|
||||
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{suboptimal example}
|
||||
\begin{center}
|
||||
\includegraphics[width=.4\linewidth]{figs/badscatterleft.png}
|
||||
\hspace{.5cm}
|
||||
\includegraphics[width=.4\linewidth]{figs/badscatterright.png}
|
||||
\end{center}
|
||||
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
|
||||
\end{frame}
|
||||
|
||||
|
||||
%-------------------------------------------------------------
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{suboptimal example}
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/badbarplot}
|
||||
\end{center}
|
||||
\source{www.enfovis.com}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{nominal scale}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting nominal data}
|
||||
\framesubtitle{bar plot for count and relative frequency}
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/nominaldataplot}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting nominal data}
|
||||
\framesubtitle{bar plot for count and relative frequency}
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
% plot
|
||||
bar([1,2], [50, 90], 'facecolor', 'k')
|
||||
|
||||
% labels axes
|
||||
ylabel('cell count')
|
||||
xlabel('cell type')
|
||||
|
||||
% cosmetics
|
||||
xlim([0.5,2.5])
|
||||
ylim([0, 100])
|
||||
box('off')
|
||||
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20)
|
||||
|
||||
% settings for saving the figure
|
||||
set(gcf, 'PaperUnits', 'centimeters');
|
||||
set(gcf, 'PaperSize', [11.7 9.0]);
|
||||
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
%----------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting nominal data}
|
||||
\framesubtitle{pie chart for count and relative frequency}
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/nominaldataplot2}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting nominal data}
|
||||
\framesubtitle{exercise}
|
||||
\begin{task}{pie chart}
|
||||
Plot the same data ($n_{py}=50$, $n_{in}=90$) as a pie chart in Matlab.
|
||||
\end{task}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting nominal data}
|
||||
\framesubtitle{pie chart for relative frequency}
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
data = [50, 90];
|
||||
h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'})
|
||||
hText = findobj(h,'Type','text') % text object handles
|
||||
|
||||
set(h(1), 'FaceColor', [.2,.2,.2]);
|
||||
set(h(2), 'Rotation', 45);
|
||||
set(h(3), 'FaceColor', [.8,.8,.8]);
|
||||
set(h(4), 'Rotation', 45);
|
||||
|
||||
title('cell count')
|
||||
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'})
|
||||
box('off')
|
||||
set(gcf, 'PaperUnits', 'centimeters');
|
||||
set(gcf, 'PaperSize', [11.7 9.0]);
|
||||
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{histogram}
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/histogram}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{bad choice of bins}
|
||||
\begin{center}
|
||||
\includegraphics[width=.4\linewidth]{figs/histogrambad}
|
||||
\includegraphics[width=.4\linewidth]{figs/histogrambad2}
|
||||
\end{center}
|
||||
\begin{summary}{Rule of thumb}
|
||||
Choose the bins $b\approx n/20$.
|
||||
\end{summary}
|
||||
\end{frame}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{how to do in Matlab}
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
x = randn(2000,1); % generate Gaussian data
|
||||
|
||||
hist(x, 50); % generate histogram
|
||||
|
||||
% set facecolor to gray
|
||||
h = findobj(gca, 'Type','patch');
|
||||
set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2)
|
||||
|
||||
% plot a white grid over it
|
||||
h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2)
|
||||
uistack(h, 'top')
|
||||
|
||||
% cosmetics
|
||||
box('off');
|
||||
xlabel('Data')
|
||||
ylabel('Count')
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{bar plot}
|
||||
There are several ways to plot a sample $x_1, ..., x_n$ of interval/ratio/absolute
|
||||
scale with a bar plot
|
||||
\begin{center}
|
||||
\includegraphics[width=.6\linewidth]{figs/barplots.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile,fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{bar plot}
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
% bar plot
|
||||
x = rand(10,1);
|
||||
gray = [.5,.5,.5];
|
||||
|
||||
bar(1, mean(x), 'EdgeColor','w','FaceColor', gray);
|
||||
hold on
|
||||
|
||||
bar(2, mean(x), 'EdgeColor','w','FaceColor', gray);
|
||||
plot(0*x + 2, x, 'ok');
|
||||
|
||||
bar(3, mean(x), 'EdgeColor','w','FaceColor', gray);
|
||||
errorbar(3, mean(x), std(x), 'ok');
|
||||
|
||||
bar(4, mean(x), 'EdgeColor','w','FaceColor', gray);
|
||||
errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok');
|
||||
set(gca, 'xtick',[])
|
||||
ylabel('uniformly distributed random data in [0,1]')
|
||||
box('off')
|
||||
title('different forms of bar plots')
|
||||
hold off
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile,fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{bar plot and measure of central tendency and spread}
|
||||
|
||||
\begin{itemize}
|
||||
\item A bar plot collapses real data onto a single number and some
|
||||
measure of spread. This number is usually a {\em measure of central
|
||||
tendency}, i.e. a typical/central value for the probability
|
||||
distribution of the data.\pause
|
||||
\item What measures of central tendency can you think of?\pause
|
||||
\begin{itemize}
|
||||
\item mean
|
||||
\item median
|
||||
\item geometric mean (the nth root of the product of the data values)
|
||||
\item weighted mean
|
||||
\item midrange (mean of the maximum and minimum values of a data set)
|
||||
\end{itemize}\pause
|
||||
\item Additionally, the bar plot is equipped with a measure of {\em
|
||||
spread} or {\em dispersion}. What measure of spread can you think of?\pause
|
||||
\begin{itemize}
|
||||
\item standard deviation
|
||||
\item range (maximum minus minimum of a dataset)
|
||||
\item inter-quartile range
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile,fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{measure of central tendency and spread}
|
||||
\Large
|
||||
\begin{center}
|
||||
\bf The part of statistics that summarizes data in a small number
|
||||
of values is called {\em descriptive statistics}.
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile,fragile]
|
||||
\frametitle{robust statistics}
|
||||
\begin{task}{When is statistic called robust (leave-one-out)?}
|
||||
\begin{itemize}
|
||||
\item Generate an array with $20$ random numbers using {\tt
|
||||
randn}.
|
||||
\item Compute $20$ means: the $i^{th}$ mean is computed from the
|
||||
data set {\em without} the $i^{th}$ example.
|
||||
\item Repeat this with the median.
|
||||
\item Make a bar plot that depicts the means of the computed means
|
||||
and medians along with an appropriate measure of dispersion.
|
||||
\item What can you observe? Do you understand why?
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{boxplot}
|
||||
\begin{minipage}{1.0\linewidth}
|
||||
\begin{minipage}{0.5\linewidth}
|
||||
\begin{center}
|
||||
\includegraphics[width=\linewidth]{figs/boxplot.png}
|
||||
\end{center}
|
||||
\end{minipage}
|
||||
\begin{minipage}{0.5\linewidth}
|
||||
Who knows what the elements mean?\pause
|
||||
\begin{itemize}
|
||||
\item the box depicts the inter-quartile range
|
||||
\item the line denotes the median
|
||||
\item the whiskers denote the extreme value of the data not
|
||||
considered outliers
|
||||
\item outliers are plotted separately
|
||||
\end{itemize}
|
||||
\begin{task}{Outliers}
|
||||
\begin{itemize}
|
||||
\item Find out how an outlier is defined in a matlab boxplot.
|
||||
\item Can you remove an outlier from the dataset?
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{minipage}
|
||||
\end{minipage}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
\framesubtitle{violinplot}
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/violinplots.png}
|
||||
\end{center}
|
||||
\begin{itemize}
|
||||
\item Violinplots depict the distribution of the data by a
|
||||
smoothed histogram.
|
||||
\item Additional information (data points, median,
|
||||
inter-quartile range) are plotted inside.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting combinations of scales}
|
||||
What could we use for a combination of categorial/nominal and
|
||||
interval/ratio/absolute?
|
||||
\pause
|
||||
\begin{center}
|
||||
\includegraphics[width=.5\linewidth]{figs/factorplot.png}
|
||||
\end{center}
|
||||
Each category is a single bar.
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting combinations of scales}
|
||||
What could we use for a combination of interval/ratio/absolute and
|
||||
interval/ratio/absolute, e.g. $(x_1, y_1), ..., (x_n,y_n)$? \pause
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/paireddata.png}
|
||||
\end{center}
|
||||
Scatter plot or paired bar chart. Scatter plot can also be used for
|
||||
ordinal vs. ordinal data (why not the bar chart?).
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\begin{center}
|
||||
\Huge
|
||||
That's it.
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
|
Reference in New Issue
Block a user