Merge branch 'master' of raven.am28.uni-tuebingen.de:scientificComputing
@ -1,16 +1,60 @@
|
||||
load('ampullary.mat')
|
||||
sample_rate = 20000; % Hz
|
||||
max_time = 0;
|
||||
for i = 1:size(times,2)
|
||||
max_time = max([max_time, max(times{i})]);
|
||||
end
|
||||
fig = figure();
|
||||
|
||||
set(gcf,'Color', 'white')
|
||||
%% create PSTH on the basis of the interspike intervals
|
||||
fig.sub
|
||||
subplot(3,1,1)
|
||||
hold on
|
||||
% 1. get the interspike intervals for each trial
|
||||
for i = 1:size(times,2)
|
||||
isi = diff(times{i});
|
||||
|
||||
t = times{i};
|
||||
isi = diff(t);
|
||||
plot(t(2:end), 1./isi)
|
||||
end
|
||||
|
||||
%% create PSTH using the binning method
|
||||
|
||||
xlabel('time [s]')
|
||||
ylabel('firing rate [Hz]')
|
||||
box('off')
|
||||
title('instanataneous firing rate')
|
||||
|
||||
%% create PSTH using the binning method
|
||||
subplot(3,1,2)
|
||||
box('off')
|
||||
bin_width = 0.02; % s
|
||||
edges = 0:bin_width:max_time;
|
||||
firing_rate = [];
|
||||
for i = 1:size(times,2)
|
||||
t = times{i};
|
||||
[n, t] = hist(t, edges);
|
||||
if isempty(firing_rate)
|
||||
firing_rate = n / bin_width;
|
||||
else
|
||||
firing_rate = firing_rate + (n / bin_width / size(times,2));
|
||||
end
|
||||
end
|
||||
plot(t,firing_rate)
|
||||
xlabel('time [s]')
|
||||
ylabel('firing rate [Hz]')
|
||||
title('binning method')
|
||||
|
||||
%% create PSTH using the kernel-convolution method
|
||||
subplot(3,1,3)
|
||||
binary_spikes = zeros(size(times,2), round(max_time*sample_rate));
|
||||
resps = zeros(size(binary_spikes));
|
||||
window = hann(bin_width/4*sample_rate,'symmetric');
|
||||
window = window/sum(window);
|
||||
|
||||
for i = 1:size(times,2)
|
||||
t = times{i};
|
||||
temp = round(t*sample_rate);
|
||||
if temp(1) <= 0
|
||||
temp(1) = 1;
|
||||
end
|
||||
binary_spikes(i, temp) = 1;
|
||||
resps(i,:) = conv(binary_spikes(i,:), window, 'same');
|
||||
end
|
||||
plot((0:1/sample_rate:max_time), mean(resps,2))
|
||||
|
16
statistics/assignments/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
all:
|
||||
for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \
|
||||
echo $$number ; \
|
||||
sed "s/000/$$number/g" day1.tex > tmp.tex; \
|
||||
pdflatex tmp.tex; \
|
||||
mv tmp.pdf day1_$$number.pdf; \
|
||||
cp ../data/example$$number.csv ./ ;\
|
||||
rm tmp.* ; \
|
||||
zip example$$number.zip example$$number.csv day1_$$number.pdf ; \
|
||||
rm example$$number.csv ;\
|
||||
rm day1_$$number.pdf ; \
|
||||
done
|
||||
|
||||
clean:
|
||||
rm *.zip
|
||||
rm -rf auto
|
72
statistics/assignments/day1.tex
Executable file
@ -0,0 +1,72 @@
|
||||
\documentclass[addpoints,10pt]{exam}
|
||||
\usepackage{url}
|
||||
\usepackage{color}
|
||||
\usepackage{hyperref}
|
||||
|
||||
\pagestyle{headandfoot}
|
||||
\runningheadrule
|
||||
\firstpageheadrule
|
||||
|
||||
\firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014}
|
||||
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
|
||||
\firstpagefooter{}{}{}
|
||||
\runningfooter{}{}{}
|
||||
\pointsinmargin
|
||||
\bracketedpoints
|
||||
|
||||
%\printanswers
|
||||
\shadedsolutions
|
||||
|
||||
|
||||
\begin{document}
|
||||
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\sffamily
|
||||
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\begin{questions}
|
||||
\question To publish scientific results, you will usually need to
|
||||
use statistical methods. Some journals provide you with a brief
|
||||
description of how they expect you to apply statistical methods. One
|
||||
example can be found in the author guidelines of the journal
|
||||
Nature.
|
||||
|
||||
Assume you collected the following dataset. You can download it from
|
||||
Ilias as {\tt example000.csv}. Here is the description of the dataset:
|
||||
|
||||
\begin{quotation}
|
||||
\tt
|
||||
\input{../examples/example000.tex}
|
||||
\end{quotation}
|
||||
|
||||
\begin{parts}
|
||||
\part Download the dataset and write a script that loads it into
|
||||
matlab.
|
||||
|
||||
\part Think about the type of your data (I might ask you that
|
||||
tomorrow).
|
||||
|
||||
\part Produce a plot that displays the data in an appropriate
|
||||
way. Make sure to respect all elements of good plotting we
|
||||
discussed today.
|
||||
|
||||
\part Download the statistical checklist from nature. Produce {\bf
|
||||
one} slide that contains the plot and a concise summary of your
|
||||
data which respects the requirements made by nature (assume you
|
||||
are producing a figure legend for the figure in nature). It is
|
||||
good style to avoid expressions like ``the plot shows'' or
|
||||
similar.
|
||||
|
||||
\part Upload your code, the data, and the slide as a zip to
|
||||
Ilias. Deadline is 19h00. Structure the zip such that you can
|
||||
present you program in front of the class. Several students will
|
||||
be asked to present their slide and their code tomorrow morning.
|
||||
|
||||
\end{parts}
|
||||
|
||||
\end{questions}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\end{document}
|
46
statistics/assignments/day2.tex
Normal file
@ -0,0 +1,46 @@
|
||||
\documentclass[addpoints,10pt]{exam}
|
||||
\usepackage{url}
|
||||
\usepackage{color}
|
||||
\usepackage{hyperref}
|
||||
|
||||
\pagestyle{headandfoot}
|
||||
\runningheadrule
|
||||
\firstpageheadrule
|
||||
|
||||
\firstpageheader{Scientific Computing}{afternoon assignment day 02}{10/21/2014}
|
||||
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
|
||||
\firstpagefooter{}{}{}
|
||||
\runningfooter{}{}{}
|
||||
\pointsinmargin
|
||||
\bracketedpoints
|
||||
|
||||
%\printanswers
|
||||
\shadedsolutions
|
||||
|
||||
|
||||
\begin{document}
|
||||
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\sffamily
|
||||
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\begin{questions}
|
||||
\question Download example002 from yesterday (brain weights).
|
||||
\begin{parts}
|
||||
\part Simulate a null distribution via permutation.
|
||||
\part Determine whether you can reject ``means are equal'' on a
|
||||
5\% significance level using the simulated null distribution.
|
||||
\part Check whether the means are different with a two sample
|
||||
t-test in matlab ({\tt ttest2}).
|
||||
\part Plot the data appropriately and generate a single slide that
|
||||
contains the plot and short discussion of the test that respects
|
||||
the nature statistical checklist (ignore all question whether the
|
||||
assumptions of the test are satisfied).
|
||||
\part Upload the slide and the code to Ilias. Deadline is 19h00.
|
||||
\end{parts}
|
||||
\end{questions}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\end{document}
|
43
statistics/data/example001.csv
Executable file
@ -0,0 +1,43 @@
|
||||
MAO,Diagnosis
|
||||
6.8,I
|
||||
4.1,I
|
||||
7.3,I
|
||||
14.2,I
|
||||
18.8,I
|
||||
9.9,I
|
||||
7.4,I
|
||||
11.9,I
|
||||
5.2,I
|
||||
7.8,I
|
||||
7.8,I
|
||||
8.7,I
|
||||
12.7,I
|
||||
14.5,I
|
||||
10.7,I
|
||||
8.4,I
|
||||
9.7,I
|
||||
10.6,I
|
||||
7.8,II
|
||||
4.4,II
|
||||
11.4,II
|
||||
3.1,II
|
||||
4.3,II
|
||||
10.1,II
|
||||
1.5,II
|
||||
7.4,II
|
||||
5.2,II
|
||||
10,II
|
||||
3.7,II
|
||||
5.5,II
|
||||
8.5,II
|
||||
7.7,II
|
||||
6.8,II
|
||||
3.1,II
|
||||
6.4,III
|
||||
10.8,III
|
||||
1.1,III
|
||||
2.9,III
|
||||
4.5,III
|
||||
5.8,III
|
||||
9.4,III
|
||||
6.8,III
|
|
186
statistics/data/example002.csv
Executable file
@ -0,0 +1,186 @@
|
||||
Weight,Sex
|
||||
1607,m
|
||||
1157,m
|
||||
1248,m
|
||||
1310,m
|
||||
1398,m
|
||||
1237,m
|
||||
1232,m
|
||||
1343,m
|
||||
1380,m
|
||||
1274,m
|
||||
1245,m
|
||||
1286,m
|
||||
1508,m
|
||||
1105,m
|
||||
1123,m
|
||||
1198,m
|
||||
1300,m
|
||||
1249,m
|
||||
1185,m
|
||||
915,m
|
||||
1345,m
|
||||
1107,m
|
||||
1357,m
|
||||
1227,m
|
||||
1205,m
|
||||
1435,m
|
||||
1289,m
|
||||
1093,m
|
||||
1211,m
|
||||
1260,m
|
||||
1193,m
|
||||
1330,m
|
||||
1130,m
|
||||
1357,m
|
||||
1193,m
|
||||
1232,m
|
||||
1321,m
|
||||
1260,m
|
||||
1380,m
|
||||
1230,m
|
||||
1136,m
|
||||
1029,m
|
||||
1223,m
|
||||
1240,m
|
||||
1264,m
|
||||
1020,m
|
||||
1415,m
|
||||
1410,m
|
||||
1275,m
|
||||
1230,m
|
||||
1085,m
|
||||
1048,m
|
||||
1181,m
|
||||
1103,m
|
||||
1165,m
|
||||
1547,m
|
||||
1173,m
|
||||
1660,m
|
||||
1307,m
|
||||
1535,m
|
||||
1315,m
|
||||
1257,m
|
||||
1424,m
|
||||
1309,m
|
||||
1170,m
|
||||
1412,m
|
||||
1270,m
|
||||
1230,m
|
||||
1233,m
|
||||
1561,m
|
||||
1193,m
|
||||
1272,m
|
||||
1355,m
|
||||
1137,m
|
||||
1354,m
|
||||
1110,m
|
||||
1265,m
|
||||
1407,m
|
||||
1227,m
|
||||
1330,m
|
||||
1222,m
|
||||
1305,m
|
||||
1475,m
|
||||
1177,m
|
||||
1337,m
|
||||
1145,m
|
||||
1070,m
|
||||
1305,m
|
||||
1085,m
|
||||
1303,m
|
||||
1390,m
|
||||
1532,m
|
||||
1238,m
|
||||
1233,m
|
||||
1280,m
|
||||
1245,m
|
||||
1459,m
|
||||
1157,m
|
||||
1302,m
|
||||
1385,m
|
||||
1310,m
|
||||
1342,m
|
||||
1303,m
|
||||
1248,m
|
||||
1115,m
|
||||
1365,m
|
||||
1227,m
|
||||
1353,m
|
||||
1125,f
|
||||
1027,f
|
||||
1112,f
|
||||
983,f
|
||||
1090,f
|
||||
1247,f
|
||||
1045,f
|
||||
983,f
|
||||
972,f
|
||||
1045,f
|
||||
937,f
|
||||
1245,f
|
||||
1200,f
|
||||
1270,f
|
||||
1200,f
|
||||
1145,f
|
||||
1090,f
|
||||
1040,f
|
||||
1343,f
|
||||
1010,f
|
||||
1095,f
|
||||
1180,f
|
||||
1168,f
|
||||
1095,f
|
||||
1040,f
|
||||
1235,f
|
||||
1050,f
|
||||
1038,f
|
||||
1046,f
|
||||
1255,f
|
||||
1228,f
|
||||
1000,f
|
||||
1225,f
|
||||
1220,f
|
||||
1085,f
|
||||
1067,f
|
||||
1006,f
|
||||
1138,f
|
||||
1175,f
|
||||
1252,f
|
||||
1037,f
|
||||
958,f
|
||||
1020,f
|
||||
1068,f
|
||||
1107,f
|
||||
1317,f
|
||||
952,f
|
||||
1056,f
|
||||
1203,f
|
||||
1183,f
|
||||
1392,f
|
||||
1130,f
|
||||
1284,f
|
||||
996,f
|
||||
1228,f
|
||||
1087,f
|
||||
1035,f
|
||||
1170,f
|
||||
1064,f
|
||||
1250,f
|
||||
1129,f
|
||||
1088,f
|
||||
1037,f
|
||||
1117,f
|
||||
1095,f
|
||||
1027,f
|
||||
1027,f
|
||||
1190,f
|
||||
1153,f
|
||||
1037,f
|
||||
1120,f
|
||||
1212,f
|
||||
1024,f
|
||||
1135,f
|
||||
1177,f
|
||||
1096,f
|
||||
1114,f
|
|
52
statistics/data/example003.csv
Executable file
@ -0,0 +1,52 @@
|
||||
singtime
|
||||
4.3
|
||||
24.1
|
||||
6.6
|
||||
7.3
|
||||
4
|
||||
2.6
|
||||
4
|
||||
3.9
|
||||
9.4
|
||||
6.2
|
||||
1.6
|
||||
6.5
|
||||
0.2
|
||||
2.7
|
||||
17.4
|
||||
5.6
|
||||
2
|
||||
3.8
|
||||
1.2
|
||||
0.7
|
||||
1.6
|
||||
2.3
|
||||
3.7
|
||||
0.8
|
||||
0.5
|
||||
4.5
|
||||
11.5
|
||||
3.5
|
||||
0.8
|
||||
5.2
|
||||
2
|
||||
0.7
|
||||
1.7
|
||||
5
|
||||
2.8
|
||||
1.5
|
||||
3.9
|
||||
3.7
|
||||
4.5
|
||||
1.8
|
||||
1.2
|
||||
0.7
|
||||
0.7
|
||||
4.2
|
||||
4.7
|
||||
2.2
|
||||
1.4
|
||||
14.1
|
||||
8.6
|
||||
3.7
|
||||
3.5
|
|
29
statistics/data/example004.csv
Executable file
@ -0,0 +1,29 @@
|
||||
Pulse
|
||||
97
|
||||
111
|
||||
93
|
||||
98
|
||||
107
|
||||
77
|
||||
121
|
||||
88
|
||||
96
|
||||
123
|
||||
119
|
||||
91
|
||||
99
|
||||
95
|
||||
99
|
||||
102
|
||||
77
|
||||
85
|
||||
104
|
||||
106
|
||||
114
|
||||
85
|
||||
112
|
||||
102
|
||||
104
|
||||
94
|
||||
104
|
||||
98
|
|
37
statistics/data/example005.csv
Executable file
@ -0,0 +1,37 @@
|
||||
Branches
|
||||
23
|
||||
30
|
||||
54
|
||||
28
|
||||
31
|
||||
29
|
||||
34
|
||||
35
|
||||
30
|
||||
27
|
||||
21
|
||||
43
|
||||
51
|
||||
35
|
||||
51
|
||||
49
|
||||
35
|
||||
24
|
||||
26
|
||||
29
|
||||
21
|
||||
29
|
||||
37
|
||||
27
|
||||
28
|
||||
33
|
||||
33
|
||||
23
|
||||
37
|
||||
27
|
||||
40
|
||||
48
|
||||
41
|
||||
20
|
||||
30
|
||||
57
|
|
32
statistics/data/example006.csv
Executable file
@ -0,0 +1,32 @@
|
||||
Glucose
|
||||
81
|
||||
85
|
||||
93
|
||||
93
|
||||
99
|
||||
76
|
||||
75
|
||||
84
|
||||
78
|
||||
84
|
||||
81
|
||||
82
|
||||
89
|
||||
81
|
||||
96
|
||||
82
|
||||
74
|
||||
70
|
||||
84
|
||||
86
|
||||
80
|
||||
70
|
||||
131
|
||||
75
|
||||
88
|
||||
102
|
||||
115
|
||||
89
|
||||
82
|
||||
79
|
||||
106
|
|
24
statistics/data/example007.csv
Executable file
@ -0,0 +1,24 @@
|
||||
NerveCells
|
||||
35
|
||||
19
|
||||
33
|
||||
34
|
||||
17
|
||||
26
|
||||
16
|
||||
40
|
||||
28
|
||||
30
|
||||
23
|
||||
12
|
||||
27
|
||||
33
|
||||
22
|
||||
31
|
||||
28
|
||||
28
|
||||
35
|
||||
23
|
||||
23
|
||||
19
|
||||
29
|
|
21
statistics/data/example008.csv
Executable file
@ -0,0 +1,21 @@
|
||||
RateChange,Treatment
|
||||
28,Caffeine
|
||||
11,Caffeine
|
||||
-3,Caffeine
|
||||
14,Caffeine
|
||||
-2,Caffeine
|
||||
-4,Caffeine
|
||||
18,Caffeine
|
||||
2,Caffeine
|
||||
2,Caffeine
|
||||
26,Decaf
|
||||
1,Decaf
|
||||
0,Decaf
|
||||
-4,Decaf
|
||||
-4,Decaf
|
||||
14,Decaf
|
||||
16,Decaf
|
||||
8,Decaf
|
||||
0,Decaf
|
||||
18,Decaf
|
||||
-10,Decaf
|
|
12
statistics/data/example009.csv
Executable file
@ -0,0 +1,12 @@
|
||||
NEConcentration,Treatment
|
||||
543,Toluene
|
||||
523,Toluene
|
||||
431,Toluene
|
||||
635,Toluene
|
||||
564,Toluene
|
||||
549,Toluene
|
||||
535,Control
|
||||
385,Control
|
||||
502,Control
|
||||
412,Control
|
||||
387,Control
|
|
13
statistics/data/example010.csv
Executable file
@ -0,0 +1,13 @@
|
||||
Dopamine,Group
|
||||
3420,toluene
|
||||
2314,toluene
|
||||
1911,toluene
|
||||
2464,toluene
|
||||
2781,toluene
|
||||
2803,toluene
|
||||
1820,control
|
||||
1843,control
|
||||
1397,control
|
||||
1803,control
|
||||
2539,control
|
||||
1990,control
|
|
10
statistics/data/example011.csv
Executable file
@ -0,0 +1,10 @@
|
||||
Animal,Site I,Site II
|
||||
1,50.6,38
|
||||
2,39.2,18.6
|
||||
3,35.2,23.2
|
||||
4,17,19
|
||||
5,11.2,6.6
|
||||
6,14.2,16.4
|
||||
7,24.2,14.4
|
||||
8,37.4,37.6
|
||||
9,35.2,24.4
|
|
10
statistics/data/example012.csv
Executable file
@ -0,0 +1,10 @@
|
||||
Subject,mCPP,Placebo
|
||||
1,1.1,0
|
||||
2,1.3,-0.3
|
||||
3,1,0.6
|
||||
4,1.7,0.3
|
||||
5,1.4,-0.7
|
||||
6,0.1,-0.2
|
||||
7,0.5,0.6
|
||||
8,1.6,0.9
|
||||
9,-0.5,-2
|
|
9
statistics/data/example013.csv
Executable file
@ -0,0 +1,9 @@
|
||||
Animal,Control,Regenerating
|
||||
1,16.3,11.5
|
||||
2,4.8,3.6
|
||||
3,10.9,12.5
|
||||
4,14.2,6.3
|
||||
5,16.3,15.2
|
||||
6,9.9,8.1
|
||||
7,29.2,16.6
|
||||
8,22.4,13.1
|
|
16
statistics/data/example014.csv
Executable file
@ -0,0 +1,16 @@
|
||||
BodyTempDrop,AlcoholDose
|
||||
0.2,1.5
|
||||
1.9,1.5
|
||||
-0.1,1.5
|
||||
0.5,1.5
|
||||
0.8,1.5
|
||||
4,3
|
||||
3.2,3
|
||||
2.3,3
|
||||
2.9,3
|
||||
3.8,3
|
||||
3.3,6
|
||||
5.1,6
|
||||
5.3,6
|
||||
6.7,6
|
||||
5.9,6
|
|
18
statistics/data/example015.csv
Executable file
@ -0,0 +1,18 @@
|
||||
PeakFlow,Height
|
||||
733,174
|
||||
572,183
|
||||
500,176
|
||||
738,169
|
||||
616,183
|
||||
787,186
|
||||
866,178
|
||||
670,175
|
||||
550,172
|
||||
660,179
|
||||
575,171
|
||||
577,184
|
||||
783,200
|
||||
625,195
|
||||
470,176
|
||||
642,176
|
||||
856,190
|
|
19
statistics/data/example016.csv
Executable file
@ -0,0 +1,19 @@
|
||||
Patient,Before,After
|
||||
1,98,75
|
||||
2,100,60
|
||||
3,82,25
|
||||
4,100,55
|
||||
5,93,78
|
||||
6,119,102
|
||||
7,70,58
|
||||
8,78,70
|
||||
9,104,90
|
||||
10,70,50
|
||||
11,60,65
|
||||
12,88,45
|
||||
13,45,36
|
||||
14,159,144
|
||||
15,65,27
|
||||
16,98,90
|
||||
17,66,16
|
||||
18,67,53
|
|
21
statistics/data/example017.csv
Executable file
@ -0,0 +1,21 @@
|
||||
LegStrength,UpperBodyStrength
|
||||
55,low
|
||||
70,low
|
||||
45,low
|
||||
246,low
|
||||
240,low
|
||||
96,low
|
||||
225,low
|
||||
40,middle
|
||||
200,middle
|
||||
250,middle
|
||||
192,middle
|
||||
117,middle
|
||||
215,middle
|
||||
181,high
|
||||
85,high
|
||||
416,high
|
||||
228,high
|
||||
257,high
|
||||
316,high
|
||||
134,high
|
|
6
statistics/examples/example001.tex
Normal file
@ -0,0 +1,6 @@
|
||||
MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is
|
||||
thought to play a role in the regulation of behavior. To see whether
|
||||
different categories of schizophrenic patients have different levels
|
||||
of MAO activity, researchers collected blood specimens from 42
|
||||
patients and measured the MAO activity in the platelets. Values are
|
||||
expressed as nmol benzylaldehyde product per 108 platelets per hour.
|
3
statistics/examples/example002.tex
Normal file
@ -0,0 +1,3 @@
|
||||
Brain Weight: In 1888, P. Topinard published data on the brain weights
|
||||
of hundreds of French men and women. Brain weights are given in
|
||||
gram.
|
4
statistics/examples/example003.tex
Normal file
@ -0,0 +1,4 @@
|
||||
Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates.
|
||||
A field researcher measured the duration of 51 unsuccessful songs--that is, the time
|
||||
until the singing male gave up and left his perch. The data is given
|
||||
in minutes.
|
3
statistics/examples/example004.tex
Normal file
@ -0,0 +1,3 @@
|
||||
Pulse after Exercise: A group of 28 adults did some moderate exercise
|
||||
for five minutes and then measured their pulses. Data is given in
|
||||
beats/minute.
|
5
statistics/examples/example005.tex
Normal file
@ -0,0 +1,5 @@
|
||||
A dendritic tree is a branched structure that emanates from the body
|
||||
of a nerve cell. As part of a study of brain development, 36 nerve
|
||||
cells were taken from the brains of newborn guinea pigs. The
|
||||
investigators counted the number of dendritic branch segments
|
||||
emanating from each nerve cell.
|
4
statistics/examples/example006.tex
Normal file
@ -0,0 +1,4 @@
|
||||
For each of 31 healthy dogs, a veterinarian measured the glucose
|
||||
concentration in the anterior chamber of the right eye and also in the
|
||||
blood serum. The following data are the anterior chamber glucose
|
||||
measurements, expressed as a percentage of the blood glucose.
|
5
statistics/examples/example007.tex
Normal file
@ -0,0 +1,5 @@
|
||||
A veterinary anatomist investigated the spatial arrangement of the
|
||||
nerve cells in the intestine of a pony. He removed a block of tissue
|
||||
from the intestinal wall, cut the block into many equal sections, and
|
||||
counted the number of nerve cells in each of 23 randomly selected
|
||||
sections.
|
8
statistics/examples/example008.tex
Normal file
@ -0,0 +1,8 @@
|
||||
Researchers were interested in the short-term effect that caffeine has
|
||||
on heart rate. They enlisted a group of volunteers and measured each
|
||||
person's resting heart rate. Then they had each subject drink 6 ounces
|
||||
of coffee. Nine of the subjects were given coffee containing caffeine
|
||||
and 11 were given decaffeinated coffee. After 10 minutes each person's
|
||||
heart rate was measured again. The data in the table contains the
|
||||
change in heart rate; a positive number means that heart rate went up
|
||||
and a negative number means that heart rate went down.
|
9
statistics/examples/example009.tex
Normal file
@ -0,0 +1,9 @@
|
||||
Toluene and the Brain Abuse of substances containing toluene (for
|
||||
example, glue) can produce various neurological symptoms. In an
|
||||
investigation of the mechanism of these toxic effects, researchers
|
||||
measured the concentrations of various chemicals in the brains of rats
|
||||
that had been exposed to a toluene-laden atmosphere, and also in
|
||||
unexposed control rats. The concentrations of the brain chemical
|
||||
norepinephrine (NE) in the medulla region of the brain, for six
|
||||
toluene-exposed rats and five control rats, are given in accompanying
|
||||
data file in ng/g.
|
3
statistics/examples/example010.tex
Normal file
@ -0,0 +1,3 @@
|
||||
In a pharmacological study, researchers measured the concentration of
|
||||
the brain chemical dopamine in six rats exposed to toluene and six
|
||||
control rats. Number are specified in ng/g.
|
6
statistics/examples/example011.tex
Normal file
@ -0,0 +1,6 @@
|
||||
Nerve Cell Density For each of nine horses, a veterinary anatomist
|
||||
measured the density of nerve cells at specified sites in the
|
||||
intestine. The results for site I (midregion of jejunum) and site II
|
||||
(mesenteric region of jejunum) are given in the accompanying dataset.
|
||||
Each density value is the average of counts of nerve cells in five
|
||||
equal sections of tissue.
|
6
statistics/examples/example012.tex
Normal file
@ -0,0 +1,6 @@
|
||||
Hunger Rating During a weight loss study each of nine subjects was
|
||||
given either the active drug m-chlorophenylpiperazine (mCPP) for two
|
||||
weeks and then a placebo for another two weeks, or else was given the
|
||||
placebo for the first two weeks and then mCPP for the second two
|
||||
weeks. As part of the study the subjects were asked to rate how hungry
|
||||
they were at the end of each two-week period.
|
10
statistics/examples/example013.tex
Normal file
@ -0,0 +1,10 @@
|
||||
Certain types of nerve cells have the ability to regenerate a part of
|
||||
the cell that has been amputated. In an early study of this process,
|
||||
measurements were made on the nerves in the spinal cord in rhesus
|
||||
monkeys. Nerves emanating from the left side of the cord were cut,
|
||||
while nerves from the right side were kept intact. During the
|
||||
regeneration process, the content of creatine phosphate (CP) was
|
||||
measured in the left and the right portion of the spinal cord. The
|
||||
following table shows the data for the right (control) side (Y1), and
|
||||
for the left (regenerating) side (Y2). The units of measurement are mg
|
||||
CP per 100 gm tissue.
|
9
statistics/examples/example014.tex
Normal file
@ -0,0 +1,9 @@
|
||||
In an investigation of the physiological effects of alcohol
|
||||
(ethanol), 15 mice were randomly allocated to three treatment groups,
|
||||
each to receive a different oral dose of alcohol. The dosage levels
|
||||
were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature
|
||||
of each mouse was measured immediately before the alcohol was given
|
||||
and again 20 minutes afterward. The accompanying data shows the drop
|
||||
(before minus after) in body temperature for each mouse. (The negative
|
||||
value - 0.1 refers to a mouse whose temperature rose rather than
|
||||
fell.)
|
5
statistics/examples/example015.tex
Normal file
@ -0,0 +1,5 @@
|
||||
The peak flow rate of a person is the fastest rate
|
||||
at which the person can expel air after taking a deep breath.
|
||||
Peak flow rate is measured in units of liters per minute and
|
||||
gives an indication of the person's respiratory health. Flow is given
|
||||
in l/min, height in cm.
|
6
statistics/examples/example016.tex
Normal file
@ -0,0 +1,6 @@
|
||||
An experiment was conducted to study the effect of tamoxifen on
|
||||
patients with cervical cancer. One of the measurements made, both
|
||||
before and again after tamoxifen was given, was microvessel density
|
||||
(MVD). MVD, which is measured as number of vessels per mm$^2$, is a
|
||||
measurement that relates to the formation of blood vessels that feed a
|
||||
tumor and allow it to grow and spread.
|
5
statistics/examples/example017.tex
Normal file
@ -0,0 +1,5 @@
|
||||
A group of female college students were divided into three groups
|
||||
according to upper body strength. Their leg strength was tested by
|
||||
measuring how many consecutive times they could leg press 246 pounds
|
||||
before exhaustion. (The subjects were allowed only one second of rest
|
||||
between consecutive lifts.)
|
BIN
statistics/figs/2012-10-29_14-55-39_181.jpg
Normal file
After Width: | Height: | Size: 546 KiB |
BIN
statistics/figs/2012-10-29_14-56-59_866.jpg
Normal file
After Width: | Height: | Size: 575 KiB |
BIN
statistics/figs/2012-10-29_14-58-18_054.jpg
Normal file
After Width: | Height: | Size: 385 KiB |
BIN
statistics/figs/2012-10-29_14-59-05_984.jpg
Normal file
After Width: | Height: | Size: 865 KiB |
BIN
statistics/figs/2012-10-29_15-04-38_517.jpg
Normal file
After Width: | Height: | Size: 425 KiB |
BIN
statistics/figs/2012-10-29_15-09-25_388.jpg
Normal file
After Width: | Height: | Size: 582 KiB |
BIN
statistics/figs/2012-10-29_16-26-05_771.jpg
Executable file
After Width: | Height: | Size: 724 KiB |
BIN
statistics/figs/2012-10-29_16-29-35_312.jpg
Executable file
After Width: | Height: | Size: 386 KiB |
BIN
statistics/figs/2012-10-29_16-41-39_523.jpg
Executable file
After Width: | Height: | Size: 461 KiB |
BIN
statistics/figs/StandardErrorOrStandardDeviation.pdf
Normal file
BIN
statistics/figs/bootstraptest.png
Normal file
After Width: | Height: | Size: 4.5 KiB |
BIN
statistics/figs/bootstraptest2.png
Normal file
After Width: | Height: | Size: 5.5 KiB |
BIN
statistics/figs/example01.png
Executable file
After Width: | Height: | Size: 21 KiB |
BIN
statistics/figs/example02.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
statistics/figs/example03.png
Normal file
After Width: | Height: | Size: 14 KiB |
BIN
statistics/figs/example04.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
statistics/figs/hunger.png
Normal file
After Width: | Height: | Size: 39 KiB |
BIN
statistics/figs/repetition0.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
statistics/figs/repetition1.png
Normal file
After Width: | Height: | Size: 36 KiB |
BIN
statistics/figs/repetition2.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
statistics/figs/repetition3.png
Normal file
After Width: | Height: | Size: 50 KiB |
BIN
statistics/figs/repetition4.png
Normal file
After Width: | Height: | Size: 50 KiB |
BIN
statistics/figs/repetition5.png
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
statistics/figs/samplingDistribution.png
Executable file
After Width: | Height: | Size: 68 KiB |
BIN
statistics/figs/samplingDistributionMedian00.pdf
Normal file
BIN
statistics/figs/samplingDistributionMedian01.pdf
Normal file
BIN
statistics/figs/statistic1.png
Executable file
After Width: | Height: | Size: 116 KiB |
BIN
statistics/figs/statistic2.png
Executable file
After Width: | Height: | Size: 125 KiB |
BIN
statistics/figs/statistic3.png
Executable file
After Width: | Height: | Size: 40 KiB |
BIN
statistics/figs/statistic4.png
Executable file
After Width: | Height: | Size: 56 KiB |
62
statistics/lecture_statistics.tex → statistics/lecture_statistics01.tex
Executable file → Normal file
@ -22,14 +22,19 @@
|
||||
% \useoutertheme{miniframes}
|
||||
}
|
||||
|
||||
\AtBeginSection[]
|
||||
\AtBeginSubsection[]
|
||||
{
|
||||
\begin{frame}<beamer>
|
||||
\begin{center}
|
||||
\Huge \insertsectionhead
|
||||
\end{center}
|
||||
\tableofcontents[
|
||||
currentsubsection,
|
||||
hideothersubsections,
|
||||
sectionstyle=show/hide,
|
||||
subsectionstyle=show/shaded,
|
||||
]
|
||||
% \frametitle{\insertsectionhead}
|
||||
% \tableofcontents[currentsection,hideothersubsections]
|
||||
\end{frame}
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
||||
@ -84,24 +89,6 @@ Bernstein Center T\"ubingen}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{plan}
|
||||
\setcounter{tocdepth}{1}
|
||||
\tableofcontents
|
||||
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{information}
|
||||
\begin{itemize}
|
||||
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
|
||||
A. A. (2010). Statistics for the Life Sciences (4th ed.,
|
||||
p. 668). Prentice Hall.
|
||||
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
||||
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
||||
Hall. doi:10.1037/0012764
|
||||
\item \url{http://stats.stackexchange.com}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% errorbars (error bar paper)
|
||||
@ -170,7 +157,8 @@ Bernstein Center T\"ubingen}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
|
||||
\section{Day 1 -- descriptive statistics and plots}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{types of data}
|
||||
|
||||
@ -315,8 +303,8 @@ Bernstein Center T\"ubingen}
|
||||
\frametitle{exercise}
|
||||
\begin{task}{Spearman rank correlation}
|
||||
\begin{enumerate}
|
||||
\item Use {\tt randi} to generate two 100-dimensional vectors
|
||||
{\tt x,y} of random integers between $0$ and $10$.
|
||||
\item Use {\tt randi} to generate two vectors
|
||||
{\tt x,y} with $100$ random integers between $0$ and $10$ each.
|
||||
\item Find out how to compute the Spearman
|
||||
rank correlation $$\rho = 1- {\frac {6 \sum
|
||||
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
|
||||
@ -358,7 +346,6 @@ correlation coefficient does not have that property.
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{description of data and plotting}
|
||||
\subsection{what makes a good plot}
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
@ -522,7 +509,7 @@ correlation coefficient does not have that property.
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{nominal scale}
|
||||
\subsection{plotting data}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%-------------------------------------------------------------
|
||||
@ -723,6 +710,23 @@ hold off
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile,fragile]
|
||||
\frametitle{robust statistics}
|
||||
\begin{task}{When is statistic called robust (leave-one-out)?}
|
||||
\begin{itemize}
|
||||
\item Generate an array with $20$ random numbers using {\tt
|
||||
randn}.
|
||||
\item Compute $20$ means: the $i^{th}$ mean is computed from the
|
||||
data set {\em without} the $i^{th}$ example.
|
||||
\item Repeat this with the median.
|
||||
\item Make a bar plot that depicts the means of the computed means
|
||||
and medians along with an appropriate measure of dispersion.
|
||||
\item What can you observe? Do you understand why?
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{plotting interval/ratio/absolute data}
|
||||
@ -791,7 +795,13 @@ hold off
|
||||
ordinal vs. ordinal data (why not the bar chart?).
|
||||
\end{frame}
|
||||
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\begin{center}
|
||||
\Huge
|
||||
That's it.
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\end{document}
|
772
statistics/lecture_statistics02.tex
Normal file
@ -0,0 +1,772 @@
|
||||
\documentclass{beamer}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{listings}
|
||||
\usepackage{pgf}
|
||||
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
||||
%\usepackage{multimedia}
|
||||
\usepackage[latin1]{inputenc}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{bm}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{ulem}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\mode<presentation>
|
||||
{
|
||||
\usetheme{Singapore}
|
||||
\setbeamercovered{opaque}
|
||||
\usecolortheme{tuebingen}
|
||||
\setbeamertemplate{navigation symbols}{}
|
||||
\usefonttheme{default}
|
||||
\useoutertheme{infolines}
|
||||
% \useoutertheme{miniframes}
|
||||
}
|
||||
|
||||
\AtBeginSubsection[]
|
||||
{
|
||||
\begin{frame}<beamer>
|
||||
\begin{center}
|
||||
\Huge \insertsectionhead
|
||||
\end{center}
|
||||
\tableofcontents[
|
||||
currentsubsection,
|
||||
hideothersubsections,
|
||||
sectionstyle=show/hide,
|
||||
subsectionstyle=show/shaded,
|
||||
]
|
||||
% \frametitle{\insertsectionhead}
|
||||
\end{frame}
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
||||
|
||||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
||||
|
||||
\title[]{Scientific Computing -- Statistics}
|
||||
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
|
||||
University T\"ubingen\\
|
||||
Bernstein Center T\"ubingen}
|
||||
|
||||
\institute[Scientific Computing]{}
|
||||
\date{10/20/2014}
|
||||
%\logo{\pgfuseimage{logo}}
|
||||
|
||||
\subject{Lectures}
|
||||
|
||||
%%%%%%%%%% configuration for code
|
||||
\lstset{
|
||||
basicstyle=\ttfamily,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
captionpos=b,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\newcommand{\mycite}[1]{
|
||||
\begin{flushright}
|
||||
\tiny \color{black!80} #1
|
||||
\end{flushright}
|
||||
}
|
||||
|
||||
\input{../latex/environments.tex}
|
||||
\makeatother
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}
|
||||
\titlepage
|
||||
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{information}
|
||||
\begin{itemize}
|
||||
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
|
||||
A. A. (2010). Statistics for the Life Sciences (4th ed.,
|
||||
p. 668). Prentice Hall.
|
||||
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
||||
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
||||
Hall. doi:10.1037/0012764
|
||||
\item \url{http://stats.stackexchange.com}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Day 2 -- errorbars, confidence intervals, and tests}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Types of evidence}
|
||||
\begin{frame}
|
||||
\scriptsize
|
||||
\frametitle{Examples}
|
||||
\begin{itemize}
|
||||
\item Before new drugs are given to human subjects, it is common
|
||||
practice to first test them in dogs or other animals. In part of
|
||||
one study, a new investigational drug was given to eight male and
|
||||
eight female dogs at doses of 8 mg/kg and 25 mg/kg. Within each
|
||||
sex, the two doses were assigned at random to the eight dogs. Many
|
||||
``endpoints'' were measured, such as cholesterol, sodium, glucose,
|
||||
and so on, from blood samples, in order to screen for toxicity
|
||||
problems in the dogs before starting studies on humans. One
|
||||
endpoint was alkaline phosphatase level (or APL, measured in U/l).
|
||||
For females, the effect of increasing the dose from 8 to 25 mg/kg
|
||||
was positive, although small (the average APL increased from 133.5
|
||||
to 143 U/l), but for males the effect of increasing the dose from
|
||||
8 to 25 mg/kg was negative.\pause
|
||||
\item On 15 July 1911, 65-year-old Mrs. Jane Decker was struck by
|
||||
lightning while in her house. She had been deaf since birth, but
|
||||
after being struck, she recovered her hearing, which led to a
|
||||
headline in the New York Times, ``Lightning Cures Deafness.''
|
||||
\pause
|
||||
\item Some research has suggested that there is a genetic basis for
|
||||
sexual orientation. One such study involved measuring the
|
||||
midsagittal area of the anterior commissure (AC) of the brain for
|
||||
30 homosexual men, 30 heterosexual men, and 30 heterosexual
|
||||
women. The researchers found that the AC tends to be larger in
|
||||
heterosexual women than in heterosexual men and that it is even
|
||||
larger in homosexual men.
|
||||
\end{itemize}
|
||||
\mycite{Samuels, Wittmer, Schaffner 2010}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\scriptsize
|
||||
\frametitle{types of evidence}
|
||||
\begin{center}
|
||||
\Large
|
||||
{\em experiment} \\ is better than\\ {\em observational study}\\ is
|
||||
better than\\ {\em anecdotal evidence}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\subsection{What is inferential statistics?}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{frame}
|
||||
\frametitle{sources of error in an experiment}
|
||||
\begin{task}{Think about it for 2 min}
|
||||
If you repeat a scientific experiment, why do you not get the same
|
||||
result every time you repeat it?
|
||||
\end{task}
|
||||
\pause
|
||||
\begin{itemize}
|
||||
\item sampling error (a finite subset of the population of interest
|
||||
is selected in each experiment)
|
||||
\item nonsampling errors (e.g. noise, uncontrolled factors)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{statisticians are lazy}
|
||||
\Large
|
||||
\only<1>{
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
|
||||
\end{center}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}\pause
|
||||
\only<2>{
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
|
||||
\end{center}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}\pause
|
||||
\only<3>{
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
|
||||
\end{center}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}
|
||||
\end{frame}
|
||||
|
||||
% % ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{illustrating examples}
|
||||
\begin{question}{lung volume of smokers}
|
||||
Assume you know the sampling distribution of the mean lung volume
|
||||
of smokers. Would you believe that
|
||||
the sample came from a group of smokers?
|
||||
\begin{center}
|
||||
\includegraphics[width=.6\linewidth]{figs/example01.png}
|
||||
\end{center}
|
||||
\end{question}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{illustrating examples}
|
||||
\begin{question}{lung volume of smokers}
|
||||
What about now? How would the sampling distribution change if I
|
||||
change the population to (i) athletes, (ii) old people, (iii) all people?
|
||||
\begin{center}
|
||||
\includegraphics[width=.6\linewidth]{figs/example02.png}
|
||||
\end{center}
|
||||
\end{question}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{illustrating examples}
|
||||
\begin{question}{Is this diet effective?}
|
||||
\begin{center}
|
||||
\includegraphics[width=.6\linewidth]{figs/example03.png}
|
||||
\end{center}
|
||||
\end{question}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{illustrating examples}
|
||||
\begin{question}{Is this diet effective?}
|
||||
What do you think now?
|
||||
\begin{center}
|
||||
\includegraphics[width=.6\linewidth]{figs/example04.png}
|
||||
\end{center}
|
||||
\end{question}
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{the (imaginary) meta-study}
|
||||
\begin{center}
|
||||
\only<1>{
|
||||
\framesubtitle{finite sampling introduces variation: the sampling distribution}
|
||||
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
|
||||
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
||||
Tests}
|
||||
}\pause
|
||||
\only<2>{
|
||||
\framesubtitle{statistic vs. population parameter}
|
||||
\includegraphics[width=.8\linewidth]{figs/statistic1.png}
|
||||
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
||||
Tests}
|
||||
}\pause
|
||||
\only<3>{
|
||||
\framesubtitle{statistic vs. population parameter}
|
||||
\includegraphics[width=.8\linewidth]{figs/statistic2.png}
|
||||
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
||||
Tests}
|
||||
}\pause
|
||||
\only<4>{
|
||||
\framesubtitle{shat parts of this diagram do we have in real life?}
|
||||
|
||||
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
|
||||
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
||||
Tests}
|
||||
}\pause
|
||||
\only<5>{
|
||||
\framesubtitle{what parts of this diagram do we have in real life?}
|
||||
|
||||
\includegraphics[width=.8\linewidth]{figs/statistic3.png}
|
||||
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
||||
Tests}
|
||||
}\pause
|
||||
\only<6->{
|
||||
\framesubtitle{what statistics does }
|
||||
\begin{minipage}{1.0\linewidth}
|
||||
\begin{minipage}{0.5\linewidth}
|
||||
\includegraphics[width=1.\linewidth]{figs/statistic4.png}
|
||||
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
||||
Tests}
|
||||
\end{minipage}
|
||||
\begin{minipage}{0.5\linewidth}
|
||||
\begin{itemize}
|
||||
\item it assumes, derives, or simulates the sampling
|
||||
distribution\pause
|
||||
\item the sampling distribution makes only sense if you think
|
||||
about it in terms of the meta study\pause
|
||||
\item {\color{red} the sampling distribution is the key to
|
||||
answering questions about the population from the value of
|
||||
the statistic}
|
||||
\end{itemize}
|
||||
\end{minipage}
|
||||
\end{minipage}
|
||||
}
|
||||
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{summary}
|
||||
\begin{itemize}
|
||||
\item In statistics, we use finite samples from a population to reason
|
||||
about features of the population. \pause
|
||||
\item The particular feature of the population we are interested in is called
|
||||
{\color{blue} population parameter}. We usually measure this
|
||||
parameter in our finite sample as well
|
||||
({\color{blue}statistic}).\pause
|
||||
\item Because of variations due to finite sampling the statistic
|
||||
almost never matches the population parameter. \pause
|
||||
\item Using the {\color{blue}sampling distribution} of the statistic, we make
|
||||
statements about the relation between our statistic and the
|
||||
population parameter.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\subsection{Errorbars}
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{illustrating example}
|
||||
|
||||
As part of a study of the development of the thymus gland, researcher
|
||||
weighed the glands of $50$ chick embyos after 14 days of
|
||||
incubation. The following plot depicts the mean thymus gland weights in (mg):
|
||||
\mycite{modified from SWS exercise 6.3.3.}
|
||||
\pause
|
||||
{\bf Which of the two bar plots is the correct way of displaying the
|
||||
data?}
|
||||
|
||||
\begin{columns}
|
||||
\begin{column}[l]{.5\linewidth}
|
||||
\includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
|
||||
\end{column}
|
||||
\begin{column}[r]{.5\linewidth}
|
||||
\pause That depends on what you want to say
|
||||
\begin{itemize}
|
||||
\item To give a measure of variability in the data: use the
|
||||
{\color{blue} standard deviation $\hat\sigma =
|
||||
\sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
|
||||
\item To make a statement about the variability in the mean
|
||||
estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
|
||||
%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% that depends: variability (descriptiv statistics, how variable is
|
||||
% the mean -> inferential, makes only sense in the meta-study setting)
|
||||
% first matlab exercise: simulate standard error
|
||||
% recommend paper for eyeballing test results from standard errors
|
||||
% from std of mean to confidence intervals
|
||||
% introduce bootstrapping (matlab exercise), then t-statistic
|
||||
% intervals
|
||||
% end with standard error of the median (and the thing from wikipedia)
|
||||
\end{frame}
|
||||
%------------------------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{standard error}
|
||||
\framesubtitle{bootstrapping}
|
||||
|
||||
\begin{task}{standard error vs. standard deviation}
|
||||
|
||||
\begin{itemize}
|
||||
\item Download the dataset {\tt thymusglandweights.dat} from Ilias
|
||||
\item Write a program that loads the data into matlab, extracts
|
||||
the the first $80$ datapoints, and repeat the following steps
|
||||
$m=500$ times:
|
||||
\begin{enumerate}
|
||||
\item draw $50$ data points from $x$ with replacement
|
||||
\item compute their mean and store it
|
||||
\end{enumerate}
|
||||
Look at the standard deviation of the computed means.
|
||||
\item Compare the result to the standard deviation of the original
|
||||
$50$ data points and the standard error.
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{standard error}
|
||||
\begin{lstlisting}
|
||||
load thymusglandweights.dat
|
||||
|
||||
n = 80;
|
||||
m = 500;
|
||||
x = thymusglandweights(1:n);
|
||||
|
||||
|
||||
mu = zeros(m,1);
|
||||
for i = 1:m
|
||||
mu(i) = mean(x(randi(n,n,1)));
|
||||
end
|
||||
disp(['bootstrap standard error: ', num2str(std(mu))]);
|
||||
disp(['standard error: ', num2str(std(x)/sqrt(n))]);
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
%------------------------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{standard error}
|
||||
\framesubtitle{bootstrapping}
|
||||
\begin{itemize}
|
||||
\item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
|
||||
{\color{blue}an estimate of the standard deviation of the means}
|
||||
in repeated experiments which is computed form a single
|
||||
experiment.
|
||||
\item When you want to do statistical tests on the mean, it is
|
||||
better to use the standard error, because one can eyeball
|
||||
significance from it
|
||||
\mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
|
||||
in experimental biology. The Journal of Cell Biology, 177(1),
|
||||
7--11.}
|
||||
\item {\color{blue}Bootstrapping} is a way to generate an estimate
|
||||
of the {\color{blue}sampling distribution of any statistic}. Instead of
|
||||
sampling from the true distribution, it samples from the
|
||||
empirical distribution represented by your dataset.
|
||||
\mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%------------------------------------------------------------------------------
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{standard error of the median?}
|
||||
{\bf What kind of errorbars should we use for the median?}
|
||||
|
||||
It depends again:
|
||||
|
||||
{\bf Descriptive statistics}
|
||||
\begin{itemize}
|
||||
\item As a {\color{blue}descriptive statistic} one could use the {\em median
|
||||
absolute deviation}: the median of the absolute differences of
|
||||
the datapoints from the median.
|
||||
\item Alternatively, one could bootstrap a standard error of the
|
||||
median.
|
||||
\end{itemize}
|
||||
\pause
|
||||
{\bf Inferential statistics}
|
||||
\begin{itemize}
|
||||
\item For {\color{blue}inferential statistics} one should use
|
||||
something that gives the reader {\color{blue}information about
|
||||
significance}.
|
||||
\item Here, {\color{blue} confidence intervals} are a better choice.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\subsection{confidence intervals \& bootstrapping}
|
||||
%------------------------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{confidence intervals}
|
||||
\begin{center}
|
||||
\only<1>{
|
||||
\vspace{.1cm}
|
||||
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
|
||||
}\pause
|
||||
\only<2>{
|
||||
\vspace{.1cm}
|
||||
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}\pause
|
||||
\only<3>{
|
||||
\vspace{.1cm}
|
||||
\includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}\pause
|
||||
\only<4>{
|
||||
\vspace{.1cm}
|
||||
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}\pause
|
||||
\only<5>{
|
||||
\vspace{.1cm}
|
||||
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}\pause
|
||||
\only<6>{
|
||||
\vspace{.1cm}
|
||||
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
|
||||
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
||||
}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{confidence intervals for the median}
|
||||
\begin{definition}{Confidence interval}
|
||||
A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
|
||||
$\hat\theta$ is an interval $\hat\theta \pm a$ such that the
|
||||
population parameter $\theta$ is contained in that interval
|
||||
$(1-\alpha)\cdot 100\%$ of the experiments.
|
||||
|
||||
An alternative way to put it is that $(\hat\theta - \theta) \in
|
||||
[-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
|
||||
\end{definition}
|
||||
|
||||
|
||||
\begin{columns}
|
||||
\begin{column}[l]{.5\linewidth}
|
||||
If we knew the sampling distribution of the median $\hat m$, could
|
||||
we generate a e.g. a $95\%$ confidence interval?\pause
|
||||
\vspace{.5cm}
|
||||
|
||||
Yes, we could choose the interval such that $\hat m - m$ in that
|
||||
interval in $95\%$ of the cases.
|
||||
\end{column}
|
||||
\begin{column}[r]{.5\linewidth}
|
||||
\only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
|
||||
\only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
|
||||
|
||||
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{confidence intervals for the mean via bootstrapping}
|
||||
\framesubtitle{how to get the sampling distribution}
|
||||
|
||||
\begin{task}{bootstrapping a confidence interval for the mean}
|
||||
\begin{itemize}
|
||||
\item Use the same dataset as before.
|
||||
\item Bootstrap $500$ means.
|
||||
\item Plot their distribution.
|
||||
\item Compute the $2.5\%$ and the $97.5\%$ percentile of the
|
||||
$500$ means.
|
||||
\item Mark them in the plot.
|
||||
\end{itemize}
|
||||
These two numbers give you $\hat m -a$ and $\hat m + a$ for
|
||||
the $95\%$ confidence interval.
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{confidence intervals for the median}
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
load thymusglandweights.dat
|
||||
n = 80;
|
||||
x = thymusglandweights(1:n);
|
||||
|
||||
m = 500;
|
||||
me = zeros(m,1);
|
||||
for i = 1:m
|
||||
me(i) = mean(x(randi(n,n,1)));
|
||||
end
|
||||
|
||||
disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
|
||||
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{confidence intervals}
|
||||
\framesubtitle{Notice the theme!}
|
||||
\begin{enumerate}
|
||||
\item choose a statistic
|
||||
\item get a the sampling distribution of the statistic (by theory or
|
||||
simulation)
|
||||
\item use that distribution to reason about the relation between the
|
||||
true population parameter (e.g. $m$) and the sampled statistic
|
||||
$\hat m$
|
||||
\end{enumerate}
|
||||
|
||||
\begin{center}
|
||||
\color{blue}
|
||||
This is the scaffold of most statistical techniques. Try to find
|
||||
it and it can help you understand them.
|
||||
\end{center}
|
||||
|
||||
\end{frame}
|
||||
|
||||
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{confidence interval for the mean}
|
||||
\framesubtitle{Let's search the pattern in the normal way of computing
|
||||
a confidence interval for the mean}
|
||||
\begin{itemize}
|
||||
\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
|
||||
well
|
||||
\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
|
||||
\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
|
||||
$\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
|
||||
\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
|
||||
\frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
|
||||
parameters.\pause
|
||||
\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
|
||||
\mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
|
||||
\item Therefore,
|
||||
\begin{align*}
|
||||
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
|
||||
\end{align*}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% ----------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{confidence interval for the mean}
|
||||
\begin{task}{Bootstrapping a confidence interval for the mean}
|
||||
Extend your script to contain the analytical confidence
|
||||
interval using
|
||||
\begin{align*}
|
||||
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
|
||||
\end{align*}
|
||||
Hint: Use the function {\tt tinv(0.025, n-1)} to get the value of
|
||||
$t_{2.5\%}$ and similar for $t_{97.5\%}$.
|
||||
\end{task}
|
||||
|
||||
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[fragile]
|
||||
\frametitle{solution}
|
||||
\scriptsize
|
||||
\begin{lstlisting}
|
||||
load thymusglandweights.dat
|
||||
n = 80;
|
||||
x = thymusglandweights(1:n);
|
||||
|
||||
m = 500;
|
||||
me = zeros(m,1);
|
||||
for i = 1:m
|
||||
me(i) = mean(x(randi(n,n,1)));
|
||||
end
|
||||
|
||||
t025 = tinv(0.025, n-1);
|
||||
t975 = tinv(0.975, n-1);
|
||||
|
||||
se = std(x)/sqrt(n);
|
||||
|
||||
disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
|
||||
disp(['analytical CI: ' , num2str(mean(x)+t025*se), ' ' , num2str(mean(x)+t975*se)]);
|
||||
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{statistical tests}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{frame}
|
||||
\frametitle{ingredients into a test}
|
||||
|
||||
\begin{itemize}
|
||||
\item {\bf What is the goal of a test?}\pause
|
||||
\item[] Check whether a measured
|
||||
statistic looks different from what you would expect if there was no
|
||||
effect.\pause
|
||||
\item {\bf What are the ingredients into a test?}\pause
|
||||
\item[] a test statistic (e.g. the mean, the median, ...) and a null
|
||||
distribution\pause
|
||||
\item {\bf What is a null distribution?}\pause
|
||||
\item[] The sampling distribution of the statistic in case there is
|
||||
no effect (i.e. the Null hypothesis is true).
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{how tests work}
|
||||
\begin{enumerate}
|
||||
\item Choose a statistic.
|
||||
\item Get a null distribution.
|
||||
\item Compare your actually measure value with the Null
|
||||
distribution.
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Example: one sample test}
|
||||
\framesubtitle{step 2: get a Null distribution}
|
||||
\scriptsize
|
||||
Assume that the expected weight of a thymus gland from the
|
||||
literature is 34.3g. We want to test whether the mean of our
|
||||
thymus gland dataset is different from the expectation in the
|
||||
literature. Comparing a statistic of a dataset against a fixed value
|
||||
is called {\em one sample test}.
|
||||
\pause
|
||||
|
||||
\begin{itemize}
|
||||
\item {\bf How could we simulate the distribution of the data if the
|
||||
mean was really 30g?}\pause
|
||||
\item[] Bootstrapping.
|
||||
\end{itemize}
|
||||
|
||||
\begin{task}{generating a null distribution}
|
||||
\begin{itemize}
|
||||
\item Write a matlab program that bootstraps 2000 means from the
|
||||
thymus gland dataset.
|
||||
\item How can we adjust the data that it has mean 34.3g (remember,
|
||||
we want to simulate the null distribution)?
|
||||
\item Plot a histogram of these 2000 means.
|
||||
\item Also indicate the actual mean of the data.
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Example: one sample test}
|
||||
\framesubtitle{step 3: compare the actual value to the Null distribution}
|
||||
\begin{minipage}{1.0\linewidth}
|
||||
\begin{minipage}{0.5\linewidth}
|
||||
The question we want to answer in this step is:
|
||||
\begin{center}
|
||||
\color{blue} Does the actually measure value look like it came
|
||||
from the Null distribution?
|
||||
\end{center}
|
||||
\end{minipage}
|
||||
\begin{minipage}{0.5\linewidth}
|
||||
\includegraphics[width=\linewidth]{figs/bootstraptest.png}
|
||||
\end{minipage}
|
||||
\end{minipage}
|
||||
{\bf How could we do this in our bootstrapping example?}\pause
|
||||
\begin{itemize}
|
||||
\item Set a threshold. \pause How do we choose the threshold? \pause Via type I error.\pause
|
||||
\item Specify the type I error if we used the actual measured value
|
||||
as threshold (p-value). Why is that a reasonable strategy?
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Example: one sample test}
|
||||
\framesubtitle{step 3: compare the actual value to the Null distribution}
|
||||
\begin{task}{type I error and p-value}
|
||||
Extend the script such that it
|
||||
\begin{itemize}
|
||||
\item computes the $5\%$ significance boundaries from the
|
||||
distribution and plot it into the histogram.
|
||||
\item computes a p-value.
|
||||
\end{itemize}
|
||||
\end{task}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{two sample test}
|
||||
\framesubtitle{permutation test}
|
||||
Brain Weight In 1888, P. Topinard published data on the brain
|
||||
weights of hundreds of French men and women. Brain weights are given
|
||||
in gram. The data can be downloaded from Ilias (example 002 from
|
||||
yesterday).
|
||||
|
||||
\vspace{.5cm}
|
||||
{\bf How could we determine (similar to bootstrapping) whether the
|
||||
mean brain weight of males and females are different?}
|
||||
\begin{itemize}
|
||||
\item What do we use as a statistic?
|
||||
\item[]<2-> The difference of the means of the two groups.
|
||||
\item How do we simulate the null distribution?
|
||||
\item[]<3-> Shuffle the labels ``male'' and ``female'', compute
|
||||
difference in means of two groups, and repeat.
|
||||
\end{itemize}
|
||||
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\begin{center}
|
||||
\Huge That's it.
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
||||
|
13
statistics/matlab/bootstrap_mean.m
Normal file
@ -0,0 +1,13 @@
|
||||
load thymusglandweights.dat
|
||||
x = thymusglandweights(1:50);
|
||||
|
||||
m = 500;
|
||||
n = length(x);
|
||||
|
||||
mu = zeros(m,1);
|
||||
for i = 1:m
|
||||
mu(i) = mean(x(randi(n,n,1)));
|
||||
end
|
||||
fprintf("bootstrap standard error: %.4f\n", std(mu));
|
||||
fprintf("standard error: %.4f\n", std(x)/sqrt(n));
|
||||
|
19
statistics/matlab/ci_mean.m
Normal file
@ -0,0 +1,19 @@
|
||||
load thymusglandweights.dat
|
||||
|
||||
n = 80;
|
||||
x = thymusglandweights(1:n);
|
||||
|
||||
m = 5000;
|
||||
me = zeros(m,1);
|
||||
for i = 1:m
|
||||
me(i) = median(x(randi(n,n,1)));
|
||||
end
|
||||
|
||||
t025 = tinv(0.025, n-1);
|
||||
t975 = tinv(0.975, n-1);
|
||||
|
||||
se = std(x)/sqrt(n);
|
||||
|
||||
fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,0.975));
|
||||
fprintf('analytical quantile: %.4f, %.4f \n', mean(x)+t025*se, mean(x)+t975*se);
|
||||
|
17
statistics/matlab/ci_media.m
Normal file
@ -0,0 +1,17 @@
|
||||
load thymusglandweights.dat
|
||||
x = thymusglandweights(1:50);
|
||||
|
||||
m = 500;
|
||||
n = length(x);
|
||||
x = sort(x);
|
||||
me = zeros(m,1);
|
||||
for i = 1:m
|
||||
me(i) = median(x(randi(n,n,1)));
|
||||
end
|
||||
|
||||
a1 = binoinv(0.025,n,.5)-1;
|
||||
a2 = binoinv(1-0.025,n,.5);
|
||||
|
||||
fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,1-0.025));
|
||||
fprintf('analytical quantile: %.4f, %.4f \n', x(a1),x(a2));
|
||||
|
38
statistics/matlab/tests.m
Normal file
@ -0,0 +1,38 @@
|
||||
close all
|
||||
clear all
|
||||
load thymusglandweights.dat
|
||||
|
||||
literature_mean = 34.3;
|
||||
|
||||
x = thymusglandweights;
|
||||
n = length(x);
|
||||
y = x - mean(x) + literature_mean;
|
||||
|
||||
m = 2000;
|
||||
me = zeros(m,1);
|
||||
for i = 1:m
|
||||
me(i) = median(y(randi(n,n,1)));
|
||||
end
|
||||
|
||||
hist(me, 50);
|
||||
hold on
|
||||
mu = mean(x);
|
||||
plot([mu,mu],[0,200],'--r','LineWidth',3);
|
||||
xlabel('thymus gland weights [g]');
|
||||
ylabel('frequency');
|
||||
title('bootstrapped null distribution');
|
||||
hold off
|
||||
|
||||
% 5% significance boundaries
|
||||
low = quantile(me,0.025);
|
||||
high = quantile(me,0.975);
|
||||
disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
|
||||
|
||||
hold on
|
||||
plot([low,low],[0,200],'--g','LineWidth',3);
|
||||
plot([high,high],[0,200],'--g','LineWidth',3);
|
||||
hold off
|
||||
|
||||
pval = mean(abs(me-literature_mean) > abs(mu - literature_mean))
|
||||
|
||||
legend('Null distribution','measured mean','5% significance boundaries')
|