diff --git a/statistics/assignments/Makefile b/statistics/assignments/Makefile new file mode 100644 index 0000000..66ca3d7 --- /dev/null +++ b/statistics/assignments/Makefile @@ -0,0 +1,16 @@ +all: + for number in 001 002 003 004 005 006 007 007 009 010 011 012 013 014 015 016 017 ; do \ + echo $$number ; \ + sed "s/000/$$number/g" day1.tex > tmp.tex; \ + pdflatex tmp.tex; \ + mv tmp.pdf day1_$$number.pdf; \ + cp ../data/example$$number.csv ./ ;\ + rm tmp.* ; \ + zip example$$number.zip example$$number.csv day1_$$number.pdf ; \ + rm example$$number.csv ;\ + rm day1_$$number.pdf ; \ + done + +clean: + rm *.zip + rm -rf auto diff --git a/statistics/assignments/day1.tex b/statistics/assignments/day1.tex new file mode 100755 index 0000000..ed772cb --- /dev/null +++ b/statistics/assignments/day1.tex @@ -0,0 +1,72 @@ +\documentclass[addpoints,10pt]{exam} +\usepackage{url} +\usepackage{color} +\usepackage{hyperref} + +\pagestyle{headandfoot} +\runningheadrule +\firstpageheadrule + +\firstpageheader{Scientific Computing}{afternoon assignment day 01}{10/20/2014} +%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014} +\firstpagefooter{}{}{} +\runningfooter{}{}{} +\pointsinmargin +\bracketedpoints + +%\printanswers +\shadedsolutions + + +\begin{document} +%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%% +\sffamily +%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{questions} + \question To publish scientific results, you will usually need to + use statistical methods. Some journals provide you with a brief + description of how they expect you to apply statistical methods. One + example can be found in the author guidelines of the journal + Nature. + + Assume you collected the following dataset. You can download it from + Ilias as {\tt example000.csv}. Here is the description of the dataset: + + \begin{quotation} + \tt + \input{../examples/example000.tex} + \end{quotation} + + \begin{parts} + \part Download the dataset and write a script that loads it into + matlab. + + \part Think about the type of your data (I might ask you that + tomorrow). + + \part Produce a plot that displays the data in an appropriate + way. Make sure to respect all elements of good plotting we + discussed today. + + \part Download the statistical checklist from nature. Produce {\bf + one} slide that contains the plot and a concise summary of your + data which respects the requirements made by nature (assume you + are producing a figure legend for the figure in nature). It is + good style to avoid expressions like ``the plot shows'' or + similar. + + \part Upload your code, the data, and the slide as a zip to + Ilias. Deadline is 19h00. Structure the zip such that you can + present you program in front of the class. Several students will + be asked to present their slide and their code tomorrow morning. + + \end{parts} + +\end{questions} + + + + + +\end{document} diff --git a/statistics/data/example001.csv b/statistics/data/example001.csv new file mode 100755 index 0000000..3f3d985 --- /dev/null +++ b/statistics/data/example001.csv @@ -0,0 +1,43 @@ +MAO,Diagnosis +6.8,I +4.1,I +7.3,I +14.2,I +18.8,I +9.9,I +7.4,I +11.9,I +5.2,I +7.8,I +7.8,I +8.7,I +12.7,I +14.5,I +10.7,I +8.4,I +9.7,I +10.6,I +7.8,II +4.4,II +11.4,II +3.1,II +4.3,II +10.1,II +1.5,II +7.4,II +5.2,II +10,II +3.7,II +5.5,II +8.5,II +7.7,II +6.8,II +3.1,II +6.4,III +10.8,III +1.1,III +2.9,III +4.5,III +5.8,III +9.4,III +6.8,III diff --git a/statistics/data/example002.csv b/statistics/data/example002.csv new file mode 100755 index 0000000..dd1ba9c --- /dev/null +++ b/statistics/data/example002.csv @@ -0,0 +1,186 @@ +Weight,Sex +1607,m +1157,m +1248,m +1310,m +1398,m +1237,m +1232,m +1343,m +1380,m +1274,m +1245,m +1286,m +1508,m +1105,m +1123,m +1198,m +1300,m +1249,m +1185,m +915,m +1345,m +1107,m +1357,m +1227,m +1205,m +1435,m +1289,m +1093,m +1211,m +1260,m +1193,m +1330,m +1130,m +1357,m +1193,m +1232,m +1321,m +1260,m +1380,m +1230,m +1136,m +1029,m +1223,m +1240,m +1264,m +1020,m +1415,m +1410,m +1275,m +1230,m +1085,m +1048,m +1181,m +1103,m +1165,m +1547,m +1173,m +1660,m +1307,m +1535,m +1315,m +1257,m +1424,m +1309,m +1170,m +1412,m +1270,m +1230,m +1233,m +1561,m +1193,m +1272,m +1355,m +1137,m +1354,m +1110,m +1265,m +1407,m +1227,m +1330,m +1222,m +1305,m +1475,m +1177,m +1337,m +1145,m +1070,m +1305,m +1085,m +1303,m +1390,m +1532,m +1238,m +1233,m +1280,m +1245,m +1459,m +1157,m +1302,m +1385,m +1310,m +1342,m +1303,m +1248,m +1115,m +1365,m +1227,m +1353,m +1125,f +1027,f +1112,f +983,f +1090,f +1247,f +1045,f +983,f +972,f +1045,f +937,f +1245,f +1200,f +1270,f +1200,f +1145,f +1090,f +1040,f +1343,f +1010,f +1095,f +1180,f +1168,f +1095,f +1040,f +1235,f +1050,f +1038,f +1046,f +1255,f +1228,f +1000,f +1225,f +1220,f +1085,f +1067,f +1006,f +1138,f +1175,f +1252,f +1037,f +958,f +1020,f +1068,f +1107,f +1317,f +952,f +1056,f +1203,f +1183,f +1392,f +1130,f +1284,f +996,f +1228,f +1087,f +1035,f +1170,f +1064,f +1250,f +1129,f +1088,f +1037,f +1117,f +1095,f +1027,f +1027,f +1190,f +1153,f +1037,f +1120,f +1212,f +1024,f +1135,f +1177,f +1096,f +1114,f diff --git a/statistics/data/example003.csv b/statistics/data/example003.csv new file mode 100755 index 0000000..91b4cb6 --- /dev/null +++ b/statistics/data/example003.csv @@ -0,0 +1,52 @@ +singtime +4.3 +24.1 +6.6 +7.3 +4 +2.6 +4 +3.9 +9.4 +6.2 +1.6 +6.5 +0.2 +2.7 +17.4 +5.6 +2 +3.8 +1.2 +0.7 +1.6 +2.3 +3.7 +0.8 +0.5 +4.5 +11.5 +3.5 +0.8 +5.2 +2 +0.7 +1.7 +5 +2.8 +1.5 +3.9 +3.7 +4.5 +1.8 +1.2 +0.7 +0.7 +4.2 +4.7 +2.2 +1.4 +14.1 +8.6 +3.7 +3.5 diff --git a/statistics/data/example004.csv b/statistics/data/example004.csv new file mode 100755 index 0000000..c987182 --- /dev/null +++ b/statistics/data/example004.csv @@ -0,0 +1,29 @@ +Pulse +97 +111 +93 +98 +107 +77 +121 +88 +96 +123 +119 +91 +99 +95 +99 +102 +77 +85 +104 +106 +114 +85 +112 +102 +104 +94 +104 +98 diff --git a/statistics/data/example005.csv b/statistics/data/example005.csv new file mode 100755 index 0000000..c36806e --- /dev/null +++ b/statistics/data/example005.csv @@ -0,0 +1,37 @@ +Branches +23 +30 +54 +28 +31 +29 +34 +35 +30 +27 +21 +43 +51 +35 +51 +49 +35 +24 +26 +29 +21 +29 +37 +27 +28 +33 +33 +23 +37 +27 +40 +48 +41 +20 +30 +57 diff --git a/statistics/data/example006.csv b/statistics/data/example006.csv new file mode 100755 index 0000000..ed8e69d --- /dev/null +++ b/statistics/data/example006.csv @@ -0,0 +1,32 @@ +Glucose +81 +85 +93 +93 +99 +76 +75 +84 +78 +84 +81 +82 +89 +81 +96 +82 +74 +70 +84 +86 +80 +70 +131 +75 +88 +102 +115 +89 +82 +79 +106 diff --git a/statistics/data/example007.csv b/statistics/data/example007.csv new file mode 100755 index 0000000..90b3b6d --- /dev/null +++ b/statistics/data/example007.csv @@ -0,0 +1,24 @@ +NerveCells +35 +19 +33 +34 +17 +26 +16 +40 +28 +30 +23 +12 +27 +33 +22 +31 +28 +28 +35 +23 +23 +19 +29 diff --git a/statistics/data/example008.csv b/statistics/data/example008.csv new file mode 100755 index 0000000..04cfeb6 --- /dev/null +++ b/statistics/data/example008.csv @@ -0,0 +1,21 @@ +RateChange,Treatment +28,Caffeine +11,Caffeine +-3,Caffeine +14,Caffeine +-2,Caffeine +-4,Caffeine +18,Caffeine +2,Caffeine +2,Caffeine +26,Decaf +1,Decaf +0,Decaf +-4,Decaf +-4,Decaf +14,Decaf +16,Decaf +8,Decaf +0,Decaf +18,Decaf +-10,Decaf diff --git a/statistics/data/example009.csv b/statistics/data/example009.csv new file mode 100755 index 0000000..22a117e --- /dev/null +++ b/statistics/data/example009.csv @@ -0,0 +1,12 @@ +NEConcentration,Treatment +543,Toluene +523,Toluene +431,Toluene +635,Toluene +564,Toluene +549,Toluene +535,Control +385,Control +502,Control +412,Control +387,Control diff --git a/statistics/data/example010.csv b/statistics/data/example010.csv new file mode 100755 index 0000000..c9f6b89 --- /dev/null +++ b/statistics/data/example010.csv @@ -0,0 +1,13 @@ +Dopamine,Group +3420,toluene +2314,toluene +1911,toluene +2464,toluene +2781,toluene +2803,toluene +1820,control +1843,control +1397,control +1803,control +2539,control +1990,control diff --git a/statistics/data/example011.csv b/statistics/data/example011.csv new file mode 100755 index 0000000..f589813 --- /dev/null +++ b/statistics/data/example011.csv @@ -0,0 +1,10 @@ +Animal,Site I,Site II +1,50.6,38 +2,39.2,18.6 +3,35.2,23.2 +4,17,19 +5,11.2,6.6 +6,14.2,16.4 +7,24.2,14.4 +8,37.4,37.6 +9,35.2,24.4 diff --git a/statistics/data/example012.csv b/statistics/data/example012.csv new file mode 100755 index 0000000..089d315 --- /dev/null +++ b/statistics/data/example012.csv @@ -0,0 +1,10 @@ +Subject,mCPP,Placebo +1,1.1,0 +2,1.3,-0.3 +3,1,0.6 +4,1.7,0.3 +5,1.4,-0.7 +6,0.1,-0.2 +7,0.5,0.6 +8,1.6,0.9 +9,-0.5,-2 diff --git a/statistics/data/example013.csv b/statistics/data/example013.csv new file mode 100755 index 0000000..a22bbbb --- /dev/null +++ b/statistics/data/example013.csv @@ -0,0 +1,9 @@ +Animal,Control,Regenerating +1,16.3,11.5 +2,4.8,3.6 +3,10.9,12.5 +4,14.2,6.3 +5,16.3,15.2 +6,9.9,8.1 +7,29.2,16.6 +8,22.4,13.1 diff --git a/statistics/data/example014.csv b/statistics/data/example014.csv new file mode 100755 index 0000000..0811241 --- /dev/null +++ b/statistics/data/example014.csv @@ -0,0 +1,16 @@ +BodyTempDrop,AlcoholDose +0.2,1.5 +1.9,1.5 +-0.1,1.5 +0.5,1.5 +0.8,1.5 +4,3 +3.2,3 +2.3,3 +2.9,3 +3.8,3 +3.3,6 +5.1,6 +5.3,6 +6.7,6 +5.9,6 diff --git a/statistics/data/example015.csv b/statistics/data/example015.csv new file mode 100755 index 0000000..6665d64 --- /dev/null +++ b/statistics/data/example015.csv @@ -0,0 +1,18 @@ +PeakFlow,Height +733,174 +572,183 +500,176 +738,169 +616,183 +787,186 +866,178 +670,175 +550,172 +660,179 +575,171 +577,184 +783,200 +625,195 +470,176 +642,176 +856,190 diff --git a/statistics/data/example016.csv b/statistics/data/example016.csv new file mode 100755 index 0000000..ee63231 --- /dev/null +++ b/statistics/data/example016.csv @@ -0,0 +1,19 @@ +Patient,Before,After +1,98,75 +2,100,60 +3,82,25 +4,100,55 +5,93,78 +6,119,102 +7,70,58 +8,78,70 +9,104,90 +10,70,50 +11,60,65 +12,88,45 +13,45,36 +14,159,144 +15,65,27 +16,98,90 +17,66,16 +18,67,53 diff --git a/statistics/data/example017.csv b/statistics/data/example017.csv new file mode 100755 index 0000000..10fd825 --- /dev/null +++ b/statistics/data/example017.csv @@ -0,0 +1,21 @@ +LegStrength,UpperBodyStrength +55,low +70,low +45,low +246,low +240,low +96,low +225,low +40,middle +200,middle +250,middle +192,middle +117,middle +215,middle +181,high +85,high +416,high +228,high +257,high +316,high +134,high diff --git a/statistics/examples/example001.tex b/statistics/examples/example001.tex new file mode 100644 index 0000000..ccbdfe7 --- /dev/null +++ b/statistics/examples/example001.tex @@ -0,0 +1,6 @@ +MAO and Schizophrenia Monoamine oxidase (MAO) is an enzyme that is +thought to play a role in the regulation of behavior. To see whether +different categories of schizophrenic patients have different levels +of MAO activity, researchers collected blood specimens from 42 +patients and measured the MAO activity in the platelets. Values are +expressed as nmol benzylaldehyde product per 108 platelets per hour. diff --git a/statistics/examples/example002.tex b/statistics/examples/example002.tex new file mode 100644 index 0000000..3aa5596 --- /dev/null +++ b/statistics/examples/example002.tex @@ -0,0 +1,3 @@ +Brain Weight In 1888, P. Topinard published data on the brain weights +of hundreds of French men and women. Brain weights are given in +gram. diff --git a/statistics/examples/example003.tex b/statistics/examples/example003.tex new file mode 100644 index 0000000..f630388 --- /dev/null +++ b/statistics/examples/example003.tex @@ -0,0 +1,4 @@ +Cricket Singing Times Male Mormon crickets (Anabrus simplex) sing to attract mates. +A field researcher measured the duration of 51 unsuccessful songs--that is, the time +until the singing male gave up and left his perch. The data is given +in minutes. \ No newline at end of file diff --git a/statistics/examples/example004.tex b/statistics/examples/example004.tex new file mode 100644 index 0000000..f41b7a5 --- /dev/null +++ b/statistics/examples/example004.tex @@ -0,0 +1,3 @@ +Pulse after Exercise: A group of 28 adults did some moderate exercise +for five minutes and then measured their pulses. Data is given in +beats/minute. diff --git a/statistics/examples/example005.tex b/statistics/examples/example005.tex new file mode 100644 index 0000000..df09295 --- /dev/null +++ b/statistics/examples/example005.tex @@ -0,0 +1,5 @@ +A dendritic tree is a branched structure that emanates from the body +of a nerve cell. As part of a study of brain development, 36 nerve +cells were taken from the brains of newborn guinea pigs. The +investigators counted the number of dendritic branch segments +emanating from each nerve cell. diff --git a/statistics/examples/example006.tex b/statistics/examples/example006.tex new file mode 100644 index 0000000..304b593 --- /dev/null +++ b/statistics/examples/example006.tex @@ -0,0 +1,4 @@ +For each of 31 healthy dogs, a veterinarian measured the glucose +concentration in the anterior chamber of the right eye and also in the +blood serum. The following data are the anterior chamber glucose +measurements, expressed as a percentage of the blood glucose. diff --git a/statistics/examples/example007.tex b/statistics/examples/example007.tex new file mode 100644 index 0000000..e49bd6a --- /dev/null +++ b/statistics/examples/example007.tex @@ -0,0 +1,5 @@ +A veterinary anatomist investigated the spatial arrangement of the +nerve cells in the intestine of a pony. He removed a block of tissue +from the intestinal wall, cut the block into many equal sections, and +counted the number of nerve cells in each of 23 randomly selected +sections. diff --git a/statistics/examples/example008.tex b/statistics/examples/example008.tex new file mode 100644 index 0000000..bb303c4 --- /dev/null +++ b/statistics/examples/example008.tex @@ -0,0 +1,8 @@ +Researchers were interested in the short-term effect that caffeine has +on heart rate. They enlisted a group of volunteers and measured each +person's resting heart rate. Then they had each subject drink 6 ounces +of coffee. Nine of the subjects were given coffee containing caffeine +and 11 were given decaffeinated coffee. After 10 minutes each person's +heart rate was measured again. The data in the table contains the +change in heart rate; a positive number means that heart rate went up +and a negative number means that heart rate went down. diff --git a/statistics/examples/example009.tex b/statistics/examples/example009.tex new file mode 100644 index 0000000..1cae5cd --- /dev/null +++ b/statistics/examples/example009.tex @@ -0,0 +1,9 @@ +Toluene and the Brain Abuse of substances containing toluene (for +example, glue) can produce various neurological symptoms. In an +investigation of the mechanism of these toxic effects, researchers +measured the concentrations of various chemicals in the brains of rats +that had been exposed to a toluene-laden atmosphere, and also in +unexposed control rats. The concentrations of the brain chemical +norepinephrine (NE) in the medulla region of the brain, for six +toluene-exposed rats and five control rats, are given in accompanying +data file in ng/g. diff --git a/statistics/examples/example010.tex b/statistics/examples/example010.tex new file mode 100644 index 0000000..60e3e9b --- /dev/null +++ b/statistics/examples/example010.tex @@ -0,0 +1,3 @@ +In a pharmacological study, researchers measured the concentration of +the brain chemical dopamine in six rats exposed to toluene and six +control rats. Number are specified in ng/g. diff --git a/statistics/examples/example011.tex b/statistics/examples/example011.tex new file mode 100644 index 0000000..d3d88a9 --- /dev/null +++ b/statistics/examples/example011.tex @@ -0,0 +1,6 @@ +Nerve Cell Density For each of nine horses, a veterinary anatomist +measured the density of nerve cells at specified sites in the +intestine. The results for site I (midregion of jejunum) and site II +(mesenteric region of jejunum) are given in the accompanying dataset. +Each density value is the average of counts of nerve cells in five +equal sections of tissue. diff --git a/statistics/examples/example012.tex b/statistics/examples/example012.tex new file mode 100644 index 0000000..980c6e2 --- /dev/null +++ b/statistics/examples/example012.tex @@ -0,0 +1,6 @@ +Hunger Rating During a weight loss study each of nine subjects was +given either the active drug m-chlorophenylpiperazine (mCPP) for two +weeks and then a placebo for another two weeks, or else was given the +placebo for the first two weeks and then mCPP for the second two +weeks. As part of the study the subjects were asked to rate how hungry +they were at the end of each two-week period. \ No newline at end of file diff --git a/statistics/examples/example013.tex b/statistics/examples/example013.tex new file mode 100644 index 0000000..10f7e8f --- /dev/null +++ b/statistics/examples/example013.tex @@ -0,0 +1,10 @@ +Certain types of nerve cells have the ability to regenerate a part of +the cell that has been amputated. In an early study of this process, +measurements were made on the nerves in the spinal cord in rhesus +monkeys. Nerves emanating from the left side of the cord were cut, +while nerves from the right side were kept intact. During the +regeneration process, the content of creatine phosphate (CP) was +measured in the left and the right portion of the spinal cord. The +following table shows the data for the right (control) side (Y1), and +for the left (regenerating) side (Y2). The units of measurement are mg +CP per 100 gm tissue. \ No newline at end of file diff --git a/statistics/examples/example014.tex b/statistics/examples/example014.tex new file mode 100644 index 0000000..a07b90d --- /dev/null +++ b/statistics/examples/example014.tex @@ -0,0 +1,9 @@ + In an investigation of the physiological effects of alcohol +(ethanol), 15 mice were randomly allocated to three treatment groups, +each to receive a different oral dose of alcohol. The dosage levels +were 1.5, 3.0, and 6.0 g alcohol/kg body weight. The body temperature +of each mouse was measured immediately before the alcohol was given +and again 20 minutes afterward. The accompanying data shows the drop +(before minus after) in body temperature for each mouse. (The negative +value - 0.1 refers to a mouse whose temperature rose rather than +fell.) diff --git a/statistics/examples/example015.tex b/statistics/examples/example015.tex new file mode 100644 index 0000000..f265826 --- /dev/null +++ b/statistics/examples/example015.tex @@ -0,0 +1,5 @@ +The peak flow rate of a person is the fastest rate +at which the person can expel air after taking a deep breath. +Peak flow rate is measured in units of liters per minute and +gives an indication of the person's respiratory health. Flow is given +in l/min, height in cm. diff --git a/statistics/examples/example016.tex b/statistics/examples/example016.tex new file mode 100644 index 0000000..e2c0358 --- /dev/null +++ b/statistics/examples/example016.tex @@ -0,0 +1,6 @@ +An experiment was conducted to study the effect of tamoxifen on +patients with cervical cancer. One of the measurements made, both +before and again after tamoxifen was given, was microvessel density +(MVD). MVD, which is measured as number of vessels per mm$^2$, is a +measurement that relates to the formation of blood vessels that feed a +tumor and allow it to grow and spread. diff --git a/statistics/examples/example017.tex b/statistics/examples/example017.tex new file mode 100644 index 0000000..98c34dd --- /dev/null +++ b/statistics/examples/example017.tex @@ -0,0 +1,5 @@ +A group of female college students were divided into three groups +according to upper body strength. Their leg strength was tested by +measuring how many consecutive times they could leg press 246 pounds +before exhaustion. (The subjects were allowed only one second of rest +between consecutive lifts.) diff --git a/statistics/lecture_statistics.tex b/statistics/lecture_statistics01.tex old mode 100755 new mode 100644 similarity index 92% rename from statistics/lecture_statistics.tex rename to statistics/lecture_statistics01.tex index a54dcef..ac11763 --- a/statistics/lecture_statistics.tex +++ b/statistics/lecture_statistics01.tex @@ -170,7 +170,9 @@ Bernstein Center T\"ubingen} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots} +\section[descriptive statistics, errorbars, and plots]{Day 1 -- + descriptive statistics and plots} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{types of data} @@ -315,8 +317,8 @@ Bernstein Center T\"ubingen} \frametitle{exercise} \begin{task}{Spearman rank correlation} \begin{enumerate} - \item Use {\tt randi} to generate two 100-dimensional vectors - {\tt x,y} of random integers between $0$ and $10$. + \item Use {\tt randi} to generate two vectors + {\tt x,y} with $100$ random integers between $0$ and $10$ each. \item Find out how to compute the Spearman rank correlation $$\rho = 1- {\frac {6 \sum d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the @@ -358,7 +360,6 @@ correlation coefficient does not have that property. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{description of data and plotting} \subsection{what makes a good plot} %------------------------------------------------------------- \begin{frame}[fragile] @@ -723,6 +724,23 @@ hold off \end{center} \end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{robust statistics} + \begin{task}{When is statistic called robust (leave-one-out)?} + \begin{itemize} + \item Generate an array with $20$ random numbers using {\tt + randn}. + \item Compute $20$ means: the $i^{th}$ mean is computed from the + data set {\em without} the $i^{th}$ example. + \item Repeat this with the median. + \item Make a bar plot that depicts the means of the computed means + and medians along with an appropriate measure of dispersion. + \item What can you observe? Do you understand why? + \end{itemize} + \end{task} +\end{frame} + %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{plotting interval/ratio/absolute data} @@ -791,7 +809,13 @@ hold off ordinal vs. ordinal data (why not the bar chart?). \end{frame} - +%------------------------------------------------------------- +\begin{frame}[fragile] + \begin{center} + \Huge + That's it. + \end{center} +\end{frame} \end{document} diff --git a/statistics/lecture_statistics02.tex b/statistics/lecture_statistics02.tex new file mode 100644 index 0000000..ac11763 --- /dev/null +++ b/statistics/lecture_statistics02.tex @@ -0,0 +1,823 @@ +\documentclass{beamer} +\usepackage{xcolor} +\usepackage{listings} +\usepackage{pgf} +%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} +%\usepackage{multimedia} +\usepackage[latin1]{inputenc} +\usepackage{amsmath} +\usepackage{bm} +\usepackage[T1]{fontenc} +\usepackage{hyperref} +\usepackage{ulem} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode +{ + \usetheme{Singapore} + \setbeamercovered{opaque} + \usecolortheme{tuebingen} + \setbeamertemplate{navigation symbols}{} + \usefonttheme{default} + \useoutertheme{infolines} + % \useoutertheme{miniframes} +} + +\AtBeginSection[] +{ + \begin{frame} + \begin{center} + \Huge \insertsectionhead + \end{center} + % \frametitle{\insertsectionhead} + % \tableofcontents[currentsection,hideothersubsections] + \end{frame} +} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 + +\setbeamertemplate{blocks}[rounded][shadow=true] + +\title[]{Scientific Computing -- Statistics} +\author[Statistics]{Fabian Sinz\\Dept. Neuroethology, + University T\"ubingen\\ +Bernstein Center T\"ubingen} + +\institute[Scientific Computing]{} + \date{10/20/2014} +%\logo{\pgfuseimage{logo}} + +\subject{Lectures} + +%%%%%%%%%% configuration for code +\lstset{ + basicstyle=\ttfamily, + numbers=left, + showstringspaces=false, + language=Matlab, + commentstyle=\itshape\color{darkgray}, + keywordstyle=\color{blue}, + stringstyle=\color{green}, + backgroundcolor=\color{blue!10}, + breaklines=true, + breakautoindent=true, + columns=flexible, + frame=single, + captionpos=b, + xleftmargin=1em, + xrightmargin=1em, + aboveskip=10pt + } +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\mycite}[1]{ +\begin{flushright} +\tiny \color{black!80} #1 +\end{flushright} +} + +\input{../latex/environments.tex} +\makeatother + +\begin{document} + +\begin{frame} + \titlepage + +\end{frame} + +\begin{frame} + \frametitle{plan} + \setcounter{tocdepth}{1} + \tableofcontents + +\end{frame} +\begin{frame} + \frametitle{information} + \begin{itemize} + \item Samuels, M. L., Wittmer, J. A., \& Schaffner, + A. A. (2010). Statistics for the Life Sciences (4th ed., + p. 668). Prentice Hall. + \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, + Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice + Hall. doi:10.1037/0012764 + \item \url{http://stats.stackexchange.com} + \end{itemize} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% errorbars (error bar paper) +% confidence intervals (sources of error) +% plotting (the right plot for the right data, Dan plotting paper) +% statistical test structure (bootstrapping, resampling, permutation) +% Don'ts: repeated testing, exclude data points +% study design +% PCA + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section[Prelude]{Prelude} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% ---------------------------------------------------------- +\begin{frame} +\frametitle{my expectations to this course} +\begin{itemize} +\item interest and participation +\item motivation to understand and question concepts +\item high scientific standard +\item intellectual honesty +\item sincere cooperation +\end{itemize} +\end{frame} + +% ---------------------------------------------------------- +\begin{frame} +\frametitle{this week will be ...} + +\only<1>{ +\framesubtitle{... no \sout{fun} piece of cake} +\begin{center} + \includegraphics[height=0.7\textheight]{figs/feeding.jpg} +\end{center} +} + +\only<2>{ +\framesubtitle{... no \sout{fun} piece of cake} +\begin{center} + \includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg} +\end{center} +} + +\only<3>{ +\framesubtitle{... no lecture (please!)} +\begin{center} + \includegraphics[height=0.7\textheight]{figs/soccer.jpg} +\end{center} +} + +\end{frame} + +% ---------------------------------------------------------- +\begin{frame} +\frametitle{What you should learn this week} +\begin{itemize} +\item What makes good plots? +\item What is descriptive/inferential statistics? +\item What is the general structure of a statistical test? +\item What does a p-value mean? +\item How can I build my own tests? +\item How large should my $n$ be? +\item What is {\em maximum likelihood} and why is it important? +\end{itemize} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section[descriptive statistics, errorbars, and plots]{Day 1 -- + descriptive statistics and plots} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{types of data} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} + \frametitle{data scales} + \framesubtitle{What data types are distinguished in statistics?} + \Large + {\bf Why are data types important?} + \pause + \begin{itemize} + \item selection of statistics + \item selection of plots + \item selection of correct tests + \end{itemize} +\end{frame} +%------------------------------------------------------------- + +\begin{frame} + \frametitle{data scales} + \framesubtitle{nominal/categorial scale} + \begin{itemize} + \item properties like cell type, experimental group (i.e. treatment + 1, treatment 2, control) + \item each observation/sample is put into one category + \item there is no reasonable order among the categories + \item example: [rods, cones] vs. [cones, rods] + \end{itemize} +\end{frame} +%------------------------------------------------------------- + +\begin{frame} + \frametitle{data scales} + \framesubtitle{ordinal scale} + \begin{itemize} + \item like nominal scale, but there is an order + \item {\bf but:} there is no reasonable measure of {\em distance} + between the classes + \item examples: ranks, ratings + \end{itemize} +\end{frame} +%------------------------------------------------------------- + +\begin{frame} + \frametitle{data scales} + \framesubtitle{interval scale} + \begin{itemize} + \item quantitative/metric values + \item reasonable measure of distance between values but no absolute zero + \item examples: temperature in $^\circ$C + \end{itemize} +\end{frame} +%------------------------------------------------------------- + +\begin{frame} + \frametitle{data scales} + \framesubtitle{absolut/ratio scale} + \begin{itemize} + \item like interval scale but with absolute zero + \item example: temperature in $^\circ$K + \end{itemize} + \pause + \begin{emphasize}{relationsships between scales} + \begin{itemize} + \item scales exhibit increasing information content from nominal + to absolute + \item conversion ,,downwards'' always possible + \end{itemize} + \end{emphasize} +\end{frame} + +%------------------------------------------------------------- +\begin{frame} + \frametitle{examples from neuroscience and psychology} + \begin{itemize} + \item {\bf nominal:}\pause + \begin{itemize} + \item treatment group + \item stimulus class + \item cell type + \end{itemize} + + \item {\bf ordinal:} \pause + \begin{itemize} + \item ratings + \item clinical stages of a disease + \item states of an ion channel + \end{itemize} + \item {\bf Absolut-/Ratioskala:}\pause + \begin{itemize} + \item firing rate + \item membrane potential + \item ion concentration + \end{itemize} + \end{itemize} +\end{frame} +%------------------------------------------------------------- +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{statistics} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%------------------------------------------------------------- +\begin{frame} + \frametitle{What is "a statistic"?} + \begin{definition}{statistic} + A statistic (singular) is a single measure of some attribute of a + sample (e.g., its arithmetic mean value). It is calculated by + applying a function (statistical algorithm) to the values of the + items of the sample, which are known together as a set of data. + + \source{http://en.wikipedia.org/wiki/Statistic} + \end{definition} +\end{frame} + +%------------------------------------------------------------- +\begin{frame} + \frametitle{Beispiele f\"ur Teststatistiken} + \begin{itemize} + \item {\bf nominal:}\pause + \begin{itemize} + \item count + \item relative frequency/proportion + \end{itemize} + + \item {\bf ordinal:} \pause + \begin{itemize} + \item median + \item quantile/percentile + \item rank correlation + \end{itemize} + \item {\bf absolute/ratio:}\pause + \begin{itemize} + \item mean + \item variance/ standard deviation + \item Pearson correlation + \end{itemize} + \end{itemize} +\end{frame} + +%------------------------------------------------------------- +\begin{frame} + \frametitle{exercise} + \begin{task}{Spearman rank correlation} + \begin{enumerate} + \item Use {\tt randi} to generate two vectors + {\tt x,y} with $100$ random integers between $0$ and $10$ each. + \item Find out how to compute the Spearman + rank correlation $$\rho = 1- {\frac {6 \sum + d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the + difference in the rank between the single data points. + \item Compute $\rho$ between $x$ and $y$, between $x$ and + $y^2$, between $\log(x+1)$ and $y^2$. + \item Compute the "standard" (Pearson) correlation coefficient + between these values. + \item What can you observe and why does it make sense? + \end{enumerate} + \end{task} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{solution} + \begin{solution}{Spearman rank correlation } +\scriptsize +\begin{lstlisting} +>>> x = randi(10, 100, 1); +>>> y = randi(10, 100, 1); +>>> corr(x,y,'type','Spearman') +ans = + 0.1220 +>>> corr(x,y.^2,'type','Spearman') +ans = + 0.1220 +>>> corr(x,y,'type','Pearson') +ans = + 0.1074 +>>> corr(x,y.^2,'type','Pearson') +ans = + 0.0551 +\end{lstlisting} +The rank correlation does not change under a monotone transformation +of the data. Therefore, it can be used for ordinal data. The Pearson +correlation coefficient does not have that property. + \end{solution} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{what makes a good plot} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{} + \begin{center} + \Huge What makes a good plot? + \end{center} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + A good plot + \begin{itemize} + \item helps the reader to clearly understand your point.\pause + \item is not misleading and let's the reader judge the information + on her own (different y-axis/length scales in two related plots, + "squeezing" via log-plots). \pause + \item contains information about the data (a comic might be + illustrative, but does not contain information about the + data).\pause + \item adheres to the principle of {\em ink minimization}. + \end{itemize} +\end{frame} + + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{design/organization} + \begin{itemize} + \item Is the display consistent with the model or hypothesis + being tested?\pause + \item Are there "empty dimensions" in the display that could be + removed (A 3D pie chart for 2D categorical data, extraneous colors + that do not encode meaningful information)?\pause + \item Does the display provide an honest and transparent portrayal + of the data (hiding, smoothing, modifying data points should be + avoided or explicitly mentioned)? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{axes} + \begin{itemize} + \item Are axes scales defined as linear, log, or radial?\pause + \item Does each axis label describe the variable and its units (use + "a.u." for arbitrary units)?\pause + \item Are axes limits appropriate for the data (The graphic should + not be bounded at zero if the data can take on both positive and + negative values.)?\pause + \item Is the aspect ratio appropriate for the data (When x and y + axes contrast the same variable under different conditions the + graphic should be square.)? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{color mapping} + \begin{itemize} + \item Is a color bar provided?\pause + \item Is the color map sensible for the data type (does the data + extend to both $\pm$, does it live in an interval, is it + circular)?\pause + \item Are contrasting colors consistent with a natural interpretation? + \item Can features be discriminated when printed in grayscale? + \item Has red/green contrast been avoided to accommodate common + forms of colorblindness? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{uncertainty} + \begin{itemize} + \item Does the display indicate the uncertainty of estimated parameters?\pause + \item Is the type of error surface appropriate for the data? + \begin{itemize} + \item Use standard deviations to describe variability in the population.\pause + \item Use standard errors or confidence intervals to make inferences + about parameters estimated from a sample.\pause + \item Parametric confidence intervals should only be used if data + meet the assumptions of the underlying model.\pause + \end{itemize} + \item Are the units of uncertainty defined (is it standard error, is + it $95\%$ confidence interval)? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{annotation} + \begin{itemize} + \item Are all symbols defined, preferably by directly labeling objects?\pause + \item Is the directionality of a contrast between conditions obvious?\pause + \item Is the number of samples or independent experiments indicated?\pause + \item Are statistical procedures and criteria for significance described?\pause + \item Are uncommon abbreviations avoided or clearly defined?\pause + \item Are abbreviations consistent with those used in the text? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} + +\subsection{bad examples} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.5\linewidth]{figs/nobelbad} + \end{center} + \mycite{Hafting et al. 2005, nature} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.5\linewidth]{figs/badbarright.png} + \end{center} + \source{http://en.wikipedia.org/wiki/Misleading\_graph} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.4\linewidth]{figs/yaxisscalingleft.png} + \hspace{.5cm} + \includegraphics[width=.4\linewidth]{figs/yaxisscalingright.png} + \end{center} + \source{http://en.wikipedia.org/wiki/Misleading\_graph} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.4\linewidth]{figs/badscatterleft.png} + \hspace{.5cm} + \includegraphics[width=.4\linewidth]{figs/badscatterright.png} + \end{center} + \source{http://en.wikipedia.org/wiki/Misleading\_graph} +\end{frame} + + +%------------------------------------------------------------- + +\begin{frame} + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/badbarplot} + \end{center} + \source{www.enfovis.com} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{nominal scale} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting nominal data} + \framesubtitle{bar plot for count and relative frequency} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/nominaldataplot} + \end{center} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting nominal data} + \framesubtitle{bar plot for count and relative frequency} + \scriptsize +\begin{lstlisting} +% plot +bar([1,2], [50, 90], 'facecolor', 'k') + +% labels axes +ylabel('cell count') +xlabel('cell type') + +% cosmetics +xlim([0.5,2.5]) +ylim([0, 100]) +box('off') +set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20) + +% settings for saving the figure +set(gcf, 'PaperUnits', 'centimeters'); +set(gcf, 'PaperSize', [11.7 9.0]); +set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); +\end{lstlisting} +\end{frame} + +%---------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting nominal data} + \framesubtitle{pie chart for count and relative frequency} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/nominaldataplot2} + \end{center} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting nominal data} + \framesubtitle{exercise} + \begin{task}{pie chart} + Plot the same data ($n_{py}=50$, $n_{in}=90$) as a pie chart in Matlab. + \end{task} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting nominal data} + \framesubtitle{pie chart for relative frequency} + \scriptsize +\begin{lstlisting} +data = [50, 90]; +h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'}) +hText = findobj(h,'Type','text') % text object handles + +set(h(1), 'FaceColor', [.2,.2,.2]); +set(h(2), 'Rotation', 45); +set(h(3), 'FaceColor', [.8,.8,.8]); +set(h(4), 'Rotation', 45); + +title('cell count') +set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'}) +box('off') +set(gcf, 'PaperUnits', 'centimeters'); +set(gcf, 'PaperSize', [11.7 9.0]); +set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); +\end{lstlisting} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{histogram} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/histogram} + \end{center} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bad choice of bins} + \begin{center} + \includegraphics[width=.4\linewidth]{figs/histogrambad} + \includegraphics[width=.4\linewidth]{figs/histogrambad2} + \end{center} + \begin{summary}{Rule of thumb} + Choose the bins $b\approx n/20$. + \end{summary} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{how to do in Matlab} + \scriptsize +\begin{lstlisting} +x = randn(2000,1); % generate Gaussian data + +hist(x, 50); % generate histogram + +% set facecolor to gray +h = findobj(gca, 'Type','patch'); +set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2) + +% plot a white grid over it +h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2) +uistack(h, 'top') + +% cosmetics +box('off'); +xlabel('Data') +ylabel('Count') +\end{lstlisting} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bar plot} + There are several ways to plot a sample $x_1, ..., x_n$ of interval/ratio/absolute + scale with a bar plot + \begin{center} + \includegraphics[width=.6\linewidth]{figs/barplots.png} + \end{center} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bar plot} +\scriptsize +\begin{lstlisting} +% bar plot +x = rand(10,1); +gray = [.5,.5,.5]; + +bar(1, mean(x), 'EdgeColor','w','FaceColor', gray); +hold on + +bar(2, mean(x), 'EdgeColor','w','FaceColor', gray); +plot(0*x + 2, x, 'ok'); + +bar(3, mean(x), 'EdgeColor','w','FaceColor', gray); +errorbar(3, mean(x), std(x), 'ok'); + +bar(4, mean(x), 'EdgeColor','w','FaceColor', gray); +errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok'); +set(gca, 'xtick',[]) +ylabel('uniformly distributed random data in [0,1]') +box('off') +title('different forms of bar plots') +hold off +\end{lstlisting} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bar plot and measure of central tendency and spread} + + \begin{itemize} + \item A bar plot collapses real data onto a single number and some + measure of spread. This number is usually a {\em measure of central + tendency}, i.e. a typical/central value for the probability + distribution of the data.\pause + \item What measures of central tendency can you think of?\pause + \begin{itemize} + \item mean + \item median + \item geometric mean (the nth root of the product of the data values) + \item weighted mean + \item midrange (mean of the maximum and minimum values of a data set) + \end{itemize}\pause + \item Additionally, the bar plot is equipped with a measure of {\em + spread} or {\em dispersion}. What measure of spread can you think of?\pause + \begin{itemize} + \item standard deviation + \item range (maximum minus minimum of a dataset) + \item inter-quartile range + \end{itemize} + \end{itemize} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{measure of central tendency and spread} + \Large + \begin{center} + \bf The part of statistics that summarizes data in a small number + of values is called {\em descriptive statistics}. + \end{center} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{robust statistics} + \begin{task}{When is statistic called robust (leave-one-out)?} + \begin{itemize} + \item Generate an array with $20$ random numbers using {\tt + randn}. + \item Compute $20$ means: the $i^{th}$ mean is computed from the + data set {\em without} the $i^{th}$ example. + \item Repeat this with the median. + \item Make a bar plot that depicts the means of the computed means + and medians along with an appropriate measure of dispersion. + \item What can you observe? Do you understand why? + \end{itemize} + \end{task} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{boxplot} + \begin{minipage}{1.0\linewidth} + \begin{minipage}{0.5\linewidth} + \begin{center} + \includegraphics[width=\linewidth]{figs/boxplot.png} + \end{center} + \end{minipage} + \begin{minipage}{0.5\linewidth} + Who knows what the elements mean?\pause + \begin{itemize} + \item the box depicts the inter-quartile range + \item the line denotes the median + \item the whiskers denote the extreme value of the data not + considered outliers + \item outliers are plotted separately + \end{itemize} + \begin{task}{Outliers} + \begin{itemize} + \item Find out how an outlier is defined in a matlab boxplot. + \item Can you remove an outlier from the dataset? + \end{itemize} + \end{task} + \end{minipage} + \end{minipage} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{violinplot} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/violinplots.png} + \end{center} + \begin{itemize} + \item Violinplots depict the distribution of the data by a + smoothed histogram. + \item Additional information (data points, median, + inter-quartile range) are plotted inside. + \end{itemize} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting combinations of scales} + What could we use for a combination of categorial/nominal and + interval/ratio/absolute? + \pause + \begin{center} + \includegraphics[width=.5\linewidth]{figs/factorplot.png} + \end{center} + Each category is a single bar. +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting combinations of scales} + What could we use for a combination of interval/ratio/absolute and + interval/ratio/absolute, e.g. $(x_1, y_1), ..., (x_n,y_n)$? \pause + \begin{center} + \includegraphics[width=.8\linewidth]{figs/paireddata.png} + \end{center} + Scatter plot or paired bar chart. Scatter plot can also be used for + ordinal vs. ordinal data (why not the bar chart?). +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \begin{center} + \Huge + That's it. + \end{center} +\end{frame} + + +\end{document} + +