diff --git a/statistics/Makefile b/statistics/Makefile index a8dbb44..83311ea 100644 --- a/statistics/Makefile +++ b/statistics/Makefile @@ -3,12 +3,9 @@ DOTSOURCES = $(wildcard figs/*.dot) all: $(DOTSOURCES:dot=pdf) - python figs/generate.py - python figs/generate03.py - python figs/generateTPlots.py - pdflatex talk*.tex - pdflatex talk*.tex - pdflatex talk*.tex + pdflatex lecture_statistics*.tex + pdflatex lecture_statistics*.tex + pdflatex lecture_statistics*.tex figs/prob%.pdf : figs/prob%.dot diff --git a/statistics/certificate.lyx b/statistics/certificate.lyx deleted file mode 100644 index f502b73..0000000 --- a/statistics/certificate.lyx +++ /dev/null @@ -1,218 +0,0 @@ -#LyX 2.0 created this file. For more info see http://www.lyx.org/ -\lyxformat 413 -\begin_document -\begin_header -\textclass g-brief2 -\begin_preamble -\fenstermarken % prints address window marks -\faltmarken % prints folding marks -%\lochermarke % prints puncher marks -\trennlinien % prints striplines -%\unserzeichen % prints "our ref" instead of "my ref" -\end_preamble -\use_default_options false -\maintain_unincluded_children false -\language english -\language_package default -\inputencoding auto -\fontencoding global -\font_roman palatino -\font_sans default -\font_typewriter default -\font_default_family default -\use_non_tex_fonts false -\font_sc false -\font_osf false -\font_sf_scale 100 -\font_tt_scale 100 - -\graphics default -\default_output_format default -\output_sync 0 -\bibtex_command default -\index_command default -\paperfontsize 12 -\spacing onehalf -\use_hyperref false -\papersize default -\use_geometry false -\use_amsmath 1 -\use_esint 1 -\use_mhchem 1 -\use_mathdots 1 -\cite_engine basic -\use_bibtopic false -\use_indices false -\paperorientation portrait -\suppress_date false -\use_refstyle 0 -\index Index -\shortcut idx -\color #008000 -\end_index -\secnumdepth 4 -\tocdepth 4 -\paragraph_separation skip -\defskip medskip -\quotes_language english -\papercolumns 1 -\papersides 1 -\paperpagestyle empty -\tracking_changes false -\output_changes false -\html_math_output 0 -\html_css_as_file 0 -\html_be_strict false -\end_header - -\begin_body - -\begin_layout Standard -\begin_inset Note Note -status open - -\begin_layout Plain Layout -Note also the document preamble settings. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout NameRowA -Dr. - rer. - nat. - Fabian Sinz -\end_layout - -\begin_layout NameRowB - -\end_layout - -\begin_layout NameRowC - -\end_layout - -\begin_layout NameRowD - -\end_layout - -\begin_layout NameRowE - -\end_layout - -\begin_layout NameRowF - -\end_layout - -\begin_layout NameRowG - -\end_layout - -\begin_layout AddressRowA -University Tübingen -\end_layout - -\begin_layout AddressRowB - -\end_layout - -\begin_layout AddressRowC -Auf der Morgenstelle 28 -\end_layout - -\begin_layout AddressRowD -72076 Tübingen -\end_layout - -\begin_layout InternetRowA -http:/ -\begin_inset Formula $\!$ -\end_inset - -/www.epagoge.de -\end_layout - -\begin_layout InternetRowB -fabian.sinz@epagoge.de -\end_layout - -\begin_layout ReturnAddress -University Tübingen -\begin_inset Formula $\cdot$ -\end_inset - - Auf der Morgenstelle 28 -\begin_inset Formula $\cdot$ -\end_inset - - 72076 Tübingen -\end_layout - -\begin_layout Date -\begin_inset ERT -status collapsed - -\begin_layout Plain Layout - - -\backslash -today -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Reference - -\end_layout - -\begin_layout Opening -To whom it may concern, -\end_layout - -\begin_layout Closing -Best regards -\end_layout - -\begin_layout Signature -Dr. - Fabian Sinz -\end_layout - -\begin_layout Encl. - -\end_layout - -\begin_layout Letter -this letter certifies that -\emph on -Lakshmi Channappa -\emph default - attended the course -\emph on -Statistics in a Nutshell -\emph default - held at the Neurochip research group at the -\emph on -Naturwissenschaftliches und Medizinisches Institut Reutlingen -\emph default -in 2013 -\emph on -. - -\emph default -The course was organized in two lectures of four hours each and covered - topics such as basics of probability theory, errorbars and confidence intervals -, statistical tests, p-values, multiple hypothesis testing, basics of study - design, and basics of ANOVA. - Small calculation and programming exercises were used to clarify selected - material. -\end_layout - -\end_body -\end_document diff --git a/statistics/figs/2012-10-29_14-55-39_181.jpg b/statistics/figs/2012-10-29_14-55-39_181.jpg deleted file mode 100644 index af6326d..0000000 Binary files a/statistics/figs/2012-10-29_14-55-39_181.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_14-56-59_866.jpg b/statistics/figs/2012-10-29_14-56-59_866.jpg deleted file mode 100644 index 7931e72..0000000 Binary files a/statistics/figs/2012-10-29_14-56-59_866.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_14-58-18_054.jpg b/statistics/figs/2012-10-29_14-58-18_054.jpg deleted file mode 100644 index 0fbbb68..0000000 Binary files a/statistics/figs/2012-10-29_14-58-18_054.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_14-59-05_984.jpg b/statistics/figs/2012-10-29_14-59-05_984.jpg deleted file mode 100644 index cffb482..0000000 Binary files a/statistics/figs/2012-10-29_14-59-05_984.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_15-04-38_517.jpg b/statistics/figs/2012-10-29_15-04-38_517.jpg deleted file mode 100644 index f9272c7..0000000 Binary files a/statistics/figs/2012-10-29_15-04-38_517.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_15-09-25_388.jpg b/statistics/figs/2012-10-29_15-09-25_388.jpg deleted file mode 100644 index d4d4444..0000000 Binary files a/statistics/figs/2012-10-29_15-09-25_388.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_16-26-05_771.jpg b/statistics/figs/2012-10-29_16-26-05_771.jpg deleted file mode 100755 index a997cdd..0000000 Binary files a/statistics/figs/2012-10-29_16-26-05_771.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_16-29-35_312.jpg b/statistics/figs/2012-10-29_16-29-35_312.jpg deleted file mode 100755 index 9f8843c..0000000 Binary files a/statistics/figs/2012-10-29_16-29-35_312.jpg and /dev/null differ diff --git a/statistics/figs/2012-10-29_16-41-39_523.jpg b/statistics/figs/2012-10-29_16-41-39_523.jpg deleted file mode 100755 index 88892ea..0000000 Binary files a/statistics/figs/2012-10-29_16-41-39_523.jpg and /dev/null differ diff --git a/statistics/figs/Bernoulli.pdf b/statistics/figs/Bernoulli.pdf deleted file mode 100644 index 2021b5a..0000000 Binary files a/statistics/figs/Bernoulli.pdf and /dev/null differ diff --git a/statistics/figs/Binomial.pdf b/statistics/figs/Binomial.pdf deleted file mode 100644 index 3fa0e22..0000000 Binary files a/statistics/figs/Binomial.pdf and /dev/null differ diff --git a/statistics/figs/Binomial00.pdf b/statistics/figs/Binomial00.pdf deleted file mode 100644 index 5f53474..0000000 Binary files a/statistics/figs/Binomial00.pdf and /dev/null differ diff --git a/statistics/figs/Binomial01.pdf b/statistics/figs/Binomial01.pdf deleted file mode 100644 index 2b5de46..0000000 Binary files a/statistics/figs/Binomial01.pdf and /dev/null differ diff --git a/statistics/figs/BinomialCdf00.pdf b/statistics/figs/BinomialCdf00.pdf deleted file mode 100644 index 6f32c24..0000000 Binary files a/statistics/figs/BinomialCdf00.pdf and /dev/null differ diff --git a/statistics/figs/BinomialCdf01.pdf b/statistics/figs/BinomialCdf01.pdf deleted file mode 100644 index 60be56a..0000000 Binary files a/statistics/figs/BinomialCdf01.pdf and /dev/null differ diff --git a/statistics/figs/BinomialExample00.pdf b/statistics/figs/BinomialExample00.pdf deleted file mode 100644 index 6bea5f0..0000000 Binary files a/statistics/figs/BinomialExample00.pdf and /dev/null differ diff --git a/statistics/figs/Fdistribution00.pdf b/statistics/figs/Fdistribution00.pdf deleted file mode 100644 index b0072e7..0000000 Binary files a/statistics/figs/Fdistribution00.pdf and /dev/null differ diff --git a/statistics/figs/Gaussian00.pdf b/statistics/figs/Gaussian00.pdf deleted file mode 100644 index 7c7ed3d..0000000 Binary files a/statistics/figs/Gaussian00.pdf and /dev/null differ diff --git a/statistics/figs/HE0.png b/statistics/figs/HE0.png deleted file mode 100644 index 53c83ef..0000000 Binary files a/statistics/figs/HE0.png and /dev/null differ diff --git a/statistics/figs/HE0Solution.png b/statistics/figs/HE0Solution.png deleted file mode 100644 index 4a0eeb2..0000000 Binary files a/statistics/figs/HE0Solution.png and /dev/null differ diff --git a/statistics/figs/HE1.png b/statistics/figs/HE1.png deleted file mode 100644 index 03ba81c..0000000 Binary files a/statistics/figs/HE1.png and /dev/null differ diff --git a/statistics/figs/HE1Solution.png b/statistics/figs/HE1Solution.png deleted file mode 100644 index b3f9663..0000000 Binary files a/statistics/figs/HE1Solution.png and /dev/null differ diff --git a/statistics/figs/HE2.png b/statistics/figs/HE2.png deleted file mode 100644 index 23e32d5..0000000 Binary files a/statistics/figs/HE2.png and /dev/null differ diff --git a/statistics/figs/HE2Solution.png b/statistics/figs/HE2Solution.png deleted file mode 100644 index 43ec354..0000000 Binary files a/statistics/figs/HE2Solution.png and /dev/null differ diff --git a/statistics/figs/HE3.png b/statistics/figs/HE3.png deleted file mode 100644 index cfd465d..0000000 Binary files a/statistics/figs/HE3.png and /dev/null differ diff --git a/statistics/figs/HE3Solution.png b/statistics/figs/HE3Solution.png deleted file mode 100644 index 46bdd17..0000000 Binary files a/statistics/figs/HE3Solution.png and /dev/null differ diff --git a/statistics/figs/Joint00.pdf b/statistics/figs/Joint00.pdf deleted file mode 100644 index 7f5666b..0000000 Binary files a/statistics/figs/Joint00.pdf and /dev/null differ diff --git a/statistics/figs/Joint01.pdf b/statistics/figs/Joint01.pdf deleted file mode 100644 index 7821a22..0000000 Binary files a/statistics/figs/Joint01.pdf and /dev/null differ diff --git a/statistics/figs/Joint02.pdf b/statistics/figs/Joint02.pdf deleted file mode 100644 index 2bd5b18..0000000 Binary files a/statistics/figs/Joint02.pdf and /dev/null differ diff --git a/statistics/figs/Poisson00.pdf b/statistics/figs/Poisson00.pdf deleted file mode 100644 index 4dc54d3..0000000 Binary files a/statistics/figs/Poisson00.pdf and /dev/null differ diff --git a/statistics/figs/Poisson01.pdf b/statistics/figs/Poisson01.pdf deleted file mode 100644 index 6801c3e..0000000 Binary files a/statistics/figs/Poisson01.pdf and /dev/null differ diff --git a/statistics/figs/PoissonConfidence.pdf b/statistics/figs/PoissonConfidence.pdf deleted file mode 100644 index 6b0dfd1..0000000 Binary files a/statistics/figs/PoissonConfidence.pdf and /dev/null differ diff --git a/statistics/figs/Posterior00.pdf b/statistics/figs/Posterior00.pdf deleted file mode 100644 index c71c350..0000000 Binary files a/statistics/figs/Posterior00.pdf and /dev/null differ diff --git a/statistics/figs/StandardErrorOrStandardDeviation.pdf b/statistics/figs/StandardErrorOrStandardDeviation.pdf deleted file mode 100644 index 0d26db2..0000000 Binary files a/statistics/figs/StandardErrorOrStandardDeviation.pdf and /dev/null differ diff --git a/statistics/figs/Uniform.pdf b/statistics/figs/Uniform.pdf deleted file mode 100644 index 92f0202..0000000 Binary files a/statistics/figs/Uniform.pdf and /dev/null differ diff --git a/statistics/figs/chirpqqplot.pdf b/statistics/figs/chirpqqplot.pdf deleted file mode 100644 index 6325416..0000000 Binary files a/statistics/figs/chirpqqplot.pdf and /dev/null differ diff --git a/statistics/figs/decision00.pdf b/statistics/figs/decision00.pdf deleted file mode 100644 index c156306..0000000 Binary files a/statistics/figs/decision00.pdf and /dev/null differ diff --git a/statistics/figs/decision01.pdf b/statistics/figs/decision01.pdf deleted file mode 100644 index 7016fa8..0000000 Binary files a/statistics/figs/decision01.pdf and /dev/null differ diff --git a/statistics/figs/decision02.pdf b/statistics/figs/decision02.pdf deleted file mode 100644 index cf4d6aa..0000000 Binary files a/statistics/figs/decision02.pdf and /dev/null differ diff --git a/statistics/figs/decision03.pdf b/statistics/figs/decision03.pdf deleted file mode 100644 index a4041dd..0000000 Binary files a/statistics/figs/decision03.pdf and /dev/null differ diff --git a/statistics/figs/dopamineqqplot.pdf b/statistics/figs/dopamineqqplot.pdf deleted file mode 100644 index 0eeed56..0000000 Binary files a/statistics/figs/dopamineqqplot.pdf and /dev/null differ diff --git a/statistics/figs/example01.png b/statistics/figs/example01.png deleted file mode 100755 index 964e117..0000000 Binary files a/statistics/figs/example01.png and /dev/null differ diff --git a/statistics/figs/example02.png b/statistics/figs/example02.png deleted file mode 100644 index 97efbd4..0000000 Binary files a/statistics/figs/example02.png and /dev/null differ diff --git a/statistics/figs/example03.png b/statistics/figs/example03.png deleted file mode 100644 index 91e0947..0000000 Binary files a/statistics/figs/example03.png and /dev/null differ diff --git a/statistics/figs/example04.png b/statistics/figs/example04.png deleted file mode 100644 index 4ca8d21..0000000 Binary files a/statistics/figs/example04.png and /dev/null differ diff --git a/statistics/figs/experimentalDesign00.pdf b/statistics/figs/experimentalDesign00.pdf deleted file mode 100644 index b822e61..0000000 Binary files a/statistics/figs/experimentalDesign00.pdf and /dev/null differ diff --git a/statistics/figs/experimentalDesign01.pdf b/statistics/figs/experimentalDesign01.pdf deleted file mode 100644 index 2e82177..0000000 Binary files a/statistics/figs/experimentalDesign01.pdf and /dev/null differ diff --git a/statistics/figs/feeding.jpg b/statistics/figs/feeding.jpg new file mode 100644 index 0000000..424aa6d Binary files /dev/null and b/statistics/figs/feeding.jpg differ diff --git a/statistics/figs/fig0.dot b/statistics/figs/fig0.dot deleted file mode 100644 index 1a79d45..0000000 --- a/statistics/figs/fig0.dot +++ /dev/null @@ -1,15 +0,0 @@ -digraph G { - rankdir=TB; - node [fontsize=12, shape=rectangle, style=filled]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"] - data->nominal[label="nominal/discrete"] - data->ordinal[label="ordinal"] -} diff --git a/statistics/figs/fig0.pdf b/statistics/figs/fig0.pdf deleted file mode 100644 index 14be473..0000000 Binary files a/statistics/figs/fig0.pdf and /dev/null differ diff --git a/statistics/figs/fig01.dot b/statistics/figs/fig01.dot deleted file mode 100755 index 4bb21a0..0000000 --- a/statistics/figs/fig01.dot +++ /dev/null @@ -1,94 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)"]; - - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - } - - - subgraph cluster_ND { - label = "nominal/discrete"; - bgcolor=lightblue; - nd_test_type[label="1, 2, or >2 variables",color="green"]; - - } - - - - - - data[label="type of data?"]; - - data->IR[label="interval/ratio"]; - data->nd_test_type[label="nominal/discrete",color="red"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig01.pdf b/statistics/figs/fig01.pdf deleted file mode 100644 index be531dd..0000000 Binary files a/statistics/figs/fig01.pdf and /dev/null differ diff --git a/statistics/figs/fig02.dot b/statistics/figs/fig02.dot deleted file mode 100644 index 36d99f4..0000000 --- a/statistics/figs/fig02.dot +++ /dev/null @@ -1,96 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)"]; - - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - } - - - subgraph cluster_ND { - label = "nominal/discrete"; - bgcolor=lightblue; - nd_test_type[label="1, 2, or >2 variables",color="green"]; - onesampND[label="chi square for\ngoodness of fit"]; - - nd_test_type->onesampND[label="1 variable"]; - } - - - - - - data[label="type of data?"]; - - data->IR[label="interval/ratio"]; - data->nd_test_type[label="nominal/discrete",color="red"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig02.pdf b/statistics/figs/fig02.pdf deleted file mode 100644 index 5c50172..0000000 Binary files a/statistics/figs/fig02.pdf and /dev/null differ diff --git a/statistics/figs/fig03.dot b/statistics/figs/fig03.dot deleted file mode 100644 index 16af84d..0000000 --- a/statistics/figs/fig03.dot +++ /dev/null @@ -1,98 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)"]; - - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - } - - - subgraph cluster_ND { - label = "nominal/discrete"; - bgcolor=lightblue; - nd_test_type[label="1, 2, or >2 variables",color="green"]; - onesampND[label="chi square for\ngoodness of fit"]; - twosampND[label="chi square for\nindependence"]; - - nd_test_type->onesampND[label="1 variable"]; - nd_test_type->twosampND[label="2 variables"]; - } - - - - - - data[label="type of data?"]; - - data->IR[label="interval/ratio"]; - data->nd_test_type[label="nominal/discrete",color="red"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig03.pdf b/statistics/figs/fig03.pdf deleted file mode 100644 index b8b2a85..0000000 Binary files a/statistics/figs/fig03.pdf and /dev/null differ diff --git a/statistics/figs/fig04.dot b/statistics/figs/fig04.dot deleted file mode 100644 index 2826662..0000000 --- a/statistics/figs/fig04.dot +++ /dev/null @@ -1,99 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)"]; - - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - } - - - subgraph cluster_ND { - label = "nominal/discrete"; - bgcolor=lightblue; - nd_test_type[label="1, 2, or >2 variables",color="green"]; - onesampND[label="chi square for\ngoodness of fit"]; - twosampND[label="chi square for\nindependence"]; - - nd_test_type->onesampND[label="1 variable"]; - nd_test_type->onesampND[label="n variables",color="red"]; - nd_test_type->twosampND[label="2 variables"]; - } - - - - - - data[label="type of data?"]; - - data->IR[label="interval/ratio"]; - data->nd_test_type[label="nominal/discrete"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig04.pdf b/statistics/figs/fig04.pdf deleted file mode 100644 index e1fa183..0000000 Binary files a/statistics/figs/fig04.pdf and /dev/null differ diff --git a/statistics/figs/fig05.dot b/statistics/figs/fig05.dot deleted file mode 100644 index 154be46..0000000 --- a/statistics/figs/fig05.dot +++ /dev/null @@ -1,99 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?",color="green"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)"]; - - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - } - - - subgraph cluster_ND { - label = "nominal/discrete"; - bgcolor=lightblue; - nd_test_type[label="1, 2, or >2 variables"]; - onesampND[label="chi square for\ngoodness of fit"]; - twosampND[label="chi square for\nindependence"]; - - nd_test_type->onesampND[label="1 variable"]; - nd_test_type->onesampND[label="n variables"]; - nd_test_type->twosampND[label="2 variables"]; - } - - - - - - data[label="type of data?"]; - - data->IR[label="interval/ratio"]; - data->nd_test_type[label="nominal/discrete"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig05.pdf b/statistics/figs/fig05.pdf deleted file mode 100644 index 5b326c5..0000000 Binary files a/statistics/figs/fig05.pdf and /dev/null differ diff --git a/statistics/figs/fig06.dot b/statistics/figs/fig06.dot deleted file mode 100644 index 513a9fc..0000000 --- a/statistics/figs/fig06.dot +++ /dev/null @@ -1,101 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?",color="green"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)"]; - - ANOVA; - normal->ANOVA[color="red"]; - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)"]; - } - - - subgraph cluster_ND { - label = "nominal/discrete"; - bgcolor=lightblue; - nd_test_type[label="1, 2, or >2 variables"]; - onesampND[label="chi square for\ngoodness of fit"]; - twosampND[label="chi square for\nindependence"]; - - nd_test_type->onesampND[label="1 variable"]; - nd_test_type->onesampND[label="n variables"]; - nd_test_type->twosampND[label="2 variables"]; - } - - - - - - data[label="type of data?"]; - - data->IR[label="interval/ratio"]; - data->nd_test_type[label="nominal/discrete"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig06.pdf b/statistics/figs/fig06.pdf deleted file mode 100644 index 278d70e..0000000 Binary files a/statistics/figs/fig06.pdf and /dev/null differ diff --git a/statistics/figs/fig1.dot b/statistics/figs/fig1.dot deleted file mode 100644 index 13bc03f..0000000 --- a/statistics/figs/fig1.dot +++ /dev/null @@ -1,22 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="?"]; - - IR->normal[label="normal"]; - - -} diff --git a/statistics/figs/fig1.pdf b/statistics/figs/fig1.pdf deleted file mode 100644 index f42272d..0000000 Binary files a/statistics/figs/fig1.pdf and /dev/null differ diff --git a/statistics/figs/fig10.dot b/statistics/figs/fig10.dot deleted file mode 100644 index 4edc47f..0000000 --- a/statistics/figs/fig10.dot +++ /dev/null @@ -1,58 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="1, 2, or >2 groups?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - trulynotnormal->twosampNN[label="2 groups"]; - twosampNN->indepTwosampNN[label="independent"]; - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - signtest[label="sign test"]; - signrank[label="Wilcoxon signed\nrank test",color="lightblue"]; - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - twosampNN->signrank[label="paired"]; - -} - - diff --git a/statistics/figs/fig10.pdf b/statistics/figs/fig10.pdf deleted file mode 100644 index a87553b..0000000 Binary files a/statistics/figs/fig10.pdf and /dev/null differ diff --git a/statistics/figs/fig11.dot b/statistics/figs/fig11.dot deleted file mode 100644 index 84d5fe2..0000000 --- a/statistics/figs/fig11.dot +++ /dev/null @@ -1,81 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - - - } - - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - - } - - data[label="type of data?"]; - ordinal[label="1, 2, or >2 groups?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig11.pdf b/statistics/figs/fig11.pdf deleted file mode 100644 index f2f7587..0000000 Binary files a/statistics/figs/fig11.pdf and /dev/null differ diff --git a/statistics/figs/fig12.dot b/statistics/figs/fig12.dot deleted file mode 100644 index d9913b5..0000000 --- a/statistics/figs/fig12.dot +++ /dev/null @@ -1,83 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.95,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - subgraph cluster_IR { - label = "interval/ ratio"; - bgcolor=lightblue; - - IR[label="data normal distributed\nor n large?"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - - trulynotnormal->twosampNN[label="2 groups"]; - - signrank[label="Wilcoxon signed\nrank test"]; - - trulynotnormal->signrank[label="1 group\n(fix other\ngroup to\n one value)",color="red"]; - - - } - - trulynotnormal->signtest[label="1 group\n(fix other\ngroup to\n one value)",color="red"]; - - twosampNN->signrank[label="paired"]; - twosampNN->indepTwosampNN[label="independent"]; - - data->ordinal[label="ordinal"]; - - subgraph cluster_O { - label = "ordinal"; - bgcolor=lightblue; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - signtest[label="sign test"]; - ordinal[label="1, 2, or >2 groups?"]; - ordinal->signtest[label="1 group\n(fix other\ngroup to\n one value)",color="red"]; - } - - data[label="type of data?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - - - - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig12.pdf b/statistics/figs/fig12.pdf deleted file mode 100644 index 4552386..0000000 Binary files a/statistics/figs/fig12.pdf and /dev/null differ diff --git a/statistics/figs/fig2.dot b/statistics/figs/fig2.dot deleted file mode 100644 index b2c6d31..0000000 --- a/statistics/figs/fig2.dot +++ /dev/null @@ -1,28 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="?",color="lightblue"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - -} - diff --git a/statistics/figs/fig2.pdf b/statistics/figs/fig2.pdf deleted file mode 100644 index 11de021..0000000 Binary files a/statistics/figs/fig2.pdf and /dev/null differ diff --git a/statistics/figs/fig3.dot b/statistics/figs/fig3.dot deleted file mode 100644 index 577c4dc..0000000 --- a/statistics/figs/fig3.dot +++ /dev/null @@ -1,28 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.75]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - -} - diff --git a/statistics/figs/fig3.pdf b/statistics/figs/fig3.pdf deleted file mode 100644 index 666bc25..0000000 Binary files a/statistics/figs/fig3.pdf and /dev/null differ diff --git a/statistics/figs/fig4.dot b/statistics/figs/fig4.dot deleted file mode 100644 index 6cf6236..0000000 --- a/statistics/figs/fig4.dot +++ /dev/null @@ -1,30 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.75]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - -} - diff --git a/statistics/figs/fig4.pdf b/statistics/figs/fig4.pdf deleted file mode 100644 index b86ba78..0000000 Binary files a/statistics/figs/fig4.pdf and /dev/null differ diff --git a/statistics/figs/fig5.dot b/statistics/figs/fig5.dot deleted file mode 100644 index 03f82d7..0000000 --- a/statistics/figs/fig5.dot +++ /dev/null @@ -1,38 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.75]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - - twosamp[label="paired or\nnot paired?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - -} - - diff --git a/statistics/figs/fig5.pdf b/statistics/figs/fig5.pdf deleted file mode 100644 index 4868ae5..0000000 Binary files a/statistics/figs/fig5.pdf and /dev/null differ diff --git a/statistics/figs/fig6.dot b/statistics/figs/fig6.dot deleted file mode 100644 index 46664b1..0000000 --- a/statistics/figs/fig6.dot +++ /dev/null @@ -1,49 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.75]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="1, 2, or >2 groups?",color="lightblue"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?",color="lightblue"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?",color="lightblue"]; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest",color="lightblue"]; - - trulynotnormal->twosampNN[label="2 groups"]; - twosampNN->indepTwosampNN[label="independent"]; - - twosampOrd[label="paired or\nindependent?",color="lightblue"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - - -} - - diff --git a/statistics/figs/fig6.pdf b/statistics/figs/fig6.pdf deleted file mode 100644 index bb6f7a4..0000000 Binary files a/statistics/figs/fig6.pdf and /dev/null differ diff --git a/statistics/figs/fig7.dot b/statistics/figs/fig7.dot deleted file mode 100644 index c6f3bed..0000000 --- a/statistics/figs/fig7.dot +++ /dev/null @@ -1,52 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.75]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="1, 2, or >2 groups?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - trulynotnormal->twosampNN[label="2 groups"]; - twosampNN->indepTwosampNN[label="independent"]; - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="indepdendent"]; - - - ttest[label="?",color="lightblue"]; - twosamp->ttest[label="independent"]; - - -} - - diff --git a/statistics/figs/fig7.pdf b/statistics/figs/fig7.pdf deleted file mode 100644 index a5fe491..0000000 Binary files a/statistics/figs/fig7.pdf and /dev/null differ diff --git a/statistics/figs/fig8.dot b/statistics/figs/fig8.dot deleted file mode 100644 index 3901b30..0000000 --- a/statistics/figs/fig8.dot +++ /dev/null @@ -1,54 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.75]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="1, 2, or >2 groups?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - trulynotnormal->twosampNN[label="2 groups"]; - twosampNN->indepTwosampNN[label="independent"]; - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="indepdendent"]; - - - ttest[label="t-test",color="lightblue"]; - twosamp->ttest[label="independent"]; - - pairedTwosampNN[label="?",color="lightblue"]; - twosampNN->pairedTwosampNN[label="paired"]; - twosampOrd->pairedTwosampNN[label="paired"]; -} - - diff --git a/statistics/figs/fig8.pdf b/statistics/figs/fig8.pdf deleted file mode 100644 index 88b042e..0000000 Binary files a/statistics/figs/fig8.pdf and /dev/null differ diff --git a/statistics/figs/fig9.dot b/statistics/figs/fig9.dot deleted file mode 100644 index 9af76f5..0000000 --- a/statistics/figs/fig9.dot +++ /dev/null @@ -1,54 +0,0 @@ -digraph G { - rankdir=TB; - ranksep=0.2; - node [fontsize=12, shape=rectangle, style=filled, nodesep=0.75,ranksep=0.95]; - edge [penwidth=2, fontsize=10 ]; - - - data[label="type of data?"]; - IR[label="data normal distributed\nor n large?"]; - ordinal[label="1, 2, or >2 groups?"]; - nominal[label="?"]; - - data->IR[label="interval/ratio"]; - data->nominal[label="nominal/discrete"]; - data->ordinal[label="ordinal"]; - - normal[label="1, 2, or >2 groups?"]; - notnormal[label="transform into\nnormal?"]; - trulynotnormal[label="1, 2, or >2 groups?"]; - - IR->normal[label="normal"]; - IR->notnormal[label="not normal"]; - notnormal->normal[label="yes"]; - notnormal->trulynotnormal[label="no"]; - - onesamp[label="one-sample\nt-test"]; - normal->onesamp[label="1 group"]; - - - twosamp[label="paired or\nindependent?"]; - pairedttest[label="paired\nt-test"]; - normal->twosamp[label="2 groups"]; - twosamp->pairedttest[label="paired"]; - - twosampNN[label="paired or\nindependent?"]; - indepTwosampNN[label="Wilcoxon-Mann-Whitney\ntest"]; - - trulynotnormal->twosampNN[label="2 groups"]; - twosampNN->indepTwosampNN[label="independent"]; - - twosampOrd[label="paired or\nindependent?"]; - ordinal->twosampOrd[label="2 groups"]; - twosampOrd->indepTwosampNN[label="independent"]; - - - ttest[label="t-test"]; - twosamp->ttest[label="independent"]; - - signtest[label="sign test"]; - twosampNN->signtest[label="paired"]; - twosampOrd->signtest[label="paired"]; -} - - diff --git a/statistics/figs/fig9.pdf b/statistics/figs/fig9.pdf deleted file mode 100644 index 5d721bd..0000000 Binary files a/statistics/figs/fig9.pdf and /dev/null differ diff --git a/statistics/figs/frequentistsvsbayesians.png b/statistics/figs/frequentistsvsbayesians.png deleted file mode 100644 index 735ee5b..0000000 Binary files a/statistics/figs/frequentistsvsbayesians.png and /dev/null differ diff --git a/statistics/figs/generate.py b/statistics/figs/generate.py deleted file mode 100644 index 1c14641..0000000 --- a/statistics/figs/generate.py +++ /dev/null @@ -1,106 +0,0 @@ -from __future__ import division -import seaborn as sns -import sys -sys.path.append('/home/fabee/code/') -from matplotlib.pyplot import * -from fabee.Plotting import * -from scipy import stats -from numpy import * - -sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 2.5}) - -# --------------------------------------------------------------------------- -fig, ax = subplots() -fig.subplots_adjust(bottom=.3, left=.3) -n = 50 -x = loadtxt('scripts/thymusglandweights.dat')[:n] -ax.bar([0,1],[mean(x),mean(x)],yerr = [std(x,ddof=1), std(x,ddof=1)/sqrt(n)], - facecolor='dodgerblue', alpha=.8,width=.7, align='center', - error_kw={'color':'k','lw':2}, capsize=10, ecolor='k') -ax.set_title('standard deviation or standard error?',fontsize=14, fontweight='bold') - -ax.set_xlim([-.5,1.5]) -box_off(ax) -#disjoint_axes(ax) -ax.set_xticks([0,1]) -ax.set_xticklabels([r'$\hat\sigma$', r'$\frac{\hat\sigma}{\sqrt{n}}$'], fontsize=30) - -ax.set_ylabel(r'$\frac{1}{n}\sum_{i=1}^n x_i$',fontsize=30, fontweight='bold') - -fig.savefig('figs/StandardErrorOrStandardDeviation.pdf') - -# --------------------------------------------------------------------------- -fig, ax = subplots() - -t = linspace(-5,5,1000) -t2 = linspace(stats.laplace.ppf(0.025),stats.laplace.ppf(1-0.025),1000) - -ax.fill_between(t,stats.laplace.pdf(t),color='dodgerblue') -ax.set_xticks([]) -ax.text(5,-0.05, r'$\hat m$',fontsize=30) -ax.text(0,0.7, r'$m$',fontsize=30) -ax.set_yticks([]) -#disjoint_axes(ax) -box_off(ax) - -ax.set_title('putative sampling distribution of the median',fontsize=14, fontweight='bold') -ax.axis([-5,5,0,.8]) -ax.plot([0,0],[0,.7],'--k',lw=2) - -fig.savefig('figs/samplingDistributionMedian00.pdf') - -ax.fill_between(t2,stats.laplace.pdf(t2),color='crimson') - -fig.savefig('figs/samplingDistributionMedian01.pdf') - -# --------------------------------------------------------------------------- -fig, ax = subplots() -k = 7 -N = 21 -F = stats.f -t = linspace(1e-6,8,1000) -t2= linspace(F.ppf(0.95,k-1,N-k),8,1000) - -ax.fill_between(t,F.pdf(t,k-1,N-k),color='dodgerblue') -ax.fill_between(t2,F.pdf(t2,k-1,N-k),color='crimson') -ax.set_xlabel('group MS/ error MS') -ax.set_ylabel(r'p(group MS/ error MS| $H_0$)') -ax.set_title('F-distribution',fontsize=14, fontweight='bold') -ax.set_ylim((0,0.8)) -box_off(ax) -fig.savefig('figs/Fdistribution00.pdf') - -# --------------------------------------------------------------------------- -fig, ax = subplots() -n = 5 -p = stats.t.pdf -t = linspace(-5,8,1000) -t0 = 1.5 -t00 = 1. - -mu0 = 3 -t1 = linspace(-5,t00,1000) -t2 = linspace(t0,8,1000) -t3 = linspace(-5,-t0,1000) -ax.fill_between(t,p(t,n-1),color='dodgerblue',alpha=1) -ax.fill_between(t2,p(t2,n-1),color='indigo',alpha=1) -ax.fill_between(t3,p(t3,n-1),color='indigo',alpha=1) -ax.set_xlabel('t') -ax.set_ylabel(r'sampling distribution') -ax.set_ylim((0,0.8)) -box_off(ax) -fig.savefig('figs/experimentalDesign00.pdf') - - -ax.fill_between(t,p(t,n-1,loc=mu0),color='lime',alpha=.5) -ax.fill_between(t1,p(t1,n-1,loc=mu0),color='magenta',alpha=1) -ax.arrow(0,.4,mu0,0,head_width=0.05) -ax.arrow(mu0,.4,-mu0,0,head_width=0.05) -ax.text(mu0/2,.45,r'$\delta$',fontsize=20) -ax.set_xlabel('t') -ax.set_ylabel(r'sampling distribution') -ax.set_ylim((0,0.8)) -box_off(ax) -fig.savefig('figs/experimentalDesign01.pdf') - - diff --git a/statistics/figs/generate03.py b/statistics/figs/generate03.py deleted file mode 100644 index 56137d2..0000000 --- a/statistics/figs/generate03.py +++ /dev/null @@ -1,265 +0,0 @@ -import sys -import seaborn as sns -sys.path.append('/home/fabee/code/') -from matplotlib.pyplot import * -from fabee.Plotting import * -from scipy import stats -from numpy import * - -sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 2.5}) - -def hinton(matrix, max_weight=None, ax=None): - """Draw Hinton diagram for visualizing a weight matrix.""" - ax = ax if ax is not None else gca() - - if not max_weight: - max_weight = 2**np.ceil(np.log(np.abs(matrix).max())/np.log(2)) - - ax.patch.set_facecolor('gray') - ax.set_aspect('equal', 'box') - ax.xaxis.set_major_locator(NullLocator()) - ax.yaxis.set_major_locator(NullLocator()) - - for (x,y),w in np.ndenumerate(matrix): - color = 'white' if w > 0 else 'black' - size = np.sqrt(np.abs(w)) - rect = Rectangle([x - size / 2, y - size / 2], size, size, - facecolor=color, edgecolor=color) - ax.add_patch(rect) - - ax.autoscale_view() - ax.invert_yaxis() - -# --------------------------------------------------------------------------- -fig, ax = subplots() -fig.subplots_adjust(bottom=.2) - -ax.bar([0,1],[.2,.8],facecolor='dodgerblue', alpha=.8,width=.7, align='center') -ax.set_title('Bernoulli distribution',fontsize=16, fontweight='bold') - -ax.set_xlim([-.5,1.5]) -box_off(ax) -#disjoint_axes(ax) -ax.set_xlabel('outcomes',fontsize=14, fontweight='bold') -ax.set_ylabel('P(outcome)',fontsize=14, fontweight='bold') -ax.set_xticks([0,1]) -ax.set_xticklabels([0,1]) -ax.set_ylim((0,1)) - -fig.savefig('figs/Bernoulli.pdf') - -# --------------------------------------------------------------------------- -fig, ax = subplots() -fig.subplots_adjust(bottom=.2) -n = 5 -k = arange(0,n) -ax.bar(k,0*k+1./n,facecolor='dodgerblue', alpha=.8,width=.7, align='center') -ax.set_title('uniform distribution',fontsize=16, fontweight='bold') - -box_off(ax) -#disjoint_axes(ax) -ax.set_xlabel('k',fontsize=14, fontweight='bold') -ax.set_ylabel('P(X=k)',fontsize=14, fontweight='bold') -ax.set_xticks(k) -ax.set_xticklabels(k+1) -ax.set_ylim((0,1)) -fig.savefig('figs/Uniform.pdf') - -# --------------------------------------------------------------------------- - -for i,(n,p) in enumerate(zip([10,20],[.5,.8])): - fig, ax = subplots() - - fig.subplots_adjust(bottom=.2) - k = arange(n+1) - - ax.bar(k,stats.binom.pmf(k,n,p),facecolor='dodgerblue', alpha=.8,width=.7, align='center') - ax.set_title(r'binomial distribution $B\left(%.2f, %i\right)$' % (p,n),fontsize=16, fontweight='bold') - - box_off(ax) - #disjoint_axes(ax) - ax.set_xlabel('k',fontsize=14, fontweight='bold') - ax.set_ylabel('P(k)',fontsize=14, fontweight='bold') - ax.set_xticks(k) - ax.set_xticklabels(k) - ax.set_xlim((-1,n+1)) - ax.set_ylim((0,1)) - fig.savefig('figs/Binomial%02i.pdf' % (i,)) - - -# --------------------------------------------------------------------------- -n = 20 -for i, lam in enumerate([5, 0.05]): - fig, ax = subplots() - - fig.subplots_adjust(bottom=.2) - k = arange(n+1) - - ax.bar(k,stats.poisson.pmf(k,lam),facecolor='dodgerblue', alpha=.8,width=.7, align='center') - ax.set_title(r'Poisson distribution $\lambda=%.2f$' % (lam,),fontsize=16, fontweight='bold') - - box_off(ax) - #disjoint_axes(ax) - ax.set_xlabel('k',fontsize=14, fontweight='bold') - ax.set_ylabel('P(k)',fontsize=14, fontweight='bold') - ax.set_xticks(k) - ax.set_xticklabels(k) - ax.set_xlim((-1,n+1)) - ax.set_ylim((0,1)) - fig.savefig('figs/Poisson%02i.pdf' % (i,)) - -# --------------------------------------------------------------------------- -fig, ax = subplots() - -fig.subplots_adjust(bottom=.2) -t = linspace(-3,3,200) -ax.fill_between(t,stats.norm.pdf(t),facecolor='dodgerblue', alpha=.8) -ax.set_title(r'Gaussian/Normal distribution $N(\mu,\sigma)$',fontsize=16, fontweight='bold') - -box_off(ax) -#disjoint_axes(ax) -ax.set_xlabel('x',fontsize=14, fontweight='bold') -ax.set_ylabel('p(x)',fontsize=14, fontweight='bold') -fig.savefig('figs/Gaussian00.pdf') - -# --------------------------------------------------------------------------- - -fig, ax = subplots() -n = 10 -kk = 5 -p = .5 -fig.subplots_adjust(bottom=.2) -k = arange(n+1) - -ax.bar(k,stats.binom.pmf(k,n,p),facecolor='dodgerblue', alpha=.8,width=.7, align='center') -ax.bar(k[:kk+1],stats.binom.pmf(k[:kk+1],n,p),facecolor='crimson', alpha=.5,width=.7, align='center') -ax.set_title(r'binomial distribution $B\left(\frac{1}{2}, %i\right)$' % (n,), fontsize=16, fontweight='bold') - -box_off(ax) -#disjoint_axes(ax) -ax.set_xlabel('k',fontsize=14, fontweight='bold') -ax.set_ylabel('P(k)',fontsize=14, fontweight='bold') -ax.set_xticks(k) -ax.set_xticklabels(k) -ax.set_xlim((-1,n+1)) -ax.set_ylim((0,1)) -fig.savefig('figs/BinomialCdf00.pdf' ) - - -fig, ax = subplots() -n = 10 -kk = 5 -p = .5 -fig.subplots_adjust(bottom=.2) -k = arange(n+1) - -ax.bar(k,stats.binom.pmf(k,n,p),facecolor='dodgerblue', alpha=.8,width=.7, align='center',label='p.m.f.') -ax.bar(k[:kk+1],stats.binom.pmf(k[:kk+1],n,p),facecolor='crimson', alpha=.5,width=.7, align='center') -ax.plot(k,stats.binom.cdf(k,n,p),'ok',mfc='crimson', alpha=1.,label='c.d.f.', ms=15) - -ax.set_title(r'binomial distribution $B\left(\frac{1}{2}, %i\right)$' % (n,), fontsize=16, fontweight='bold') -ax.legend(frameon=False, loc='best') -box_off(ax) -#disjoint_axes(ax) -ax.set_xlabel('k',fontsize=14, fontweight='bold') -ax.set_ylabel('P(k)',fontsize=14, fontweight='bold') -ax.set_xticks(k) -ax.set_xticklabels(k) -ax.set_xlim((-1,n+1)) -ax.set_ylim((0,1.1)) -fig.savefig('figs/BinomialCdf01.pdf' ) - -fig, ax = subplots() -n = 10 -kk = 2 -p = .5 -fig.subplots_adjust(bottom=.2) -k = arange(n+1) - -ax.bar(k,stats.binom.pmf(k,n,p),facecolor='dodgerblue', alpha=.8,width=.7, align='center',label='p.m.f.') -ax.bar(k[:kk+1],stats.binom.pmf(k[:kk+1],n,p),facecolor='crimson', alpha=.5,width=.7, align='center') -ax.bar(k[-kk-1:],stats.binom.pmf(k[-kk-1:],n,p),facecolor='crimson', alpha=.5,width=.7, align='center') - -ax.set_title(r'binomial distribution $B\left(\frac{1}{2}, %i\right)$' % (n,), fontsize=16, fontweight='bold') -ax.legend(frameon=False, loc='best') -box_off(ax) -#disjoint_axes(ax) -ax.set_xlabel('k',fontsize=14, fontweight='bold') -ax.set_ylabel('P(k)',fontsize=14, fontweight='bold') -ax.set_xticks(k) -ax.set_xticklabels(k) -ax.set_xlim((-1,n+1)) -ax.set_ylim((0,1.1)) -fig.savefig('figs/BinomialExample00.pdf' ) - - -#------------------------------------------------------ -fig = figure(figsize=(10,3.5)) -ax = fig.add_axes([.1,.13,.6,.3]) - -n = 10 -p = [.5,.8] -q = [.7, .3] -fig.subplots_adjust(bottom=0.2) -k = arange(n+1) -P = vstack((stats.binom.pmf(k,n,p[0])*q[0], stats.binom.pmf(k,n,p[1])*q[1])).T - -hinton(P, ax = None) - -#disjoint_axes(ax) -ax.set_xticks(k) -ax.set_xticklabels(k) -ax.set_yticks([0,1]) -ax.set_ylim((-.5,1.5)) -ax.set_xlim((-.5,n+.5)) -ax.set_yticklabels(['subject #1', 'subject #2']) -fig.savefig('figs/Joint00.pdf' ) - -ax = fig.add_axes([.75,.13,.2,.3]) -ax.barh([0,1],q, facecolor='dodgerblue',alpha=.8, align='center') -box_off(ax) -#disjoint_axes(ax) -ax.set_xticks([0,.5,1.]) -ax.set_yticks([]) -ax.set_ylim((-.5,1.5)) -fig.savefig('figs/Joint01.pdf' ) - -ax = fig.add_axes([.1,.6,.6,.2]) -ax.bar(k,sum(P,axis=1), facecolor='dodgerblue',alpha=.8, align='center') -a = .7 -ax.axis([-a,n-a+1.5,0,1]) -box_off(ax) -#disjoint_axes(ax) -ax.set_xticks([]) -ax.set_yticks([0,.3]) -ax.set_ylim((0,.3)) -fig.savefig('figs/Joint02.pdf' ) - - -#------------------------------------------------------ -n = 10 -k = arange(n+1) -p = [.5,.8] -q = [.7, .3] -P = vstack((stats.binom.pmf(k,n,p[0])*q[0], stats.binom.pmf(k,n,p[1])*q[1])) -Pk = sum(P,axis=0) - -fig = figure() -for i,kk in enumerate(k): - ax = fig.add_subplot(3,4,i+1) - fig.subplots_adjust(bottom=0.2) - - ax.bar([0,1],P[:,i]/Pk[i], facecolor='dodgerblue',alpha=.8, align='center') - - - #disjoint_axes(ax) - ax.set_xticks([0,1]) - ax.set_xticklabels(['#1','#2'], fontsize=8) - ax.set_yticks([0,.5,1]) - ax.set_yticklabels([0,.5,1],fontsize=8) - ax.set_xlim((-.5,1.5)) - ax.set_ylim((0,1)) - ax.set_title('P({#1,#2}| %i successes)' % (i,), fontsize=8) - -fig.subplots_adjust(wspace=.8, hspace=.8) -fig.savefig('figs/Posterior00.pdf') diff --git a/statistics/figs/generatePlots.py b/statistics/figs/generatePlots.py deleted file mode 100644 index 309eba7..0000000 --- a/statistics/figs/generatePlots.py +++ /dev/null @@ -1,216 +0,0 @@ -import sys -sys.path.append('/home/fabee/code/') -import seaborn as sns -from matplotlib.pyplot import * -from scipy import stats -from numpy import * - -from matplotlib.ticker import NullFormatter - -sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 2.5}) - -# --------------- PLOT 1 ------------------------- -# the random data -distr = stats.uniform -col = '+*0<>v' - -for k,distr in enumerate([stats.laplace, stats.norm, stats.expon,stats.uniform]): - col = [col[i] for i in random.permutation(6)] - x = random.randn(5000) - - nullfmt = NullFormatter() # no labels - - # definitions for the axes - left, width = 0.1, 0.65 - bottom, height = 0.1, 0.65 - bottom_h = left_h = left+width+0.02 - - rect_scatter = [left + 0.22, bottom + 0.22 , width, height] - rect_histx = [left + 0.22, bottom, width, 0.2] - rect_histy = [left, bottom + 0.22 , 0.2, height] - - # start with a rectangular Figure - fig = figure(figsize=(8,8)) - - axQQ = axes(rect_scatter) - axHistx = axes(rect_histx) - axHisty = axes(rect_histy) - - # no labels - axHistx.yaxis.set_major_formatter(nullfmt) - axHisty.xaxis.set_major_formatter(nullfmt) - axQQ.xaxis.set_major_formatter(nullfmt) - axQQ.yaxis.set_major_formatter(nullfmt) - - - - # the scatter plot: - z = distr.ppf(stats.norm.cdf(x)) - y = linspace(amin(z),amax(z),1000) - - - z = distr.ppf(stats.norm.cdf(x)) - if distr != stats.norm: - if distr == stats.uniform: - axQQ.plot(x, z,'ok',marker=col[0],ms=5,label='c.d.f.') - else: - axQQ.plot(x, z,'ok',marker=col[0],ms=5,label='correct') - - if distr != stats.expon: - axQQ.plot((z-amin(z))/(amax(z)-amin(z))*(amax(x)-amin(x)) + amin(x),\ - (x-amin(x))/(amax(x)-amin(x))*(amax(z)-amin(z)) + amin(z),'ok',marker=col[1],ms=5) - axQQ.plot(x, (x-amin(x))/(amax(x)-amin(x))*(amax(z)-amin(z)) + amin(z),'ok',marker=col[2],ms=5) - - - # now determine nice limits by hand: - axHistx.hist(x, bins=100,normed=True) - if distr != stats.expon: - axHisty.plot(distr.pdf(y),y) - z2 = distr.pdf(y) - y = hstack((y[0],y,y[-1])) - z2 = hstack((0,z2,0)) - axHisty.fill(z2,y,color=(.0,.0,1.)) - - axQQ.set_xlim(axHistx.get_xlim()) - axQQ.set_ylim(axHisty.get_ylim()) - - if distr == stats.uniform: - axQQ.set_ylim((-.1,1.1)) - axHisty.set_ylim((-.1,1.1)) - axHisty.set_xlim((.0,1.1)) - - axHistx.set_xlabel('x',fontsize=16) - axHistx.set_ylabel('p(x)',fontsize=16) - axHisty.set_ylabel('y',fontsize=16) - axHisty.set_xlabel('p(y)',fontsize=16) - - fig.savefig('figs/HE%i.png' % (k,)) - if distr == stats.norm: - axQQ.plot(x, z,'ok',marker=col[0],ms=5) - elif distr == stats.expon: - axHisty.plot(distr.pdf(y),y) - z2 = distr.pdf(y) - y = hstack((y[0],y,y[-1])) - z2 = hstack((0,z2,0)) - axHisty.fill(z2,y,color=(.0,.0,1.)) - - else: - axQQ.legend(loc=2) - fig.savefig('figs/HE%iSolution.png' % (k,)) - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) -xx = linspace(-3.,stats.norm.ppf(1-0.2),1000) - -x = linspace(-3.,3.,1000) -y = stats.norm.pdf(x,scale=1) -yy = stats.norm.pdf(xx,scale=1) -yy[0] = 0 -yy[-1] = 0 - -ax.plot(x,y,'k-',lw=2) -ax.plot(x,stats.norm.pdf(x),'k-',lw=1) -ax.set_xlabel('x',fontsize=16) -ax.set_ylabel('pdf',fontsize=16) -ax.fill(xx,yy,'b') - -ax.set_xlim(-3.,3.) - -ax.text(xx[-1],-.1,'b'); - -ax.text(xx[-1],.4,'p(x)',color='k'); -ax.text(xx[0],.3,'F(b) = P(x <= b)',color='b'); - -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) - -fig.savefig('figs/cdf.png') - -#----------------------------- -fig = figure() - - -ax = fig.add_subplot(111) -xx = linspace(-3.,stats.norm.ppf(1-0.2),1000) - -x = linspace(-3.,3.,1000) -y = stats.norm.pdf(x,scale=1) -yy = stats.norm.pdf(xx,scale=1) -yy[0] = 0 -yy[-1] = 0 - -ax.plot(x,y,'k-',lw=2) -ax.plot(x,stats.norm.cdf(x),'b-',lw=1) -ax.set_xlabel('x/b',fontsize=16) -ax.set_ylabel('pdf/cdf',fontsize=16) - -ax.set_xlim(-3.,3.) - -ax.text(xx[-1],.4,'p(x)',color='k'); -ax.text(xx[0],.3,'F(b) = P(x <= b)',color='b'); - -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) - -fig.savefig('figs/cdf2.png') - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) - -x = hstack((linspace(-3.,stats.norm.ppf(0.13),1000),\ - linspace(stats.norm.ppf(1-0.13),3.,1000))) - -xx = hstack((linspace(-3.,stats.norm.ppf(0.2),1000),\ - linspace(stats.norm.ppf(1-0.2),3.,1000))) - -y = stats.norm.pdf(x,scale=1) -yy = stats.norm.pdf(xx,scale=1) - -y[[0,999,1000,-1]] = 0 -yy[[0,999,1000,-1]] = 0 - -t = linspace(-3.,3.,1000) -ax.plot(t,stats.norm.pdf(t),'k-',lw=2) - -ax.fill(xx[:1000],yy[:1000],'b') -ax.fill(xx[1000:],yy[1000:],'b') -ax.text(xx[1000],-.1,'b') -ax.text(xx[999],-.1,'-b') -ax.text(.2,.7,'P(|x|>b) =$\\alpha$',color='b'); - -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) - -fig.savefig('figs/pval0.png') - -#--------------------------------------------------- -fig = figure() - -ax = fig.add_subplot(111) - -t = linspace(-3.,3.,1000) -ax.plot(t,stats.norm.pdf(t),'k-',lw=2) - - -ax.fill(x[:1000],y[:1000],'r') -ax.fill(x[1000:],y[1000:],'r') - - -ax.text(x[1000],-.1,'t') -ax.text(x[999],-.1,'-t') - -ax.text(.2,.5,'P(|x| > t) = p-value',color='r'); - -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) - -fig.savefig('figs/ pval1.png') - - - -# show() - - -#----------------------------- diff --git a/statistics/figs/generateTPlots.py b/statistics/figs/generateTPlots.py deleted file mode 100644 index 36f10dd..0000000 --- a/statistics/figs/generateTPlots.py +++ /dev/null @@ -1,184 +0,0 @@ -import sys -import seaborn as sns -sys.path.append('/home/fabee/code') -from matplotlib.pyplot import * -from scipy import stats -from numpy import * - -sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 2.5}) - -# define the curves -x = np.linspace(2, 20, 200) -n = 16. - - -X =random.randn(n)*4.+12.5 -fig = figure() -ax = fig.add_subplot(111) -ax.set_xlim(5, 18) -#ax.set_ylim(0, .5) -ax.plot([10,10],[-.2,.2],'k-',lw=2) -ax.text(10,.3,r'stimulus position',rotation=-30); -ax.plot([12.5,12.5],[-.2,.2],'b-',lw=2) -ax.text(12.5,.3,r'$\hat\mu$',rotation=-45); - -ax.set_xlabel('x eye position') -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) -ax.plot(X,0*X,'ob',label='fixations',mfc='orange',ms=10) -fig.savefig('figs/repetition0.png') - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) - -ax.plot(x,-stats.norm.pdf(x,loc=10,scale=4),'orange',label=r'Null distribution of x') - - -ax.set_xlim(5, 18) -# ax.set_ylim(0, .5) - -ax.plot([10,10],[-.2,.2],'k-',lw=2) -ax.text(10,.3,r'stimulus position',rotation=-30); -ax.plot([12.5,12.5],[-.2,.2],'b-',lw=2) -ax.text(12.5,.3,r'$\hat\mu$',rotation=-45); -ax.legend() - -ax.set_xlabel('x eye position') -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) -ax.plot(X,0*X,'ob',label='fixations',mfc='orange',ms=10) - -fig.savefig('figs/repetition1.png') - - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) - -ax.plot(x,-stats.norm.pdf(x,loc=10,scale=4),'orange',label=r'Null distribution of x') -ax.plot(x,-stats.t.pdf(x,n-1,loc=10,scale=1),'b',label=r'Null distribution of $t$') - -ax.set_xlim(5, 18) -# ax.set_ylim(0, .5) - -ax.plot([10,10],[-.2,.2],'k-',lw=2) -ax.text(10,.3,r'stimulus position',rotation=-30); -ax.plot([12.5,12.5],[-.2,.2],'b-',lw=2) -ax.text(12.5,.3,r'$\hat\mu$',rotation=-45); -ax.legend() - -ax.set_xlabel('x eye position') -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) -ax.plot(X,0*X,'ob',label='fixations',mfc='orange',ms=10) - -fig.savefig('figs/repetition2.png') - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) -xx = linspace(stats.norm.ppf(0.05),stats.norm.ppf(1-0.05),100) -xx += 10. - -yy = -stats.norm.pdf(xx,loc=10.,scale=1) -xx = hstack((xx[0],xx,xx[-1])) -yy = hstack((0,yy,0)) - -ax.plot(x,-stats.norm.pdf(x,loc=10,scale=4),'orange',label=r'Null distribution of x') -ax.plot(x,-stats.t.pdf(x,n-1,loc=10,scale=1),'b',label=r'Null distribution of $t$') - -ax.fill(xx,yy,'c') - -ax.set_xlim(5, 18) -# ax.set_ylim(0, .5) - -ax.plot([10,10],[-.2,.2],'k-',lw=2) -ax.text(10,.3,r'stimulus position',rotation=-30); -ax.plot([12.5,12.5],[-.2,.2],'b-',lw=2) -ax.text(12.5,.3,r'$\hat\mu$',rotation=-45); -ax.legend() - -ax.set_xlabel('x eye position') -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) -ax.plot(X,0*X,'ob',label='fixations',mfc='orange',ms=10) - -fig.savefig('figs/repetition3.png') - - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) -xx = linspace(stats.norm.ppf(0.05),stats.norm.ppf(1-0.05),100) -xx += 10. - -yy = -stats.norm.pdf(xx,loc=10.,scale=1) -xx = hstack((xx[0],xx,xx[-1])) -yy = hstack((0,yy,0)) - -ax.plot(x,-stats.norm.pdf(x,loc=10,scale=4),'orange',label=r'Null distribution of x') -ax.plot(x,-stats.t.pdf(x,n-1,loc=10,scale=1),'b',label=r'Null distribution of $t$') - -ax.fill(xx,yy,'c') - -ax.set_xlim(5, 18) -# ax.set_ylim(0, .5) - -ax.plot([10,10],[-.2,.2],'k-',lw=2) -ax.text(10,.3,r'stimulus position',rotation=-30); -ax.plot([12.5,12.5],[-.2,.2],'b-',lw=2) -ax.text(12.5,.3,r'$\hat\mu$',rotation=-45) - -ax.plot([xx[0],xx[-1]],[0,0],'-g',label=r'$H_0$',lw=4) -ax.plot([0,xx[0]],[0,0],'-r',label=r'$H_1$',lw=4) -ax.plot([xx[-1],20],[0,0],'-r',lw=4) - -ax.legend() - -ax.set_xlabel('x eye position') -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) -ax.plot(X,0*X,'ob',label='fixations',mfc='orange',ms=10) - -fig.savefig('figs/repetition4.png') - - -# ####################################################3 -fig = figure() - - -ax = fig.add_subplot(111) - -ax.plot(x,-stats.norm.pdf(x,loc=10,scale=4),'orange',label=r'Null distribution of x') -ax.plot(x,-stats.t.pdf(x,n-1,loc=10,scale=1),'b',label=r'Null distribution of $t$') - -xx = linspace(0,stats.norm.ppf(0.05)+10.,100) -yy = -stats.norm.pdf(xx,loc=10.,scale=1) -xx = hstack((xx[0],xx,xx[-1])) -yy = hstack((0,yy,0)) -ax.fill(xx,yy,'magenta') - -xx = linspace(stats.norm.ppf(1-0.05)+10.,20,100) -yy = -stats.norm.pdf(xx,loc=10.,scale=1) -xx = hstack((xx[0],xx,xx[-1])) -yy = hstack((0,yy,0)) -ax.fill(xx,yy,'magenta') - -ax.set_xlim(5, 18) -# ax.set_ylim(0, .5) - -ax.plot([10,10],[-.2,.2],'k-',lw=2) -ax.text(10,.3,r'stimulus position',rotation=-30); -ax.plot([12.5,12.5],[-.2,.2],'b-',lw=2) -ax.text(12.5,.3,r'$\hat\mu$',rotation=-45); -ax.legend() - -ax.set_xlabel('x eye position') -#XKCDify(ax, expand_axes=True,yaxis_loc=0,xaxis_loc=0) -ax.plot(X,0*X,'ob',label='fixations',mfc='orange',ms=10) - -fig.savefig('figs/repetition5.png') diff --git a/statistics/figs/hunger.png b/statistics/figs/hunger.png deleted file mode 100644 index 445ad6e..0000000 Binary files a/statistics/figs/hunger.png and /dev/null differ diff --git a/statistics/figs/mensqqplot.pdf b/statistics/figs/mensqqplot.pdf deleted file mode 100644 index ce49ba0..0000000 Binary files a/statistics/figs/mensqqplot.pdf and /dev/null differ diff --git a/statistics/figs/multipletesting.pdf b/statistics/figs/multipletesting.pdf deleted file mode 100644 index a2c4cd2..0000000 Binary files a/statistics/figs/multipletesting.pdf and /dev/null differ diff --git a/statistics/figs/multipletesting.py b/statistics/figs/multipletesting.py deleted file mode 100644 index 03e019e..0000000 --- a/statistics/figs/multipletesting.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import division -from numpy import * -from scipy import stats -from matplotlib.pyplot import * - -N = random.randn - -m = 2000 -n = 20 - -T = zeros((m,)) -R = zeros((m,)) -pT = zeros((m,)) -pR = zeros((m,)) - -for k in xrange(m): - x = N(n) - y = N(n) - - T[k], pT[k] = stats.ttest_ind(x,y) - R[k], pR[k] = stats.ranksums(x,y) - -a = stats.t.ppf([0.025,1.-0.025], n-1) -b = stats.norm.ppf([0.025,1.-0.025]) - - -fig = figure(figsize=(8,8),dpi=100) -ax = fig.add_axes([.3,.3,.6,.6]) -axb = fig.add_axes([.3,.1,.6,.2]) -axl = fig.add_axes([.1,.3,.2,.6]) - -ax.plot(T,R,'ok',mfc=(.7,.7,.7)) -axb.hist(T,bins=50,facecolor=(1.,.7,.7),normed=True) -axl.hist(R,bins=50,facecolor=(.7,.7,1.),normed=True,orientation='horizontal') -axl.axis([0,1,-5,5]) -axb.plot([a[0],a[0]],[0,1],'k--',lw=2) -axb.plot([a[1],a[1]],[0,1],'k--',lw=2) - -axl.plot([0,1],[b[0],b[0]],'k--',lw=2) -axl.plot([0,1],[b[1],b[1]],'k--',lw=2) -axl.set_ylabel('standardized U statistic', fontsize=16) -axb.set_xlabel('t statistic', fontsize=16) - -# print sum(1.*(T < a[0] ))/m + sum(1.*(T > a[1]))/m -# print sum(1.*(R < b[0] ))/m + sum(1.*(R > b[1]))/m - -ax.fill([-5,a[0],a[0],-5],[-5,-5,5,5],color=(1.,.7,.7),alpha=.5) -ax.fill([a[1],5,5,a[1]],[-5,-5,5,5],color=(1.,.7,.7),alpha=.5) -axb.fill([-5,a[0],a[0],-5],[0,0,1,1],color=(1.,.7,.7),alpha=.5) -axb.fill([a[1],5,5,a[1]],[0,0,1,1],color=(1.,.7,.7),alpha=.5) - - -ax.fill([-5,-5,5,5],[-5,b[0],b[0],-5],color=(.7,.7,1.),alpha=.5) -ax.fill([-5,-5,5,5],[b[1],5,5,b[1]],color=(.7,.7,1.),alpha=.5) -axl.fill([0,0,1,1],[-5,b[0],b[0],-5],color=(.7,.7,1.),alpha=.5) -axl.fill([0,0,1,1],[b[1],5,5,b[1]],color=(.7,.7,1.),alpha=.5) - - - -axb.axis([-5,5,0,1]) -ax.axis([-5,5,-5,5]) - -axl.set_xticks([]) -axb.set_yticks([]) -axl = axl.twiny() -axb = axb.twinx() -axl.set_xticks([0,.5,1.]) -axb.set_yticks([0,.5,1.]) - - - -fig.savefig('multipletesting.pdf') diff --git a/statistics/figs/nacho-trainer.jpg b/statistics/figs/nacho-trainer.jpg new file mode 100644 index 0000000..20b278d Binary files /dev/null and b/statistics/figs/nacho-trainer.jpg differ diff --git a/statistics/figs/nnqqplot.pdf b/statistics/figs/nnqqplot.pdf deleted file mode 100644 index 9942613..0000000 Binary files a/statistics/figs/nnqqplot.pdf and /dev/null differ diff --git a/statistics/figs/onetailed.png b/statistics/figs/onetailed.png deleted file mode 100644 index 155aa64..0000000 Binary files a/statistics/figs/onetailed.png and /dev/null differ diff --git a/statistics/figs/power.pdf b/statistics/figs/power.pdf deleted file mode 100644 index 5089bbd..0000000 Binary files a/statistics/figs/power.pdf and /dev/null differ diff --git a/statistics/figs/probtree00.dot b/statistics/figs/probtree00.dot deleted file mode 100644 index 506c640..0000000 --- a/statistics/figs/probtree00.dot +++ /dev/null @@ -1,29 +0,0 @@ -digraph subject { - size="6,6"; - node [color=lightblue2, style=filled]; - rankdir=LR; - - - s10[label=""]; - s11[label=""]; - s1other[label="...", style=none, border=0, color=white]; - - s20[label=""]; - s21[label=""]; - s2other[label="...", style=none, border=0, color=white]; - - F1[label=""]; - F2[label=""]; - - - ""->F1; - ""->F2; - F1->s10; - F1->s11; - F1->s1other; - - F2->s20; - F2->s21; - F2->s2other; - -} diff --git a/statistics/figs/probtree00.pdf b/statistics/figs/probtree00.pdf deleted file mode 100644 index c156306..0000000 Binary files a/statistics/figs/probtree00.pdf and /dev/null differ diff --git a/statistics/figs/probtree01.dot b/statistics/figs/probtree01.dot deleted file mode 100644 index bdcae34..0000000 --- a/statistics/figs/probtree01.dot +++ /dev/null @@ -1,29 +0,0 @@ -digraph subject { - size="6,6"; - node [color=lightblue2, style=filled]; - rankdir=LR; - - - s10[label="k=0"]; - s11[label="k=1"]; - s1other[label="...", style=none, border=0, color=white]; - - s20[label="k=0"]; - s21[label="k=1"]; - s2other[label="...", style=none, border=0, color=white]; - - F1[label="subject #1"]; - F2[label="subject #2"]; - - - ""->F1; - ""->F2; - F1->s10; - F1->s11; - F1->s1other; - - F2->s20; - F2->s21; - F2->s2other; - -} diff --git a/statistics/figs/probtree01.pdf b/statistics/figs/probtree01.pdf deleted file mode 100644 index 7016fa8..0000000 Binary files a/statistics/figs/probtree01.pdf and /dev/null differ diff --git a/statistics/figs/probtree02.dot b/statistics/figs/probtree02.dot deleted file mode 100644 index 124f640..0000000 --- a/statistics/figs/probtree02.dot +++ /dev/null @@ -1,29 +0,0 @@ -digraph fish { - size="6,6"; - node [color=lightblue2, style=filled]; - rankdir=LR; - - - s10[label="k=0"]; - s11[label="k=1"]; - s1other[label="...", style=none, border=0, color=white]; - - s20[label="k=0"]; - s21[label="k=1"]; - s2other[label="...", style=none, border=0, color=white]; - - F1[label="fish #1"]; - F2[label="fish #2"]; - - - ""->F1[label="p=0.7"]; - ""->F2[label="p=0.3"]; - F1->s10[label="P(k=0|#1)"]; - F1->s11[label="P(k=1|#1)"]; - F1->s1other[label="..."]; - - F2->s20[label="P(k=0|#2)"]; - F2->s21[label="P(k=1|#2)"]; - F2->s2other[label="..."]; - -} diff --git a/statistics/figs/probtree02.pdf b/statistics/figs/probtree02.pdf deleted file mode 100644 index cf4d6aa..0000000 Binary files a/statistics/figs/probtree02.pdf and /dev/null differ diff --git a/statistics/figs/probtree03.dot b/statistics/figs/probtree03.dot deleted file mode 100644 index 7e0b34e..0000000 --- a/statistics/figs/probtree03.dot +++ /dev/null @@ -1,29 +0,0 @@ -digraph subject { - size="6,6"; - node [color=lightblue2, style=filled]; - rankdir=LR; - - - s10[label="P(k=0, subject#1)\n=0.7 P(k=0|#1)"]; - s11[label="P(k=1, subject#1)\n=0.7 P(k=1|#1)"]; - s1other[label="...", style=none, border=0, color=white]; - - s20[label="P(k=0, subject#2)\n=0.3 P(k=0|#2)"]; - s21[label="P(k=1, subject#2)\n=0.3 P(k=1|#2)"]; - s2other[label="...", style=none, border=0, color=white]; - - F1[label="subject #1"]; - F2[label="subject #2"]; - - - ""->F1[label="p=0.7"]; - ""->F2[label="p=0.3"]; - F1->s10[label="P(k=0|#1)"]; - F1->s11[label="P(k=1|#1)"]; - F1->s1other[label="..."]; - - F2->s20[label="P(k=0|#2)"]; - F2->s21[label="P(k=1|#2)"]; - F2->s2other[label="..."]; - -} diff --git a/statistics/figs/probtree03.pdf b/statistics/figs/probtree03.pdf deleted file mode 100644 index 56862f6..0000000 Binary files a/statistics/figs/probtree03.pdf and /dev/null differ diff --git a/statistics/figs/pval0.png b/statistics/figs/pval0.png deleted file mode 100644 index 85a02ac..0000000 Binary files a/statistics/figs/pval0.png and /dev/null differ diff --git a/statistics/figs/pval1.png b/statistics/figs/pval1.png deleted file mode 100644 index 3751baa..0000000 Binary files a/statistics/figs/pval1.png and /dev/null differ diff --git a/statistics/figs/qqplot.pdf b/statistics/figs/qqplot.pdf deleted file mode 100644 index b349363..0000000 Binary files a/statistics/figs/qqplot.pdf and /dev/null differ diff --git a/statistics/figs/regression01.pdf b/statistics/figs/regression01.pdf deleted file mode 100644 index 8381bc6..0000000 Binary files a/statistics/figs/regression01.pdf and /dev/null differ diff --git a/statistics/figs/regression02.pdf b/statistics/figs/regression02.pdf deleted file mode 100644 index f834706..0000000 Binary files a/statistics/figs/regression02.pdf and /dev/null differ diff --git a/statistics/figs/regression03.pdf b/statistics/figs/regression03.pdf deleted file mode 100644 index 7e15e99..0000000 Binary files a/statistics/figs/regression03.pdf and /dev/null differ diff --git a/statistics/figs/regression04.pdf b/statistics/figs/regression04.pdf deleted file mode 100644 index 0857dbd..0000000 Binary files a/statistics/figs/regression04.pdf and /dev/null differ diff --git a/statistics/figs/regression05.pdf b/statistics/figs/regression05.pdf deleted file mode 100644 index bcbf9de..0000000 Binary files a/statistics/figs/regression05.pdf and /dev/null differ diff --git a/statistics/figs/regression06.pdf b/statistics/figs/regression06.pdf deleted file mode 100644 index 20e6000..0000000 Binary files a/statistics/figs/regression06.pdf and /dev/null differ diff --git a/statistics/figs/repetition0.png b/statistics/figs/repetition0.png deleted file mode 100644 index 751bcaa..0000000 Binary files a/statistics/figs/repetition0.png and /dev/null differ diff --git a/statistics/figs/repetition1.png b/statistics/figs/repetition1.png deleted file mode 100644 index f0e9be8..0000000 Binary files a/statistics/figs/repetition1.png and /dev/null differ diff --git a/statistics/figs/repetition2.png b/statistics/figs/repetition2.png deleted file mode 100644 index 3356959..0000000 Binary files a/statistics/figs/repetition2.png and /dev/null differ diff --git a/statistics/figs/repetition3.png b/statistics/figs/repetition3.png deleted file mode 100644 index a67e0e9..0000000 Binary files a/statistics/figs/repetition3.png and /dev/null differ diff --git a/statistics/figs/repetition4.png b/statistics/figs/repetition4.png deleted file mode 100644 index 550b7dd..0000000 Binary files a/statistics/figs/repetition4.png and /dev/null differ diff --git a/statistics/figs/repetition5.png b/statistics/figs/repetition5.png deleted file mode 100644 index 4f9e2f7..0000000 Binary files a/statistics/figs/repetition5.png and /dev/null differ diff --git a/statistics/figs/samplingDistribution.png b/statistics/figs/samplingDistribution.png deleted file mode 100755 index 51a43fd..0000000 Binary files a/statistics/figs/samplingDistribution.png and /dev/null differ diff --git a/statistics/figs/samplingDistributionMedian.pdf b/statistics/figs/samplingDistributionMedian.pdf deleted file mode 100644 index e5c6a21..0000000 Binary files a/statistics/figs/samplingDistributionMedian.pdf and /dev/null differ diff --git a/statistics/figs/samplingDistributionMedian00.pdf b/statistics/figs/samplingDistributionMedian00.pdf deleted file mode 100644 index 56f25cf..0000000 Binary files a/statistics/figs/samplingDistributionMedian00.pdf and /dev/null differ diff --git a/statistics/figs/samplingDistributionMedian01.pdf b/statistics/figs/samplingDistributionMedian01.pdf deleted file mode 100644 index d7eb541..0000000 Binary files a/statistics/figs/samplingDistributionMedian01.pdf and /dev/null differ diff --git a/statistics/figs/samuels.jpg b/statistics/figs/samuels.jpg new file mode 100755 index 0000000..9ba5f83 Binary files /dev/null and b/statistics/figs/samuels.jpg differ diff --git a/statistics/figs/soccer.jpg b/statistics/figs/soccer.jpg new file mode 100644 index 0000000..bce5fa4 Binary files /dev/null and b/statistics/figs/soccer.jpg differ diff --git a/statistics/figs/statistic0.png b/statistics/figs/statistic0.png deleted file mode 100755 index 86d3571..0000000 Binary files a/statistics/figs/statistic0.png and /dev/null differ diff --git a/statistics/figs/statistic1.png b/statistics/figs/statistic1.png deleted file mode 100755 index c358821..0000000 Binary files a/statistics/figs/statistic1.png and /dev/null differ diff --git a/statistics/figs/statistic2.png b/statistics/figs/statistic2.png deleted file mode 100755 index fbe09ee..0000000 Binary files a/statistics/figs/statistic2.png and /dev/null differ diff --git a/statistics/figs/statistic3.png b/statistics/figs/statistic3.png deleted file mode 100755 index 2efd4bb..0000000 Binary files a/statistics/figs/statistic3.png and /dev/null differ diff --git a/statistics/figs/statistic4.png b/statistics/figs/statistic4.png deleted file mode 100755 index f3a1885..0000000 Binary files a/statistics/figs/statistic4.png and /dev/null differ diff --git a/statistics/figs/statisticalInference.png b/statistics/figs/statisticalInference.png deleted file mode 100755 index 06dc857..0000000 Binary files a/statistics/figs/statisticalInference.png and /dev/null differ diff --git a/statistics/figs/tdistribution5.png b/statistics/figs/tdistribution5.png deleted file mode 100755 index 17f0c8d..0000000 Binary files a/statistics/figs/tdistribution5.png and /dev/null differ diff --git a/statistics/figs/testframework00.dot b/statistics/figs/testframework00.dot deleted file mode 100644 index cc4a73e..0000000 --- a/statistics/figs/testframework00.dot +++ /dev/null @@ -1,25 +0,0 @@ -digraph G { - rankdir=KR; - node [fontsize=12, shape=oval, style=filled, nodesep=0.95,ranksep=0.95, color="dodgerblue"]; - edge [penwidth=2, fontsize=10 ]; - - root[label=""]; - H0[label="H0 is true"]; - HA[label="HA is true"]; - TN[label="true negative:\naccept H0"]; - FP[label="false positive:\nreject H0"]; - TP[label="true positive:\nreject H0"]; - FN[label="false negative:\naccept H0"]; - - - root->H0[label="P(H0)"]; - root->HA[label="1-P(H0)"]; - - H0->TN[label="P(accept H0| H0 true)=1-alpha"]; - H0->FP[label="P(reject H0| H0 true)=alpha\n=type I"]; - - HA->TP[label="P(accept HA| HA true)=1-\beta\n=power"]; - HA->FN[label="P(reject HA| HA true)=\beta\n=type II"]; - - -} diff --git a/statistics/figs/testframework00.pdf b/statistics/figs/testframework00.pdf deleted file mode 100644 index a44f348..0000000 Binary files a/statistics/figs/testframework00.pdf and /dev/null differ diff --git a/statistics/figs/testframework01.dot b/statistics/figs/testframework01.dot deleted file mode 100644 index 845d0ba..0000000 --- a/statistics/figs/testframework01.dot +++ /dev/null @@ -1,25 +0,0 @@ -digraph G { - rankdir=KR; - node [fontsize=12, shape=oval, style=filled, nodesep=0.95,ranksep=0.95, color="dodgerblue"]; - edge [penwidth=2, fontsize=10 ]; - - root[label=""]; - H0[label="H0 is true",color="green"]; - HA[label="HA is true"]; - TN[label="true negative:\naccept H0",color="green"]; - FP[label="false positive:\nreject H0",color="green"]; - TP[label="true positive:\nreject H0"]; - FN[label="false negative:\naccept H0"]; - - - root->H0[label="P(H0)"]; - root->HA[label="1-P(H0)"]; - - H0->TN[label="P(accept H0| H0 true)=1-alpha"]; - H0->FP[label="P(reject H0| H0 true)=alpha\n=type I"]; - - HA->TP[label="P(accept HA| HA true)=1-\beta\n=power"]; - HA->FN[label="P(reject HA| HA true)=\beta\n=type II"]; - - -} diff --git a/statistics/figs/testframework01.pdf b/statistics/figs/testframework01.pdf deleted file mode 100644 index 05070bb..0000000 Binary files a/statistics/figs/testframework01.pdf and /dev/null differ diff --git a/statistics/figs/twotailed.png b/statistics/figs/twotailed.png deleted file mode 100644 index 382a0a2..0000000 Binary files a/statistics/figs/twotailed.png and /dev/null differ diff --git a/statistics/figs/typeingqqplot.pdf b/statistics/figs/typeingqqplot.pdf deleted file mode 100644 index fdc24d4..0000000 Binary files a/statistics/figs/typeingqqplot.pdf and /dev/null differ diff --git a/statistics/progressionbar.tex b/statistics/progressionbar.tex deleted file mode 100644 index 836f5d1..0000000 --- a/statistics/progressionbar.tex +++ /dev/null @@ -1,123 +0,0 @@ - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - - -\newenvironment<>{description}[1]{% - \begin{actionenv}#2% - \def\insertblocktitle{#1}% - \par% - \mode{% - \setbeamercolor{block title}{fg=white,bg=gray} - \setbeamercolor{block body}{fg=black,bg=gray!30} - % \setbeamercolor{itemize item}{fg=orange!20!black} - % \setbeamertemplate{itemize item}[triangle] - \setbeamerfont{block title}{family=\sffamily, series=\bfseries} - \setbeamerfont{block body}{family=\ttfamily} - }% - \usebeamertemplate{block begin}} - {\par\usebeamertemplate{block end}\end{actionenv}} - -\newenvironment<>{task}[1]{% - \begin{actionenv}#2% - \def\insertblocktitle{#1}% - \par% - \mode{% - \setbeamercolor{block title}{fg=black,bg=cyan!40} - \setbeamercolor{block body}{fg=black,bg=cyan!20} - % \setbeamercolor{itemize item}{fg=orange!20!black} - % \setbeamertemplate{itemize item}[triangle] - \setbeamerfont{block title}{series=\bfseries} - % \setbeamerfont{block body}{family=\ttfamily} - }% - \usebeamertemplate{block begin}} - {\par\usebeamertemplate{block end}\end{actionenv}} - -\newenvironment<>{summary}[1]{% - \begin{actionenv}#2% - \def\insertblocktitle{#1}% - \par% - \mode{% - \setbeamercolor{block title}{fg=black,bg=blue!40} - \setbeamercolor{block body}{fg=black,bg=blue!20} - % \setbeamercolor{itemize item}{fg=orange!20!black} - % \setbeamertemplate{itemize item}[triangle] - \setbeamerfont{block title}{series=\bfseries} - % \setbeamerfont{block body}{family=\ttfamily} - }% - \usebeamertemplate{block begin}} - {\par\usebeamertemplate{block end}\end{actionenv}} -%%%%%%%%%%%%%%%%%%% PROGRESSBAR %%%%%%%%%%%%%%%%%%%%%%%%%% - -\definecolor{pbblue}{HTML}{0A75A8}% filling color for the progress bar -\definecolor{pbgray}{HTML}{575757}% background color for the progress bar -\definecolor{pbgreen}{HTML}{57EE57}% green color for the progress bar - -\newcounter{slideminutes} -\newcounter{minutes} -\newcounter{totalminutes} -\setcounter{totalminutes}{0} -\setcounter{totalminutes}{105} - -\makeatletter -\def\progressbar@progressbar{} % the progress bar -\newcount\progressbar@tmpcounta% auxiliary counter -\newcount\progressbar@tmpcountb% auxiliary counter -\newcount\progressbar@tmpcountc% auxiliary counter -\newdimen\progressbar@pbht %progressbar height -\newdimen\progressbar@pbwd %progressbar width -\newdimen\progressbar@pbwda %progressbar width -\newdimen\progressbar@tmpdim % auxiliary dimension -\newdimen\progressbar@tmpdima % auxiliary dimension - -\progressbar@pbwd=\linewidth -\progressbar@pbht=1.5ex - - - -% the progress bar -\def\progressbar@progressbar{% - - % \progressbar@tmpcounta=\insertframenumber - % \progressbar@tmpcountb=\inserttotalframenumber - \progressbar@tmpcounta=\theminutes - \progressbar@tmpcountb=\thetotalminutes - \progressbar@tmpcountc=\theslideminutes - - \progressbar@tmpdim=\progressbar@pbwd - \divide\progressbar@tmpdim by \progressbar@tmpcountb - \multiply\progressbar@tmpdim by \progressbar@tmpcounta - - \progressbar@tmpdima=\progressbar@pbwd - \divide\progressbar@tmpdima by \progressbar@tmpcountb - \multiply\progressbar@tmpdima by \progressbar@tmpcountc - - \begin{tikzpicture}[rounded corners=2pt,very thin] - - \shade[top color=pbgray!20,bottom color=pbgray!20,middle color=pbgray!50] - (0pt, 0pt) rectangle ++ (\progressbar@pbwd, \progressbar@pbht); - - \shade[draw=pbblue,top color=pbblue!50,bottom color=pbblue!50,middle color=pbblue] % - (0pt, 0pt) rectangle ++ (\progressbar@tmpdim, \progressbar@pbht); - - \shade[draw=pbblue,top color=pbblue!50,bottom color=pbblue!50,middle color=pbgreen] % - (\progressbar@tmpdim, 0) rectangle ++ (\progressbar@tmpdima, \progressbar@pbht); - - \draw[color=normal text.fg!50] - (0pt, 0pt) rectangle (\progressbar@pbwd, \progressbar@pbht) - node[pos=0.5,color=normal text.fg] {\textnormal{\theminutes / - \thetotalminutes~ min done | the next \theslideminutes~ min will - be on } - }; - - \end{tikzpicture}% -} - -\addtobeamertemplate{headline}{} -{% - \begin{beamercolorbox}[wd=\paperwidth,ht=4ex,center,dp=1ex]{white}% - \progressbar@progressbar% - \end{beamercolorbox}% -} diff --git a/statistics/talk.tex b/statistics/talk.tex deleted file mode 100755 index 8161694..0000000 --- a/statistics/talk.tex +++ /dev/null @@ -1,2914 +0,0 @@ -\documentclass{beamer} -\usepackage{xcolor} -\usepackage{listings} -\usepackage{pgf} -%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} -%\usepackage{multimedia} - -\usepackage[english]{babel} -\usepackage{movie15} -\usepackage[latin1]{inputenc} -\usepackage{times} -\usepackage{amsmath} -\usepackage{bm} -\usepackage[T1]{fontenc} -\usepackage[scaled=.90]{helvet} -\usepackage{scalefnt} -\usepackage{tikz} -\usepackage{ textcomp } -\usepackage{soul} -\usepackage{hyperref} -\definecolor{lightblue}{rgb}{.7,.7,1.} -\definecolor{mygreen}{rgb}{0,1.,0} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode -{ - \usetheme{Singapore} - \setbeamercovered{opaque} - \usecolortheme{tuebingen} - \setbeamertemplate{navigation symbols}{} - \usefonttheme{default} - \useoutertheme{infolines} - % \useoutertheme{miniframes} -} - -\AtBeginSection[] -{ - \begin{frame} - \begin{center} - \Huge \insertsectionhead - \end{center} - % \frametitle{\insertsectionhead} - % \tableofcontents[currentsection,hideothersubsections] - \end{frame} -} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 - -\setbeamertemplate{blocks}[rounded][shadow=true] - -\title[]{Scientific Computing -- Statistics} -\author[Statistics]{Fabian Sinz\\Dept. Neuroethology, - University T\"ubingen\\ -Bernstein Center T\"ubingen} - -\institute[Scientific Computing]{} - \date{11/27/2013} -%\logo{\pgfuseimage{logo}} - -\subject{Lectures} - -%%%%%%%%%% configuration for code -\lstset{ - basicstyle=\ttfamily, - numbers=left, - showstringspaces=false, - language=Matlab, - commentstyle=\itshape\color{darkgray}, - keywordstyle=\color{blue}, - stringstyle=\color{green}, - backgroundcolor=\color{blue!10}, - breaklines=true, - breakautoindent=true, - columns=flexible, - frame=single, - captionpos=b, - xleftmargin=1em, - xrightmargin=1em, - aboveskip=10pt - } -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\newcommand{\mycite}[1]{ -\begin{flushright} -\tiny \color{black!80} #1 -\end{flushright} -} - -\input{../latex/environments.tex} -\makeatother - -\begin{document} - -\begin{frame} - \titlepage - -\end{frame} - -\begin{frame} - \frametitle{Plan} - \setcounter{tocdepth}{1} - \tableofcontents - -\end{frame} -\begin{frame} - \frametitle{Information \"uber Statistik} - \begin{itemize} - \item Samuels, M. L., Wittmer, J. A., \& Schaffner, - A. A. (2010). Statistics for the Life Sciences (4th ed., - p. 668). Prentice Hall. - \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, - Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice - Hall. doi:10.1037/0012764 - \item \url{http://stats.stackexchange.com} - \end{itemize} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section[meta-study]{how statisticians think - the meta-study} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% ---------------------------------------------------------- -\begin{frame}[fragile] -\frametitle{statisticians are lazy} -\Large -\only<1>{ - \begin{center} - \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg} - \end{center} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} -}\pause -\only<2>{ - \begin{center} - \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg} - \end{center} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} -}\pause -\only<3>{ - \begin{center} - \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg} - \end{center} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} -} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{the (imaginary) meta-study} -\begin{center} - \only<1>{ - \framesubtitle{finite sampling introduces variation: the sampling distribution} - \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png} - \mycite{Hesterberg et al., Bootstrap Methods and Permutation - Tests} - }\pause - \only<2>{ - \framesubtitle{statistic vs. population parameter} - \includegraphics[width=.8\linewidth]{figs/statistic1.png} - \mycite{Hesterberg et al., Bootstrap Methods and Permutation - Tests} - }\pause - \only<3>{ - \framesubtitle{statistic vs. population parameter} - \includegraphics[width=.8\linewidth]{figs/statistic2.png} - \mycite{Hesterberg et al., Bootstrap Methods and Permutation - Tests} - }\pause - \only<4>{ - \framesubtitle{shat parts of this diagram do we have in real life?} - - \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png} - \mycite{Hesterberg et al., Bootstrap Methods and Permutation - Tests} - }\pause - \only<5>{ - \framesubtitle{what parts of this diagram do we have in real life?} - - \includegraphics[width=.8\linewidth]{figs/statistic3.png} - \mycite{Hesterberg et al., Bootstrap Methods and Permutation - Tests} - }\pause - \only<6->{ - \framesubtitle{what statistics does } - \begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - \includegraphics[width=1.\linewidth]{figs/statistic4.png} - \mycite{Hesterberg et al., Bootstrap Methods and Permutation - Tests} - \end{minipage} - \begin{minipage}{0.5\linewidth} - \begin{itemize} - \item it assumes, derives, or simulates the sampling - distribution\pause - \item the sampling distribution makes only sense if you think - about it in terms of the meta study\pause - \item {\color{red} the sampling distribution is the key to - answering questions about the population from the value of - the statistic} - \end{itemize} - \end{minipage} - \end{minipage} - } - -\end{center} -\end{frame} - -% % ---------------------------------------------------------- -\begin{frame} -\frametitle{illustrating examples} -\begin{question}{lung volume of smokers} - Assume you know the sampling distribution of the mean lung volume - of smokers. Would you believe that - the sample came from a group of smokers? - \begin{center} - \includegraphics[width=.6\linewidth]{figs/example01.png} - \end{center} -\end{question} -\end{frame} - -\begin{frame} -\frametitle{illustrating examples} -\begin{question}{lung volume of smokers} - What about now? How would the sampling distribution change if I - change the population to (i) athletes or (ii) old people? - \begin{center} - \includegraphics[width=.6\linewidth]{figs/example02.png} - \end{center} -\end{question} -\end{frame} - - -\begin{frame} -\frametitle{illustrating examples} -\begin{question}{Is this diet effective?} - \begin{center} - \includegraphics[width=.6\linewidth]{figs/example03.png} - \end{center} -\end{question} -\end{frame} - -\begin{frame} -\frametitle{illustrating examples} -\begin{question}{Is this diet effective?} - What do you think now? - \begin{center} - \includegraphics[width=.6\linewidth]{figs/example04.png} - \end{center} -\end{question} -\end{frame} - -\begin{frame} -\frametitle{summary} -\begin{itemize} -\item In statistics, we use finite samples from a population to reason - about features of the population. \pause -\item The particular feature of the population we are interested in is called - {\color{blue} population parameter}. We usually measure this - parameter in our finite sample as well - ({\color{blue}statistic}).\pause -\item Because of variations due to finite sampling the statistic - almost never matches the population parameter. \pause -\item Using the {\color{blue}sampling distribution} of the statistic, we make - statements about the relation between our statistic and the - population parameter. -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{outlook} -{\bf Questions to be addressed} -\begin{itemize} -\item How do we choose the statistic? -\item How do we get the sampling distribution? -\item How does statistical reasoning work in practice? -\end{itemize} -{\bf Perspective} -\begin{itemize} -\item We start by looking at a few standard distribution. -\item We will use those in the statistical tests that follow. -\item For each statistical test, I also try to provide a - non-parametric method. -\end{itemize} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{probability primer} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{probability models} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{getting the model right} -In statistics/probability it is important to select the correct -distribution. Models are easier to remember if you remember a -``standard situation''. - -\begin{itemize} -\item What is the distribution corresponding to throwing a coin? \pause -\item What in neuroscience/psychology is like throwing a coin (fair or - unfair)?\pause -\item What is the distribution of counting heads in repeated - independent coin tosses?\pause -\item What in neuroscience/psychology corresponds to counting heads in - repeated independent coin tosses? -\end{itemize} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{the different models} -\only<1>{ - \framesubtitle{Bernoulli distribution} - \begin{center} - \includegraphics[width=.4\linewidth]{figs/Bernoulli.pdf} - - \end{center} - -\begin{itemize} - \item single coin toss (success/ failure) - \item distribution $p(X=1)=p$ - \end{itemize} -}\pause -\only<2>{ - \framesubtitle{uniform distribution} - \begin{center} - \includegraphics[width=.4\linewidth]{figs/Uniform.pdf} - - \end{center} - -\begin{itemize} - \item $n$ items with the same probability of occurence - \item distribution $p(X=k)=\frac{1}{n}$ - \end{itemize} -}\pause -\only<3>{ - \framesubtitle{binomial distribution} - - \begin{center} - \includegraphics[width=.4\linewidth]{figs/Binomial00.pdf} - \includegraphics[width=.4\linewidth]{figs/Binomial01.pdf} - \end{center} - - \begin{itemize} - \item number of $k$ successes/heads in $n$ trials - \item distribution $P(X=k)= {n \choose - k} p^k (1-p)^{n-k}$ - \item parameters $n,p$ - \end{itemize} -}\pause -\only<4>{ - \framesubtitle{Poisson distribution} - - \begin{center} - \includegraphics[width=.4\linewidth]{figs/Poisson00.pdf} - \includegraphics[width=.4\linewidth]{figs/Poisson01.pdf} - \end{center} - - \begin{itemize} - \item successes per time unit for (very) large $n$ and small $p$ - \item distribution $P(X=k) = \frac{\lambda^k - e^{-\lambda}}{k!}$ - \item parameter: success rate $\lambda$ - \end{itemize} -} -\only<5>{ - \framesubtitle{Gaussian/ normal distribution} - - \begin{center} - \includegraphics[width=.4\linewidth]{figs/Gaussian00.pdf} - \end{center} - - \begin{itemize} - \item shows up everywhere (central limit theorem) - \item distribution $p(x) = \frac{1}{\sigma\sqrt{2\pi}}\operatorname{exp}\left\{-\frac{\left(x-\mu\right)^2}{2\sigma^2}\right\}$ - \item parameter: mean $\mu$, standard deviation $\sigma$ - \end{itemize} -} -\only<6>{ - \framesubtitle{caveat} - \begin{question}{important distinction} - \begin{itemize} - \item For {\em discrete} random variables $P(X=k)$ makes sense - (probabilities are like ``single weights''). - \item For {\em continuous} random variables $p(X=x)=0$ (probabilities - are like ``water''). - \item For {\em continuous} random variables it makes only sense to - ask for the probability that they take values in a particular - range. - \end{itemize} - \end{question} - -} - -\end{frame} - - -% ---------------------------------------------------------- - -\begin{frame} -\frametitle{example} -You place a mouse in a circular maze and place some food on the -opposite side. In each trial you record whether the mouse went {\em - left} (``L'') or {\em right} (``R'') to get the food. -\vspace{.5cm} - -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.59\linewidth} - \begin{itemize} - \item What kind of distribution would you expect for the number of - ``R'' in $10$ trials? What is the distribution of the number of - ``L''?\pause - \item Here is the result of $10$ trials: ``LLLLLLLLLL''. What is - the probability of that? - \item What do you conclude from that? - \end{itemize} - \end{minipage} - \begin{minipage}{0.4\linewidth} - \only<1->{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/Binomial00.pdf} - \end{center} - } - \end{minipage} -\end{minipage} -\end{frame} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{cumulative distribution function} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{cumulative distribution function (c.d.f.)} - \framesubtitle{we will need that a lot in statistics} - \begin{itemize} - \item The c.d.f. is used to compute the probability that a random - variable is in a particular range. - - \item It is defined as $F(y) = P(X \le y)$ - - \item For the binomial distribution this would be - $$F(k) = P(\mbox{no. of - successes} \le k)\mbox{ in } n \mbox{ trials}$$ - - \item Where could I - see that probability in that plot for $k=5$ and $n=10$? - \begin{center} - \only<1>{ - \includegraphics[width=.5\linewidth]{figs/Binomial00.pdf} - } - \only<2>{ - \includegraphics[width=.5\linewidth]{figs/BinomialCdf00.pdf} - }\pause - \only<3>{ - \includegraphics[width=.5\linewidth]{figs/BinomialCdf01.pdf} - } - - \end{center} - \end{itemize} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{cumulative distribution function (c.d.f.)} - \framesubtitle{example} - \small - You want to find out whether a subject performs significantly - different from chance in $10$ trials that either are successful or not. - \begin{itemize}[<+->] - \item What would be a good decision rule? - \item[] {\color{gray} We set thresholds on the number of successes - and decide that (s)he is performing at chance if the performance - falls within the thresholds.} - \item What is the distribution of the number of successes in $n=10$ - trials if the subject performs at chance? - \item[] {\color{gray} Binomial with $n=10$ and $p=\frac{1}{2}$} - \item Let's say we set the threshold at $k=2$ and $k=8$, what is the - probability that we think (s)he is {\em not} performing at chance, - even though (s)he is? - \end{itemize} -\end{frame} - -\begin{frame}[fragile] - \frametitle{cumulative distribution function (c.d.f.)} - \framesubtitle{example} - \small - \begin{itemize}[<+->] - \item Let's say we set the threshold at $k=2$ and $k=8$, what is the - probability that we think (s)he is {\em not} performing at chance, - even though (s)he is? - \item[] {\color{gray} The probability for that is $P(X \le 2 \mbox{ - or } X \ge 8)$. Using the c.d.f. that is - \begin{align*} - P(X \le 2 \mbox{ or } X \ge 8) &= P(X \le 2) + P(X \ge 8) - = P(X \le 2) + (1-P(X \le 7)) - \end{align*} - } - \end{itemize} - \only<2>{ - \begin{center} - \includegraphics[width=.5\linewidth]{figs/BinomialExample00.pdf} - \end{center} - } -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{joint and conditional distributions} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame}[fragile] - \frametitle{conditional and marginal $\rightarrow$ joint distribution} - \framesubtitle{Bayes' rule} - \begin{itemize} -\small - \item Assume you ran decision experiments with two subject. Subject \#1 had a success - probability of $50\%$, while subject \#2 achieved $80\%$. - \item $70\%$ of the trials were run with the first subject, $30\%$ of - the trials with the other. - \item Each trial gets saved in a file on the hard disk.\pause - \item Now, let's assume your recording software had a bug and did not - store the subject ID in the file. - \item For a given file, we have two random variables now: subject ID $X$, - number of successes $Y$. - \end{itemize} - \begin{center} - \includegraphics[height=.32\linewidth]{figs/decision01.pdf} - \end{center} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{joint and conditional distributions} - \framesubtitle{definitions} - \begin{definition}{Joint, marginal, and conditional distribution} - \begin{itemize} - \item The {\bf joint distribution $P(X,Y)$} gives the probability - that a particular combination of $X$ and $Y$ occur at the same - time. \pause - \item The {\bf marginal distributions $P(X)$ and $P(Y)$} specify - the probabilities that a particular value occurs if the value of - the other variable is ignored. \pause - \item The {\bf conditional distribution $P(X|Y)$} gives the - probability of particular values of $X$ given that $Y$ has - particular values. - \end{itemize}\pause - \end{definition} - \begin{center} {\color{blue} joint distribution - $\stackrel{\mbox{Bayes' Rule}}{\leftrightarrow}$ - marginal and conditional distribution} - \end{center} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{conditional and marginal $\rightarrow$ joint distribution} - \framesubtitle{Bayes' rule} - \begin{itemize} -\small - \item Assume you ran decision experiments with two subject. Subject \#1 had a success - probability of $50\%$, while subject \#2 achieved $80\%$. - \item $70\%$ of the trials were run with the first subject, $30\%$ of - the trials with the other. - \item What probabilities do I need to write at the edges? - \item What distribution do I use for the subjects ID ($X$)? - \item What distribution do I use for the conditional distribution $Y|X$? - \end{itemize} - \begin{center} - \only<1>{\includegraphics[height=.32\linewidth]{figs/decision01.pdf}} - \only<2>{\includegraphics[height=.32\linewidth]{figs/decision02.pdf}} - \only<3>{\includegraphics[height=.32\linewidth]{figs/decision03.pdf}} - \end{center} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{conditional and marginal $\rightarrow$ joint distribution} - \framesubtitle{Bayes' rule} - \begin{itemize} -\small - \item The joint probability are obtained by multiplying the - probabilities along the paths from the root note to the leaves. - \begin{center} - \includegraphics[height=.32\linewidth]{figs/decision03.pdf} - \end{center}\pause - \item In algebraic terms, this is known as {\em Bayes' rule} (very important!) - $$\color{red} P(Y|X)P(X) = P(X|Y)P(Y) = P(X,Y)$$\pause - \item You can remember it as ``moving variables in front of the - bar'' - $$P(X|Y) P(Y) = P(X,Y|\_)$$ - \end{itemize} - -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{Bayes' rule} - $$P(X|Y)P(Y) = P(Y|X)P(X) = P(X,Y)$$ - - \begin{task}{Independent random variables} - If two random variables are independent, the joint distribution is - the product of their marginals $$ P(X,Y) =P(X) P(Y)$$ - How can you see that from Bayes' rule? - \end{task} - \pause - - \begin{solution}{Solution} - If the variables are independent $P(X|Y) = P(X)$ and $P(Y|X) = - P(Y)$: The probability of $X$ is the same as the probability of - $X$ given that I know $Y$, because knowing $Y$ does not help. - \end{solution} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{Joint $\rightarrow$ marginal and conditional distribution} - \begin{itemize} -\small - \item The plot shows the joint distribution $P(X,Y)$, where $X$ is - the subject id and $Y$ the number of successes in $n=10$ trials. - \begin{center} - \only<-1>{\includegraphics[width=.83\linewidth]{figs/Joint00.pdf}} - \only<2>{\includegraphics[width=.83\linewidth]{figs/Joint01.pdf}} - \only<3>{\includegraphics[width=.83\linewidth]{figs/Joint02.pdf}} - \end{center} - -\only<-1>{ \vspace{2cm}} -\only<2-3>{ \item We can get the marginal distributions via {\em - marginalization} (very important!): - $$\color{red} P(Y) =\sum_{i=1}^2P(X=i, Y) \mbox{ and } P(X) = - \sum_{j=0}^{n} P(X, Y=j)$$} -\only<3->{ \item We can get the conditional distribution via Bayes' rule: - $$P(X|Y)P(Y) = P(X,Y) \Leftrightarrow P(X|Y) = \frac{P(X,Y)}{P(Y)}$$} -\only<-2>{ \vspace{2cm}} - \end{itemize} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{The posterior} - \begin{itemize} - \small - \item Could we use the probability distribution to get an idea which - subject the number of successes came from?\pause - \item Use Bayes' rule to ``invert'' the conditional distribution - $$P(X|Y=k) = P(X,Y=k)/P(Y=k)$$ - \end{itemize} - \begin{center} - \only<-2>{\includegraphics[height=.28\linewidth]{figs/Joint02.pdf}} - \only<3->{\includegraphics[height=.53\linewidth]{figs/Posterior00.pdf}} - \end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{summary} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -% ---------------------------------------------------------- -\begin{frame}[fragile] - \frametitle{summary} - \begin{itemize} - \item We need to know certain distributions to use them as sampling - distribution. \pause - \item For many distributions one can use a ``standard situation'' to - remember them. \pause - \item When dealing with two or more random variables one deals with - {\color{blue}joint, marginal}, and {\color{blue}conditional - distributions}.\pause - \item Marginal and conditional distributions can be converted into - the joint distribution via {\color{blue}Bayes' rule}.\pause - \item The conversion in the other direction can be done via - {\color{blue}marginalization} and {\color{blue}Bayes' rule}. - \end{itemize} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{error bars \& confidence intervals} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% ---------------------------------------------------------- -\subsection{errorbars} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{illustrating example} - -As part of a study of the development of the thymus gland, researcher -weighed the glands of $50$ chick embyos after 14 days of -incubation. The following plot depicts the mean thymus gland weights in (mg): -\mycite{modified from SWS exercise 6.3.3.} -\pause -{\bf Which of the two bar plots is the correct way of displaying the - data?} - -\begin{columns} - \begin{column}[l]{.5\linewidth} - \includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf} - \end{column} - \begin{column}[r]{.5\linewidth} - \pause That depends on what you want to say - \begin{itemize} - \item To give a measure of variability in the data: use the - {\color{blue} standard deviation $\hat\sigma = - \sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$} - \item To make a statement about the variability in the mean - estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$} - \end{itemize} - \end{column} -\end{columns} - -%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% that depends: variability (descriptiv statistics, how variable is -% the mean -> inferential, makes only sense in the meta-study setting) -% first matlab exercise: simulate standard error -% recommend paper for eyeballing test results from standard errors -% from std of mean to confidence intervals -% introduce bootstrapping (matlab exercise), then t-statistic -% intervals -% end with standard error of the median (and the thing from wikipedia) -\end{frame} -%------------------------------------------------------------------------------ -\begin{frame} - \frametitle{standard error} - \framesubtitle{bootstrapping} - - \begin{task}{quantifying the variability in the mean} - Download \url{https://www.dropbox.com/s/20l7ptrdc4kkceq/materialNMI.zip} - - Load the dataset {\tt thymusglandweights.dat} into matlab and use - the first $50$ datapoints as your dataset. Repeat the following - steps $m=500$ times: - \begin{enumerate} - \item sample $50$ data points from $x$ with replacement - \item compute their mean and store it - \end{enumerate} - Look at the standard deviation of the computed means and compare - it to the standard error. - \end{task} -\end{frame} - -%------------------------------------------------------------------------------ -\begin{frame}[fragile] - \frametitle{standard error} - \framesubtitle{bootstrapping} - \begin{itemize} - \item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is - {\color{blue}an estimate of the standard deviation of the means} - in repeated experiments which is computed form a single - experiment. - \item When you want to do statistical tests on the mean, it is - better to use the standard error, because one can eyeball - significance from it - \mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars - in experimental biology. The Journal of Cell Biology, 177(1), - 7--11.} - \item {\color{blue}Bootstrapping} is a way to generate an estimate - of the {\color{blue}sampling distribution of any statistic}. Instead of - sampling from the true distribution, it samples from the - empirical distribution represented by your dataset. - \mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC} - \end{itemize} -\end{frame} - -%------------------------------------------------------------------------------ -\begin{frame}[fragile] - \frametitle{standard error of the median?} - {\bf What kind of errorbars should we use for the median?} - - It depends again: - - {\bf Descriptive statistics} - \begin{itemize} - \item As a {\color{blue}descriptive statistic} one could use the {\em median - absolute deviation}: the median of the absolute differences of - the datapoints from the median. - \item Alternatively, one could bootstrap a standard deviation of the - median. - \end{itemize} - \pause - {\bf Inferential statistics} - \begin{itemize} - \item For {\color{blue}inferential statistics} one should use - something that gives the reader {\color{blue}information about - significance}. - \item Here, {\color{blue} confidence intervals} are a better choice. - \end{itemize} -\end{frame} - -% ---------------------------------------------------------- -\subsection{confidence intervals \& bootstrapping} -%------------------------------------------------------------------------------ -\begin{frame} -\frametitle{confidence intervals} -\begin{center} - \only<1>{ - \vspace{.1cm} - \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} - - }\pause - \only<2>{ - \vspace{.1cm} - \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} - }\pause - \only<3>{ - \vspace{.1cm} - \includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} - }\pause - \only<4>{ - \vspace{.1cm} - \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} - }\pause - \only<5>{ - \vspace{.1cm} - \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} - }\pause - \only<6>{ - \vspace{.1cm} - \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg} - \mycite{Larry Gonick, The Cartoon Guide to Statistics} - } -\end{center} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} - \frametitle{confidence intervals for the median} - \begin{definition}{Confidence interval} - A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic - $\hat\theta$ is an interval $\hat\theta \pm a$ such that the - population parameter $\theta$ is contained in that interval - $(1-\alpha)\cdot 100\%$ of the experiments. - - An alternative way to put it is that $(\hat\theta - \theta) \in - [-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases. - \end{definition} - - -\begin{columns} - \begin{column}[l]{.5\linewidth} - If we knew the sampling distribution of the median $\hat m$, could - we generate a e.g. a $95\%$ confidence interval?\pause - \vspace{.5cm} - - Yes, we could choose the interval such that $\hat m - m$ in that - interval in $95\%$ of the cases. - \end{column} - \begin{column}[r]{.5\linewidth} - \only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}} - \only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}} - \end{column} -\end{columns} - - - % \begin{task}{Bootstrapping a confidence interval for the median} - % \begin{itemize} - % \item Use the same dataset as before. - % \item Bootstrap $500$ medians. - % \item Compute the $2.5\%$ and the $97.5\%$ percentile of the - % $500$ medians. - % \end{itemize} - % \end{task} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} - \frametitle{confidence intervals for the median} - \framesubtitle{how to get the sampling distribution} - - \begin{task}{Bootstrapping a confidence interval for the median} - \begin{itemize} - \item Use the same dataset as before. - \item Bootstrap $500$ medians. - \item Compute the $2.5\%$ and the $97.5\%$ percentile of the - $500$ medians. - \end{itemize} - These two numbers give you $\hat m -a$ and $\hat m + a$ for - the $95\%$ confidence interval. - \end{task} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} - \frametitle{confidence intervals for the median} - \framesubtitle{how to get it analytically} - There is also an analytical estimation oft the confidence interval - for the median: Use the $\frac{\alpha}{2}$ and $1 - \frac{\alpha}{2}$ - quantile of a binomial distribution. - - - \begin{task}{Comparing the analytical interval to the bootstrapped} - \begin{itemize} - \item Get the $\frac{\alpha}{2}$ quantile minus one and $1 - - \frac{\alpha}{2}$ quantile of a binomial distribution using {\tt - binoinv}. - \item Sort you data points and use the data points at the position - corresponding to the quantiles. - \item Compare that to the bootstrapped confidence interval. - \end{itemize} - \end{task} - \tiny The idea behind this: - \begin{itemize} - \item The probability that the true median $m$ is covered by the - interval between $x_r$ and $x_{r+1}$ is binomial $${n \choose r} - \left(\frac{1}{2}\right)^r \left(\frac{1}{2}\right)^{n-r}$$ - \item No we take enough intervals in the ``middle'' of our sample - that we cover the true median with at least $1-\alpha$ - probability. - \mycite{David, H. A., \& Nagaraja, H. N. (2003). Order Statistics. MES (Vol. 1, p. 482). Wiley. doi:10.1016/j.bpj.2010.07.012} - \end{itemize} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} - \frametitle{confidence intervals} - \framesubtitle{Notice the theme!} - \begin{enumerate} - \item choose a statistic - \item get a the sampling distribution of the statistic (by theory or - simulation) - \item use that distribution to reason about the relation between the - true population parameter (e.g. $m$) and the sampled statistic - $\hat m$ - \end{enumerate} - - \begin{center} - \color{blue} - This is the scaffold of most statistical techniques. Try to find - it and it can help you understand them. - \end{center} - -\end{frame} - - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{let's practice that again} -\framesubtitle{confidence interval for the mean} - -\begin{task}{Bootstrapping a confidence interval for the mean} - \begin{itemize} - \item Use the same dataset as before. - \item Use bootstrapping to get a $95\%$ confidence interval for - the mean. - \end{itemize} -\end{task} - -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{confidence interval for the mean} -\framesubtitle{confidence interval for the mean} -Getting a convenient sampling distribution is (a little bit) more -difficult: -\begin{itemize} -\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as - well -\item What is the mean of $\hat\mu$? What is its standard deviation?\pause -\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and - $\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause -\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu, - \frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population - parameters.\pause -\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim - \mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$ -\item Therefore, -\begin{align*} - P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right) -\end{align*} -\end{itemize} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{confidence interval for the mean} -\begin{task}{Bootstrapping a confidence interval for the mean} - Extend your script to contain the analytical confidence - interval using -\begin{align*} - P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right) -\end{align*} -\end{task} - -\end{frame} - -% ---------------------------------------------------------- -\subsection{summary} -% ---------------------------------------------------------- - -\begin{frame} -\frametitle{summary} -\begin{emphasize}{Which errorbars should I choose?} - Always use errorbars to help the reader see your point. -\end{emphasize} -\pause - \begin{itemize} - \item Errorbars can {\color{blue} describe the variability} in a dataset - ({\color{blue}descriptive statistics}). Example: {\em standard deviation, inter-quartile - range, ...} - \item {\color{blue}Errorbars yield information about significance in testing - (inferential statistics)}. Examples: {\em standard error of the mean, confidence - intervals, ...} - \item Other possible ways of displaying variability: {\em - boxplots, violin plots, histograms, ...} - \end{itemize} -\end{frame} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{statistical tests} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{one-sample test on the mean} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% ---------------------------------------------------------- -\begin{frame} -\frametitle{from confidence intervals to one-sample test} - -\begin{task}{example: eye movements} - \small - In an experiment you measure eye movements of subjects on the - screen. You want be sure that the subject fixates a certain target - (at $x=0$). During the fixation period, you aquire $n=16$ - measurements. The measurements have a mean of $\hat\mu=2.5$ and a - standard deviation of $\hat\sigma=4$. Assuming that the single - fixation locations are Gaussian distributed, can you be $95\%$ - confident that the subject focused the target (x-Position)? -\end{task} -\pause -\begin{solution}{use confidence intervals} - \small - Compute a $95\%$ confidence interval: Does it contain - $\mu=0$? Yes? Then we are $95\%$ confident! - - From the table we get $t_{0.025}=2.131$, the standard error is - $\frac{\hat\sigma}{\sqrt{n}} = \frac{4}{\sqrt{16}}=1$ which means - that $$0\pm t_{0.025}\frac{\hat\sigma}{\sqrt{n}} = 0 \pm 2.131$$ - is our confidence interval. Therefore we cannot be $95$\% - confident in this case. -\end{solution} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{from confidence intervals to one-sample test} -\begin{task}{example: eye movements} - Could we put the interval on $\mu=0$ as well? -\end{task} -\pause -\begin{solution}{Example: eye movements} - Yes, if the interval around $\hat\mu$ contains $\mu$, then the - interval around $\mu$ also contains $\hat\mu$. -\end{solution} - - -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{One-sample t-test} - -\begin{task}{example 2: eye movements again} - \small - Now assume that there is a fixation target at $x=0$. You are - running the experiment with a monkey and you want to discard all - trials in which the monkey was not fixating the target. - - During the trial, you aquire again $n=16$ measurements with mean - $\hat\mu=2.5$ and standard deviation $\hat\sigma=4$. How can you be - confident that the monkey did not fixate the target if you are - willing to be wrong in $5\%$ of the cases if ``wrong'' means that - you believe the subject was not fixating when in fact it was. -\end{task} -\pause -\begin{solution}{Example 2: eye movements again} - \small -The steps to the solution is exactly the same, only the logic is -different. -\begin{itemize} -\item We make a $95\%$ confidence around the fixation target - $\mu=0$. This means that if the monkey was actually fixating the - target, $95\%$ of the measured averaged positions $\hat\mu$ would - fall into that interval. -\item $5\%$ of the measured would fall outside the interval even - though the monkey fixated and we would falsely treat them as not as ``not - fixated''. -\end{itemize} -\end{solution} - -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{one-sample t-test} -\framesubtitle{Notice the theme again!} -\only<1>{ - \begin{center} - \includegraphics[width=0.4\linewidth]{figs/repetition0.png} - \end{center} - \begin{enumerate} - \small - \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. - \end{enumerate} -}\pause -\only<2>{ - \begin{center} - \includegraphics[width=0.4\linewidth]{figs/repetition1.png} - \end{center} - \begin{enumerate} - \small - \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. - \item Get a sampling distribution! Here, we get it by assuming that - the positions $x_1,...,x_{16}$ are Gaussian. - \end{enumerate} -}\pause -\only<3>{ - \begin{center} - \includegraphics[width=0.4\linewidth]{figs/repetition2.png} - \end{center} - \begin{enumerate} - \small - \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. - \item Get a sampling distribution! Here, we get it by assuming that - the positions $x_1,...,x_{16}$ are Gaussian. The resulting - distribution of $t$ is a t-distribution. - \end{enumerate} -}\pause -\only<4>{ - \begin{center} - \includegraphics[width=0.4\linewidth]{figs/repetition3.png} - \end{center} - \begin{enumerate} - \small - \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. - \item Get a {\color{blue}null distribution}! Here, we get it by assuming that - the positions $x_1,...,x_{16}$ are Gaussian. The resulting - distribution of $t$ is a t-distribution. - \item Get an interval around $\mu=0$ in which values of $\hat\mu$ - are assumed typical for $\mu=0$, the {\color{blue}null hypothesis - $H_0$}. - \end{enumerate} -} -\pause -\only<5>{ - \begin{center} - \includegraphics[width=0.4\linewidth]{figs/repetition5.png} - \end{center} - \begin{enumerate} - \small - \item Choose a statistic! We take the standardized mean - $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. - \item Get a {\color{blue}null distribution}! Here, we get it by assuming that - the positions $x_1,...,x_{16}$ are Gaussian. The resulting - distribution of $t$ is a t-distribution. - \item Get an interval around $\mu=0$ in which values of $\hat\mu$ - are assumed typical for $\mu=0$, the {\color{blue}null hypothesis - $H_0$}. This is done by fixing the {\color{blue}type I error} probability. - \end{enumerate} -} -\pause -\only<6>{ - \begin{center} - \includegraphics[width=0.4\linewidth]{figs/repetition4.png} - \end{center} - \begin{enumerate} - \small - \item Choose a statistic! We take the standardized mean - $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. - \item Get a {\color{blue}null distribution}! Here, we get it by assuming that - the positions $x_1,...,x_{16}$ are Gaussian. The resulting - distribution of $t$ is a t-distribution. - \item Get an interval around $\mu=0$ in which values of $\hat\mu$ - are assumed typical for $\mu=0$, the {\color{blue}null hypothesis - $H_0$}. This is done by fixing the {\color{blue}type I error} probability. - \item Outside that interval we consider $\mu=0$ as implausible and - reject $H_0$. - \end{enumerate} -} - -\end{frame} - - -% ---------------------------------------------------------- -\subsection{another one-sample test} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{another one-sample test} -\begin{task}{Fair coin?} - \small - Assume you carry out the following test to determine whether a coin - is fair or not: - - You throw the coin $n=3$ times. If the result is either $3\times$ - head or $3\times$ tail, you conclude that the coin is not fair. - - Answer the following questions (for yourself first): - \begin{enumerate} - \item What is the meta-study? \pause {\em Repeated experiments of 3 throws - with this the coin.}\pause - \item What is the statistic used? \pause {\em The number of heads (could also - be tails).}\pause - \item What is $H_0$? \pause {\em The coin is fair.}\pause - \item What is the Null distribution? \pause {\em The distribution is - binomial $$p(k \mbox{heads in }n \mbox{ throws})={n \choose k} - \left(\frac{1}{2}\right)^k \left(\frac{1}{2}\right)^{n-k} $$}\pause - \item What is the Type I error of this test? \pause {\em $p(HHH|H_0) + p(TTT|H_0) = \frac{2}{8}$} - \end{enumerate} -\end{task} -\end{frame} - -% ---------------------------------------------------------- -\subsection{paired sample t-test} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{paired sample t-test} -\begin{task}{Hunger Rating (SWS, Example 3.2.4)} - \begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} -\small During a weight loss study each of nine subjects was given either the -active drug m-chlorophenylpiperazine (mCPP) for two weeks and then a placebo -for another two weeks, or else was given the placebo for the first two weeks and -then mCPP for the second two weeks. Can we say that there was an -effect with significance level $5$\%? - \end{minipage} - \begin{minipage}{0.5\linewidth} -\begin{center} - \includegraphics[width=0.8\linewidth]{figs/hunger.png} -\end{center} - \end{minipage} - - \end{minipage} - \vspace{.5cm} - - What could we use as statistic? - What is $H_0$? - Is the difference significant? -\end{task} -\end{frame} - -\begin{frame} -\frametitle{paired sample t-test} -\begin{solution}{Hunger Rating (SWS, Example 3.2.4)} - \begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - \small - \begin{enumerate} - \item The statistic is the difference between drug and placebo?\pause - \item $H_0$ is ``there is no difference'', i.e. the true mean of - the differences is zero. \pause - \item The standard error is $33/\sqrt{9}=11$.\pause - \item $n-1=8$ DoF yields (t-distribution table) $t_{0.025}=2.306$, so we - would reject $H_0$ if $\hat\mu$ in $0\pm t_{0.025}\cdot 11 = \pm - 25.366$. \pause - \item This means the difference is significant with $\alpha=0.05$. - \end{enumerate} - \end{minipage} - \begin{minipage}{0.5\linewidth} -\begin{center} - \includegraphics[width=0.8\linewidth]{figs/hunger.png} -\end{center} - \end{minipage} - - \end{minipage} -\end{solution} -\end{frame} - - -\begin{frame} -\frametitle{paired sample t-test} -\begin{itemize} -\item a paired sample consists of a number of {\em paired} - measurements (e.g. before/after)\pause -\item build the differences (either there are many and or check that - they are approx. Gaussian distributed)\pause -\item use a one-sample t-test on the differences -\end{itemize} -\end{frame} -% ---------------------------------------------------------- -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{sign rank test} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% ---------------------------------------------------------- -\begin{frame} -\frametitle{sign rank test} -\begin{task}{Hunger Rating (SWS, Example 3.2.4)} - \small - \begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - \small Consider again the example data from before. Instead of - taking the difference, we consider now only whether ``drug'' was - smaller or greater than ``placebo''. We then count the number of - times for which ``drug''$<$``placebo'' and the number of times - ``drug''$>$``placebo''. - \end{minipage} - \begin{minipage}{0.5\linewidth} -\begin{center} - \includegraphics[width=0.5\linewidth]{figs/hunger.png} -\end{center} - \end{minipage} - \end{minipage} - \begin{itemize} - \item What is the statistic?\pause {\em The number $N_+$ of ``>'' - or the number $N_-$ of ``<''.} \pause - - \item What is $H_0$? \pause {\em $N_+ = N/2$} - \pause - \item What is $H_A$? \pause {\em $N+ > N/2$ or $N_+ < N/2$} - \pause - \item What is the Null distribution? \pause {\em Binomial with $p=0.5$} - \pause - \item Given $\alpha$, how is the region determined in which we - reject $H_0$? \pause {\em Choose a such that $P(k>a|H_0) + P(k{\includegraphics[width=\linewidth]{figs/testframework00.pdf}} - \only<2>{\includegraphics[width=\linewidth]{figs/testframework01.pdf}} - \end{center} -\small -\begin{columns} - \begin{column}[l]{.5\linewidth} -{\bf You want:} -\begin{itemize} -\item large power -\item small type I \& II error probability ($\alpha$ and $\beta$) -\end{itemize} - \end{column} - \begin{column}[r]{.5\linewidth} -\begin{itemize} -\item \hyperlink{sec:power}{\color{magenta}detour II: statistical power} \hypertarget{back:power}{} -\item \hyperlink{sec:bayesian}{\color{magenta}detour III: Bayes rule - and statistical tests} \hypertarget{back:bayesian}{} -\end{itemize} - \end{column} -\end{columns} - -Which of the above can {\bf you} choose? \pause {\em the type I error - probability $\alpha$} - - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{zoo of statistical tests} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - - -\begin{frame} -\hypertarget{back:detourIV}{} -\frametitle{how to choose the statistical test} -\begin{center} - \includegraphics[height=.38\linewidth]{figs/fig0.pdf} -\end{center} -\begin{itemize} -\item Normality can be checked with a QQ-plot - (\hyperlink{sec:qqplots}{\color{magenta} detour IV: QQ-plots}). -\item If $n$ is large and the variance of the data distribution is - finite, the central limit theorem guarantees normality for - ``summed statistics''. -\end{itemize} -\end{frame} - - - -% ------------ - -\begin{frame} -\frametitle{} -\begin{center} - \includegraphics[height=.6\linewidth]{figs/fig2.pdf} -\end{center} - -\end{frame} -% ------------ - -\begin{frame} -\frametitle{} -\begin{center} - \includegraphics[height=.6\linewidth]{figs/fig3.pdf} -\end{center} - -\end{frame} -% ------------ - -%----------------------------------------------------------------- -%----------------------------------------------------------------- -\begin{frame} -\frametitle{tests for normal data} -\begin{task}{menstrual cycle} - The data set {\tt menstrual.dat} contains the lengths of the - menstrual cycles in a random sample of 15 women. Assume we want to - the hypothesis that the mean length of human menstrual cycle is - equal to a lunar month ($29.5$ days). Consider the data to be - sufficiently normal. - - Questions: - \begin{itemize} - \item What is $H_0$? What is $H_A$? \pause $H_0: \hat\mu=29.5$, - $H_A: \hat\mu\not=29.5$ \pause - \item What is the test statistic? \pause $t=\frac{\hat\mu - - 29.5}{\hat\sigma/\sqrt{n}}$ \pause - \item Which test should did you use and why? {\em One sample t-test: data - normal, one sample against a fixed mean.} - \end{itemize} -\end{task} - -\hyperlink{sec:twotailed}{\color{magenta}detour I: one- vs. two-tailed} -\hypertarget{back:twotailed}{} -\end{frame} - -%----------------------------------------------------------------- - -\begin{frame} -\frametitle{} -\begin{center} - \includegraphics[height=.6\linewidth]{figs/fig4.pdf} -\end{center} - -\end{frame} - - -% ---------------------------------------------------------- -\begin{frame} - \frametitle{} - \begin{task}{chirping} - A scientist conducted a study of how often her pet parakeet - chirps. She recorded the number of distinct chirps the parakeet - made in a 30-minute period, sometimes when the room was silent and - sometimes when music was playing. The data are shown in the - following table. Test whether the bird changes its chirping - behavior when music is playing (data set {\tt - chirping.dat}. columns: day, with, without). - - Questions: - \begin{itemize} - \item What is $H_0$? What is $H_A$? \pause - $d_i=x_{\mbox{with}}-x_{\mbox{without}}$. $H_0: \hat\mu_d=0$, - $H_0: \hat\mu_d\not=0$ \pause - \item What is the test statistic? \pause $t=\frac{\hat\mu_d - - 0}{\hat\sigma_d/\sqrt{n}}$ \pause - \item Which test should did you use and why? \pause {\em Paired t-test: data - sufficiently normal, measurements are paired by day.} - \end{itemize} - \end{task} - -\end{frame} - -%----------------------------------------------------------------- - -\begin{frame} -\frametitle{} -\begin{center} - \includegraphics[height=.7\linewidth]{figs/fig5.pdf} -\end{center} - -\end{frame} - -%----------------------------------------------------------------- - - -\begin{frame} -\frametitle{} -\begin{center} - \includegraphics[width=.8\linewidth]{figs/fig6.pdf} -\end{center} - -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{two indepedendent sample test} -\begin{task}{Brain Weights (permutation test)} - The dataset {\tt brainweight.dat} contains brain weights of males - and females. It consists of {\bf (i) two samples (male/female)} - which are {\bf (ii) not paired}. We want to test whether the mean - brain weights of males and females are different. - \begin{itemize} - \item What could we use as statistic?\pause {\em~the difference in the - means} \pause - \item What would be $H_0$?\pause {\em~the difference is zero} \pause - \item Think about a way to generate an estimate of the Null - distribution with Matlab? \pause {\em~Permutation test: Shuffle the - labels, compute difference in means, repeat ...}. \pause - \end{itemize} - -\end{task} -\begin{itemize} -\item There is {\color{blue}two-sample independent t-test} is the parametric test - for this dataset. -\item If normality does not hold, you can use the - {\color{blue}Wilcoxon-Mann-Whitney test} -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{one- and two-sample t-test and sign test} -\begin{center} - \tiny -\bgroup -\def\arraystretch{2} -\begin{tabular}{|l|c|c|c|} - \hline - \textbf{name} & \textbf{statistic} & $\boldsymbol{H_{0}}$ & \textbf{Null distribution}\tabularnewline - \hline - \hline - one sample t-test & $t=\frac{\overline{x}-0}{\mbox{SE}_x}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline - \hline - paired sample t-test & $t=\frac{\overline{d}-0}{\mbox{SE}_d},\, d=x_{i}-y_{i}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline - \hline - sign test & $t=\#\left[x_{i}{ - So far, we chose a particular threshold $b$ by fixing the type I error - rate $\alpha$. - \begin{center} - \includegraphics[width=.7\linewidth]{figs/pval0.png} - \end{center} -} -\only<2>{ - \begin{itemize} - \item The {\color{blue}p-value} is the type I error rate if you use - your {\color{blue} actually measured statistic} as threshold. - \item In other words: The p-value is the minimal type I error rate - you have to accept if you call your result significant. - \end{itemize} - \begin{center} - \includegraphics[width=.7\linewidth]{figs/pval1.png} - \end{center} -} -\end{frame} -%--------------------------------------------------------- - -\begin{frame} -\frametitle{the mother of all statistics: the p-value} -\framesubtitle{Why is it a universal measure?} - -The p-value is the minimal type I error rate you have to accept if you -call your result significant. - -\begin{itemize} -\item If you have a personal $\alpha$-level that is larger than the - p-value, you automatically know that the decision threshold lies - ``further inside'' -\item This means you {\color{blue}can simply compare your $\alpha$-level with the - p-value}: if the p-value is smaller, then you call that result - significant, otherwise you don't. -\end{itemize} - -\begin{center} - \includegraphics[width=.45\linewidth]{figs/pval0.png} - \includegraphics[width=.45\linewidth]{figs/pval1.png} -\end{center} -\end{frame} -%--------------------------------------------------------- - -\begin{frame} -\frametitle{the mother of all statistics: the p-value} - -\begin{task}{p-values if $H_0$ is true} - Is the following procedure correct? - - \vspace{.5cm} - - In order to show that a sample $x_1,...,x_n$ follows a Normal - distribution with mean zero, you perform a t-test. If the p-value is - large, you conclude that there is evidence for $H_0$, i.e. accept - that $x_1,...,x_n$ has mean zero and is normally distributed. - - \vspace{.5cm} - To find the answer, simulate normally distributed random variables - with {\tt randn} in Matlab and compute the p-value with a one-sample - t-test. Repeat that several times and plot a histogram of the p-value. - -\end{task} -\pause -\begin{itemize} -\item If $H_0$ is true, the p-value is uniformly distributed between 0 - and 1. Why?\pause -\pause -\item Think about the beginning of this lecture - $$p=P(|x| > |t|) = 1 - P(|x| \le |t|) = 1 - \mbox{c.d.f.}(|t|) \sim U([0,1])$$ -\end{itemize} -\end{frame} - -%-------------------------------------------------- -\begin{frame} -\frametitle{the mother of all statistics: the p-value} - -\begin{task}{Study design} - Is the following procedure statistically sound? - - \vspace{.5cm} - - Psychophysical experiments with human subjects can be time-consuming - and costly. In order to get a significant effect with minimal effort - you use the following procedure: You start with a few subjects. If - your statistical test for the effect returns a p-value smaller than - $0.05$ you stop and publish. Otherwise you repeat adding subjects - and computing p-values until you get a significant results (or run - out of time and money). - -\end{task} -\pause - -\begin{solution}{Answer} - No, the procedure is not sound. Even if $H_0$ is true, you will - eventually get a p-value smaller than $0.05$ since it is uniformly - distributed between $0$ and $1$ in this case. -\end{solution} -\end{frame} - -%-------------------------------------------------- -\begin{frame} -\frametitle{the mother of all statistics: the p-value} - -\begin{task}{p-values over studies} - If there is no effect, how many studies would yield a significant - p-value (for $\alpha=0.05$)? -\end{task} -\pause -\begin{solution}{Answer} - $5\%$ -\end{solution} -\pause -\begin{task}{p-values in publications} - Do you think that only publishing positive findings poses a problem? -\end{task} -\pause -\begin{solution}{Answer} -Yes. If I only publish significant positive findings, then I can -publish anything if I just repeat the study long enough. -\end{solution} - -\end{frame} - -%--------------------------------------------------------- -\begin{frame} - \frametitle{the mother of all statistics: the p-value} - \begin{task}{true or false?} - \begin{itemize} - \item From $p<0.01$ you can deduce that your result is of - biological importance.\pause - - - \item {\color{gray} False. A small p-value doesn't say anything - about biological importance. It just indicates that the data - and $H_0$ are not very compatible.} \pause - - \item The p-value is the probability of observing a dataset - resulting in a test-statistic more extreme than the one at hand, - assuming the null hypothesis is true.\pause - - \item {\color{gray} True.} \pause - \item $1-p$ is the probability of the alternative hypothesis being - true. \pause - - \item {\color{gray} False. The p-value cannot tell us anything - about whether one of the hypotheses are true or not.} - \end{itemize} - \end{task} -\end{frame} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{multiple hypothesis testing} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%--------------------------------------------------------------- -\begin{frame} -\frametitle{two tests} -\begin{task}{Correct or not?} - You have two independent samples from a treatment group and a - control group. You are not sure whether your data meets the - requirement of a t-test. Therefore, you carry out a t-test and a - ranksum test. If one of them rejects $H_0$ you use this one to - report your findings in a paper. - -\vspace{.5cm} -\footnotesize - -To approach an answer, use Matlab and -\begin{itemize} -\item repeatedly sample two datasets from the same Normal distribution - $\mathcal N(0,1)$. -\item for each pair of datasets compute the test statistic of a - ranksum test (use {\tt ranksum}) and a t-test (use {\tt ttest2}) -\item Plot the values of the statistics against each other (using {\tt - plot(T, R, 'k.')}). What can you observe? -\item Count the number of times at least one of the tests gives a - p-value smaller than $0.05$. What can you observe? -\end{itemize} -\end{task} - - - -\end{frame} - -%--------------------------------------------------------------- -\begin{frame} -\frametitle{two tests} - -\begin{minipage}{1.\linewidth} - \begin{minipage}{0.6\linewidth} - \begin{center} - \includegraphics[width=1.\linewidth]{figs/multipletesting.pdf} - \end{center} - \end{minipage} - \begin{minipage}{0.39\linewidth} - \small - \only<1-4>{ - \begin{itemize} - \item the two statistics are clearly correlated\pause - \item What is the type I error rate for each single test?\pause - \item Where is the type I error area in the combined plot? \pause - \item Is the type I error rate in the combined strategy lower or - larger compared to using just a single test?\pause - \end{itemize} - } - \only<5>{ - \small - \color{blue} The combined strategy has a higher error rate! This gets - worse for more tests. For that reason we have to account for multiple - testing! - } - - \end{minipage} -\end{minipage} -\end{frame} - - -%--------------------------------------------------------------- -\begin{frame} -\frametitle{two tests} - -\begin{minipage}{1.\linewidth} - \begin{minipage}{0.49\linewidth} - \begin{center} - \includegraphics[width=1.\linewidth]{figs/multipletesting.pdf} - \end{center} - \end{minipage} - \begin{minipage}{0.5\linewidth} - \small - \begin{itemize} - \item When is something called multiple testing?\pause - \item[]{\color{gray} If a hypothesis is a compound of single - hypotheses.}\pause - \item If I test $\mu_1 = \mu_2 = \mu_3$ by testing $\mu_i = \mu_j$ - for all $i\not= j$ and reject as soon as one of the test rejects, - does the type I error increase or decrease?\pause - \item[]{\color{gray} It increases, because a have the chance to make - an error in all conditions.}\pause - \item Can the type I error also go in the other direction?\pause - \item[]{\color{gray} Yes, it could. For example if the single - hypotheses are combined with ``and''.} - \end{itemize} - - \end{minipage} -\end{minipage} -\end{frame} -%--------------------------------------------------------------- - -\begin{frame} - \frametitle{summary} - \begin{itemize} - \item Multiple testing tests a {\color{blue}compound hypothesis} by - testing several single hypotheses.\pause - \item {\color{blue}Multiple testing can decrease or increase type I/II error} - dependening on how the single hypothese are combined (``or'' type - I up, ``and'' type I down).\pause - \item This can be accounted for (e.g. by {\em Bonferroni correction: - divide $\alpha$ by number of tests}). However, better is to have - a test that directly tests the compound hypothesis. ANOVA is a - typical example for that. - \end{itemize} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{study design} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame} - \frametitle{general theme} - \begin{enumerate} - \item make an educated guess about the true parameters - \item state how accurate/powerful you want to be - \item select $n$ based on that - \end{enumerate} -\end{frame} - -\begin{frame} - \frametitle{estimating a single mean} - \framesubtitle{standard error and $\alpha$} - \begin{itemize} - \item Assume you want to make estimate the mean of some quantity.\pause - \item From a pilot study or the literature, you have an estimate $s$ - of the standard deviation and $\tilde\mu$ of the mean of that - quantity.\pause - \item $\tilde \mu$ could also be chosen to set a minimal detectable difference.\pause - \item In order to test whether your mean $\hat\mu$ is different from - a fixed mean $\mu_0$ on an $\alpha$-level of $5\%$ you know that - the $95\%$ confidence interval around $\tilde\mu$ should not - contain $\mu_0$: $$\underbrace{|\tilde\mu - \mu_0|}_{=:\delta} \ge - t_{0.025, \nu}\frac{s}{\sqrt{n}}$$ -\pause -\item This mean you should set $n$ to be -$$n \ge \left(\frac{t_{0.025, \nu}\cdot s}{\delta}\right)^2 $$ - \end{itemize} - -\end{frame} - -\begin{frame} - \frametitle{estimating means} - \framesubtitle{type I and type II error} - {\bf one can also take the desired power $1-\beta$ into account} - $$n \ge \frac{s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$ - \only<1>{ - \includegraphics[width=.5\linewidth]{figs/experimentalDesign00.pdf} - \includegraphics[width=.5\linewidth]{figs/experimentalDesign01.pdf} - } - \pause - - {\bf rearranging the formula yields an estimate for minimal - detectable difference} - $$\delta \ge \sqrt{\frac{s^2}{n}}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)$$ - \pause - - {\bf for two means, this formula becomes} - $$n \ge \frac{2s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$ - - \pause - - \begin{emphasize}{iterative estimation} - Since $\nu$ depends on $n$ (i.e. $\nu=n-1$), we need to estimate - $n$ iteratively. - \end{emphasize} - - \mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, - Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice - Hall. doi:10.1037/0012764} - - -\end{frame} - -\begin{frame} - \frametitle{example} - \framesubtitle{Zar, example 7.2} - \small - Researches observed the weight changes in twelve rats after being - subjected to forced exercise. The mean difference is - $\hat\mu=-0.65g$, the sample variance is $\hat\sigma^2=1.5682 - g^2$. We wish to test the difference to $\mu=0$ with $\alpha=0.05$ - and a $1-\beta=0.9\cdot 100\%$ chance of detecting a population mean - different from $\mu_0=0$ by as little as $1.0g$. - -\pause - - Let's guess that a sample size of $n=20$ would be required. Then - $\nu=19$, $t_{0.025,19}=2.093$, $\beta=1-0.9=0.1$, and - $t_{0.1,19}=1.328$. This means - $$n=\frac{1.5682}{1^2}(2.093+1.3828)^2 = 18.4.$$ - -\pause - -Now let's us $n=19$ as an estimate, in which case $\nu=18$, -$t_{0.025,18}=2.101$, $t_{0.1,18}=1.330$, -and $$n=\frac{1.5682}{1^2}(2.101+1.330)^2=18.5.$$ -Thus we need a sample size of at least $19$. -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{ANOVA} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{from linear regression to ANOVA} -\begin{frame} -\frametitle{from linear regression to ANOVA} - -\small The following table contains the impulse frequency of the -electric field from electric fish measured at several temperatures -(data for project 03). - -\begin{center} - \tiny -\begin{tabular}{lccccccc} -{\bf temperature C${}^\circ$} & \multicolumn{3}{c}{\bf impulse frequency [number/sec]} \\ \hline\\ -20.00 & 225.00 & 230.00 & 239.00 \\ -22.00 & 251.00 & 259.00 & 265.00 \\ -23.00 & 266.00 & 273.00 & 280.00 \\ -25.00 & 287.00 & 295.00 & 302.00 \\ -27.00 & 301.00 & 310.00 & 317.00 \\ -28.00 & 307.00 & 313.00 & 325.00 \\ -30.00 & 324.00 & 330.00 & 338.00 -\end{tabular} - -\end{center} - -\begin{itemize} -\item Our goal will be to test whether $\mu_{20}=...=\mu_{30}$. -\item Note that ANOVA is not the method to analyze this - dataset. Linear regression is because temperature is on an interval - scale. We will just use the ideas here for illustration. -\end{itemize} -\end{frame} - - - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{from linear regression to ANOVA} -\begin{center} - \includegraphics[width=.8\linewidth]{figs/regression01.pdf} -\end{center} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\frametitle{from linear regression to ANOVA} -\begin{center} - \includegraphics[width=.7\linewidth]{figs/regression02.pdf} -\end{center} -What kind of regression line would we expect if the means were equal? -\pause {\em One with slope $\alpha=0$.} -\end{frame} - -% ---------------------------------------------------------- -\begin{frame} -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - \includegraphics[width=1.\linewidth]{figs/regression02.pdf} - \end{minipage} - \begin{minipage}{0.5\linewidth} - \begin{itemize} - \item For linear regression data, we would test whether - $\alpha=0$. - \item For categorial inputs (x-axis), we cannot compute a - regression line. Therefore, we need a different approach. - \end{itemize} - \end{minipage} -\end{minipage} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{law of total variance} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{law of total variance} -\only<1>{ - Approach law of total variance - $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + - \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ - \begin{center} - \includegraphics[width=.7\linewidth]{figs/regression02.pdf} - \end{center} -}\pause -\only<2>{ - Approach law of total variance - $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + - \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ - \begin{center} - \includegraphics[width=.7\linewidth]{figs/regression03.pdf} - \end{center} -}\pause -\only<3>{ - Approach law of total variance - $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + - \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ - Data generation model for regression $f_{ij} = {\color{mygreen} \alpha t_i} + \beta + {\color{lightblue}\varepsilon_{ij}}$ - \begin{center} - \includegraphics[width=.6\linewidth]{figs/regression04.pdf} - \end{center} -}\pause -\only<4>{ - Approach law of total variance - $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + - \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ - Data generation model for regression - $f_{ij} = {\color{mygreen} \alpha t_i} + \beta + - {\color{lightblue}\varepsilon_{ij}}: $ $${\color{mygreen} \alpha=0} - \Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$ - \begin{center} - \includegraphics[width=.6\linewidth]{figs/regression04.pdf} - \end{center} -} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{single factor ANOVA} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%--------------------------------------------------------------- -\begin{frame} -\frametitle{data model for single factor ANOVA} - Approach law of total variance - $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + - \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ - Data generation model for single factor ANOVA - $f_{ij} = \overline{\mu} + {\color{mygreen} \tau_{i}} + - {\color{lightblue}\varepsilon_{ij}}$: - $${\color{mygreen} \tau_i=\tau_j=0} - \Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$ - \begin{center} - \includegraphics[width=.6\linewidth]{figs/regression05.pdf} - \end{center} -\end{frame} -%--------------------------------------------------------------- -\begin{frame} -\frametitle{statistic of ANOVA} -\begin{columns} - \begin{column}{0.43\linewidth} - \begin{center} - \includegraphics[width=1.\linewidth]{figs/regression02.pdf} - - \vspace{-.2cm} - - \includegraphics[width=1.\linewidth]{figs/Fdistribution00.pdf} - \end{center} - \end{column} - \begin{column}{0.55\linewidth} - \begin{align*} - \:&\mbox{\color{lightblue} error SS}&=\color{lightblue}\sum_{ij}\left(x_{ij}-\mu_{i}\right)^{2}\\ - +\:&\mbox{\color{mygreen} group SS}&=\color{mygreen}\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}\\\hline - \:&\mbox{\color{red} total SS}&=\color{red}\sum_{ij}\left(x_{ij}-\mu\right)^{2} - \end{align*} - \pause - \begin{align*} - \mbox{\color{mygreen}groups MS}=\frac{\mbox{\color{mygreen}group SS}}{\mbox{\color{mygreen}groups DF}}&=\color{mygreen}\frac{\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}}{k-1}\\\mbox{\color{lightblue}error MS}=\frac{\mbox{\color{lightblue}error SS}}{\mbox{\color{lightblue}error DF}}&=\color{lightblue}\frac{\sum_{ij}\left(x_{ij}-\hat{\mu_{i}}\right)^{2}}{N-k}\\\color{dodgerblue}F&=\frac{\mbox{\color{mygreen}group MS}}{\mbox{\color{lightblue}error MS}} - \end{align*} - \end{column} -\end{columns} -\end{frame} - -%--------------------------------------------------------------- -\begin{frame} -\frametitle{summary single factor ANOVA} -\begin{itemize} -\item {\bf Goal:} Test whether several means are equal or not.\pause -\item {\bf Strategy:} Use law of total variance to explain the overall - variance with the {\em variance of the means} and the {\em variance - within groups}\pause -\item If the total variance can be solely explained from {\em variance - within groups}, then the means do not vary and must be the same. \pause -\item Since a statistic should be large if the data does not fit to - $H_0$, we use $\frac{MS(between)}{MS(within)}$ which can be shown to - have an F-distribution under certain ...\pause -\item {\bf Assumptions:} - \begin{itemize} - \item The groups must be independent of each other. - \item In each group, the specimen must be i.i.d. from the particular - population distribution $f_{ij} \sim p(f|\mu_i) $. - \item The standard deviations of the groups are equal - ($\sigma_\varepsilon$ is the same for all groups). - \item The residuals $\varepsilon$ must be Normally distributed - \end{itemize} -\end{itemize} -\end{frame} - -\subsection{study design for ANOVA} -\begin{frame} - \frametitle{study design for ANOVA} - \begin{itemize} - \item If the means are different (but all other assumptions are - satisfied), then $F$ follows a non-central F-distribution. - \item Like in the case of one- and two-sample t-tests, this can be - used to adjust $n$ for the desired power. - \item Alternatively, one can estimate the minimal detectable - difference $\delta$ from estimates of the {\em error MS} $s^2$ - and $n$, or $n$ from $\delta$ and $s^2$, respectively. - \end{itemize} - \mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, - Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice - Hall. doi:10.1037/0012764} - -\end{frame} - -\subsection{non-parametric ANOVA} -\begin{frame} - \frametitle{Kruskal-Wallis test} - \begin{itemize} - \item Can be applied if the data is not normally distributed. - \item Is equivalent to Mann-Whitney/Wilcoxon rank sum test for two - factor levels. - \item Needs the variances to be equal as well. - \item Instead of testing equality of means/medians it tests for - equality of distributions. - \item For more details see {\em Biostatistical Analysis}. - \end{itemize} -\end{frame} - -\begin{frame} - \frametitle{Testing the difference among several medians} - \begin{itemize} - \item Can be applied if the data is not normally distributed. - \item Does not need the variances to be equal. - \item For more details see {\em Biostatistical Analysis}. - \end{itemize} -\end{frame} - -\section{more complex ANOVAs} -\subsection{blocking} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{blocking} -\footnotesize -{\bf Blocking} -How does experience affect the anatomy of the brain? In a typical -experiment to study this question, young rats are placed in one of -three environments for 80 days: - -\begin{itemize} -\item[T1] Standard environment.The rat is housed with a single - companion in a standard lab cage. -\item[T2] Enriched environment. The rat is housed with several - companions in a large cage, furnished with various playthings. -\item[T3] Impoverished environment.The rat lives alone in a standard - lab cage. -\end{itemize} - -At the end of the 80-day experience, various anatomical measurements -are made on the rats' brains. Suppose a researcher plans to conduct -the above experiment using 30 rats. To minimize variation in response, -all 30 animals will be male, of the same age and strain. To reduce -variation even further, the researcher can take advantage of the -similarity of animals from the same litter. In this approach, the -researcher would obtain three male rats from each of 10 litters. The -three littermates from each litter would be assigned at random: one to -T1, one to T2, and one to T3. -\end{frame} - -%--------------------------------------------------------------- -\begin{frame} - \frametitle{How to create blocks} - - Try to create blocks that are as homogeneous within themselves as - possible, so that the inherent variation between experimental units - becomes, as far as possible, variation between blocks rather than - within blocks (see SWS chapter 11.6). - - {\bf Fish data:} - \begin{itemize} - \item each fish is a block - \item the different categories are the factor of interest - \item note that we have one measurement per block and factor, but - there could be more - \end{itemize} - -\end{frame} - - -%--------------------------------------------------------------- -\begin{frame} -\frametitle{data model for block randomized ANOVA} - - Data generation model for randomized block factor ANOVA - $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$: - - \vspace{.5cm} - - How do we know that there is no interaction $\gamma_{ij}$ between - the blocks and the factors? - \begin{itemize} - \item {\bf a priori knowledge:} why should temperature be dependent on - fish identity - \item {\bf additivity:} for each factor $i$, the values differ by - the {\em same} amount $\beta_j$. \pause - \end{itemize} - - \begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - \begin{center} - \includegraphics[width=1.\linewidth]{figs/regression06.pdf} - \end{center} - \end{minipage} - \begin{minipage}{0.5\linewidth} - \only<2>{\color{red} Would that also be the case if the values cross at the point?} - \end{minipage} - \end{minipage} -\end{frame} - -\subsection{two factor ANOVA} -%--------------------------------------------------------------- -\begin{frame} -\frametitle{What's the funny way to write down the data model in ANOVA?} - - Data generation model for a two factor ANOVA with interaction - $$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$ - - {\bf Note that:} - \begin{itemize} - \item The sum over the $\tau_i$, $\beta_j$, $\gamma_{ij}$, and - $\varepsilon_{ijk}$ terms are always zero. They model the {\em deviation} - from the grand mean. \pause - \item The directly correspond to the available SS/ MS terms. For - example, in the block randomized ANOVA - \begin{itemize} - \item $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$ - \item $SS(total) = SS(temperature) + SS(blocks) + SS(within)$ - \end{itemize} - \end{itemize} - -\end{frame} - -%--------------------------------------------------------------- -\begin{frame} -\frametitle{different hypotheses from a 2-factor ANOVA} -\small - Data generation model for a two factor ANOVA with interaction - $$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$ - - \begin{itemize} - \item {\bf Blocking: } Assume $\gamma_{ij}=0$. Test - $$F=\frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS} - (\varepsilon_{ijk})}$$\pause - \item {\bf Repeated Measures: } Assume $\gamma_{ij}=0$. Entity - which was repeatedly measured becomes block.\pause - \item {\bf Two factor testing factor influence: } Assume $\gamma_{ij}\not=0$. Test - $$F = \frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS} - (\varepsilon_{ijk})}$$\pause - \item {\bf Two factor testing interaction: } Assume $\gamma_{ij}\not=0$. Test - $$F=\frac{\mbox{interaction MS}(\gamma_{ij})} {\mbox{error MS} - (\varepsilon_{ijk})}$$ - - \end{itemize} - -\end{frame} - - -%--------------------------------------------------------------- -\begin{frame} -\frametitle{summary} -\begin{itemize} -\small -\item ANOVA is a very flexible method to study the interactions of - categorial variables (factors) and ratio/ interval data \pause -\item Works by checking whether a certain factor/ interaction between - factors, ... is needed to explain the variability in the data \pause -\item Relies on assumptions that need to be checked - \begin{itemize} - \item equal variance for each factor level - \item the residuals are Normally distributed - \item number of points $n_i$ should be the same - \end{itemize}\pause -\item There is a whole zoo of ANOVA techniques, for all kinds of - situations. This is just the tip of the iceberg. -\item One can often get away with violating some of the - assumptions. For more details on that check {\em Biostatistical Analysis} -\end{itemize} - -\end{frame} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{detour I: One-tailed vs. two-tailed} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%--------------------------------------------------------------- -\begin{frame} -\hypertarget{sec:twotailed}{} - -\frametitle{one-tailed tests} -\begin{task}{Correct or not?} - Imagine a pharmaceutical company runs clinical trials for a drug - that enhances the ability to focus. To that end they apply the drug - to a treatment and measure scores in a standardized test. From the - literature it is known that normal subjects have a score of about 0. - - Since the company want to test whether the drug {\em enhances (score - > 0)} the ability to focus, they choose a one-tailed test ($H_A:$ - treatment group performs better than the performance from the - literature). -\end{task} -\end{frame} - - -%------------------------------------------------------------- - -\begin{frame} -\frametitle{one tailed test} -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - {\bf two tailed test} - - \includegraphics[width=\linewidth]{figs/twotailed.png} - \footnotesize - \vspace{-1cm} - - e.g. - - \begin{itemize} - \item $H_0: \mu = 0$ - \item $H_A: \mu \not= 0$ - \vspace{1.8cm} - \end{itemize} - \end{minipage} - \begin{minipage}{0.5\linewidth} - {\bf one tailed test} - - \includegraphics[width=\linewidth]{figs/onetailed.png} - \footnotesize - \vspace{-1cm} - e.g. - - \begin{itemize} - \item $H_0: \mu = 0$ - \item $H_A: \mu > 0$ - \item $\hat\mu < 0$ must directly imply $\hat\mu$ came from - $P(\hat\mu|H_0)$ - \item if that is not the case, using one-tailed is cheating - \end{itemize} - \end{minipage} -\end{minipage} -\hyperlink{back:twotailed}{\color{gray}go back} -\end{frame} - - -% ---------------------------------------------------------- - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{detour II: Statistical Power} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame} -\frametitle{Why is it hard to assess the power of a test?} -\begin{minipage}{1.\linewidth} - \begin{minipage}{.5\linewidth} - \includegraphics[width=.8\linewidth]{figs/power.pdf} - \end{minipage} - \begin{minipage}{.5\linewidth} - \begin{itemize} - \item Power = 1 - P(type II error)\\ - = P(reject $H_0$| $H_A$ is true)\pause - \item in general the distribution - \begin{center} - P(test statistic|$H_A$ is true) - \end{center} - is not available to us. - \pause - \item Therefore, the power can often only be specified for a - specific $H_A$. - - \end{itemize} - - \end{minipage} -\end{minipage} -\mycite{J. H. Zar, Biostatistical Analysis} -\hypertarget{sec:power}{} -\hyperlink{back:power}{\color{gray}go back} - -\end{frame} - - -% ---------------------------------------------------------- - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{detour III: Bayes rule and statistical tests} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% ---------------------------------------------------------- -\begin{frame} -\hypertarget{sec:bayesian}{} - -\frametitle{Why is this funny (or sad)?} -\begin{center} - \includegraphics[width=.4\linewidth]{figs/frequentistsvsbayesians.png} -\end{center} -\mycite{http://xkcd.com/1132/} -\end{frame} - -%----------------------------------------------------------------- -\begin{frame} -\frametitle{Why is this funny (or sad)?} -\begin{minipage}{1.\linewidth} - \begin{minipage}{.5\linewidth} - \includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png} - \mycite{http://xkcd.com/1132/} - \end{minipage} - \begin{minipage}{.5\linewidth} - \begin{itemize} - \item $H_0:$ the sun has not gone nova - \item $H_A:$ the sun has gone nova \pause - \item test procedure: we believe the detector \pause - \item Null distribution: multinomial $n=2, p_1 = \frac{1}{6}, ..., p_6 = \frac{1}{6}$ \pause - \item the probability of making a type I error is $p(2\times - 6)=\frac{1}{6}\cdot \frac{1}{6} \approx 0.028$ - \end{itemize} - \pause - So ... what is wrong? - \end{minipage} -\end{minipage} -\end{frame} - -%----------------------------------------------------------------- -\begin{frame} -\frametitle{A similar example} -\begin{minipage}{1.\linewidth} - \begin{minipage}{.5\linewidth} - {\bf sensitivity \& specificity of a HIV test} - - \begin{tabular}{ccc} - & HIV & no HIV\tabularnewline - test + & 99.7\% & 1.5\%\tabularnewline - test - & 0.03\% & 98.5\%\tabularnewline - \end{tabular} - - \vspace{1cm} - - {\bf HIV prevalence (Germany)} - - \begin{tabular}{cc} - HIV & no HIV\tabularnewline - 0.1\% & 99.9\%\tabularnewline - \end{tabular} - - - \end{minipage} - \begin{minipage}{.5\linewidth} - \begin{task}{} - What is the probability that you are HIV+ if you test positive? - \end{task}\pause - In order to answer that question, you need two rules for - probability.\pause - - \vspace{1cm} - - What is the power, what is the type I error of the test? - \end{minipage} -\end{minipage} -\end{frame} - -%----------------------------------------------------------------- -\begin{frame} -\frametitle{Bayes rule and marginalization} -{\bf Bayes rule} -$$p(A|B)p(B) = p(B|A)p(A)$$ - -{\bf joint probability} -$$p(A,B) = p(A|B)p(B) = p(B|A)p(A)$$ - -{\bf marginalization} -$$p(B) = \sum_{\mbox{possible values a of }A}p(a,B)$$ -\end{frame} - -%----------------------------------------------------------------- -\begin{frame} -\frametitle{probability/Bayesian nomenclature} -\frametitle{repetition} -Let $T\in \{+, -\}$ be the test result and $H\in \{+,-\}$ whether you -are HIV positive or not. -\begin{itemize} -\item $p(T|H)$ is the {\em likelihood} \pause -\item $p(H)$ is the {\em prior} \pause -\item $p(H|T)$ is the {\em posterior} -\end{itemize} -\pause -Given the prior and the likelihood, we can compute the posterior. -\begin{align*} - p(H|T) &= \frac{P(T|H)P(H)}{P(T)} &\mbox{Bayes rule}\\ - &= \frac{P(T|H)P(H)}{\sum_h P(T,h)} &\mbox{marginalization}\\ - &= \frac{P(T|H)P(H)}{\sum_h P(T|h)p(h)} &\mbox{joint - probability} -\end{align*} - -\end{frame} - -%----------------------------------------------------------------- -\begin{frame} -\frametitle{HIV test} -\begin{minipage}{1.\linewidth} - \begin{minipage}{.5\linewidth} -\begin{tabular}{ccc} - & HIV & no HIV\tabularnewline - test + & 99.7\% & 1.5\%\tabularnewline - test - & 0.03\% & 98.5\%\tabularnewline -\end{tabular} - \end{minipage} - \begin{minipage}{.5\linewidth} -\begin{tabular}{cc} - HIV & no HIV\tabularnewline - 0.1\% & 99.9\%\tabularnewline -\end{tabular} - \end{minipage} -\end{minipage} - - -\begin{align*} - p(H=+|T=+)&= \frac{P(T=+|H=+)P(H=+)}{\sum_{h\in\{+,-\}} P(T=+|H=h)p(H=h)} \\ - p(H=+|T=+)&= \frac{0.997 \cdot 0.001}{0.997 \cdot 0.001 + 0.015 - \cdot 0.999} \\ - &\approx 0.062 -\end{align*} -\pause -This means with a positive HIV test, you have about $6.2$\% chance of -being HIV positive. Why is this number so low? \pause - -\only<3>{Because a lot of the people for which the test is positives - are false positives from the HIV- group. This is because HIV+ is - relatively rare.} -\end{frame} - -%----------------------------------------------------------------- -\begin{frame} -\frametitle{Why is this funny (or sad)?} -\begin{minipage}{1.\linewidth} - \begin{minipage}{.5\linewidth} - \includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png} - \mycite{http://xkcd.com/1132/} - \end{minipage} - \begin{minipage}{.5\linewidth} - {\bf Why is it funny:} Because it points at the fact that - statistical tests usually look at the likelihood only and ignore - the prior. - - \vspace{1cm} - - {\bf Why is it sad?} Because statistical tests usually look at - the likelihood and ignore the prior. - \end{minipage} -\end{minipage} -\hyperlink{back:bayesian}{\color{gray}go back} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{detour IV: Assessing normality with QQ plots} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\hypertarget{sec:qqplots}{} -\frametitle{histogram equalization} -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.5\linewidth} - \begin{task}{histogram equalization} - Which function $y = f(x)$ transforms $x$ such that it has the - distribution of $p(y)$? - \end{task} - \end{minipage} - \begin{minipage}{0.5\linewidth} - \only<1>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE0.png} - \end{center} - }\pause - \only<2>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE0Solution.png} - \end{center} - } - \end{minipage} -\end{minipage} -\end{frame} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{histogram equalization} -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.4\linewidth} - \begin{task}{histogram equalization} - How would the function look like if the target was a Normal - distribution? - \end{task} - \end{minipage} - \begin{minipage}{0.6\linewidth} - \only<1>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE1.png} - \end{center} - }\pause - \only<2>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE1Solution.png} - \end{center} - } - \end{minipage} -\end{minipage} -\end{frame} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{histogram equalization} -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.4\linewidth} - \begin{task}{histogram equalization} - Is the target distribution a Normal distribution? - \end{task} - \end{minipage} - \begin{minipage}{0.6\linewidth} - \only<1>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE2.png} - \end{center} - }\pause - \only<2>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE2Solution.png} - \end{center} - } - \end{minipage} -\end{minipage} -\end{frame} -% ---------------------------------------------------------- - -\begin{frame} -\frametitle{QQ-plots} - \begin{itemize} - \item QQ-plots can be used to visually assess whether a set of data - points might follow a certain distribution. \pause - \item A QQ-plot is constructed by - \begin{enumerate} - \item computing the fraction of data points $q_1,...,q_n$ that are - lower or equal than a given $x_1,...,x_n$ (Where do you know - that function from?)\pause - \item and plotting it against the value $y_j$ of the other - distribution which has the same $q_i$ - \end{enumerate}\pause - - \item If the two distributions are equal the QQ-plot shows a straight line.\pause - \item How would you assess the normality of data $x_1,...,x_n$ with - a QQ-plot? \pause {\em make the target distribution a Gaussian} - \end{itemize} - -\end{frame} -% ---------------------------------------------------------- -\begin{frame} -\frametitle{histogram equalization} -\begin{minipage}{1.0\linewidth} - \begin{minipage}{0.4\linewidth} - \begin{task}{special transform} - Which function $y = f(x)$ transforms $x$ such that it has the - distribution of $p(y)$? - - - Do you know that function? - - \end{task} - - \only<2>{{\bf Answer:} The cumulative distribution function $f(x) = F(x)$.} - \end{minipage} - \begin{minipage}{0.6\linewidth} - \only<1>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE3.png} - \end{center} - }\pause - \only<2>{ - \begin{center} - \includegraphics[width=1.\linewidth]{figs/HE3Solution.png} - \end{center} - } - \end{minipage} -\end{minipage} -\hyperlink{back:detourIV}{\color{gray} back to statistical tests} -\end{frame} - -\end{document} - -