\documentclass{beamer} \usepackage{xcolor} \usepackage{listings} \usepackage{pgf} %\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} %\usepackage{multimedia} \usepackage[english]{babel} \usepackage{movie15} \usepackage[latin1]{inputenc} \usepackage{times} \usepackage{amsmath} \usepackage{bm} \usepackage[T1]{fontenc} \usepackage[scaled=.90]{helvet} \usepackage{scalefnt} \usepackage{tikz} \usepackage{ textcomp } \usepackage{soul} \usepackage{hyperref} \definecolor{lightblue}{rgb}{.7,.7,1.} \definecolor{mygreen}{rgb}{0,1.,0} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode { \usetheme{Singapore} \setbeamercovered{opaque} \usecolortheme{tuebingen} \setbeamertemplate{navigation symbols}{} \usefonttheme{default} \useoutertheme{infolines} % \useoutertheme{miniframes} } \AtBeginSection[] { \begin{frame} \begin{center} \Huge \insertsectionhead \end{center} % \frametitle{\insertsectionhead} % \tableofcontents[currentsection,hideothersubsections] \end{frame} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 \setbeamertemplate{blocks}[rounded][shadow=true] \title[]{Scientific Computing -- Statistics} \author[Statistics]{Fabian Sinz\\Dept. Neuroethology, University T\"ubingen\\ Bernstein Center T\"ubingen} \institute[Scientific Computing]{} \date{11/27/2013} %\logo{\pgfuseimage{logo}} \subject{Lectures} %%%%%%%%%% configuration for code \lstset{ basicstyle=\ttfamily, numbers=left, showstringspaces=false, language=Matlab, commentstyle=\itshape\color{darkgray}, keywordstyle=\color{blue}, stringstyle=\color{green}, backgroundcolor=\color{blue!10}, breaklines=true, breakautoindent=true, columns=flexible, frame=single, captionpos=b, xleftmargin=1em, xrightmargin=1em, aboveskip=10pt } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newcommand{\mycite}[1]{ \begin{flushright} \tiny \color{black!80} #1 \end{flushright} } \input{../latex/environments.tex} \makeatother \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Plan} \setcounter{tocdepth}{1} \tableofcontents \end{frame} \begin{frame} \frametitle{Information \"uber Statistik} \begin{itemize} \item Samuels, M. L., Wittmer, J. A., \& Schaffner, A. A. (2010). Statistics for the Life Sciences (4th ed., p. 668). Prentice Hall. \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice Hall. doi:10.1037/0012764 \item \url{http://stats.stackexchange.com} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section[meta-study]{how statisticians think - the meta-study} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{statisticians are lazy} \Large \only<1>{ \begin{center} \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg} \end{center} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<2>{ \begin{center} \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg} \end{center} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<3>{ \begin{center} \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg} \end{center} \mycite{Larry Gonick, The Cartoon Guide to Statistics} } \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{the (imaginary) meta-study} \begin{center} \only<1>{ \framesubtitle{finite sampling introduces variation: the sampling distribution} \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png} \mycite{Hesterberg et al., Bootstrap Methods and Permutation Tests} }\pause \only<2>{ \framesubtitle{statistic vs. population parameter} \includegraphics[width=.8\linewidth]{figs/statistic1.png} \mycite{Hesterberg et al., Bootstrap Methods and Permutation Tests} }\pause \only<3>{ \framesubtitle{statistic vs. population parameter} \includegraphics[width=.8\linewidth]{figs/statistic2.png} \mycite{Hesterberg et al., Bootstrap Methods and Permutation Tests} }\pause \only<4>{ \framesubtitle{shat parts of this diagram do we have in real life?} \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png} \mycite{Hesterberg et al., Bootstrap Methods and Permutation Tests} }\pause \only<5>{ \framesubtitle{what parts of this diagram do we have in real life?} \includegraphics[width=.8\linewidth]{figs/statistic3.png} \mycite{Hesterberg et al., Bootstrap Methods and Permutation Tests} }\pause \only<6->{ \framesubtitle{what statistics does } \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \includegraphics[width=1.\linewidth]{figs/statistic4.png} \mycite{Hesterberg et al., Bootstrap Methods and Permutation Tests} \end{minipage} \begin{minipage}{0.5\linewidth} \begin{itemize} \item it assumes, derives, or simulates the sampling distribution\pause \item the sampling distribution makes only sense if you think about it in terms of the meta study\pause \item {\color{red} the sampling distribution is the key to answering questions about the population from the value of the statistic} \end{itemize} \end{minipage} \end{minipage} } \end{center} \end{frame} % % ---------------------------------------------------------- \begin{frame} \frametitle{illustrating examples} \begin{question}{lung volume of smokers} Assume you know the sampling distribution of the mean lung volume of smokers. Would you believe that the sample came from a group of smokers? \begin{center} \includegraphics[width=.6\linewidth]{figs/example01.png} \end{center} \end{question} \end{frame} \begin{frame} \frametitle{illustrating examples} \begin{question}{lung volume of smokers} What about now? How would the sampling distribution change if I change the population to (i) athletes or (ii) old people? \begin{center} \includegraphics[width=.6\linewidth]{figs/example02.png} \end{center} \end{question} \end{frame} \begin{frame} \frametitle{illustrating examples} \begin{question}{Is this diet effective?} \begin{center} \includegraphics[width=.6\linewidth]{figs/example03.png} \end{center} \end{question} \end{frame} \begin{frame} \frametitle{illustrating examples} \begin{question}{Is this diet effective?} What do you think now? \begin{center} \includegraphics[width=.6\linewidth]{figs/example04.png} \end{center} \end{question} \end{frame} \begin{frame} \frametitle{summary} \begin{itemize} \item In statistics, we use finite samples from a population to reason about features of the population. \pause \item The particular feature of the population we are interested in is called {\color{blue} population parameter}. We usually measure this parameter in our finite sample as well ({\color{blue}statistic}).\pause \item Because of variations due to finite sampling the statistic almost never matches the population parameter. \pause \item Using the {\color{blue}sampling distribution} of the statistic, we make statements about the relation between our statistic and the population parameter. \end{itemize} \end{frame} \begin{frame} \frametitle{outlook} {\bf Questions to be addressed} \begin{itemize} \item How do we choose the statistic? \item How do we get the sampling distribution? \item How does statistical reasoning work in practice? \end{itemize} {\bf Perspective} \begin{itemize} \item We start by looking at a few standard distribution. \item We will use those in the statistical tests that follow. \item For each statistical test, I also try to provide a non-parametric method. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{probability primer} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{probability models} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{getting the model right} In statistics/probability it is important to select the correct distribution. Models are easier to remember if you remember a ``standard situation''. \begin{itemize} \item What is the distribution corresponding to throwing a coin? \pause \item What in neuroscience/psychology is like throwing a coin (fair or unfair)?\pause \item What is the distribution of counting heads in repeated independent coin tosses?\pause \item What in neuroscience/psychology corresponds to counting heads in repeated independent coin tosses? \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{the different models} \only<1>{ \framesubtitle{Bernoulli distribution} \begin{center} \includegraphics[width=.4\linewidth]{figs/Bernoulli.pdf} \end{center} \begin{itemize} \item single coin toss (success/ failure) \item distribution $p(X=1)=p$ \end{itemize} }\pause \only<2>{ \framesubtitle{uniform distribution} \begin{center} \includegraphics[width=.4\linewidth]{figs/Uniform.pdf} \end{center} \begin{itemize} \item $n$ items with the same probability of occurence \item distribution $p(X=k)=\frac{1}{n}$ \end{itemize} }\pause \only<3>{ \framesubtitle{binomial distribution} \begin{center} \includegraphics[width=.4\linewidth]{figs/Binomial00.pdf} \includegraphics[width=.4\linewidth]{figs/Binomial01.pdf} \end{center} \begin{itemize} \item number of $k$ successes/heads in $n$ trials \item distribution $P(X=k)= {n \choose k} p^k (1-p)^{n-k}$ \item parameters $n,p$ \end{itemize} }\pause \only<4>{ \framesubtitle{Poisson distribution} \begin{center} \includegraphics[width=.4\linewidth]{figs/Poisson00.pdf} \includegraphics[width=.4\linewidth]{figs/Poisson01.pdf} \end{center} \begin{itemize} \item successes per time unit for (very) large $n$ and small $p$ \item distribution $P(X=k) = \frac{\lambda^k e^{-\lambda}}{k!}$ \item parameter: success rate $\lambda$ \end{itemize} } \only<5>{ \framesubtitle{Gaussian/ normal distribution} \begin{center} \includegraphics[width=.4\linewidth]{figs/Gaussian00.pdf} \end{center} \begin{itemize} \item shows up everywhere (central limit theorem) \item distribution $p(x) = \frac{1}{\sigma\sqrt{2\pi}}\operatorname{exp}\left\{-\frac{\left(x-\mu\right)^2}{2\sigma^2}\right\}$ \item parameter: mean $\mu$, standard deviation $\sigma$ \end{itemize} } \only<6>{ \framesubtitle{caveat} \begin{question}{important distinction} \begin{itemize} \item For {\em discrete} random variables $P(X=k)$ makes sense (probabilities are like ``single weights''). \item For {\em continuous} random variables $p(X=x)=0$ (probabilities are like ``water''). \item For {\em continuous} random variables it makes only sense to ask for the probability that they take values in a particular range. \end{itemize} \end{question} } \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{example} You place a mouse in a circular maze and place some food on the opposite side. In each trial you record whether the mouse went {\em left} (``L'') or {\em right} (``R'') to get the food. \vspace{.5cm} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.59\linewidth} \begin{itemize} \item What kind of distribution would you expect for the number of ``R'' in $10$ trials? What is the distribution of the number of ``L''?\pause \item Here is the result of $10$ trials: ``LLLLLLLLLL''. What is the probability of that? \item What do you conclude from that? \end{itemize} \end{minipage} \begin{minipage}{0.4\linewidth} \only<1->{ \begin{center} \includegraphics[width=1.\linewidth]{figs/Binomial00.pdf} \end{center} } \end{minipage} \end{minipage} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{cumulative distribution function} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{cumulative distribution function (c.d.f.)} \framesubtitle{we will need that a lot in statistics} \begin{itemize} \item The c.d.f. is used to compute the probability that a random variable is in a particular range. \item It is defined as $F(y) = P(X \le y)$ \item For the binomial distribution this would be $$F(k) = P(\mbox{no. of successes} \le k)\mbox{ in } n \mbox{ trials}$$ \item Where could I see that probability in that plot for $k=5$ and $n=10$? \begin{center} \only<1>{ \includegraphics[width=.5\linewidth]{figs/Binomial00.pdf} } \only<2>{ \includegraphics[width=.5\linewidth]{figs/BinomialCdf00.pdf} }\pause \only<3>{ \includegraphics[width=.5\linewidth]{figs/BinomialCdf01.pdf} } \end{center} \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{cumulative distribution function (c.d.f.)} \framesubtitle{example} \small You want to find out whether a subject performs significantly different from chance in $10$ trials that either are successful or not. \begin{itemize}[<+->] \item What would be a good decision rule? \item[] {\color{gray} We set thresholds on the number of successes and decide that (s)he is performing at chance if the performance falls within the thresholds.} \item What is the distribution of the number of successes in $n=10$ trials if the subject performs at chance? \item[] {\color{gray} Binomial with $n=10$ and $p=\frac{1}{2}$} \item Let's say we set the threshold at $k=2$ and $k=8$, what is the probability that we think (s)he is {\em not} performing at chance, even though (s)he is? \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{cumulative distribution function (c.d.f.)} \framesubtitle{example} \small \begin{itemize}[<+->] \item Let's say we set the threshold at $k=2$ and $k=8$, what is the probability that we think (s)he is {\em not} performing at chance, even though (s)he is? \item[] {\color{gray} The probability for that is $P(X \le 2 \mbox{ or } X \ge 8)$. Using the c.d.f. that is \begin{align*} P(X \le 2 \mbox{ or } X \ge 8) &= P(X \le 2) + P(X \ge 8) = P(X \le 2) + (1-P(X \le 7)) \end{align*} } \end{itemize} \only<2>{ \begin{center} \includegraphics[width=.5\linewidth]{figs/BinomialExample00.pdf} \end{center} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{joint and conditional distributions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{conditional and marginal $\rightarrow$ joint distribution} \framesubtitle{Bayes' rule} \begin{itemize} \small \item Assume you ran decision experiments with two subject. Subject \#1 had a success probability of $50\%$, while subject \#2 achieved $80\%$. \item $70\%$ of the trials were run with the first subject, $30\%$ of the trials with the other. \item Each trial gets saved in a file on the hard disk.\pause \item Now, let's assume your recording software had a bug and did not store the subject ID in the file. \item For a given file, we have two random variables now: subject ID $X$, number of successes $Y$. \end{itemize} \begin{center} \includegraphics[height=.32\linewidth]{figs/decision01.pdf} \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{joint and conditional distributions} \framesubtitle{definitions} \begin{definition}{Joint, marginal, and conditional distribution} \begin{itemize} \item The {\bf joint distribution $P(X,Y)$} gives the probability that a particular combination of $X$ and $Y$ occur at the same time. \pause \item The {\bf marginal distributions $P(X)$ and $P(Y)$} specify the probabilities that a particular value occurs if the value of the other variable is ignored. \pause \item The {\bf conditional distribution $P(X|Y)$} gives the probability of particular values of $X$ given that $Y$ has particular values. \end{itemize}\pause \end{definition} \begin{center} {\color{blue} joint distribution $\stackrel{\mbox{Bayes' Rule}}{\leftrightarrow}$ marginal and conditional distribution} \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{conditional and marginal $\rightarrow$ joint distribution} \framesubtitle{Bayes' rule} \begin{itemize} \small \item Assume you ran decision experiments with two subject. Subject \#1 had a success probability of $50\%$, while subject \#2 achieved $80\%$. \item $70\%$ of the trials were run with the first subject, $30\%$ of the trials with the other. \item What probabilities do I need to write at the edges? \item What distribution do I use for the subjects ID ($X$)? \item What distribution do I use for the conditional distribution $Y|X$? \end{itemize} \begin{center} \only<1>{\includegraphics[height=.32\linewidth]{figs/decision01.pdf}} \only<2>{\includegraphics[height=.32\linewidth]{figs/decision02.pdf}} \only<3>{\includegraphics[height=.32\linewidth]{figs/decision03.pdf}} \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{conditional and marginal $\rightarrow$ joint distribution} \framesubtitle{Bayes' rule} \begin{itemize} \small \item The joint probability are obtained by multiplying the probabilities along the paths from the root note to the leaves. \begin{center} \includegraphics[height=.32\linewidth]{figs/decision03.pdf} \end{center}\pause \item In algebraic terms, this is known as {\em Bayes' rule} (very important!) $$\color{red} P(Y|X)P(X) = P(X|Y)P(Y) = P(X,Y)$$\pause \item You can remember it as ``moving variables in front of the bar'' $$P(X|Y) P(Y) = P(X,Y|\_)$$ \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{Bayes' rule} $$P(X|Y)P(Y) = P(Y|X)P(X) = P(X,Y)$$ \begin{task}{Independent random variables} If two random variables are independent, the joint distribution is the product of their marginals $$ P(X,Y) =P(X) P(Y)$$ How can you see that from Bayes' rule? \end{task} \pause \begin{solution}{Solution} If the variables are independent $P(X|Y) = P(X)$ and $P(Y|X) = P(Y)$: The probability of $X$ is the same as the probability of $X$ given that I know $Y$, because knowing $Y$ does not help. \end{solution} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{Joint $\rightarrow$ marginal and conditional distribution} \begin{itemize} \small \item The plot shows the joint distribution $P(X,Y)$, where $X$ is the subject id and $Y$ the number of successes in $n=10$ trials. \begin{center} \only<-1>{\includegraphics[width=.83\linewidth]{figs/Joint00.pdf}} \only<2>{\includegraphics[width=.83\linewidth]{figs/Joint01.pdf}} \only<3>{\includegraphics[width=.83\linewidth]{figs/Joint02.pdf}} \end{center} \only<-1>{ \vspace{2cm}} \only<2-3>{ \item We can get the marginal distributions via {\em marginalization} (very important!): $$\color{red} P(Y) =\sum_{i=1}^2P(X=i, Y) \mbox{ and } P(X) = \sum_{j=0}^{n} P(X, Y=j)$$} \only<3->{ \item We can get the conditional distribution via Bayes' rule: $$P(X|Y)P(Y) = P(X,Y) \Leftrightarrow P(X|Y) = \frac{P(X,Y)}{P(Y)}$$} \only<-2>{ \vspace{2cm}} \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{The posterior} \begin{itemize} \small \item Could we use the probability distribution to get an idea which subject the number of successes came from?\pause \item Use Bayes' rule to ``invert'' the conditional distribution $$P(X|Y=k) = P(X,Y=k)/P(Y=k)$$ \end{itemize} \begin{center} \only<-2>{\includegraphics[height=.28\linewidth]{figs/Joint02.pdf}} \only<3->{\includegraphics[height=.53\linewidth]{figs/Posterior00.pdf}} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{summary} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame}[fragile] \frametitle{summary} \begin{itemize} \item We need to know certain distributions to use them as sampling distribution. \pause \item For many distributions one can use a ``standard situation'' to remember them. \pause \item When dealing with two or more random variables one deals with {\color{blue}joint, marginal}, and {\color{blue}conditional distributions}.\pause \item Marginal and conditional distributions can be converted into the joint distribution via {\color{blue}Bayes' rule}.\pause \item The conversion in the other direction can be done via {\color{blue}marginalization} and {\color{blue}Bayes' rule}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{error bars \& confidence intervals} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \subsection{errorbars} % ---------------------------------------------------------- \begin{frame} \frametitle{illustrating example} As part of a study of the development of the thymus gland, researcher weighed the glands of $50$ chick embyos after 14 days of incubation. The following plot depicts the mean thymus gland weights in (mg): \mycite{modified from SWS exercise 6.3.3.} \pause {\bf Which of the two bar plots is the correct way of displaying the data?} \begin{columns} \begin{column}[l]{.5\linewidth} \includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf} \end{column} \begin{column}[r]{.5\linewidth} \pause That depends on what you want to say \begin{itemize} \item To give a measure of variability in the data: use the {\color{blue} standard deviation $\hat\sigma = \sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$} \item To make a statement about the variability in the mean estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$} \end{itemize} \end{column} \end{columns} %%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % that depends: variability (descriptiv statistics, how variable is % the mean -> inferential, makes only sense in the meta-study setting) % first matlab exercise: simulate standard error % recommend paper for eyeballing test results from standard errors % from std of mean to confidence intervals % introduce bootstrapping (matlab exercise), then t-statistic % intervals % end with standard error of the median (and the thing from wikipedia) \end{frame} %------------------------------------------------------------------------------ \begin{frame} \frametitle{standard error} \framesubtitle{bootstrapping} \begin{task}{quantifying the variability in the mean} Download \url{https://www.dropbox.com/s/20l7ptrdc4kkceq/materialNMI.zip} Load the dataset {\tt thymusglandweights.dat} into matlab and use the first $50$ datapoints as your dataset. Repeat the following steps $m=500$ times: \begin{enumerate} \item sample $50$ data points from $x$ with replacement \item compute their mean and store it \end{enumerate} Look at the standard deviation of the computed means and compare it to the standard error. \end{task} \end{frame} %------------------------------------------------------------------------------ \begin{frame}[fragile] \frametitle{standard error} \framesubtitle{bootstrapping} \begin{itemize} \item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is {\color{blue}an estimate of the standard deviation of the means} in repeated experiments which is computed form a single experiment. \item When you want to do statistical tests on the mean, it is better to use the standard error, because one can eyeball significance from it \mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars in experimental biology. The Journal of Cell Biology, 177(1), 7--11.} \item {\color{blue}Bootstrapping} is a way to generate an estimate of the {\color{blue}sampling distribution of any statistic}. Instead of sampling from the true distribution, it samples from the empirical distribution represented by your dataset. \mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC} \end{itemize} \end{frame} %------------------------------------------------------------------------------ \begin{frame}[fragile] \frametitle{standard error of the median?} {\bf What kind of errorbars should we use for the median?} It depends again: {\bf Descriptive statistics} \begin{itemize} \item As a {\color{blue}descriptive statistic} one could use the {\em median absolute deviation}: the median of the absolute differences of the datapoints from the median. \item Alternatively, one could bootstrap a standard deviation of the median. \end{itemize} \pause {\bf Inferential statistics} \begin{itemize} \item For {\color{blue}inferential statistics} one should use something that gives the reader {\color{blue}information about significance}. \item Here, {\color{blue} confidence intervals} are a better choice. \end{itemize} \end{frame} % ---------------------------------------------------------- \subsection{confidence intervals \& bootstrapping} %------------------------------------------------------------------------------ \begin{frame} \frametitle{confidence intervals} \begin{center} \only<1>{ \vspace{.1cm} \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<2>{ \vspace{.1cm} \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<3>{ \vspace{.1cm} \includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<4>{ \vspace{.1cm} \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<5>{ \vspace{.1cm} \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg} \mycite{Larry Gonick, The Cartoon Guide to Statistics} }\pause \only<6>{ \vspace{.1cm} \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg} \mycite{Larry Gonick, The Cartoon Guide to Statistics} } \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{confidence intervals for the median} \begin{definition}{Confidence interval} A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic $\hat\theta$ is an interval $\hat\theta \pm a$ such that the population parameter $\theta$ is contained in that interval $(1-\alpha)\cdot 100\%$ of the experiments. An alternative way to put it is that $(\hat\theta - \theta) \in [-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases. \end{definition} \begin{columns} \begin{column}[l]{.5\linewidth} If we knew the sampling distribution of the median $\hat m$, could we generate a e.g. a $95\%$ confidence interval?\pause \vspace{.5cm} Yes, we could choose the interval such that $\hat m - m$ in that interval in $95\%$ of the cases. \end{column} \begin{column}[r]{.5\linewidth} \only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}} \only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}} \end{column} \end{columns} % \begin{task}{Bootstrapping a confidence interval for the median} % \begin{itemize} % \item Use the same dataset as before. % \item Bootstrap $500$ medians. % \item Compute the $2.5\%$ and the $97.5\%$ percentile of the % $500$ medians. % \end{itemize} % \end{task} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{confidence intervals for the median} \framesubtitle{how to get the sampling distribution} \begin{task}{Bootstrapping a confidence interval for the median} \begin{itemize} \item Use the same dataset as before. \item Bootstrap $500$ medians. \item Compute the $2.5\%$ and the $97.5\%$ percentile of the $500$ medians. \end{itemize} These two numbers give you $\hat m -a$ and $\hat m + a$ for the $95\%$ confidence interval. \end{task} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{confidence intervals for the median} \framesubtitle{how to get it analytically} There is also an analytical estimation oft the confidence interval for the median: Use the $\frac{\alpha}{2}$ and $1 - \frac{\alpha}{2}$ quantile of a binomial distribution. \begin{task}{Comparing the analytical interval to the bootstrapped} \begin{itemize} \item Get the $\frac{\alpha}{2}$ quantile minus one and $1 - \frac{\alpha}{2}$ quantile of a binomial distribution using {\tt binoinv}. \item Sort you data points and use the data points at the position corresponding to the quantiles. \item Compare that to the bootstrapped confidence interval. \end{itemize} \end{task} \tiny The idea behind this: \begin{itemize} \item The probability that the true median $m$ is covered by the interval between $x_r$ and $x_{r+1}$ is binomial $${n \choose r} \left(\frac{1}{2}\right)^r \left(\frac{1}{2}\right)^{n-r}$$ \item No we take enough intervals in the ``middle'' of our sample that we cover the true median with at least $1-\alpha$ probability. \mycite{David, H. A., \& Nagaraja, H. N. (2003). Order Statistics. MES (Vol. 1, p. 482). Wiley. doi:10.1016/j.bpj.2010.07.012} \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{confidence intervals} \framesubtitle{Notice the theme!} \begin{enumerate} \item choose a statistic \item get a the sampling distribution of the statistic (by theory or simulation) \item use that distribution to reason about the relation between the true population parameter (e.g. $m$) and the sampled statistic $\hat m$ \end{enumerate} \begin{center} \color{blue} This is the scaffold of most statistical techniques. Try to find it and it can help you understand them. \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{let's practice that again} \framesubtitle{confidence interval for the mean} \begin{task}{Bootstrapping a confidence interval for the mean} \begin{itemize} \item Use the same dataset as before. \item Use bootstrapping to get a $95\%$ confidence interval for the mean. \end{itemize} \end{task} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{confidence interval for the mean} \framesubtitle{confidence interval for the mean} Getting a convenient sampling distribution is (a little bit) more difficult: \begin{itemize} \item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as well \item What is the mean of $\hat\mu$? What is its standard deviation?\pause \item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and $\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause \item The problem is, that $\hat\mu \sim \mathcal N\left(\mu, \frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population parameters.\pause \item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim \mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$ \item Therefore, \begin{align*} P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right) \end{align*} \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{confidence interval for the mean} \begin{task}{Bootstrapping a confidence interval for the mean} Extend your script to contain the analytical confidence interval using \begin{align*} P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right) \end{align*} \end{task} \end{frame} % ---------------------------------------------------------- \subsection{summary} % ---------------------------------------------------------- \begin{frame} \frametitle{summary} \begin{emphasize}{Which errorbars should I choose?} Always use errorbars to help the reader see your point. \end{emphasize} \pause \begin{itemize} \item Errorbars can {\color{blue} describe the variability} in a dataset ({\color{blue}descriptive statistics}). Example: {\em standard deviation, inter-quartile range, ...} \item {\color{blue}Errorbars yield information about significance in testing (inferential statistics)}. Examples: {\em standard error of the mean, confidence intervals, ...} \item Other possible ways of displaying variability: {\em boxplots, violin plots, histograms, ...} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{statistical tests} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{one-sample test on the mean} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame} \frametitle{from confidence intervals to one-sample test} \begin{task}{example: eye movements} \small In an experiment you measure eye movements of subjects on the screen. You want be sure that the subject fixates a certain target (at $x=0$). During the fixation period, you aquire $n=16$ measurements. The measurements have a mean of $\hat\mu=2.5$ and a standard deviation of $\hat\sigma=4$. Assuming that the single fixation locations are Gaussian distributed, can you be $95\%$ confident that the subject focused the target (x-Position)? \end{task} \pause \begin{solution}{use confidence intervals} \small Compute a $95\%$ confidence interval: Does it contain $\mu=0$? Yes? Then we are $95\%$ confident! From the table we get $t_{0.025}=2.131$, the standard error is $\frac{\hat\sigma}{\sqrt{n}} = \frac{4}{\sqrt{16}}=1$ which means that $$0\pm t_{0.025}\frac{\hat\sigma}{\sqrt{n}} = 0 \pm 2.131$$ is our confidence interval. Therefore we cannot be $95$\% confident in this case. \end{solution} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{from confidence intervals to one-sample test} \begin{task}{example: eye movements} Could we put the interval on $\mu=0$ as well? \end{task} \pause \begin{solution}{Example: eye movements} Yes, if the interval around $\hat\mu$ contains $\mu$, then the interval around $\mu$ also contains $\hat\mu$. \end{solution} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{One-sample t-test} \begin{task}{example 2: eye movements again} \small Now assume that there is a fixation target at $x=0$. You are running the experiment with a monkey and you want to discard all trials in which the monkey was not fixating the target. During the trial, you aquire again $n=16$ measurements with mean $\hat\mu=2.5$ and standard deviation $\hat\sigma=4$. How can you be confident that the monkey did not fixate the target if you are willing to be wrong in $5\%$ of the cases if ``wrong'' means that you believe the subject was not fixating when in fact it was. \end{task} \pause \begin{solution}{Example 2: eye movements again} \small The steps to the solution is exactly the same, only the logic is different. \begin{itemize} \item We make a $95\%$ confidence around the fixation target $\mu=0$. This means that if the monkey was actually fixating the target, $95\%$ of the measured averaged positions $\hat\mu$ would fall into that interval. \item $5\%$ of the measured would fall outside the interval even though the monkey fixated and we would falsely treat them as not as ``not fixated''. \end{itemize} \end{solution} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{one-sample t-test} \framesubtitle{Notice the theme again!} \only<1>{ \begin{center} \includegraphics[width=0.4\linewidth]{figs/repetition0.png} \end{center} \begin{enumerate} \small \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. \end{enumerate} }\pause \only<2>{ \begin{center} \includegraphics[width=0.4\linewidth]{figs/repetition1.png} \end{center} \begin{enumerate} \small \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. \item Get a sampling distribution! Here, we get it by assuming that the positions $x_1,...,x_{16}$ are Gaussian. \end{enumerate} }\pause \only<3>{ \begin{center} \includegraphics[width=0.4\linewidth]{figs/repetition2.png} \end{center} \begin{enumerate} \small \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. \item Get a sampling distribution! Here, we get it by assuming that the positions $x_1,...,x_{16}$ are Gaussian. The resulting distribution of $t$ is a t-distribution. \end{enumerate} }\pause \only<4>{ \begin{center} \includegraphics[width=0.4\linewidth]{figs/repetition3.png} \end{center} \begin{enumerate} \small \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. \item Get a {\color{blue}null distribution}! Here, we get it by assuming that the positions $x_1,...,x_{16}$ are Gaussian. The resulting distribution of $t$ is a t-distribution. \item Get an interval around $\mu=0$ in which values of $\hat\mu$ are assumed typical for $\mu=0$, the {\color{blue}null hypothesis $H_0$}. \end{enumerate} } \pause \only<5>{ \begin{center} \includegraphics[width=0.4\linewidth]{figs/repetition5.png} \end{center} \begin{enumerate} \small \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. \item Get a {\color{blue}null distribution}! Here, we get it by assuming that the positions $x_1,...,x_{16}$ are Gaussian. The resulting distribution of $t$ is a t-distribution. \item Get an interval around $\mu=0$ in which values of $\hat\mu$ are assumed typical for $\mu=0$, the {\color{blue}null hypothesis $H_0$}. This is done by fixing the {\color{blue}type I error} probability. \end{enumerate} } \pause \only<6>{ \begin{center} \includegraphics[width=0.4\linewidth]{figs/repetition4.png} \end{center} \begin{enumerate} \small \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$. \item Get a {\color{blue}null distribution}! Here, we get it by assuming that the positions $x_1,...,x_{16}$ are Gaussian. The resulting distribution of $t$ is a t-distribution. \item Get an interval around $\mu=0$ in which values of $\hat\mu$ are assumed typical for $\mu=0$, the {\color{blue}null hypothesis $H_0$}. This is done by fixing the {\color{blue}type I error} probability. \item Outside that interval we consider $\mu=0$ as implausible and reject $H_0$. \end{enumerate} } \end{frame} % ---------------------------------------------------------- \subsection{another one-sample test} % ---------------------------------------------------------- \begin{frame} \frametitle{another one-sample test} \begin{task}{Fair coin?} \small Assume you carry out the following test to determine whether a coin is fair or not: You throw the coin $n=3$ times. If the result is either $3\times$ head or $3\times$ tail, you conclude that the coin is not fair. Answer the following questions (for yourself first): \begin{enumerate} \item What is the meta-study? \pause {\em Repeated experiments of 3 throws with this the coin.}\pause \item What is the statistic used? \pause {\em The number of heads (could also be tails).}\pause \item What is $H_0$? \pause {\em The coin is fair.}\pause \item What is the Null distribution? \pause {\em The distribution is binomial $$p(k \mbox{heads in }n \mbox{ throws})={n \choose k} \left(\frac{1}{2}\right)^k \left(\frac{1}{2}\right)^{n-k} $$}\pause \item What is the Type I error of this test? \pause {\em $p(HHH|H_0) + p(TTT|H_0) = \frac{2}{8}$} \end{enumerate} \end{task} \end{frame} % ---------------------------------------------------------- \subsection{paired sample t-test} % ---------------------------------------------------------- \begin{frame} \frametitle{paired sample t-test} \begin{task}{Hunger Rating (SWS, Example 3.2.4)} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \small During a weight loss study each of nine subjects was given either the active drug m-chlorophenylpiperazine (mCPP) for two weeks and then a placebo for another two weeks, or else was given the placebo for the first two weeks and then mCPP for the second two weeks. Can we say that there was an effect with significance level $5$\%? \end{minipage} \begin{minipage}{0.5\linewidth} \begin{center} \includegraphics[width=0.8\linewidth]{figs/hunger.png} \end{center} \end{minipage} \end{minipage} \vspace{.5cm} What could we use as statistic? What is $H_0$? Is the difference significant? \end{task} \end{frame} \begin{frame} \frametitle{paired sample t-test} \begin{solution}{Hunger Rating (SWS, Example 3.2.4)} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \small \begin{enumerate} \item The statistic is the difference between drug and placebo?\pause \item $H_0$ is ``there is no difference'', i.e. the true mean of the differences is zero. \pause \item The standard error is $33/\sqrt{9}=11$.\pause \item $n-1=8$ DoF yields (t-distribution table) $t_{0.025}=2.306$, so we would reject $H_0$ if $\hat\mu$ in $0\pm t_{0.025}\cdot 11 = \pm 25.366$. \pause \item This means the difference is significant with $\alpha=0.05$. \end{enumerate} \end{minipage} \begin{minipage}{0.5\linewidth} \begin{center} \includegraphics[width=0.8\linewidth]{figs/hunger.png} \end{center} \end{minipage} \end{minipage} \end{solution} \end{frame} \begin{frame} \frametitle{paired sample t-test} \begin{itemize} \item a paired sample consists of a number of {\em paired} measurements (e.g. before/after)\pause \item build the differences (either there are many and or check that they are approx. Gaussian distributed)\pause \item use a one-sample t-test on the differences \end{itemize} \end{frame} % ---------------------------------------------------------- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{sign rank test} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame} \frametitle{sign rank test} \begin{task}{Hunger Rating (SWS, Example 3.2.4)} \small \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \small Consider again the example data from before. Instead of taking the difference, we consider now only whether ``drug'' was smaller or greater than ``placebo''. We then count the number of times for which ``drug''$<$``placebo'' and the number of times ``drug''$>$``placebo''. \end{minipage} \begin{minipage}{0.5\linewidth} \begin{center} \includegraphics[width=0.5\linewidth]{figs/hunger.png} \end{center} \end{minipage} \end{minipage} \begin{itemize} \item What is the statistic?\pause {\em The number $N_+$ of ``>'' or the number $N_-$ of ``<''.} \pause \item What is $H_0$? \pause {\em $N_+ = N/2$} \pause \item What is $H_A$? \pause {\em $N+ > N/2$ or $N_+ < N/2$} \pause \item What is the Null distribution? \pause {\em Binomial with $p=0.5$} \pause \item Given $\alpha$, how is the region determined in which we reject $H_0$? \pause {\em Choose a such that $P(k>a|H_0) + P(k{\includegraphics[width=\linewidth]{figs/testframework00.pdf}} \only<2>{\includegraphics[width=\linewidth]{figs/testframework01.pdf}} \end{center} \small \begin{columns} \begin{column}[l]{.5\linewidth} {\bf You want:} \begin{itemize} \item large power \item small type I \& II error probability ($\alpha$ and $\beta$) \end{itemize} \end{column} \begin{column}[r]{.5\linewidth} \begin{itemize} \item \hyperlink{sec:power}{\color{magenta}detour II: statistical power} \hypertarget{back:power}{} \item \hyperlink{sec:bayesian}{\color{magenta}detour III: Bayes rule and statistical tests} \hypertarget{back:bayesian}{} \end{itemize} \end{column} \end{columns} Which of the above can {\bf you} choose? \pause {\em the type I error probability $\alpha$} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{zoo of statistical tests} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \hypertarget{back:detourIV}{} \frametitle{how to choose the statistical test} \begin{center} \includegraphics[height=.38\linewidth]{figs/fig0.pdf} \end{center} \begin{itemize} \item Normality can be checked with a QQ-plot (\hyperlink{sec:qqplots}{\color{magenta} detour IV: QQ-plots}). \item If $n$ is large and the variance of the data distribution is finite, the central limit theorem guarantees normality for ``summed statistics''. \end{itemize} \end{frame} % ------------ \begin{frame} \frametitle{} \begin{center} \includegraphics[height=.6\linewidth]{figs/fig2.pdf} \end{center} \end{frame} % ------------ \begin{frame} \frametitle{} \begin{center} \includegraphics[height=.6\linewidth]{figs/fig3.pdf} \end{center} \end{frame} % ------------ %----------------------------------------------------------------- %----------------------------------------------------------------- \begin{frame} \frametitle{tests for normal data} \begin{task}{menstrual cycle} The data set {\tt menstrual.dat} contains the lengths of the menstrual cycles in a random sample of 15 women. Assume we want to the hypothesis that the mean length of human menstrual cycle is equal to a lunar month ($29.5$ days). Consider the data to be sufficiently normal. Questions: \begin{itemize} \item What is $H_0$? What is $H_A$? \pause $H_0: \hat\mu=29.5$, $H_A: \hat\mu\not=29.5$ \pause \item What is the test statistic? \pause $t=\frac{\hat\mu - 29.5}{\hat\sigma/\sqrt{n}}$ \pause \item Which test should did you use and why? {\em One sample t-test: data normal, one sample against a fixed mean.} \end{itemize} \end{task} \hyperlink{sec:twotailed}{\color{magenta}detour I: one- vs. two-tailed} \hypertarget{back:twotailed}{} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{} \begin{center} \includegraphics[height=.6\linewidth]{figs/fig4.pdf} \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{} \begin{task}{chirping} A scientist conducted a study of how often her pet parakeet chirps. She recorded the number of distinct chirps the parakeet made in a 30-minute period, sometimes when the room was silent and sometimes when music was playing. The data are shown in the following table. Test whether the bird changes its chirping behavior when music is playing (data set {\tt chirping.dat}. columns: day, with, without). Questions: \begin{itemize} \item What is $H_0$? What is $H_A$? \pause $d_i=x_{\mbox{with}}-x_{\mbox{without}}$. $H_0: \hat\mu_d=0$, $H_0: \hat\mu_d\not=0$ \pause \item What is the test statistic? \pause $t=\frac{\hat\mu_d - 0}{\hat\sigma_d/\sqrt{n}}$ \pause \item Which test should did you use and why? \pause {\em Paired t-test: data sufficiently normal, measurements are paired by day.} \end{itemize} \end{task} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{} \begin{center} \includegraphics[height=.7\linewidth]{figs/fig5.pdf} \end{center} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{} \begin{center} \includegraphics[width=.8\linewidth]{figs/fig6.pdf} \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{two indepedendent sample test} \begin{task}{Brain Weights (permutation test)} The dataset {\tt brainweight.dat} contains brain weights of males and females. It consists of {\bf (i) two samples (male/female)} which are {\bf (ii) not paired}. We want to test whether the mean brain weights of males and females are different. \begin{itemize} \item What could we use as statistic?\pause {\em~the difference in the means} \pause \item What would be $H_0$?\pause {\em~the difference is zero} \pause \item Think about a way to generate an estimate of the Null distribution with Matlab? \pause {\em~Permutation test: Shuffle the labels, compute difference in means, repeat ...}. \pause \end{itemize} \end{task} \begin{itemize} \item There is {\color{blue}two-sample independent t-test} is the parametric test for this dataset. \item If normality does not hold, you can use the {\color{blue}Wilcoxon-Mann-Whitney test} \end{itemize} \end{frame} \begin{frame} \frametitle{one- and two-sample t-test and sign test} \begin{center} \tiny \bgroup \def\arraystretch{2} \begin{tabular}{|l|c|c|c|} \hline \textbf{name} & \textbf{statistic} & $\boldsymbol{H_{0}}$ & \textbf{Null distribution}\tabularnewline \hline \hline one sample t-test & $t=\frac{\overline{x}-0}{\mbox{SE}_x}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline \hline paired sample t-test & $t=\frac{\overline{d}-0}{\mbox{SE}_d},\, d=x_{i}-y_{i}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline \hline sign test & $t=\#\left[x_{i}{ So far, we chose a particular threshold $b$ by fixing the type I error rate $\alpha$. \begin{center} \includegraphics[width=.7\linewidth]{figs/pval0.png} \end{center} } \only<2>{ \begin{itemize} \item The {\color{blue}p-value} is the type I error rate if you use your {\color{blue} actually measured statistic} as threshold. \item In other words: The p-value is the minimal type I error rate you have to accept if you call your result significant. \end{itemize} \begin{center} \includegraphics[width=.7\linewidth]{figs/pval1.png} \end{center} } \end{frame} %--------------------------------------------------------- \begin{frame} \frametitle{the mother of all statistics: the p-value} \framesubtitle{Why is it a universal measure?} The p-value is the minimal type I error rate you have to accept if you call your result significant. \begin{itemize} \item If you have a personal $\alpha$-level that is larger than the p-value, you automatically know that the decision threshold lies ``further inside'' \item This means you {\color{blue}can simply compare your $\alpha$-level with the p-value}: if the p-value is smaller, then you call that result significant, otherwise you don't. \end{itemize} \begin{center} \includegraphics[width=.45\linewidth]{figs/pval0.png} \includegraphics[width=.45\linewidth]{figs/pval1.png} \end{center} \end{frame} %--------------------------------------------------------- \begin{frame} \frametitle{the mother of all statistics: the p-value} \begin{task}{p-values if $H_0$ is true} Is the following procedure correct? \vspace{.5cm} In order to show that a sample $x_1,...,x_n$ follows a Normal distribution with mean zero, you perform a t-test. If the p-value is large, you conclude that there is evidence for $H_0$, i.e. accept that $x_1,...,x_n$ has mean zero and is normally distributed. \vspace{.5cm} To find the answer, simulate normally distributed random variables with {\tt randn} in Matlab and compute the p-value with a one-sample t-test. Repeat that several times and plot a histogram of the p-value. \end{task} \pause \begin{itemize} \item If $H_0$ is true, the p-value is uniformly distributed between 0 and 1. Why?\pause \pause \item Think about the beginning of this lecture $$p=P(|x| > |t|) = 1 - P(|x| \le |t|) = 1 - \mbox{c.d.f.}(|t|) \sim U([0,1])$$ \end{itemize} \end{frame} %-------------------------------------------------- \begin{frame} \frametitle{the mother of all statistics: the p-value} \begin{task}{Study design} Is the following procedure statistically sound? \vspace{.5cm} Psychophysical experiments with human subjects can be time-consuming and costly. In order to get a significant effect with minimal effort you use the following procedure: You start with a few subjects. If your statistical test for the effect returns a p-value smaller than $0.05$ you stop and publish. Otherwise you repeat adding subjects and computing p-values until you get a significant results (or run out of time and money). \end{task} \pause \begin{solution}{Answer} No, the procedure is not sound. Even if $H_0$ is true, you will eventually get a p-value smaller than $0.05$ since it is uniformly distributed between $0$ and $1$ in this case. \end{solution} \end{frame} %-------------------------------------------------- \begin{frame} \frametitle{the mother of all statistics: the p-value} \begin{task}{p-values over studies} If there is no effect, how many studies would yield a significant p-value (for $\alpha=0.05$)? \end{task} \pause \begin{solution}{Answer} $5\%$ \end{solution} \pause \begin{task}{p-values in publications} Do you think that only publishing positive findings poses a problem? \end{task} \pause \begin{solution}{Answer} Yes. If I only publish significant positive findings, then I can publish anything if I just repeat the study long enough. \end{solution} \end{frame} %--------------------------------------------------------- \begin{frame} \frametitle{the mother of all statistics: the p-value} \begin{task}{true or false?} \begin{itemize} \item From $p<0.01$ you can deduce that your result is of biological importance.\pause \item {\color{gray} False. A small p-value doesn't say anything about biological importance. It just indicates that the data and $H_0$ are not very compatible.} \pause \item The p-value is the probability of observing a dataset resulting in a test-statistic more extreme than the one at hand, assuming the null hypothesis is true.\pause \item {\color{gray} True.} \pause \item $1-p$ is the probability of the alternative hypothesis being true. \pause \item {\color{gray} False. The p-value cannot tell us anything about whether one of the hypotheses are true or not.} \end{itemize} \end{task} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{multiple hypothesis testing} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %--------------------------------------------------------------- \begin{frame} \frametitle{two tests} \begin{task}{Correct or not?} You have two independent samples from a treatment group and a control group. You are not sure whether your data meets the requirement of a t-test. Therefore, you carry out a t-test and a ranksum test. If one of them rejects $H_0$ you use this one to report your findings in a paper. \vspace{.5cm} \footnotesize To approach an answer, use Matlab and \begin{itemize} \item repeatedly sample two datasets from the same Normal distribution $\mathcal N(0,1)$. \item for each pair of datasets compute the test statistic of a ranksum test (use {\tt ranksum}) and a t-test (use {\tt ttest2}) \item Plot the values of the statistics against each other (using {\tt plot(T, R, 'k.')}). What can you observe? \item Count the number of times at least one of the tests gives a p-value smaller than $0.05$. What can you observe? \end{itemize} \end{task} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{two tests} \begin{minipage}{1.\linewidth} \begin{minipage}{0.6\linewidth} \begin{center} \includegraphics[width=1.\linewidth]{figs/multipletesting.pdf} \end{center} \end{minipage} \begin{minipage}{0.39\linewidth} \small \only<1-4>{ \begin{itemize} \item the two statistics are clearly correlated\pause \item What is the type I error rate for each single test?\pause \item Where is the type I error area in the combined plot? \pause \item Is the type I error rate in the combined strategy lower or larger compared to using just a single test?\pause \end{itemize} } \only<5>{ \small \color{blue} The combined strategy has a higher error rate! This gets worse for more tests. For that reason we have to account for multiple testing! } \end{minipage} \end{minipage} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{two tests} \begin{minipage}{1.\linewidth} \begin{minipage}{0.49\linewidth} \begin{center} \includegraphics[width=1.\linewidth]{figs/multipletesting.pdf} \end{center} \end{minipage} \begin{minipage}{0.5\linewidth} \small \begin{itemize} \item When is something called multiple testing?\pause \item[]{\color{gray} If a hypothesis is a compound of single hypotheses.}\pause \item If I test $\mu_1 = \mu_2 = \mu_3$ by testing $\mu_i = \mu_j$ for all $i\not= j$ and reject as soon as one of the test rejects, does the type I error increase or decrease?\pause \item[]{\color{gray} It increases, because a have the chance to make an error in all conditions.}\pause \item Can the type I error also go in the other direction?\pause \item[]{\color{gray} Yes, it could. For example if the single hypotheses are combined with ``and''.} \end{itemize} \end{minipage} \end{minipage} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{summary} \begin{itemize} \item Multiple testing tests a {\color{blue}compound hypothesis} by testing several single hypotheses.\pause \item {\color{blue}Multiple testing can decrease or increase type I/II error} dependening on how the single hypothese are combined (``or'' type I up, ``and'' type I down).\pause \item This can be accounted for (e.g. by {\em Bonferroni correction: divide $\alpha$ by number of tests}). However, better is to have a test that directly tests the compound hypothesis. ANOVA is a typical example for that. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{study design} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{general theme} \begin{enumerate} \item make an educated guess about the true parameters \item state how accurate/powerful you want to be \item select $n$ based on that \end{enumerate} \end{frame} \begin{frame} \frametitle{estimating a single mean} \framesubtitle{standard error and $\alpha$} \begin{itemize} \item Assume you want to make estimate the mean of some quantity.\pause \item From a pilot study or the literature, you have an estimate $s$ of the standard deviation and $\tilde\mu$ of the mean of that quantity.\pause \item $\tilde \mu$ could also be chosen to set a minimal detectable difference.\pause \item In order to test whether your mean $\hat\mu$ is different from a fixed mean $\mu_0$ on an $\alpha$-level of $5\%$ you know that the $95\%$ confidence interval around $\tilde\mu$ should not contain $\mu_0$: $$\underbrace{|\tilde\mu - \mu_0|}_{=:\delta} \ge t_{0.025, \nu}\frac{s}{\sqrt{n}}$$ \pause \item This mean you should set $n$ to be $$n \ge \left(\frac{t_{0.025, \nu}\cdot s}{\delta}\right)^2 $$ \end{itemize} \end{frame} \begin{frame} \frametitle{estimating means} \framesubtitle{type I and type II error} {\bf one can also take the desired power $1-\beta$ into account} $$n \ge \frac{s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$ \only<1>{ \includegraphics[width=.5\linewidth]{figs/experimentalDesign00.pdf} \includegraphics[width=.5\linewidth]{figs/experimentalDesign01.pdf} } \pause {\bf rearranging the formula yields an estimate for minimal detectable difference} $$\delta \ge \sqrt{\frac{s^2}{n}}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)$$ \pause {\bf for two means, this formula becomes} $$n \ge \frac{2s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$ \pause \begin{emphasize}{iterative estimation} Since $\nu$ depends on $n$ (i.e. $\nu=n-1$), we need to estimate $n$ iteratively. \end{emphasize} \mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice Hall. doi:10.1037/0012764} \end{frame} \begin{frame} \frametitle{example} \framesubtitle{Zar, example 7.2} \small Researches observed the weight changes in twelve rats after being subjected to forced exercise. The mean difference is $\hat\mu=-0.65g$, the sample variance is $\hat\sigma^2=1.5682 g^2$. We wish to test the difference to $\mu=0$ with $\alpha=0.05$ and a $1-\beta=0.9\cdot 100\%$ chance of detecting a population mean different from $\mu_0=0$ by as little as $1.0g$. \pause Let's guess that a sample size of $n=20$ would be required. Then $\nu=19$, $t_{0.025,19}=2.093$, $\beta=1-0.9=0.1$, and $t_{0.1,19}=1.328$. This means $$n=\frac{1.5682}{1^2}(2.093+1.3828)^2 = 18.4.$$ \pause Now let's us $n=19$ as an estimate, in which case $\nu=18$, $t_{0.025,18}=2.101$, $t_{0.1,18}=1.330$, and $$n=\frac{1.5682}{1^2}(2.101+1.330)^2=18.5.$$ Thus we need a sample size of at least $19$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{ANOVA} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{from linear regression to ANOVA} \begin{frame} \frametitle{from linear regression to ANOVA} \small The following table contains the impulse frequency of the electric field from electric fish measured at several temperatures (data for project 03). \begin{center} \tiny \begin{tabular}{lccccccc} {\bf temperature C${}^\circ$} & \multicolumn{3}{c}{\bf impulse frequency [number/sec]} \\ \hline\\ 20.00 & 225.00 & 230.00 & 239.00 \\ 22.00 & 251.00 & 259.00 & 265.00 \\ 23.00 & 266.00 & 273.00 & 280.00 \\ 25.00 & 287.00 & 295.00 & 302.00 \\ 27.00 & 301.00 & 310.00 & 317.00 \\ 28.00 & 307.00 & 313.00 & 325.00 \\ 30.00 & 324.00 & 330.00 & 338.00 \end{tabular} \end{center} \begin{itemize} \item Our goal will be to test whether $\mu_{20}=...=\mu_{30}$. \item Note that ANOVA is not the method to analyze this dataset. Linear regression is because temperature is on an interval scale. We will just use the ideas here for illustration. \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{from linear regression to ANOVA} \begin{center} \includegraphics[width=.8\linewidth]{figs/regression01.pdf} \end{center} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{from linear regression to ANOVA} \begin{center} \includegraphics[width=.7\linewidth]{figs/regression02.pdf} \end{center} What kind of regression line would we expect if the means were equal? \pause {\em One with slope $\alpha=0$.} \end{frame} % ---------------------------------------------------------- \begin{frame} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \includegraphics[width=1.\linewidth]{figs/regression02.pdf} \end{minipage} \begin{minipage}{0.5\linewidth} \begin{itemize} \item For linear regression data, we would test whether $\alpha=0$. \item For categorial inputs (x-axis), we cannot compute a regression line. Therefore, we need a different approach. \end{itemize} \end{minipage} \end{minipage} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{law of total variance} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{law of total variance} \only<1>{ Approach law of total variance $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ \begin{center} \includegraphics[width=.7\linewidth]{figs/regression02.pdf} \end{center} }\pause \only<2>{ Approach law of total variance $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ \begin{center} \includegraphics[width=.7\linewidth]{figs/regression03.pdf} \end{center} }\pause \only<3>{ Approach law of total variance $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ Data generation model for regression $f_{ij} = {\color{mygreen} \alpha t_i} + \beta + {\color{lightblue}\varepsilon_{ij}}$ \begin{center} \includegraphics[width=.6\linewidth]{figs/regression04.pdf} \end{center} }\pause \only<4>{ Approach law of total variance $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ Data generation model for regression $f_{ij} = {\color{mygreen} \alpha t_i} + \beta + {\color{lightblue}\varepsilon_{ij}}: $ $${\color{mygreen} \alpha=0} \Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$ \begin{center} \includegraphics[width=.6\linewidth]{figs/regression04.pdf} \end{center} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{single factor ANOVA} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %--------------------------------------------------------------- \begin{frame} \frametitle{data model for single factor ANOVA} Approach law of total variance $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] + \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$ Data generation model for single factor ANOVA $f_{ij} = \overline{\mu} + {\color{mygreen} \tau_{i}} + {\color{lightblue}\varepsilon_{ij}}$: $${\color{mygreen} \tau_i=\tau_j=0} \Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$ \begin{center} \includegraphics[width=.6\linewidth]{figs/regression05.pdf} \end{center} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{statistic of ANOVA} \begin{columns} \begin{column}{0.43\linewidth} \begin{center} \includegraphics[width=1.\linewidth]{figs/regression02.pdf} \vspace{-.2cm} \includegraphics[width=1.\linewidth]{figs/Fdistribution00.pdf} \end{center} \end{column} \begin{column}{0.55\linewidth} \begin{align*} \:&\mbox{\color{lightblue} error SS}&=\color{lightblue}\sum_{ij}\left(x_{ij}-\mu_{i}\right)^{2}\\ +\:&\mbox{\color{mygreen} group SS}&=\color{mygreen}\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}\\\hline \:&\mbox{\color{red} total SS}&=\color{red}\sum_{ij}\left(x_{ij}-\mu\right)^{2} \end{align*} \pause \begin{align*} \mbox{\color{mygreen}groups MS}=\frac{\mbox{\color{mygreen}group SS}}{\mbox{\color{mygreen}groups DF}}&=\color{mygreen}\frac{\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}}{k-1}\\\mbox{\color{lightblue}error MS}=\frac{\mbox{\color{lightblue}error SS}}{\mbox{\color{lightblue}error DF}}&=\color{lightblue}\frac{\sum_{ij}\left(x_{ij}-\hat{\mu_{i}}\right)^{2}}{N-k}\\\color{dodgerblue}F&=\frac{\mbox{\color{mygreen}group MS}}{\mbox{\color{lightblue}error MS}} \end{align*} \end{column} \end{columns} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{summary single factor ANOVA} \begin{itemize} \item {\bf Goal:} Test whether several means are equal or not.\pause \item {\bf Strategy:} Use law of total variance to explain the overall variance with the {\em variance of the means} and the {\em variance within groups}\pause \item If the total variance can be solely explained from {\em variance within groups}, then the means do not vary and must be the same. \pause \item Since a statistic should be large if the data does not fit to $H_0$, we use $\frac{MS(between)}{MS(within)}$ which can be shown to have an F-distribution under certain ...\pause \item {\bf Assumptions:} \begin{itemize} \item The groups must be independent of each other. \item In each group, the specimen must be i.i.d. from the particular population distribution $f_{ij} \sim p(f|\mu_i) $. \item The standard deviations of the groups are equal ($\sigma_\varepsilon$ is the same for all groups). \item The residuals $\varepsilon$ must be Normally distributed \end{itemize} \end{itemize} \end{frame} \subsection{study design for ANOVA} \begin{frame} \frametitle{study design for ANOVA} \begin{itemize} \item If the means are different (but all other assumptions are satisfied), then $F$ follows a non-central F-distribution. \item Like in the case of one- and two-sample t-tests, this can be used to adjust $n$ for the desired power. \item Alternatively, one can estimate the minimal detectable difference $\delta$ from estimates of the {\em error MS} $s^2$ and $n$, or $n$ from $\delta$ and $s^2$, respectively. \end{itemize} \mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch, Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice Hall. doi:10.1037/0012764} \end{frame} \subsection{non-parametric ANOVA} \begin{frame} \frametitle{Kruskal-Wallis test} \begin{itemize} \item Can be applied if the data is not normally distributed. \item Is equivalent to Mann-Whitney/Wilcoxon rank sum test for two factor levels. \item Needs the variances to be equal as well. \item Instead of testing equality of means/medians it tests for equality of distributions. \item For more details see {\em Biostatistical Analysis}. \end{itemize} \end{frame} \begin{frame} \frametitle{Testing the difference among several medians} \begin{itemize} \item Can be applied if the data is not normally distributed. \item Does not need the variances to be equal. \item For more details see {\em Biostatistical Analysis}. \end{itemize} \end{frame} \section{more complex ANOVAs} \subsection{blocking} % ---------------------------------------------------------- \begin{frame} \frametitle{blocking} \footnotesize {\bf Blocking} How does experience affect the anatomy of the brain? In a typical experiment to study this question, young rats are placed in one of three environments for 80 days: \begin{itemize} \item[T1] Standard environment.The rat is housed with a single companion in a standard lab cage. \item[T2] Enriched environment. The rat is housed with several companions in a large cage, furnished with various playthings. \item[T3] Impoverished environment.The rat lives alone in a standard lab cage. \end{itemize} At the end of the 80-day experience, various anatomical measurements are made on the rats' brains. Suppose a researcher plans to conduct the above experiment using 30 rats. To minimize variation in response, all 30 animals will be male, of the same age and strain. To reduce variation even further, the researcher can take advantage of the similarity of animals from the same litter. In this approach, the researcher would obtain three male rats from each of 10 litters. The three littermates from each litter would be assigned at random: one to T1, one to T2, and one to T3. \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{How to create blocks} Try to create blocks that are as homogeneous within themselves as possible, so that the inherent variation between experimental units becomes, as far as possible, variation between blocks rather than within blocks (see SWS chapter 11.6). {\bf Fish data:} \begin{itemize} \item each fish is a block \item the different categories are the factor of interest \item note that we have one measurement per block and factor, but there could be more \end{itemize} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{data model for block randomized ANOVA} Data generation model for randomized block factor ANOVA $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$: \vspace{.5cm} How do we know that there is no interaction $\gamma_{ij}$ between the blocks and the factors? \begin{itemize} \item {\bf a priori knowledge:} why should temperature be dependent on fish identity \item {\bf additivity:} for each factor $i$, the values differ by the {\em same} amount $\beta_j$. \pause \end{itemize} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \begin{center} \includegraphics[width=1.\linewidth]{figs/regression06.pdf} \end{center} \end{minipage} \begin{minipage}{0.5\linewidth} \only<2>{\color{red} Would that also be the case if the values cross at the point?} \end{minipage} \end{minipage} \end{frame} \subsection{two factor ANOVA} %--------------------------------------------------------------- \begin{frame} \frametitle{What's the funny way to write down the data model in ANOVA?} Data generation model for a two factor ANOVA with interaction $$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$ {\bf Note that:} \begin{itemize} \item The sum over the $\tau_i$, $\beta_j$, $\gamma_{ij}$, and $\varepsilon_{ijk}$ terms are always zero. They model the {\em deviation} from the grand mean. \pause \item The directly correspond to the available SS/ MS terms. For example, in the block randomized ANOVA \begin{itemize} \item $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$ \item $SS(total) = SS(temperature) + SS(blocks) + SS(within)$ \end{itemize} \end{itemize} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{different hypotheses from a 2-factor ANOVA} \small Data generation model for a two factor ANOVA with interaction $$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$ \begin{itemize} \item {\bf Blocking: } Assume $\gamma_{ij}=0$. Test $$F=\frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS} (\varepsilon_{ijk})}$$\pause \item {\bf Repeated Measures: } Assume $\gamma_{ij}=0$. Entity which was repeatedly measured becomes block.\pause \item {\bf Two factor testing factor influence: } Assume $\gamma_{ij}\not=0$. Test $$F = \frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS} (\varepsilon_{ijk})}$$\pause \item {\bf Two factor testing interaction: } Assume $\gamma_{ij}\not=0$. Test $$F=\frac{\mbox{interaction MS}(\gamma_{ij})} {\mbox{error MS} (\varepsilon_{ijk})}$$ \end{itemize} \end{frame} %--------------------------------------------------------------- \begin{frame} \frametitle{summary} \begin{itemize} \small \item ANOVA is a very flexible method to study the interactions of categorial variables (factors) and ratio/ interval data \pause \item Works by checking whether a certain factor/ interaction between factors, ... is needed to explain the variability in the data \pause \item Relies on assumptions that need to be checked \begin{itemize} \item equal variance for each factor level \item the residuals are Normally distributed \item number of points $n_i$ should be the same \end{itemize}\pause \item There is a whole zoo of ANOVA techniques, for all kinds of situations. This is just the tip of the iceberg. \item One can often get away with violating some of the assumptions. For more details on that check {\em Biostatistical Analysis} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{detour I: One-tailed vs. two-tailed} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %--------------------------------------------------------------- \begin{frame} \hypertarget{sec:twotailed}{} \frametitle{one-tailed tests} \begin{task}{Correct or not?} Imagine a pharmaceutical company runs clinical trials for a drug that enhances the ability to focus. To that end they apply the drug to a treatment and measure scores in a standardized test. From the literature it is known that normal subjects have a score of about 0. Since the company want to test whether the drug {\em enhances (score > 0)} the ability to focus, they choose a one-tailed test ($H_A:$ treatment group performs better than the performance from the literature). \end{task} \end{frame} %------------------------------------------------------------- \begin{frame} \frametitle{one tailed test} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} {\bf two tailed test} \includegraphics[width=\linewidth]{figs/twotailed.png} \footnotesize \vspace{-1cm} e.g. \begin{itemize} \item $H_0: \mu = 0$ \item $H_A: \mu \not= 0$ \vspace{1.8cm} \end{itemize} \end{minipage} \begin{minipage}{0.5\linewidth} {\bf one tailed test} \includegraphics[width=\linewidth]{figs/onetailed.png} \footnotesize \vspace{-1cm} e.g. \begin{itemize} \item $H_0: \mu = 0$ \item $H_A: \mu > 0$ \item $\hat\mu < 0$ must directly imply $\hat\mu$ came from $P(\hat\mu|H_0)$ \item if that is not the case, using one-tailed is cheating \end{itemize} \end{minipage} \end{minipage} \hyperlink{back:twotailed}{\color{gray}go back} \end{frame} % ---------------------------------------------------------- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{detour II: Statistical Power} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Why is it hard to assess the power of a test?} \begin{minipage}{1.\linewidth} \begin{minipage}{.5\linewidth} \includegraphics[width=.8\linewidth]{figs/power.pdf} \end{minipage} \begin{minipage}{.5\linewidth} \begin{itemize} \item Power = 1 - P(type II error)\\ = P(reject $H_0$| $H_A$ is true)\pause \item in general the distribution \begin{center} P(test statistic|$H_A$ is true) \end{center} is not available to us. \pause \item Therefore, the power can often only be specified for a specific $H_A$. \end{itemize} \end{minipage} \end{minipage} \mycite{J. H. Zar, Biostatistical Analysis} \hypertarget{sec:power}{} \hyperlink{back:power}{\color{gray}go back} \end{frame} % ---------------------------------------------------------- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{detour III: Bayes rule and statistical tests} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame} \hypertarget{sec:bayesian}{} \frametitle{Why is this funny (or sad)?} \begin{center} \includegraphics[width=.4\linewidth]{figs/frequentistsvsbayesians.png} \end{center} \mycite{http://xkcd.com/1132/} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{Why is this funny (or sad)?} \begin{minipage}{1.\linewidth} \begin{minipage}{.5\linewidth} \includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png} \mycite{http://xkcd.com/1132/} \end{minipage} \begin{minipage}{.5\linewidth} \begin{itemize} \item $H_0:$ the sun has not gone nova \item $H_A:$ the sun has gone nova \pause \item test procedure: we believe the detector \pause \item Null distribution: multinomial $n=2, p_1 = \frac{1}{6}, ..., p_6 = \frac{1}{6}$ \pause \item the probability of making a type I error is $p(2\times 6)=\frac{1}{6}\cdot \frac{1}{6} \approx 0.028$ \end{itemize} \pause So ... what is wrong? \end{minipage} \end{minipage} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{A similar example} \begin{minipage}{1.\linewidth} \begin{minipage}{.5\linewidth} {\bf sensitivity \& specificity of a HIV test} \begin{tabular}{ccc} & HIV & no HIV\tabularnewline test + & 99.7\% & 1.5\%\tabularnewline test - & 0.03\% & 98.5\%\tabularnewline \end{tabular} \vspace{1cm} {\bf HIV prevalence (Germany)} \begin{tabular}{cc} HIV & no HIV\tabularnewline 0.1\% & 99.9\%\tabularnewline \end{tabular} \end{minipage} \begin{minipage}{.5\linewidth} \begin{task}{} What is the probability that you are HIV+ if you test positive? \end{task}\pause In order to answer that question, you need two rules for probability.\pause \vspace{1cm} What is the power, what is the type I error of the test? \end{minipage} \end{minipage} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{Bayes rule and marginalization} {\bf Bayes rule} $$p(A|B)p(B) = p(B|A)p(A)$$ {\bf joint probability} $$p(A,B) = p(A|B)p(B) = p(B|A)p(A)$$ {\bf marginalization} $$p(B) = \sum_{\mbox{possible values a of }A}p(a,B)$$ \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{probability/Bayesian nomenclature} \frametitle{repetition} Let $T\in \{+, -\}$ be the test result and $H\in \{+,-\}$ whether you are HIV positive or not. \begin{itemize} \item $p(T|H)$ is the {\em likelihood} \pause \item $p(H)$ is the {\em prior} \pause \item $p(H|T)$ is the {\em posterior} \end{itemize} \pause Given the prior and the likelihood, we can compute the posterior. \begin{align*} p(H|T) &= \frac{P(T|H)P(H)}{P(T)} &\mbox{Bayes rule}\\ &= \frac{P(T|H)P(H)}{\sum_h P(T,h)} &\mbox{marginalization}\\ &= \frac{P(T|H)P(H)}{\sum_h P(T|h)p(h)} &\mbox{joint probability} \end{align*} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{HIV test} \begin{minipage}{1.\linewidth} \begin{minipage}{.5\linewidth} \begin{tabular}{ccc} & HIV & no HIV\tabularnewline test + & 99.7\% & 1.5\%\tabularnewline test - & 0.03\% & 98.5\%\tabularnewline \end{tabular} \end{minipage} \begin{minipage}{.5\linewidth} \begin{tabular}{cc} HIV & no HIV\tabularnewline 0.1\% & 99.9\%\tabularnewline \end{tabular} \end{minipage} \end{minipage} \begin{align*} p(H=+|T=+)&= \frac{P(T=+|H=+)P(H=+)}{\sum_{h\in\{+,-\}} P(T=+|H=h)p(H=h)} \\ p(H=+|T=+)&= \frac{0.997 \cdot 0.001}{0.997 \cdot 0.001 + 0.015 \cdot 0.999} \\ &\approx 0.062 \end{align*} \pause This means with a positive HIV test, you have about $6.2$\% chance of being HIV positive. Why is this number so low? \pause \only<3>{Because a lot of the people for which the test is positives are false positives from the HIV- group. This is because HIV+ is relatively rare.} \end{frame} %----------------------------------------------------------------- \begin{frame} \frametitle{Why is this funny (or sad)?} \begin{minipage}{1.\linewidth} \begin{minipage}{.5\linewidth} \includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png} \mycite{http://xkcd.com/1132/} \end{minipage} \begin{minipage}{.5\linewidth} {\bf Why is it funny:} Because it points at the fact that statistical tests usually look at the likelihood only and ignore the prior. \vspace{1cm} {\bf Why is it sad?} Because statistical tests usually look at the likelihood and ignore the prior. \end{minipage} \end{minipage} \hyperlink{back:bayesian}{\color{gray}go back} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{detour IV: Assessing normality with QQ plots} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \hypertarget{sec:qqplots}{} \frametitle{histogram equalization} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.5\linewidth} \begin{task}{histogram equalization} Which function $y = f(x)$ transforms $x$ such that it has the distribution of $p(y)$? \end{task} \end{minipage} \begin{minipage}{0.5\linewidth} \only<1>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE0.png} \end{center} }\pause \only<2>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE0Solution.png} \end{center} } \end{minipage} \end{minipage} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{histogram equalization} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.4\linewidth} \begin{task}{histogram equalization} How would the function look like if the target was a Normal distribution? \end{task} \end{minipage} \begin{minipage}{0.6\linewidth} \only<1>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE1.png} \end{center} }\pause \only<2>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE1Solution.png} \end{center} } \end{minipage} \end{minipage} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{histogram equalization} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.4\linewidth} \begin{task}{histogram equalization} Is the target distribution a Normal distribution? \end{task} \end{minipage} \begin{minipage}{0.6\linewidth} \only<1>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE2.png} \end{center} }\pause \only<2>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE2Solution.png} \end{center} } \end{minipage} \end{minipage} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{QQ-plots} \begin{itemize} \item QQ-plots can be used to visually assess whether a set of data points might follow a certain distribution. \pause \item A QQ-plot is constructed by \begin{enumerate} \item computing the fraction of data points $q_1,...,q_n$ that are lower or equal than a given $x_1,...,x_n$ (Where do you know that function from?)\pause \item and plotting it against the value $y_j$ of the other distribution which has the same $q_i$ \end{enumerate}\pause \item If the two distributions are equal the QQ-plot shows a straight line.\pause \item How would you assess the normality of data $x_1,...,x_n$ with a QQ-plot? \pause {\em make the target distribution a Gaussian} \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame} \frametitle{histogram equalization} \begin{minipage}{1.0\linewidth} \begin{minipage}{0.4\linewidth} \begin{task}{special transform} Which function $y = f(x)$ transforms $x$ such that it has the distribution of $p(y)$? Do you know that function? \end{task} \only<2>{{\bf Answer:} The cumulative distribution function $f(x) = F(x)$.} \end{minipage} \begin{minipage}{0.6\linewidth} \only<1>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE3.png} \end{center} }\pause \only<2>{ \begin{center} \includegraphics[width=1.\linewidth]{figs/HE3Solution.png} \end{center} } \end{minipage} \end{minipage} \hyperlink{back:detourIV}{\color{gray} back to statistical tests} \end{frame} \end{document}