scientificComputing/statistics/talk.tex

\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}

\usepackage[english]{babel}
\usepackage{movie15}
\usepackage[latin1]{inputenc}
\usepackage{times}
\usepackage{amsmath}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage[scaled=.90]{helvet}
\usepackage{scalefnt}
\usepackage{tikz}
\usepackage{ textcomp }
\usepackage{soul}
\usepackage{hyperref}
\definecolor{lightblue}{rgb}{.7,.7,1.}
\definecolor{mygreen}{rgb}{0,1.,0}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
  \usetheme{Singapore}
  \setbeamercovered{opaque}
  \usecolortheme{tuebingen}
  \setbeamertemplate{navigation symbols}{}
  \usefonttheme{default}
  \useoutertheme{infolines}
  % \useoutertheme{miniframes}
}

\AtBeginSection[]
{
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
    % \frametitle{\insertsectionhead}
    % \tableofcontents[currentsection,hideothersubsections]
  \end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5

\setbeamertemplate{blocks}[rounded][shadow=true]

\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
  University T\"ubingen\\
Bernstein Center T\"ubingen}

\institute[Scientific Computing]{}
 \date{11/27/2013}
%\logo{\pgfuseimage{logo}}

\subject{Lectures}

%%%%%%%%%% configuration for code
\lstset{
 basicstyle=\ttfamily,
 numbers=left,
 showstringspaces=false,
 language=Matlab,
 commentstyle=\itshape\color{darkgray},
 keywordstyle=\color{blue},
 stringstyle=\color{green},
 backgroundcolor=\color{blue!10},
 breaklines=true,
 breakautoindent=true,
 columns=flexible,
 frame=single,
 captionpos=b,
 xleftmargin=1em,
 xrightmargin=1em,
 aboveskip=10pt
 }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}

\input{../latex/environments.tex}
\makeatother

\begin{document}

\begin{frame}
  \titlepage

\end{frame}

\begin{frame}
  \frametitle{Plan}
  \setcounter{tocdepth}{1}
  \tableofcontents

\end{frame}
\begin{frame}
  \frametitle{Information \"uber Statistik}
  \begin{itemize}
  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
    A. A. (2010). Statistics for the Life Sciences (4th ed.,
    p. 668). Prentice Hall.
  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764
  \item \url{http://stats.stackexchange.com}
  \end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[meta-study]{how statisticians think - the meta-study}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ----------------------------------------------------------
\begin{frame}[fragile]
\frametitle{statisticians are lazy}
\Large
\only<1>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<2>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<3>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{the (imaginary) meta-study}
\begin{center}
  \only<1>{
    \framesubtitle{finite sampling introduces variation: the sampling distribution}
    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests}
  }\pause
  \only<2>{
    \framesubtitle{statistic vs. population parameter}
    \includegraphics[width=.8\linewidth]{figs/statistic1.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests}
  }\pause
  \only<3>{
    \framesubtitle{statistic vs. population parameter}
    \includegraphics[width=.8\linewidth]{figs/statistic2.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests}
  }\pause
  \only<4>{
    \framesubtitle{shat parts of this diagram do we have in real life?}

    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests}
  }\pause
  \only<5>{
    \framesubtitle{what parts of this diagram do we have in real life?}

    \includegraphics[width=.8\linewidth]{figs/statistic3.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests}
  }\pause
  \only<6->{
    \framesubtitle{what statistics does }
    \begin{minipage}{1.0\linewidth}
      \begin{minipage}{0.5\linewidth}
        \includegraphics[width=1.\linewidth]{figs/statistic4.png}
        \mycite{Hesterberg et al., Bootstrap Methods and Permutation
          Tests}
      \end{minipage}
      \begin{minipage}{0.5\linewidth}
        \begin{itemize}
        \item it assumes, derives, or simulates the sampling
          distribution\pause
        \item the sampling distribution makes only sense if you think
          about it in terms of the meta study\pause
        \item  {\color{red} the sampling distribution is the key to
            answering questions about the population from the value of
            the statistic}
        \end{itemize}
      \end{minipage}
    \end{minipage}
  }

\end{center}
\end{frame}

% % ----------------------------------------------------------
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
  Assume you know the sampling distribution of the mean lung volume
  of smokers. Would you believe that
  the sample came from a group of smokers?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example01.png}
  \end{center}
\end{question}
\end{frame}

\begin{frame}
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
  What about now? How would the sampling distribution change if I
  change the population to (i) athletes or (ii) old people?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example02.png}
  \end{center}
\end{question}
\end{frame}


\begin{frame}
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example03.png}
  \end{center}
\end{question}
\end{frame}

\begin{frame}
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
  What do you think now?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example04.png}
  \end{center}
\end{question}
\end{frame}

\begin{frame}
\frametitle{summary}
\begin{itemize}
\item In statistics, we use finite samples from a population to reason
  about features of the population. \pause
\item The particular feature of the population we are interested in is called
  {\color{blue} population parameter}. We usually measure this
  parameter in our finite sample as well
  ({\color{blue}statistic}).\pause
\item Because of variations due to finite sampling the statistic
  almost never matches the population parameter. \pause
\item Using the {\color{blue}sampling distribution} of the statistic, we make
  statements about the relation between our statistic and the
  population parameter.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{outlook}
{\bf Questions to be addressed}
\begin{itemize}
\item How do we choose the statistic?
\item How do we get the sampling distribution?
\item How does statistical reasoning work in practice?
\end{itemize}
{\bf Perspective}
\begin{itemize}
\item We start by looking at a few standard distribution.
\item We will use those in the statistical tests that follow.
\item For each statistical test, I also try to provide a
  non-parametric method.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{probability primer}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{probability models}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{getting the model right}
In statistics/probability it is important to select the correct
distribution. Models are easier to remember if you remember a
``standard situation''.

\begin{itemize}
\item What is the distribution corresponding to throwing a coin? \pause
\item What in neuroscience/psychology is like throwing a coin (fair or
  unfair)?\pause
\item What is the distribution of counting heads in repeated
  independent coin tosses?\pause
\item What in neuroscience/psychology corresponds to counting heads in
  repeated independent coin tosses?
\end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{the different models}
\only<1>{
  \framesubtitle{Bernoulli distribution}
  \begin{center}
    \includegraphics[width=.4\linewidth]{figs/Bernoulli.pdf}

  \end{center}

\begin{itemize}
  \item single coin toss (success/ failure)
  \item distribution $p(X=1)=p$
  \end{itemize}
}\pause
\only<2>{
  \framesubtitle{uniform distribution}
  \begin{center}
    \includegraphics[width=.4\linewidth]{figs/Uniform.pdf}

  \end{center}

\begin{itemize}
  \item $n$ items with the same probability of occurence
  \item distribution $p(X=k)=\frac{1}{n}$
  \end{itemize}
}\pause
\only<3>{
  \framesubtitle{binomial distribution}

  \begin{center}
    \includegraphics[width=.4\linewidth]{figs/Binomial00.pdf}
    \includegraphics[width=.4\linewidth]{figs/Binomial01.pdf}
  \end{center}

      \begin{itemize}
      \item number of $k$ successes/heads in $n$ trials
      \item distribution $P(X=k)= {n \choose
      k} p^k (1-p)^{n-k}$
      \item parameters $n,p$
      \end{itemize}
}\pause
\only<4>{
  \framesubtitle{Poisson distribution}

  \begin{center}
    \includegraphics[width=.4\linewidth]{figs/Poisson00.pdf}
    \includegraphics[width=.4\linewidth]{figs/Poisson01.pdf}
  \end{center}

      \begin{itemize}
        \item successes per time unit for (very) large $n$ and small $p$
        \item distribution $P(X=k) = \frac{\lambda^k
      e^{-\lambda}}{k!}$
        \item parameter: success rate $\lambda$
      \end{itemize}
}
\only<5>{
  \framesubtitle{Gaussian/ normal distribution}

  \begin{center}
    \includegraphics[width=.4\linewidth]{figs/Gaussian00.pdf}
  \end{center}

  \begin{itemize}
  \item shows up everywhere (central limit theorem)
   \item distribution $p(x) = \frac{1}{\sigma\sqrt{2\pi}}\operatorname{exp}\left\{-\frac{\left(x-\mu\right)^2}{2\sigma^2}\right\}$
        \item parameter: mean $\mu$, standard deviation $\sigma$
      \end{itemize}
}
\only<6>{
  \framesubtitle{caveat}
  \begin{question}{important distinction}
    \begin{itemize}
    \item For {\em discrete} random variables $P(X=k)$ makes sense
      (probabilities are like ``single weights'').
    \item For {\em continuous} random variables $p(X=x)=0$ (probabilities
      are like ``water'').
    \item For {\em continuous} random variables it makes only sense to
      ask for the probability that they take values in a particular
      range.
    \end{itemize}
  \end{question}

}

\end{frame}


% ----------------------------------------------------------

\begin{frame}
\frametitle{example}
You place a mouse in a circular maze and place some food on the
opposite side. In each trial you record whether the mouse went {\em
  left} (``L'') or {\em right} (``R'') to get the food.
\vspace{.5cm}

\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.59\linewidth}
    \begin{itemize}
    \item What kind of distribution would you expect for the number of
      ``R'' in $10$ trials? What is the distribution of the number of
      ``L''?\pause
    \item Here is the result of $10$ trials: ``LLLLLLLLLL''. What is
      the probability of that?
    \item What do you conclude from that?
    \end{itemize}
  \end{minipage}
  \begin{minipage}{0.4\linewidth}
    \only<1->{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/Binomial00.pdf}
      \end{center}
    }
  \end{minipage}
\end{minipage}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{cumulative distribution function}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{cumulative distribution function (c.d.f.)}
  \framesubtitle{we will need that a lot in statistics}
  \begin{itemize}
  \item The c.d.f. is used to compute the probability that a random
    variable is in a particular range.

  \item It is defined as  $F(y) = P(X \le y)$

  \item For the binomial distribution this would be
    $$F(k) = P(\mbox{no. of
      successes} \le k)\mbox{ in } n \mbox{ trials}$$

    \item Where could I
    see that probability in that plot for $k=5$ and $n=10$?
    \begin{center}
      \only<1>{
        \includegraphics[width=.5\linewidth]{figs/Binomial00.pdf}
      }
      \only<2>{
        \includegraphics[width=.5\linewidth]{figs/BinomialCdf00.pdf}
      }\pause
      \only<3>{
        \includegraphics[width=.5\linewidth]{figs/BinomialCdf01.pdf}
      }

    \end{center}
  \end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{cumulative distribution function (c.d.f.)}
  \framesubtitle{example}
  \small
  You want to find out whether a subject performs significantly
  different from chance in $10$ trials that either are successful or not.
  \begin{itemize}[<+->]
  \item What would be a good decision rule?
  \item[] {\color{gray} We set thresholds on the number of successes
      and decide that (s)he is performing at chance if the performance
      falls within the thresholds.}
  \item What is the distribution of the number of successes in $n=10$
    trials if the subject performs at chance?
  \item[] {\color{gray} Binomial with $n=10$ and $p=\frac{1}{2}$}
  \item Let's say we set the threshold at $k=2$ and $k=8$, what is the
    probability that we think (s)he is {\em not} performing at chance,
    even though (s)he is?
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{cumulative distribution function (c.d.f.)}
  \framesubtitle{example}
  \small
  \begin{itemize}[<+->]
  \item Let's say we set the threshold at $k=2$ and $k=8$, what is the
    probability that we think (s)he is {\em not} performing at chance,
    even though (s)he is?
  \item[] {\color{gray} The probability for that is $P(X \le 2 \mbox{
        or } X \ge 8)$. Using the c.d.f. that is
      \begin{align*}
        P(X \le 2 \mbox{ or } X \ge 8) &= P(X \le 2) + P(X \ge 8)
        = P(X \le 2) + (1-P(X \le 7))
        \end{align*}
      }
  \end{itemize}
  \only<2>{
    \begin{center}
        \includegraphics[width=.5\linewidth]{figs/BinomialExample00.pdf}
    \end{center}
  }
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{joint and conditional distributions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{conditional and marginal $\rightarrow$ joint distribution}
  \framesubtitle{Bayes' rule}
  \begin{itemize}
\small
  \item Assume you ran decision experiments with two subject. Subject \#1 had a success
    probability of $50\%$, while subject \#2 achieved $80\%$.
  \item $70\%$ of the trials were run with the first subject, $30\%$ of
    the trials with the other.
  \item Each trial gets saved in a file on the hard disk.\pause
  \item Now, let's assume your recording software had a bug and did not
    store the subject ID in the file.
  \item For a given file, we have two random variables now: subject ID $X$,
    number of successes $Y$.
  \end{itemize}
    \begin{center}
      \includegraphics[height=.32\linewidth]{figs/decision01.pdf}
    \end{center}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{joint and conditional distributions}
  \framesubtitle{definitions}
  \begin{definition}{Joint, marginal, and conditional distribution}
    \begin{itemize}
    \item The {\bf joint distribution $P(X,Y)$} gives the probability
      that a particular combination of $X$ and $Y$ occur at the same
      time. \pause
    \item The {\bf marginal distributions $P(X)$ and $P(Y)$} specify
      the probabilities that a particular value occurs if the value of
      the other variable is ignored. \pause
    \item The {\bf conditional distribution $P(X|Y)$} gives the
      probability of particular values of $X$ given that $Y$ has
      particular values.
    \end{itemize}\pause
  \end{definition}
    \begin{center} {\color{blue} joint distribution
        $\stackrel{\mbox{Bayes' Rule}}{\leftrightarrow}$
        marginal and conditional distribution}
    \end{center}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{conditional and marginal $\rightarrow$ joint distribution}
  \framesubtitle{Bayes' rule}
  \begin{itemize}
\small
  \item Assume you ran decision experiments with two subject. Subject \#1 had a success
    probability of $50\%$, while subject \#2 achieved $80\%$.
  \item $70\%$ of the trials were run with the first subject, $30\%$ of
    the trials with the other.
  \item What probabilities do I need to write at the edges?
  \item What distribution do I use for the subjects ID ($X$)?
  \item What distribution do I use for the conditional distribution $Y|X$?
  \end{itemize}
    \begin{center}
      \only<1>{\includegraphics[height=.32\linewidth]{figs/decision01.pdf}}
      \only<2>{\includegraphics[height=.32\linewidth]{figs/decision02.pdf}}
      \only<3>{\includegraphics[height=.32\linewidth]{figs/decision03.pdf}}
    \end{center}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{conditional and marginal $\rightarrow$ joint distribution}
  \framesubtitle{Bayes' rule}
  \begin{itemize}
\small
  \item The joint probability are obtained by multiplying the
    probabilities along the paths from the root note to the leaves.
  \begin{center}
    \includegraphics[height=.32\linewidth]{figs/decision03.pdf}
  \end{center}\pause
  \item In algebraic terms, this is known as {\em Bayes' rule} (very important!)
    $$\color{red} P(Y|X)P(X) =  P(X|Y)P(Y) = P(X,Y)$$\pause
  \item You can remember it as ``moving variables in front of the
    bar''
    $$P(X|Y) P(Y) = P(X,Y|\_)$$
  \end{itemize}

\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Bayes' rule}
  $$P(X|Y)P(Y) = P(Y|X)P(X) = P(X,Y)$$

  \begin{task}{Independent random variables}
    If two random variables are independent, the joint distribution is
    the product of their marginals  $$ P(X,Y) =P(X) P(Y)$$
    How can you see that from Bayes' rule?
  \end{task}
    \pause

    \begin{solution}{Solution}
      If the variables are independent $P(X|Y) = P(X)$ and $P(Y|X) =
      P(Y)$: The probability of $X$ is the same as the probability of
      $X$ given that I know $Y$, because knowing $Y$ does not help.
    \end{solution}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Joint $\rightarrow$ marginal and conditional distribution}
  \begin{itemize}
\small
  \item The plot shows the joint distribution $P(X,Y)$, where $X$ is
    the subject id and $Y$ the number of successes in $n=10$ trials.
  \begin{center}
    \only<-1>{\includegraphics[width=.83\linewidth]{figs/Joint00.pdf}}
    \only<2>{\includegraphics[width=.83\linewidth]{figs/Joint01.pdf}}
    \only<3>{\includegraphics[width=.83\linewidth]{figs/Joint02.pdf}}
  \end{center}

\only<-1>{  \vspace{2cm}}
\only<2-3>{  \item We can get the marginal distributions via {\em
    marginalization} (very important!):
   $$\color{red} P(Y) =\sum_{i=1}^2P(X=i, Y) \mbox{ and } P(X) =
   \sum_{j=0}^{n} P(X, Y=j)$$}
\only<3->{  \item We can get the conditional distribution via Bayes' rule:
   $$P(X|Y)P(Y) = P(X,Y) \Leftrightarrow P(X|Y) = \frac{P(X,Y)}{P(Y)}$$}
\only<-2>{  \vspace{2cm}}
  \end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{The posterior}
  \begin{itemize}
    \small
  \item Could we use the probability distribution to get an idea which
    subject the number of successes came from?\pause
  \item Use Bayes' rule to ``invert'' the conditional distribution
    $$P(X|Y=k) = P(X,Y=k)/P(Y=k)$$
  \end{itemize}
  \begin{center}
    \only<-2>{\includegraphics[height=.28\linewidth]{figs/Joint02.pdf}}
    \only<3->{\includegraphics[height=.53\linewidth]{figs/Posterior00.pdf}}
  \end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{summary}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% ----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{summary}
  \begin{itemize}
  \item We need to know certain distributions to use them as sampling
    distribution. \pause
  \item For many distributions one can use a ``standard situation'' to
    remember them. \pause
  \item When dealing with two or more random variables one deals with
    {\color{blue}joint, marginal}, and {\color{blue}conditional
      distributions}.\pause
  \item Marginal and conditional distributions can be converted into
    the joint distribution via {\color{blue}Bayes' rule}.\pause
  \item The conversion in the other direction can be done via
    {\color{blue}marginalization} and {\color{blue}Bayes' rule}.
  \end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{error bars \& confidence intervals}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ----------------------------------------------------------
\subsection{errorbars}
% ----------------------------------------------------------
\begin{frame}
\frametitle{illustrating example}

As part of a study of the development of the thymus gland, researcher
weighed the glands of $50$ chick embyos after 14 days of
incubation. The following plot depicts the mean thymus gland weights in (mg):
\mycite{modified from SWS exercise 6.3.3.}
\pause
{\bf Which of the two bar plots is the correct way of displaying the
  data?}

\begin{columns}
  \begin{column}[l]{.5\linewidth}
    \includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
  \end{column}
  \begin{column}[r]{.5\linewidth}
    \pause That depends on what you want to say
    \begin{itemize}
    \item To give a measure of variability in the data: use the
      {\color{blue} standard deviation $\hat\sigma =
        \sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
    \item To make a statement about the variability in the mean
      estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
    \end{itemize}
  \end{column}
\end{columns}

%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% that depends: variability (descriptiv statistics, how variable is
% the mean -> inferential, makes only sense in the meta-study setting)
% first matlab exercise: simulate standard error
% recommend paper for eyeballing test results from standard errors
% from std of mean to confidence intervals
% introduce bootstrapping (matlab exercise), then t-statistic
% intervals
% end with standard error of the median (and the thing from wikipedia)
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}
  \frametitle{standard error}
  \framesubtitle{bootstrapping}

  \begin{task}{quantifying the variability in the mean}
    Download \url{https://www.dropbox.com/s/20l7ptrdc4kkceq/materialNMI.zip}

    Load the dataset {\tt thymusglandweights.dat} into matlab and use
    the first $50$ datapoints as your dataset. Repeat the following
    steps $m=500$ times:
    \begin{enumerate}
    \item sample $50$ data points from $x$ with replacement
    \item compute their mean and store it
    \end{enumerate}
    Look at the standard deviation of the computed means and compare
    it to the standard error.
  \end{task}
\end{frame}

%------------------------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{standard error}
  \framesubtitle{bootstrapping}
  \begin{itemize}
  \item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
    {\color{blue}an estimate of the standard deviation of the means}
    in repeated experiments which is computed form a single
    experiment.
  \item When you want to do statistical tests on the mean, it is
    better to use the standard error, because one can eyeball
    significance from it
    \mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
      in experimental biology. The Journal of Cell Biology, 177(1),
      7--11.}
    \item {\color{blue}Bootstrapping} is a way to generate an estimate
      of the {\color{blue}sampling distribution of any statistic}. Instead of
      sampling from the true distribution, it samples from the
      empirical distribution represented by your dataset.
      \mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
  \end{itemize}
\end{frame}

%------------------------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{standard error of the median?}
  {\bf What kind of errorbars should we use for the median?}

  It depends again:

  {\bf Descriptive statistics}
  \begin{itemize}
  \item As a {\color{blue}descriptive statistic} one could use the {\em median
      absolute deviation}: the median of the absolute differences of
    the datapoints from the median.
  \item Alternatively, one could bootstrap a standard deviation of the
    median.
  \end{itemize}
  \pause
  {\bf Inferential statistics}
  \begin{itemize}
  \item For {\color{blue}inferential statistics} one should use
    something that gives the reader {\color{blue}information about
      significance}.
  \item Here, {\color{blue} confidence intervals} are a better choice.
  \end{itemize}
\end{frame}

% ----------------------------------------------------------
\subsection{confidence intervals \& bootstrapping}
%------------------------------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals}
\begin{center}
  \only<1>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}

  }\pause
  \only<2>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<3>{
    \vspace{.1cm}
    \includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<4>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<5>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<6>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }
\end{center}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals for the median}
  \begin{definition}{Confidence interval}
    A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
    $\hat\theta$ is an interval $\hat\theta \pm a$ such that the
    population parameter $\theta$ is contained in that interval
    $(1-\alpha)\cdot 100\%$ of the experiments.

    An alternative way to put it is that $(\hat\theta - \theta) \in
    [-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
  \end{definition}


\begin{columns}
  \begin{column}[l]{.5\linewidth}
  If we knew the sampling distribution of the median $\hat m$, could
  we generate a e.g. a $95\%$ confidence interval?\pause
  \vspace{.5cm}

  Yes, we could choose the interval such that $\hat m - m$ in that
  interval in $95\%$ of the cases.
  \end{column}
  \begin{column}[r]{.5\linewidth}
    \only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
    \only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
  \end{column}
\end{columns}


  % \begin{task}{Bootstrapping a confidence interval for the median}
  %   \begin{itemize}
  %   \item Use the same dataset as before.
  %   \item Bootstrap $500$ medians.
  %   \item Compute the $2.5\%$ and the $97.5\%$ percentile of the
  %     $500$ medians.
  %   \end{itemize}
  % \end{task}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals for the median}
  \framesubtitle{how to get the sampling distribution}

  \begin{task}{Bootstrapping a confidence interval for the median}
    \begin{itemize}
    \item Use the same dataset as before.
    \item Bootstrap $500$ medians.
    \item Compute the $2.5\%$ and the $97.5\%$ percentile of the
      $500$ medians.
    \end{itemize}
    These two numbers give you $\hat m -a$ and $\hat m + a$ for
      the $95\%$ confidence interval.
  \end{task}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals for the median}
  \framesubtitle{how to get it analytically}
  There is also an analytical estimation oft the confidence interval
  for the median: Use the $\frac{\alpha}{2}$ and $1 - \frac{\alpha}{2}$
  quantile of a binomial distribution.


  \begin{task}{Comparing the analytical interval to the bootstrapped}
    \begin{itemize}
    \item Get the $\frac{\alpha}{2}$ quantile minus one and $1 -
      \frac{\alpha}{2}$ quantile of a binomial distribution using {\tt
        binoinv}.
    \item Sort you data points and use the data points at the position
      corresponding to the quantiles.
    \item Compare that to the bootstrapped confidence interval.
    \end{itemize}
  \end{task}
  \tiny The idea behind this:
  \begin{itemize}
  \item The probability that the true median $m$ is covered by the
    interval between $x_r$ and $x_{r+1}$ is binomial $${n \choose r}
    \left(\frac{1}{2}\right)^r \left(\frac{1}{2}\right)^{n-r}$$
  \item No we take enough intervals in the ``middle'' of our sample
    that we cover the true median with at least $1-\alpha$
    probability.
    \mycite{David, H. A., \& Nagaraja, H. N. (2003). Order Statistics. MES (Vol. 1, p. 482). Wiley. doi:10.1016/j.bpj.2010.07.012}
  \end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals}
  \framesubtitle{Notice the theme!}
  \begin{enumerate}
  \item choose a statistic
  \item get a the sampling distribution of the statistic (by theory or
    simulation)
  \item use that distribution to reason about the relation between the
    true population parameter (e.g. $m$) and the sampled statistic
    $\hat m$
  \end{enumerate}

  \begin{center}
    \color{blue}
    This is the scaffold of most statistical techniques. Try to find
    it and it can help you understand them.
  \end{center}

\end{frame}


% ----------------------------------------------------------
\begin{frame}
\frametitle{let's practice that again}
\framesubtitle{confidence interval for the mean}

\begin{task}{Bootstrapping a confidence interval for the mean}
  \begin{itemize}
    \item Use the same dataset as before.
    \item Use bootstrapping to get a $95\%$ confidence interval for
      the mean.
    \end{itemize}
\end{task}

\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence interval for the mean}
\framesubtitle{confidence interval for the mean}
Getting a convenient sampling distribution is (a little bit) more
difficult:
\begin{itemize}
\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
  well
\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
    $\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
    \frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
  parameters.\pause
\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
  \mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
\item Therefore,
\begin{align*}
  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
\end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence interval for the mean}
\begin{task}{Bootstrapping a confidence interval for the mean}
 Extend your script to contain the analytical confidence
 interval using
\begin{align*}
  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
\end{task}

\end{frame}

% ----------------------------------------------------------
\subsection{summary}
% ----------------------------------------------------------

\begin{frame}
\frametitle{summary}
\begin{emphasize}{Which errorbars should I choose?}
  Always use errorbars to help the reader see your point.
\end{emphasize}
\pause
  \begin{itemize}
    \item Errorbars can {\color{blue} describe the variability} in a dataset
      ({\color{blue}descriptive statistics}). Example: {\em standard deviation, inter-quartile
      range, ...}
    \item {\color{blue}Errorbars yield information about significance in testing
      (inferential statistics)}. Examples: {\em standard error of the mean, confidence
      intervals, ...}
    \item Other possible ways of displaying variability: {\em
        boxplots, violin plots, histograms, ...}
  \end{itemize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{statistical tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{one-sample test on the mean}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ----------------------------------------------------------
\begin{frame}
\frametitle{from confidence intervals to one-sample test}

\begin{task}{example: eye movements}
  \small
  In an experiment you measure eye movements of subjects on the
  screen. You want be sure that the subject fixates a certain target
  (at $x=0$). During the fixation period, you aquire $n=16$
  measurements. The measurements have a mean of $\hat\mu=2.5$ and a
  standard deviation of $\hat\sigma=4$. Assuming that the single
  fixation locations are Gaussian distributed, can you be $95\%$
  confident that the subject focused the target (x-Position)?
\end{task}
\pause
\begin{solution}{use confidence intervals}
    \small
  Compute a $95\%$ confidence interval: Does it contain
    $\mu=0$? Yes? Then we are $95\%$ confident!

    From the table we get $t_{0.025}=2.131$, the standard error is
    $\frac{\hat\sigma}{\sqrt{n}} = \frac{4}{\sqrt{16}}=1$ which means
    that $$0\pm t_{0.025}\frac{\hat\sigma}{\sqrt{n}} = 0 \pm 2.131$$
    is our confidence interval. Therefore we cannot be $95$\%
    confident in this case.
\end{solution}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{from confidence intervals to one-sample test}
\begin{task}{example: eye movements}
  Could we put the interval on $\mu=0$ as well?
\end{task}
\pause
\begin{solution}{Example: eye movements}
  Yes, if the interval around $\hat\mu$ contains $\mu$, then the
  interval around $\mu$ also contains $\hat\mu$.
\end{solution}


\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{One-sample t-test}

\begin{task}{example 2: eye movements again}
  \small
  Now assume that there is a fixation target at $x=0$. You are
  running the experiment with a monkey and you want to discard all
  trials in which the monkey was not fixating the target.

  During the trial, you aquire again $n=16$ measurements with mean
  $\hat\mu=2.5$ and standard deviation $\hat\sigma=4$. How can you be
  confident that the monkey did not fixate the target if you are
  willing to be wrong in $5\%$ of the cases if ``wrong'' means that
  you believe the subject was not fixating when in fact it was.
\end{task}
\pause
\begin{solution}{Example 2: eye movements again}
  \small
The steps to the solution is exactly the same, only the logic is
different.
\begin{itemize}
\item We make a $95\%$ confidence around the fixation target
  $\mu=0$. This means that if the monkey was actually fixating the
  target, $95\%$ of the measured averaged positions $\hat\mu$ would
  fall into that interval.
\item $5\%$ of the measured would fall outside the interval even
  though the monkey fixated and we would falsely treat them as not as ``not
  fixated''.
\end{itemize}
\end{solution}

\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{one-sample t-test}
\framesubtitle{Notice the theme again!}
\only<1>{
  \begin{center}
    \includegraphics[width=0.4\linewidth]{figs/repetition0.png}
  \end{center}
  \begin{enumerate}
    \small
  \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
  \end{enumerate}
}\pause
\only<2>{
  \begin{center}
    \includegraphics[width=0.4\linewidth]{figs/repetition1.png}
  \end{center}
  \begin{enumerate}
    \small
  \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
  \item Get a sampling distribution! Here, we get it by assuming that
    the positions $x_1,...,x_{16}$ are Gaussian.
  \end{enumerate}
}\pause
\only<3>{
  \begin{center}
    \includegraphics[width=0.4\linewidth]{figs/repetition2.png}
  \end{center}
  \begin{enumerate}
    \small
  \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
  \item Get a sampling distribution! Here, we get it by assuming that
    the positions $x_1,...,x_{16}$ are Gaussian. The resulting
    distribution of $t$ is a t-distribution.
  \end{enumerate}
}\pause
\only<4>{
  \begin{center}
    \includegraphics[width=0.4\linewidth]{figs/repetition3.png}
  \end{center}
  \begin{enumerate}
    \small
  \item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
  \item Get a {\color{blue}null distribution}! Here, we get it by assuming that
    the positions $x_1,...,x_{16}$ are Gaussian.  The resulting
    distribution of $t$ is a t-distribution.
  \item Get an interval around $\mu=0$ in which values of $\hat\mu$
    are assumed typical for $\mu=0$, the {\color{blue}null hypothesis
    $H_0$}.
  \end{enumerate}
}
\pause
\only<5>{
  \begin{center}
    \includegraphics[width=0.4\linewidth]{figs/repetition5.png}
  \end{center}
  \begin{enumerate}
    \small
  \item Choose a statistic! We take the standardized mean
    $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
  \item Get a {\color{blue}null distribution}! Here, we get it by assuming that
    the positions $x_1,...,x_{16}$ are Gaussian. The resulting
    distribution of $t$ is a t-distribution.
  \item Get an interval around $\mu=0$ in which values of $\hat\mu$
    are assumed typical for $\mu=0$, the {\color{blue}null hypothesis
    $H_0$}. This is done by fixing the {\color{blue}type I error} probability.
  \end{enumerate}
}
\pause
\only<6>{
  \begin{center}
    \includegraphics[width=0.4\linewidth]{figs/repetition4.png}
  \end{center}
  \begin{enumerate}
    \small
  \item Choose a statistic! We take the standardized mean
    $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
  \item Get a {\color{blue}null distribution}! Here, we get it by assuming that
    the positions $x_1,...,x_{16}$ are Gaussian. The resulting
    distribution of $t$ is a t-distribution.
  \item Get an interval around $\mu=0$ in which values of $\hat\mu$
    are assumed typical for $\mu=0$, the {\color{blue}null hypothesis
    $H_0$}. This is done by fixing the {\color{blue}type I error} probability.
  \item Outside that interval we consider $\mu=0$ as implausible and
    reject $H_0$.
  \end{enumerate}
}

\end{frame}


% ----------------------------------------------------------
\subsection{another one-sample test}
% ----------------------------------------------------------
\begin{frame}
\frametitle{another one-sample test}
\begin{task}{Fair coin?}
  \small
  Assume you carry out the following test to determine whether a coin
  is fair or not:

  You throw the coin $n=3$ times. If the result is either $3\times$
  head or $3\times$ tail, you conclude that the coin is not fair.

  Answer the following questions (for yourself first):
  \begin{enumerate}
  \item What is the meta-study? \pause {\em Repeated experiments of 3 throws
    with this the coin.}\pause
  \item What is the statistic used? \pause {\em The number of heads (could also
    be tails).}\pause
  \item What is $H_0$? \pause {\em The coin is fair.}\pause
  \item What is the Null distribution? \pause {\em The distribution is
    binomial $$p(k \mbox{heads in }n \mbox{ throws})={n \choose k}
    \left(\frac{1}{2}\right)^k \left(\frac{1}{2}\right)^{n-k} $$}\pause
  \item What is the Type I error of this test? \pause {\em $p(HHH|H_0) + p(TTT|H_0) = \frac{2}{8}$}
  \end{enumerate}
\end{task}
\end{frame}

% ----------------------------------------------------------
\subsection{paired sample t-test}
% ----------------------------------------------------------
\begin{frame}
\frametitle{paired sample t-test}
\begin{task}{Hunger Rating (SWS, Example 3.2.4)}
  \begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.5\linewidth}
\small During a weight loss study each of nine subjects was given either the
active drug m-chlorophenylpiperazine (mCPP) for two weeks and then a placebo
for another two weeks, or else was given the placebo for the first two weeks and
then mCPP for the second two weeks. Can we say that there was an
effect with significance level $5$\%?
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
\begin{center}
  \includegraphics[width=0.8\linewidth]{figs/hunger.png}
\end{center}
  \end{minipage}

  \end{minipage}
  \vspace{.5cm}

   What could we use as statistic?
   What is $H_0$?
   Is the difference significant?
\end{task}
\end{frame}

\begin{frame}
\frametitle{paired sample t-test}
\begin{solution}{Hunger Rating (SWS, Example 3.2.4)}
  \begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.5\linewidth}
    \small
    \begin{enumerate}
    \item The statistic is the difference between drug and placebo?\pause
    \item $H_0$ is ``there is no difference'', i.e. the true mean of
      the differences is zero. \pause
    \item The standard error is $33/\sqrt{9}=11$.\pause
    \item $n-1=8$ DoF yields (t-distribution table) $t_{0.025}=2.306$, so we
      would reject $H_0$ if $\hat\mu$ in $0\pm t_{0.025}\cdot 11 = \pm
      25.366$. \pause
    \item This means the difference is significant with $\alpha=0.05$.
    \end{enumerate}
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
\begin{center}
  \includegraphics[width=0.8\linewidth]{figs/hunger.png}
\end{center}
  \end{minipage}

  \end{minipage}
\end{solution}
\end{frame}


\begin{frame}
\frametitle{paired sample t-test}
\begin{itemize}
\item a paired sample consists of a number of {\em paired}
  measurements (e.g. before/after)\pause
\item build the differences (either there are many and or check that
  they are approx. Gaussian distributed)\pause
\item use a one-sample t-test on the differences
\end{itemize}
\end{frame}
% ----------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{sign rank test}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ----------------------------------------------------------
\begin{frame}
\frametitle{sign rank test}
\begin{task}{Hunger Rating (SWS, Example 3.2.4)}
  \small
  \begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.5\linewidth}
    \small Consider again the example data from before. Instead of
    taking the difference, we consider now only whether ``drug'' was
    smaller or greater than ``placebo''. We then count the number of
    times for which ``drug''$<$``placebo'' and the number of times
    ``drug''$>$``placebo''.
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
\begin{center}
  \includegraphics[width=0.5\linewidth]{figs/hunger.png}
\end{center}
  \end{minipage}
  \end{minipage}
  \begin{itemize}
    \item What is the statistic?\pause {\em The number $N_+$ of ``>''
        or the number $N_-$ of ``<''.} \pause

    \item What is $H_0$? \pause {\em $N_+ = N/2$}
    \pause
    \item What is $H_A$? \pause {\em $N+ > N/2$ or $N_+ < N/2$}
    \pause
    \item What is the Null distribution? \pause {\em Binomial with $p=0.5$}
    \pause
    \item Given $\alpha$, how is the region determined in which we
    reject $H_0$? \pause {\em Choose a such that $P(k>a|H_0) + P(k<a|H_0)$ equals $\alpha$}
  \end{itemize}
\end{task}
\end{frame}

\begin{frame}
\frametitle{sign rank test vs. paired sample t-test}
\begin{itemize}
\item paired sample t-test assumes that the differences are Gaussian distributed\pause
\item the sign rank test makes no assumption about the distribution\pause
\item both assume that the pairs are independently drawn\pause
\item the sign rank test is less powerful than the t-test (you will
  see in a minute what that means)
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{test nomenclature}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
  \frametitle{test nomenclature}
  \begin{center}
    \only<1>{\includegraphics[width=\linewidth]{figs/testframework00.pdf}}
    \only<2>{\includegraphics[width=\linewidth]{figs/testframework01.pdf}}
  \end{center}
\small
\begin{columns}
  \begin{column}[l]{.5\linewidth}
{\bf You want:}
\begin{itemize}
\item large power
\item small type I \& II error probability ($\alpha$ and $\beta$)
\end{itemize}
  \end{column}
  \begin{column}[r]{.5\linewidth}
\begin{itemize}
\item \hyperlink{sec:power}{\color{magenta}detour II: statistical power} \hypertarget{back:power}{}
\item \hyperlink{sec:bayesian}{\color{magenta}detour III: Bayes rule
    and statistical tests} \hypertarget{back:bayesian}{}
\end{itemize}
  \end{column}
\end{columns}

Which of the above can {\bf you} choose? \pause {\em the type I error
  probability $\alpha$}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{zoo of statistical tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}
\hypertarget{back:detourIV}{}
\frametitle{how to choose the statistical test}
\begin{center}
  \includegraphics[height=.38\linewidth]{figs/fig0.pdf}
\end{center}
\begin{itemize}
\item Normality can be checked with a QQ-plot
  (\hyperlink{sec:qqplots}{\color{magenta} detour IV: QQ-plots}).
\item If $n$ is large and the variance of the data distribution is
  finite, the central limit theorem guarantees normality for
  ``summed statistics''.
\end{itemize}
\end{frame}


% ------------

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[height=.6\linewidth]{figs/fig2.pdf}
\end{center}

\end{frame}
% ------------

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[height=.6\linewidth]{figs/fig3.pdf}
\end{center}

\end{frame}
% ------------

%-----------------------------------------------------------------
%-----------------------------------------------------------------
\begin{frame}
\frametitle{tests for normal data}
\begin{task}{menstrual cycle}
  The data set {\tt menstrual.dat} contains the lengths of the
  menstrual cycles in a random sample of 15 women. Assume we want to
  the hypothesis that the mean length of human menstrual cycle is
  equal to a lunar month ($29.5$ days). Consider the data to be
  sufficiently normal.

  Questions:
  \begin{itemize}
  \item What is $H_0$? What is $H_A$? \pause  $H_0: \hat\mu=29.5$,
    $H_A: \hat\mu\not=29.5$ \pause
  \item What is the test statistic? \pause $t=\frac{\hat\mu -
      29.5}{\hat\sigma/\sqrt{n}}$ \pause
  \item Which test should did you use and why? {\em One sample t-test: data
    normal, one sample against a fixed mean.}
  \end{itemize}
\end{task}

\hyperlink{sec:twotailed}{\color{magenta}detour I: one- vs. two-tailed}
\hypertarget{back:twotailed}{}
\end{frame}

%-----------------------------------------------------------------

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[height=.6\linewidth]{figs/fig4.pdf}
\end{center}

\end{frame}


% ----------------------------------------------------------
\begin{frame}
  \frametitle{}
  \begin{task}{chirping}
    A scientist conducted a study of how often her pet parakeet
    chirps. She recorded the number of distinct chirps the parakeet
    made in a 30-minute period, sometimes when the room was silent and
    sometimes when music was playing. The data are shown in the
    following table. Test whether the bird changes its chirping
    behavior when music is playing (data set {\tt
      chirping.dat}. columns: day, with, without).

  Questions:
  \begin{itemize}
  \item What is $H_0$? What is $H_A$? \pause
    $d_i=x_{\mbox{with}}-x_{\mbox{without}}$. $H_0: \hat\mu_d=0$,
    $H_0: \hat\mu_d\not=0$ \pause
  \item What is the test statistic? \pause  $t=\frac{\hat\mu_d -
      0}{\hat\sigma_d/\sqrt{n}}$ \pause
  \item Which test should did you use and why? \pause {\em Paired t-test: data
    sufficiently normal, measurements are paired by day.}
  \end{itemize}
  \end{task}

\end{frame}

%-----------------------------------------------------------------

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[height=.7\linewidth]{figs/fig5.pdf}
\end{center}

\end{frame}

%-----------------------------------------------------------------


\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=.8\linewidth]{figs/fig6.pdf}
\end{center}

\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{two indepedendent sample test}
\begin{task}{Brain Weights (permutation test)}
  The dataset {\tt brainweight.dat} contains brain weights of males
  and females. It consists of {\bf (i) two samples (male/female)}
  which are {\bf (ii) not paired}. We want to test whether the mean
  brain weights of males and females are different.
  \begin{itemize}
  \item What could we use as statistic?\pause {\em~the difference in the
      means} \pause
  \item What would be $H_0$?\pause {\em~the difference is zero} \pause
  \item Think about a way to generate an estimate of the Null
    distribution with Matlab? \pause {\em~Permutation test: Shuffle the
      labels, compute difference in means, repeat ...}. \pause
  \end{itemize}

\end{task}
\begin{itemize}
\item   There is {\color{blue}two-sample independent t-test} is the parametric test
  for this dataset.
\item If normality does not hold, you can use the
  {\color{blue}Wilcoxon-Mann-Whitney test}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{one- and two-sample t-test and sign test}
\begin{center}
  \tiny
\bgroup
\def\arraystretch{2}
\begin{tabular}{|l|c|c|c|}
  \hline
  \textbf{name} & \textbf{statistic} & $\boldsymbol{H_{0}}$ & \textbf{Null distribution}\tabularnewline
  \hline
  \hline
  one sample t-test & $t=\frac{\overline{x}-0}{\mbox{SE}_x}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline
  \hline
  paired sample t-test & $t=\frac{\overline{d}-0}{\mbox{SE}_d},\, d=x_{i}-y_{i}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline
  \hline
  sign test & $t=\#\left[x_{i}<y_{i}\right]$ & median of $x_{i}-y_{i}$
  is zero & binomial distr. $\mathcal{B}\left(\frac{1}{2},n\right)$\tabularnewline
   &  & so $t = \frac{n}{2}$ & \tabularnewline
  \hline
  two indep. sample t-test & $t=\frac{(\overline{x}-\overline{y})-\text{0}}{\sqrt{\frac{s_{x}^{2}}{n_{x}}-\frac{s_{y}^{2}}{n_{y}}}}$ & mean of $t$ is zero & t-distr. with DoF\tabularnewline
  &  &  & $\frac{\left(\mbox{SE}_{x}^{2}+\mbox{SE}_{y}^{2}\right)^{2}}{\mbox{SE}_{x}^{4}/(n_{x}-1)+\mbox{SE}_{y}^{4}/(n_{y}-1)}$\tabularnewline
  \hline
\end{tabular}
\egroup

\end{center}
\end{frame}


% % ----------------------------------------------------------
% \begin{frame}
% \frametitle{}
% \begin{center}
%   \includegraphics[width=.8\linewidth]{figs/fig7.pdf}
% \end{center}

% \end{frame}


\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=.8\linewidth]{figs/fig8.pdf}
\end{center}

\end{frame}


\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=.8\linewidth]{figs/fig9.pdf}
\end{center}

\end{frame}


\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=.8\linewidth]{figs/fig10.pdf}
\end{center}

\end{frame}

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=.9\linewidth]{figs/fig11.pdf}
\end{center}

\end{frame}
\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=1.\linewidth]{figs/fig12.pdf}
\end{center}

\end{frame}


% ----------------------------------------------------------
\begin{frame}
\frametitle{goodness of fit for two categorial variables}
\small
Suppose you observe two binary variables $A\in \{0,1\}$ and $B\in
\{0,1\}$ in a series of several experiments (e.g. $A$ is success or
not; $B$ indicates gender). You collect the outcomes of the
experiments in a table
\begin{center}
  \begin{tabular}{l|cc|}
    & \bf A=0 & \bf A=1\\\hline
  \bf B=0 & $n_{00}$ & $n_{01}$\\
  \bf B=1 & $n_{10}$ & $n_{11}$
  \end{tabular}
\end{center}
  and want to test whether it is consistent with a fixed probability
  distribution you know from the literature
\begin{center}
  \begin{tabular}{l|cc|l}
    & \bf A=0 & \bf A=1\\\hline
  \bf B=0 & $p_{00}$ & $p_{01}$ & $p(B=0)$\\
  \bf B=1 & $p_{10}$ & $p_{11}$ & $p(B=1)$ \\\hline
   & $p(A=0)$ &  $p(A=1)$ &  $1$
  \end{tabular}
\end{center}

\end{frame}
\begin{frame}
\begin{center}
  \includegraphics[width=0.9\linewidth]{figs/fig01.pdf}
\end{center}
\end{frame}

\begin{frame}
\begin{center}
  \includegraphics[width=0.9\linewidth]{figs/fig02.pdf}
\end{center}
\end{frame}

\begin{frame}
\begin{center}
  \includegraphics[width=0.9\linewidth]{figs/fig03.pdf}
\end{center}
\end{frame}


\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=0.9\linewidth]{figs/fig04.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=0.9\linewidth]{figs/fig05.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{}
\begin{center}
  \includegraphics[width=0.9\linewidth]{figs/fig06.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{zoo of statistical tests}
{\bf Don't take the diagram too serious}
\begin{itemize}
\item is ANOVA a method for
  \begin{itemize}
  \item the relation between a categorial variable and an interval/ratio
    variable?
  \item the relation between groups of interval/ratio variables?
  \end{itemize}\pause
\item is linear regression a method for
  \begin{itemize}
  \item the relation between two interval/ratio variables?
  \item the relation between infinitely many groups of interval/ratio
    variables?
  \end{itemize}\pause
\item Can ANOVA be seen as some kind of linear regression?\pause
\item There are many more statistics and many more tests out
  there.
\end{itemize}
\end{frame}

%----------------------------------------------------------------------

\begin{frame}
\frametitle{advice}
\begin{itemize}
\item There is not a general recipe, not a general way of looking at
  and doing data analysis (otherwise statisticians would be unemployed
  and a computer would do their job). \pause
\item Use your intelligence (and the book by Zar) to choose the right
  one. \pause
\item Ask if you don't know what to take
  (e.g. \url{stats.stackexchange.com}). \pause
\item Play around in Matlab with toy example to get a feeling for a
  particular method/test/idea ...
\end{itemize}
\end{frame}

%----------------------------------------------------------------------
\begin{frame}
\frametitle{summary}
\begin{itemize}
\item Statistical tests have always the same ingredients:
  \begin{enumerate}
  \item a {\color{blue} test statistic}
  \item a default situation under which we can compute/simulate the
    {\color{blue} null distribution} of the test statistic
    ({\color{blue}null hypothesis $H_0$})
  \end{enumerate}
\pause
\item in order to make a decision between $H_0$ and $H_A$ you set
  boundaries
\item these boundaries determine your {\color{blue}type I error} or
  {\color{blue}false positives rate}, or vice versa\pause
\end{itemize}
\begin{emphasize}{scaffold of statistical tests}
  Remembering this structure will help you to find the right
  statistical test and understand it.
\end{emphasize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{p-values}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{the mother or all statistics: p-values}
\framesubtitle{why p-values?}
\begin{itemize}
\item Different persons might accept more or less conservative type I error rates.
  (Is $\alpha=0.01$ significant or is $\alpha=0.05$ enough?)
\item P-values are a universal way of reporting statistics such that
  the type I error rate can be chosen by each person individually.
\end{itemize}
\end{frame}

%---------------------------------------------------------
\begin{frame}
  \frametitle{the mother of all statistics: the p-value}
  \begin{task}{true or false?}
    \begin{itemize}
    \item From $p<0.01$ you can deduce that your result is of
      biological importance.\pause


      % \item {\color{gray} False. A small p-value doesn't say
      %   anything
      %   about biological importance. It just indicates that the data
      %   and $H_0$ are not very compatible.}  \pause

    \item The p-value is the probability of observing a dataset
      resulting in a test-statistic more extreme than the one at hand,
      assuming the null hypothesis is true.\pause

      % \item {\color{gray} True.} \pause
    \item $1-p$ is the probability of the alternative hypothesis being
      true.

      % \item {\color{gray} False. The p-value cannot tell us anything
      %   about whether one of the hypotheses are true or not.}
    \end{itemize}
  \end{task}
\end{frame}
%---------------------------------------------------------
\begin{frame}
\frametitle{the mother of all statistics: the p-value}
\framesubtitle{What is a p-value?}
\only<1>{
  So far, we chose a particular threshold $b$ by fixing the type I error
  rate $\alpha$.
  \begin{center}
    \includegraphics[width=.7\linewidth]{figs/pval0.png}
  \end{center}
}
\only<2>{
  \begin{itemize}
  \item The {\color{blue}p-value} is the type I error rate if you use
    your {\color{blue} actually measured statistic} as threshold.
  \item In other words: The p-value is the minimal type I error rate
    you have to accept if you call your result significant.
  \end{itemize}
  \begin{center}
    \includegraphics[width=.7\linewidth]{figs/pval1.png}
  \end{center}
}
\end{frame}
%---------------------------------------------------------

\begin{frame}
\frametitle{the mother of all statistics: the p-value}
\framesubtitle{Why is it a universal measure?}

The p-value is the minimal type I error rate you have to accept if you
call your result significant.

\begin{itemize}
\item If you have a personal $\alpha$-level that is larger than the
  p-value, you automatically know that the decision threshold lies
  ``further inside''
\item This means you {\color{blue}can simply compare your $\alpha$-level with the
  p-value}: if the p-value is smaller, then you call that result
  significant, otherwise you don't.
\end{itemize}

\begin{center}
  \includegraphics[width=.45\linewidth]{figs/pval0.png}
  \includegraphics[width=.45\linewidth]{figs/pval1.png}
\end{center}
\end{frame}
%---------------------------------------------------------

\begin{frame}
\frametitle{the mother of all statistics: the p-value}

\begin{task}{p-values if $H_0$ is true}
  Is the following procedure correct?

  \vspace{.5cm}

  In order to show that a sample $x_1,...,x_n$ follows a Normal
  distribution with mean zero, you perform a t-test. If the p-value is
  large, you conclude that there is evidence for $H_0$, i.e. accept
  that $x_1,...,x_n$ has mean zero and is normally distributed.

  \vspace{.5cm}
  To find the answer, simulate normally distributed random variables
  with {\tt randn} in Matlab and compute the p-value with a one-sample
  t-test. Repeat that several times and plot a histogram of the p-value.

\end{task}
\pause
\begin{itemize}
\item If $H_0$ is true, the p-value is uniformly distributed between 0
  and 1. Why?\pause
\pause
\item Think about the beginning of this lecture
  $$p=P(|x| > |t|) = 1 - P(|x| \le |t|) = 1 - \mbox{c.d.f.}(|t|) \sim U([0,1])$$
\end{itemize}
\end{frame}

%--------------------------------------------------
\begin{frame}
\frametitle{the mother of all statistics: the p-value}

\begin{task}{Study design}
  Is the following procedure statistically sound?

  \vspace{.5cm}

  Psychophysical experiments with human subjects can be time-consuming
  and costly. In order to get a significant effect with minimal effort
  you use the following procedure: You start with a few subjects. If
  your statistical test for the effect returns a p-value smaller than
  $0.05$ you stop and publish. Otherwise you repeat adding subjects
  and computing p-values until you get a significant results (or run
  out of time and money).

\end{task}
\pause

\begin{solution}{Answer}
  No, the procedure is not sound. Even if $H_0$ is true, you will
  eventually get a p-value smaller than $0.05$ since it is uniformly
  distributed between $0$ and $1$ in this case.
\end{solution}
\end{frame}

%--------------------------------------------------
\begin{frame}
\frametitle{the mother of all statistics: the p-value}

\begin{task}{p-values over studies}
  If there is no effect, how many studies would yield a significant
  p-value (for $\alpha=0.05$)?
\end{task}
\pause
\begin{solution}{Answer}
  $5\%$
\end{solution}
\pause
\begin{task}{p-values in publications}
  Do you think that only publishing positive findings poses a problem?
\end{task}
\pause
\begin{solution}{Answer}
Yes. If I only publish significant positive findings, then I can
publish anything if I just repeat the study long enough.
\end{solution}

\end{frame}

%---------------------------------------------------------
\begin{frame}
  \frametitle{the mother of all statistics: the p-value}
  \begin{task}{true or false?}
    \begin{itemize}
    \item From $p<0.01$ you can deduce that your result is of
      biological importance.\pause


    \item {\color{gray} False. A small p-value doesn't say anything
        about biological importance. It just indicates that the data
        and $H_0$ are not very compatible.}  \pause

    \item The p-value is the probability of observing a dataset
      resulting in a test-statistic more extreme than the one at hand,
      assuming the null hypothesis is true.\pause

    \item {\color{gray} True.} \pause
    \item $1-p$ is the probability of the alternative hypothesis being
      true. \pause

    \item {\color{gray} False. The p-value cannot tell us anything
        about whether one of the hypotheses are true or not.}
    \end{itemize}
  \end{task}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{multiple hypothesis testing}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%---------------------------------------------------------------
\begin{frame}
\frametitle{two tests}
\begin{task}{Correct or not?}
  You have two independent samples from a treatment group and a
  control group.  You are not sure whether your data meets the
  requirement of a t-test. Therefore, you carry out a t-test and a
  ranksum test. If one of them rejects $H_0$ you use this one to
  report your findings in a paper.

\vspace{.5cm}
\footnotesize

To approach an answer, use Matlab and
\begin{itemize}
\item repeatedly sample two datasets from the same Normal distribution
  $\mathcal N(0,1)$.
\item for each pair of datasets compute the test statistic of a
  ranksum test (use {\tt ranksum}) and a t-test (use {\tt ttest2})
\item Plot the values of the statistics against each other (using {\tt
  plot(T, R, 'k.')}). What can you observe?
\item Count the number of times at least one of the tests gives a
  p-value smaller than $0.05$. What can you observe?
\end{itemize}
\end{task}


\end{frame}

%---------------------------------------------------------------
\begin{frame}
\frametitle{two tests}

\begin{minipage}{1.\linewidth}
  \begin{minipage}{0.6\linewidth}
    \begin{center}
      \includegraphics[width=1.\linewidth]{figs/multipletesting.pdf}
    \end{center}
  \end{minipage}
  \begin{minipage}{0.39\linewidth}
    \small
    \only<1-4>{
    \begin{itemize}
    \item the two statistics are clearly correlated\pause
    \item What is the type I error rate for each single test?\pause
    \item Where is the type I error area in the combined plot? \pause
    \item Is the type I error rate in the combined strategy lower or
      larger compared to using just a single test?\pause
    \end{itemize}
    }
    \only<5>{
      \small
      \color{blue} The combined strategy has a higher error rate! This gets
      worse for more tests. For that reason we have to account for multiple
      testing!
    }

  \end{minipage}
\end{minipage}
\end{frame}


%---------------------------------------------------------------
\begin{frame}
\frametitle{two tests}

\begin{minipage}{1.\linewidth}
  \begin{minipage}{0.49\linewidth}
    \begin{center}
      \includegraphics[width=1.\linewidth]{figs/multipletesting.pdf}
    \end{center}
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
    \small
    \begin{itemize}
     \item When is something called multiple testing?\pause
     \item[]{\color{gray} If a hypothesis is a compound of single
         hypotheses.}\pause
      \item If I test $\mu_1 = \mu_2 = \mu_3$ by testing $\mu_i = \mu_j$
    for all $i\not= j$ and reject as soon as one of the test rejects,
    does the type I error increase or decrease?\pause
  \item[]{\color{gray} It increases, because a have the chance to make
    an error in all conditions.}\pause
  \item Can the type I error also go in the other direction?\pause
    \item[]{\color{gray} Yes, it could. For example if the single
        hypotheses are combined with ``and''.}
    \end{itemize}

  \end{minipage}
\end{minipage}
\end{frame}
%---------------------------------------------------------------

\begin{frame}
  \frametitle{summary}
  \begin{itemize}
  \item Multiple testing tests a {\color{blue}compound hypothesis} by
    testing several single hypotheses.\pause
  \item {\color{blue}Multiple testing can decrease or increase type I/II error}
    dependening on how the single hypothese are combined (``or'' type
    I up, ``and'' type I down).\pause
  \item This can be accounted for (e.g. by {\em Bonferroni correction:
      divide $\alpha$ by number of tests}). However, better is to have
    a test that directly tests the compound hypothesis.  ANOVA is a
    typical example for that.
  \end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{study design}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
  \frametitle{general theme}
  \begin{enumerate}
  \item make an educated guess about the true parameters
  \item state how accurate/powerful you want to be
  \item select $n$ based on that
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{estimating a single mean}
  \framesubtitle{standard error and $\alpha$}
  \begin{itemize}
  \item Assume you want to make estimate the mean of some quantity.\pause
  \item From a pilot study or the literature, you have an estimate $s$
    of the standard deviation and $\tilde\mu$ of the mean of that
    quantity.\pause
  \item $\tilde \mu$ could also be chosen to set a minimal detectable difference.\pause
  \item In order to test whether your mean $\hat\mu$ is different from
    a fixed mean $\mu_0$ on an $\alpha$-level of $5\%$ you know that
    the $95\%$ confidence interval around $\tilde\mu$ should not
    contain $\mu_0$: $$\underbrace{|\tilde\mu - \mu_0|}_{=:\delta} \ge
    t_{0.025, \nu}\frac{s}{\sqrt{n}}$$
\pause
\item This mean you should set $n$ to be
$$n \ge \left(\frac{t_{0.025, \nu}\cdot s}{\delta}\right)^2 $$
 \end{itemize}

\end{frame}

\begin{frame}
  \frametitle{estimating  means}
  \framesubtitle{type I and type II error}
  {\bf one can also take the desired power $1-\beta$ into account}
  $$n \ge \frac{s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$
  \only<1>{
    \includegraphics[width=.5\linewidth]{figs/experimentalDesign00.pdf}
    \includegraphics[width=.5\linewidth]{figs/experimentalDesign01.pdf}
  }
  \pause

  {\bf rearranging the formula yields an estimate for minimal
    detectable difference}
  $$\delta \ge \sqrt{\frac{s^2}{n}}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)$$
  \pause

  {\bf for two means, this formula becomes}
  $$n \ge \frac{2s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$

  \pause

  \begin{emphasize}{iterative estimation}
    Since $\nu$ depends on $n$ (i.e. $\nu=n-1$), we need to estimate
    $n$ iteratively.
  \end{emphasize}

  \mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764}


\end{frame}

\begin{frame}
  \frametitle{example}
  \framesubtitle{Zar, example 7.2}
  \small
  Researches observed the weight changes in twelve rats after being
  subjected to forced exercise. The mean difference is
  $\hat\mu=-0.65g$, the sample variance is $\hat\sigma^2=1.5682
  g^2$. We wish to test the difference to $\mu=0$ with $\alpha=0.05$
  and a $1-\beta=0.9\cdot 100\%$ chance of detecting a population mean
  different from $\mu_0=0$ by as little as $1.0g$.

\pause

  Let's guess that a sample size of $n=20$ would be required. Then
  $\nu=19$, $t_{0.025,19}=2.093$, $\beta=1-0.9=0.1$, and
  $t_{0.1,19}=1.328$. This means
  $$n=\frac{1.5682}{1^2}(2.093+1.3828)^2 = 18.4.$$

\pause

Now let's us $n=19$ as an estimate, in which case $\nu=18$,
$t_{0.025,18}=2.101$, $t_{0.1,18}=1.330$,
and $$n=\frac{1.5682}{1^2}(2.101+1.330)^2=18.5.$$
Thus we need a sample size of at least $19$.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{ANOVA}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{from linear regression to ANOVA}
\begin{frame}
\frametitle{from linear regression to ANOVA}

\small The following table contains the impulse frequency of the
electric field from electric fish measured at several temperatures
(data for project 03).

\begin{center}
  \tiny
\begin{tabular}{lccccccc}
{\bf temperature C${}^\circ$} & \multicolumn{3}{c}{\bf impulse frequency [number/sec]} \\ \hline\\
20.00 & 225.00 & 230.00 & 239.00 \\
22.00 & 251.00 & 259.00 & 265.00 \\
23.00 & 266.00 & 273.00 & 280.00 \\
25.00 & 287.00 & 295.00 & 302.00 \\
27.00 & 301.00 & 310.00 & 317.00 \\
28.00 & 307.00 & 313.00 & 325.00 \\
30.00 & 324.00 & 330.00 & 338.00
\end{tabular}

\end{center}

\begin{itemize}
\item Our goal will be to test whether $\mu_{20}=...=\mu_{30}$.
\item Note that ANOVA is not the method to analyze this
  dataset. Linear regression is because temperature is on an interval
  scale. We will just use the ideas here for illustration.
\end{itemize}
\end{frame}


% ----------------------------------------------------------
\begin{frame}
\frametitle{from linear regression to ANOVA}
\begin{center}
  \includegraphics[width=.8\linewidth]{figs/regression01.pdf}
\end{center}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{from linear regression to ANOVA}
\begin{center}
  \includegraphics[width=.7\linewidth]{figs/regression02.pdf}
\end{center}
What kind of regression line would we expect if the means were equal?
\pause {\em One with slope $\alpha=0$.}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.5\linewidth}
    \includegraphics[width=1.\linewidth]{figs/regression02.pdf}
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
    \begin{itemize}
    \item For linear regression data, we would test whether
      $\alpha=0$.
    \item For categorial inputs (x-axis), we cannot compute a
      regression line. Therefore, we need a different approach.
    \end{itemize}
  \end{minipage}
\end{minipage}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{law of total variance}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{law of total variance}
\only<1>{
  Approach law of total variance
  $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
  \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
  \begin{center}
    \includegraphics[width=.7\linewidth]{figs/regression02.pdf}
  \end{center}
}\pause
\only<2>{
  Approach law of total variance
  $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
  \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
  \begin{center}
    \includegraphics[width=.7\linewidth]{figs/regression03.pdf}
  \end{center}
}\pause
\only<3>{
  Approach law of total variance
  $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
  \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
  Data generation model for regression $f_{ij} = {\color{mygreen} \alpha t_i} + \beta + {\color{lightblue}\varepsilon_{ij}}$
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/regression04.pdf}
  \end{center}
}\pause
\only<4>{
  Approach law of total variance
  $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
  \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
  Data generation model for regression
  $f_{ij} = {\color{mygreen} \alpha t_i} + \beta +
  {\color{lightblue}\varepsilon_{ij}}: $ $${\color{mygreen} \alpha=0}
  \Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/regression04.pdf}
  \end{center}
}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{single factor ANOVA}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%---------------------------------------------------------------
\begin{frame}
\frametitle{data model for single factor ANOVA}
  Approach law of total variance
  $$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
  \color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
  Data generation model for single factor ANOVA
  $f_{ij} = \overline{\mu} + {\color{mygreen} \tau_{i}} +
  {\color{lightblue}\varepsilon_{ij}}$:
  $${\color{mygreen} \tau_i=\tau_j=0}
  \Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/regression05.pdf}
  \end{center}
\end{frame}
%---------------------------------------------------------------
\begin{frame}
\frametitle{statistic of ANOVA}
\begin{columns}
  \begin{column}{0.43\linewidth}
     \begin{center}
      \includegraphics[width=1.\linewidth]{figs/regression02.pdf}

      \vspace{-.2cm}

      \includegraphics[width=1.\linewidth]{figs/Fdistribution00.pdf}
    \end{center}
  \end{column}
  \begin{column}{0.55\linewidth}
    \begin{align*}
      \:&\mbox{\color{lightblue} error SS}&=\color{lightblue}\sum_{ij}\left(x_{ij}-\mu_{i}\right)^{2}\\
      +\:&\mbox{\color{mygreen} group SS}&=\color{mygreen}\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}\\\hline
      \:&\mbox{\color{red} total SS}&=\color{red}\sum_{ij}\left(x_{ij}-\mu\right)^{2}
    \end{align*}
    \pause
    \begin{align*}
      \mbox{\color{mygreen}groups MS}=\frac{\mbox{\color{mygreen}group SS}}{\mbox{\color{mygreen}groups DF}}&=\color{mygreen}\frac{\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}}{k-1}\\\mbox{\color{lightblue}error MS}=\frac{\mbox{\color{lightblue}error SS}}{\mbox{\color{lightblue}error DF}}&=\color{lightblue}\frac{\sum_{ij}\left(x_{ij}-\hat{\mu_{i}}\right)^{2}}{N-k}\\\color{dodgerblue}F&=\frac{\mbox{\color{mygreen}group MS}}{\mbox{\color{lightblue}error MS}}
    \end{align*}
  \end{column}
\end{columns}
\end{frame}

%---------------------------------------------------------------
\begin{frame}
\frametitle{summary single factor ANOVA}
\begin{itemize}
\item {\bf Goal:} Test whether several means are equal or not.\pause
\item {\bf Strategy:} Use law of total variance to explain the overall
  variance with the {\em variance of the means} and the {\em variance
    within groups}\pause
\item If the total variance can be solely explained from {\em variance
  within groups}, then the means do not vary and must be the same. \pause
\item Since a statistic should be large if the data does not fit to
  $H_0$, we use $\frac{MS(between)}{MS(within)}$ which can be shown to
  have an F-distribution under certain ...\pause
\item {\bf Assumptions:}
  \begin{itemize}
  \item The groups must be independent of each other.
  \item In each group, the specimen must be i.i.d. from the particular
    population distribution $f_{ij} \sim p(f|\mu_i) $.
  \item The standard deviations of the groups are equal
    ($\sigma_\varepsilon$ is the same for all groups).
  \item The residuals $\varepsilon$ must be Normally distributed
  \end{itemize}
\end{itemize}
\end{frame}

\subsection{study design for ANOVA}
\begin{frame}
  \frametitle{study design for ANOVA}
  \begin{itemize}
    \item If the means are different (but all other assumptions are
      satisfied), then $F$ follows a non-central F-distribution.
    \item Like in the case of one- and two-sample t-tests, this can be
      used to adjust $n$ for the desired power.
    \item Alternatively, one can estimate the minimal detectable
      difference $\delta$ from estimates of the {\em error MS} $s^2$
      and $n$, or $n$ from $\delta$ and $s^2$, respectively.
  \end{itemize}
  \mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764}

\end{frame}

\subsection{non-parametric ANOVA}
\begin{frame}
  \frametitle{Kruskal-Wallis test}
  \begin{itemize}
  \item Can be applied if the data is not normally distributed.
  \item Is equivalent to Mann-Whitney/Wilcoxon rank sum test for two
    factor levels.
  \item Needs the variances to be equal as well.
  \item Instead of testing equality of means/medians it tests for
    equality of distributions.
  \item For more details see {\em Biostatistical Analysis}.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Testing the difference among several medians}
  \begin{itemize}
  \item Can be applied if the data is not normally distributed.
  \item Does not need the variances to be equal.
  \item For more details see {\em Biostatistical Analysis}.
  \end{itemize}
\end{frame}

\section{more complex ANOVAs}
\subsection{blocking}
% ----------------------------------------------------------
\begin{frame}
\frametitle{blocking}
\footnotesize
{\bf Blocking}
How does experience affect the anatomy of the brain? In a typical
experiment to study this question, young rats are placed in one of
three environments for 80 days:

\begin{itemize}
\item[T1] Standard environment.The rat is housed with a single
  companion in a standard lab cage.
\item[T2] Enriched environment. The rat is housed with several
  companions in a large cage, furnished with various playthings.
\item[T3] Impoverished environment.The rat lives alone in a standard
  lab cage.
\end{itemize}

At the end of the 80-day experience, various anatomical measurements
are made on the rats' brains.  Suppose a researcher plans to conduct
the above experiment using 30 rats. To minimize variation in response,
all 30 animals will be male, of the same age and strain. To reduce
variation even further, the researcher can take advantage of the
similarity of animals from the same litter. In this approach, the
researcher would obtain three male rats from each of 10 litters. The
three littermates from each litter would be assigned at random: one to
T1, one to T2, and one to T3.
\end{frame}

%---------------------------------------------------------------
\begin{frame}
  \frametitle{How to create blocks}

  Try to create blocks that are as homogeneous within themselves as
  possible, so that the inherent variation between experimental units
  becomes, as far as possible, variation between blocks rather than
  within blocks (see SWS chapter 11.6).

  {\bf Fish data:}
  \begin{itemize}
    \item each fish is a block
    \item the different categories are the factor of interest
    \item note that we have one measurement per block and factor, but
      there could be more
  \end{itemize}

\end{frame}


%---------------------------------------------------------------
\begin{frame}
\frametitle{data model for block randomized ANOVA}

  Data generation model for randomized block factor ANOVA
  $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$:

  \vspace{.5cm}

  How do we know that there is no interaction $\gamma_{ij}$ between
  the blocks and the factors?
  \begin{itemize}
  \item {\bf a priori knowledge:} why should temperature be dependent on
    fish identity
  \item {\bf additivity:} for each factor $i$, the values differ by
    the {\em same} amount $\beta_j$. \pause
  \end{itemize}

  \begin{minipage}{1.0\linewidth}
    \begin{minipage}{0.5\linewidth}
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/regression06.pdf}
      \end{center}
    \end{minipage}
    \begin{minipage}{0.5\linewidth}
      \only<2>{\color{red} Would that also be the case if the values cross at the point?}
    \end{minipage}
  \end{minipage}
\end{frame}

\subsection{two factor ANOVA}
%---------------------------------------------------------------
\begin{frame}
\frametitle{What's the funny way to write down the data model in ANOVA?}

  Data generation model for a two factor ANOVA with interaction
  $$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$

  {\bf Note that:}
  \begin{itemize}
  \item The sum over the $\tau_i$, $\beta_j$, $\gamma_{ij}$, and
    $\varepsilon_{ijk}$ terms are always zero. They model the {\em deviation}
    from the grand mean. \pause
  \item The directly correspond to the available SS/ MS terms. For
    example, in the block randomized ANOVA
    \begin{itemize}
    \item $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$
    \item $SS(total) = SS(temperature) + SS(blocks) + SS(within)$
    \end{itemize}
  \end{itemize}

\end{frame}

%---------------------------------------------------------------
\begin{frame}
\frametitle{different hypotheses from a 2-factor ANOVA}
\small
  Data generation model for a two factor ANOVA with interaction
  $$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$

  \begin{itemize}
    \item {\bf Blocking: } Assume $\gamma_{ij}=0$. Test
      $$F=\frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS}
        (\varepsilon_{ijk})}$$\pause
    \item {\bf Repeated Measures: } Assume $\gamma_{ij}=0$. Entity
      which was repeatedly measured becomes block.\pause
    \item {\bf Two factor testing factor influence: } Assume $\gamma_{ij}\not=0$. Test
      $$F = \frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS}
        (\varepsilon_{ijk})}$$\pause
    \item {\bf Two factor testing interaction: } Assume $\gamma_{ij}\not=0$. Test
      $$F=\frac{\mbox{interaction MS}(\gamma_{ij})} {\mbox{error MS}
        (\varepsilon_{ijk})}$$

  \end{itemize}

\end{frame}


%---------------------------------------------------------------
\begin{frame}
\frametitle{summary}
\begin{itemize}
\small
\item ANOVA is a very flexible method to study the interactions of
  categorial variables (factors) and ratio/ interval data \pause
\item Works by checking whether a certain factor/ interaction between
  factors, ... is needed to explain the variability in the data \pause
\item Relies on assumptions that need to be checked
  \begin{itemize}
  \item equal variance for each factor level
  \item the residuals are Normally distributed
  \item number of points $n_i$ should be the same
  \end{itemize}\pause
\item There is a whole zoo of ANOVA techniques, for all kinds of
  situations. This is just the tip of the iceberg.
\item One can often get away with violating some of the
  assumptions. For more details on that check {\em Biostatistical Analysis}
\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{detour I: One-tailed vs. two-tailed}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%---------------------------------------------------------------
\begin{frame}
\hypertarget{sec:twotailed}{}

\frametitle{one-tailed tests}
\begin{task}{Correct or not?}
  Imagine a pharmaceutical company runs clinical trials for a drug
  that enhances the ability to focus. To that end they apply the drug
  to a treatment and measure scores in a standardized test. From the
  literature it is known that normal subjects have a score of about 0.

  Since the company want to test whether the drug {\em enhances (score
    > 0)} the ability to focus, they choose a one-tailed test ($H_A:$
  treatment group performs better than the performance from the
  literature).
\end{task}
\end{frame}


%-------------------------------------------------------------

\begin{frame}
\frametitle{one tailed test}
\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.5\linewidth}
    {\bf two tailed test}

    \includegraphics[width=\linewidth]{figs/twotailed.png}
    \footnotesize
    \vspace{-1cm}

    e.g.

    \begin{itemize}
    \item $H_0: \mu = 0$
    \item $H_A: \mu \not= 0$
      \vspace{1.8cm}
    \end{itemize}
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
    {\bf one tailed test}

    \includegraphics[width=\linewidth]{figs/onetailed.png}
    \footnotesize
    \vspace{-1cm}
    e.g.

    \begin{itemize}
    \item $H_0: \mu = 0$
    \item $H_A: \mu > 0$
    \item $\hat\mu < 0$ must directly imply $\hat\mu$ came from
      $P(\hat\mu|H_0)$
    \item if that is not the case, using one-tailed is cheating
    \end{itemize}
  \end{minipage}
\end{minipage}
\hyperlink{back:twotailed}{\color{gray}go back}
\end{frame}


% ----------------------------------------------------------

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{detour II: Statistical Power}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Why is it hard to assess the power of a test?}
\begin{minipage}{1.\linewidth}
  \begin{minipage}{.5\linewidth}
    \includegraphics[width=.8\linewidth]{figs/power.pdf}
  \end{minipage}
  \begin{minipage}{.5\linewidth}
    \begin{itemize}
    \item Power = 1 - P(type II error)\\
      = P(reject $H_0$| $H_A$ is true)\pause
    \item in general the distribution
      \begin{center}
        P(test statistic|$H_A$ is true)
      \end{center}
      is not available to us.
      \pause
    \item Therefore, the power can often only be specified for a
      specific $H_A$.

    \end{itemize}

  \end{minipage}
\end{minipage}
\mycite{J. H. Zar, Biostatistical Analysis}
\hypertarget{sec:power}{}
\hyperlink{back:power}{\color{gray}go back}

\end{frame}


% ----------------------------------------------------------

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{detour III: Bayes rule and statistical tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ----------------------------------------------------------
\begin{frame}
\hypertarget{sec:bayesian}{}

\frametitle{Why is this funny (or sad)?}
\begin{center}
  \includegraphics[width=.4\linewidth]{figs/frequentistsvsbayesians.png}
\end{center}
\mycite{http://xkcd.com/1132/}
\end{frame}

%-----------------------------------------------------------------
\begin{frame}
\frametitle{Why is this funny (or sad)?}
\begin{minipage}{1.\linewidth}
  \begin{minipage}{.5\linewidth}
    \includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png}
    \mycite{http://xkcd.com/1132/}
  \end{minipage}
  \begin{minipage}{.5\linewidth}
    \begin{itemize}
    \item $H_0:$ the sun has not gone nova
    \item $H_A:$ the sun has gone nova \pause
    \item test procedure: we believe the detector \pause
    \item Null distribution: multinomial $n=2, p_1 = \frac{1}{6}, ..., p_6 = \frac{1}{6}$ \pause
    \item the probability of making a type I error is $p(2\times
      6)=\frac{1}{6}\cdot \frac{1}{6} \approx 0.028$
    \end{itemize}
    \pause
    So ... what is wrong?
  \end{minipage}
\end{minipage}
\end{frame}

%-----------------------------------------------------------------
\begin{frame}
\frametitle{A similar example}
\begin{minipage}{1.\linewidth}
  \begin{minipage}{.5\linewidth}
    {\bf sensitivity \& specificity of a HIV test}

    \begin{tabular}{ccc}
      & HIV & no HIV\tabularnewline
      test + & 99.7\% & 1.5\%\tabularnewline
      test - & 0.03\% & 98.5\%\tabularnewline
    \end{tabular}

    \vspace{1cm}

    {\bf HIV prevalence (Germany)}

    \begin{tabular}{cc}
      HIV & no HIV\tabularnewline
       0.1\% & 99.9\%\tabularnewline
    \end{tabular}


  \end{minipage}
  \begin{minipage}{.5\linewidth}
    \begin{task}{}
      What is the probability that you are HIV+ if you test positive?
    \end{task}\pause
    In order to answer that question, you need two rules for
    probability.\pause

    \vspace{1cm}

    What is the power, what is the type I error of the test?
  \end{minipage}
\end{minipage}
\end{frame}

%-----------------------------------------------------------------
\begin{frame}
\frametitle{Bayes rule and marginalization}
{\bf Bayes rule}
$$p(A|B)p(B) = p(B|A)p(A)$$

{\bf joint probability}
$$p(A,B) = p(A|B)p(B) = p(B|A)p(A)$$

{\bf marginalization}
$$p(B) = \sum_{\mbox{possible values a of }A}p(a,B)$$
\end{frame}

%-----------------------------------------------------------------
\begin{frame}
\frametitle{probability/Bayesian nomenclature}
\frametitle{repetition}
Let $T\in \{+, -\}$ be the test result and $H\in \{+,-\}$ whether you
are HIV positive or not.
\begin{itemize}
\item $p(T|H)$ is the {\em likelihood} \pause
\item $p(H)$ is the {\em prior} \pause
\item $p(H|T)$ is the {\em posterior}
\end{itemize}
\pause
Given the prior and the likelihood, we can compute the posterior.
\begin{align*}
  p(H|T) &= \frac{P(T|H)P(H)}{P(T)} &\mbox{Bayes rule}\\
         &= \frac{P(T|H)P(H)}{\sum_h P(T,h)} &\mbox{marginalization}\\
         &= \frac{P(T|H)P(H)}{\sum_h P(T|h)p(h)} &\mbox{joint
           probability}
\end{align*}

\end{frame}

%-----------------------------------------------------------------
\begin{frame}
\frametitle{HIV test}
\begin{minipage}{1.\linewidth}
  \begin{minipage}{.5\linewidth}
\begin{tabular}{ccc}
  & HIV & no HIV\tabularnewline
  test + & 99.7\% & 1.5\%\tabularnewline
  test - & 0.03\% & 98.5\%\tabularnewline
\end{tabular}
  \end{minipage}
  \begin{minipage}{.5\linewidth}
\begin{tabular}{cc}
  HIV & no HIV\tabularnewline
  0.1\% & 99.9\%\tabularnewline
\end{tabular}
  \end{minipage}
\end{minipage}


\begin{align*}
  p(H=+|T=+)&= \frac{P(T=+|H=+)P(H=+)}{\sum_{h\in\{+,-\}} P(T=+|H=h)p(H=h)} \\
  p(H=+|T=+)&= \frac{0.997 \cdot 0.001}{0.997 \cdot 0.001 + 0.015
    \cdot 0.999} \\
  &\approx  0.062
\end{align*}
\pause
This means with a positive HIV test, you have about $6.2$\% chance of
being HIV positive. Why is this number so low? \pause

\only<3>{Because a lot of the people for which the test is positives
  are false positives from the HIV- group. This is because HIV+ is
  relatively rare.}
\end{frame}

%-----------------------------------------------------------------
\begin{frame}
\frametitle{Why is this funny (or sad)?}
\begin{minipage}{1.\linewidth}
  \begin{minipage}{.5\linewidth}
    \includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png}
    \mycite{http://xkcd.com/1132/}
  \end{minipage}
  \begin{minipage}{.5\linewidth}
    {\bf Why is it funny:} Because it points at the fact that
    statistical tests usually look at the likelihood only and ignore
    the prior.

    \vspace{1cm}

    {\bf Why is it sad?}  Because statistical tests usually look at
    the likelihood and ignore the prior.
  \end{minipage}
\end{minipage}
\hyperlink{back:bayesian}{\color{gray}go back}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{detour IV: Assessing normality with QQ plots}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\hypertarget{sec:qqplots}{}
\frametitle{histogram equalization}
\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.5\linewidth}
    \begin{task}{histogram equalization}
      Which function $y = f(x)$ transforms $x$ such that it has the
      distribution of $p(y)$?
    \end{task}
  \end{minipage}
  \begin{minipage}{0.5\linewidth}
    \only<1>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE0.png}
      \end{center}
    }\pause
    \only<2>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE0Solution.png}
      \end{center}
    }
  \end{minipage}
\end{minipage}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{histogram equalization}
\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.4\linewidth}
    \begin{task}{histogram equalization}
      How would the function look like if the target was a Normal
      distribution?
    \end{task}
  \end{minipage}
  \begin{minipage}{0.6\linewidth}
    \only<1>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE1.png}
      \end{center}
    }\pause
    \only<2>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE1Solution.png}
      \end{center}
    }
  \end{minipage}
\end{minipage}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{histogram equalization}
\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.4\linewidth}
    \begin{task}{histogram equalization}
      Is the target distribution a Normal distribution?
    \end{task}
  \end{minipage}
  \begin{minipage}{0.6\linewidth}
    \only<1>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE2.png}
      \end{center}
    }\pause
    \only<2>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE2Solution.png}
      \end{center}
    }
  \end{minipage}
\end{minipage}
\end{frame}
% ----------------------------------------------------------

\begin{frame}
\frametitle{QQ-plots}
  \begin{itemize}
  \item QQ-plots can be used to visually assess whether a set of data
    points might follow a certain distribution. \pause
  \item A QQ-plot is constructed by
    \begin{enumerate}
    \item computing the fraction of data points $q_1,...,q_n$ that are
      lower or equal than a given $x_1,...,x_n$ (Where do you know
      that function from?)\pause
  \item and plotting it against the value $y_j$ of the other
    distribution which has the same $q_i$
    \end{enumerate}\pause

  \item If the two distributions are equal the QQ-plot shows a straight line.\pause
  \item How would you assess the normality of data $x_1,...,x_n$ with
    a QQ-plot? \pause {\em make the target distribution a Gaussian}
  \end{itemize}

\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{histogram equalization}
\begin{minipage}{1.0\linewidth}
  \begin{minipage}{0.4\linewidth}
    \begin{task}{special transform}
      Which function $y = f(x)$ transforms $x$ such that it has the
      distribution of $p(y)$?


      Do you know that function?

    \end{task}

    \only<2>{{\bf Answer:} The cumulative distribution function $f(x) = F(x)$.}
  \end{minipage}
  \begin{minipage}{0.6\linewidth}
    \only<1>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE3.png}
      \end{center}
    }\pause
    \only<2>{
      \begin{center}
        \includegraphics[width=1.\linewidth]{figs/HE3Solution.png}
      \end{center}
    }
  \end{minipage}
\end{minipage}
\hyperlink{back:detourIV}{\color{gray} back to statistical tests}
\end{frame}

\end{document}