2915 lines
98 KiB
TeX
Executable File
2915 lines
98 KiB
TeX
Executable File
\documentclass{beamer}
|
|
\usepackage{xcolor}
|
|
\usepackage{listings}
|
|
\usepackage{pgf}
|
|
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
|
%\usepackage{multimedia}
|
|
|
|
\usepackage[english]{babel}
|
|
\usepackage{movie15}
|
|
\usepackage[latin1]{inputenc}
|
|
\usepackage{times}
|
|
\usepackage{amsmath}
|
|
\usepackage{bm}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage[scaled=.90]{helvet}
|
|
\usepackage{scalefnt}
|
|
\usepackage{tikz}
|
|
\usepackage{ textcomp }
|
|
\usepackage{soul}
|
|
\usepackage{hyperref}
|
|
\definecolor{lightblue}{rgb}{.7,.7,1.}
|
|
\definecolor{mygreen}{rgb}{0,1.,0}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\mode<presentation>
|
|
{
|
|
\usetheme{Singapore}
|
|
\setbeamercovered{opaque}
|
|
\usecolortheme{tuebingen}
|
|
\setbeamertemplate{navigation symbols}{}
|
|
\usefonttheme{default}
|
|
\useoutertheme{infolines}
|
|
% \useoutertheme{miniframes}
|
|
}
|
|
|
|
\AtBeginSection[]
|
|
{
|
|
\begin{frame}<beamer>
|
|
\begin{center}
|
|
\Huge \insertsectionhead
|
|
\end{center}
|
|
% \frametitle{\insertsectionhead}
|
|
% \tableofcontents[currentsection,hideothersubsections]
|
|
\end{frame}
|
|
}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
|
|
|
\setbeamertemplate{blocks}[rounded][shadow=true]
|
|
|
|
\title[]{Scientific Computing -- Statistics}
|
|
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
|
|
University T\"ubingen\\
|
|
Bernstein Center T\"ubingen}
|
|
|
|
\institute[Scientific Computing]{}
|
|
\date{11/27/2013}
|
|
%\logo{\pgfuseimage{logo}}
|
|
|
|
\subject{Lectures}
|
|
|
|
%%%%%%%%%% configuration for code
|
|
\lstset{
|
|
basicstyle=\ttfamily,
|
|
numbers=left,
|
|
showstringspaces=false,
|
|
language=Matlab,
|
|
commentstyle=\itshape\color{darkgray},
|
|
keywordstyle=\color{blue},
|
|
stringstyle=\color{green},
|
|
backgroundcolor=\color{blue!10},
|
|
breaklines=true,
|
|
breakautoindent=true,
|
|
columns=flexible,
|
|
frame=single,
|
|
captionpos=b,
|
|
xleftmargin=1em,
|
|
xrightmargin=1em,
|
|
aboveskip=10pt
|
|
}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\newcommand{\mycite}[1]{
|
|
\begin{flushright}
|
|
\tiny \color{black!80} #1
|
|
\end{flushright}
|
|
}
|
|
|
|
\input{../latex/environments.tex}
|
|
\makeatother
|
|
|
|
\begin{document}
|
|
|
|
\begin{frame}
|
|
\titlepage
|
|
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Plan}
|
|
\setcounter{tocdepth}{1}
|
|
\tableofcontents
|
|
|
|
\end{frame}
|
|
\begin{frame}
|
|
\frametitle{Information \"uber Statistik}
|
|
\begin{itemize}
|
|
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
|
|
A. A. (2010). Statistics for the Life Sciences (4th ed.,
|
|
p. 668). Prentice Hall.
|
|
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
|
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
|
Hall. doi:10.1037/0012764
|
|
\item \url{http://stats.stackexchange.com}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section[meta-study]{how statisticians think - the meta-study}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{statisticians are lazy}
|
|
\Large
|
|
\only<1>{
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
|
|
\end{center}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}\pause
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
|
|
\end{center}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}\pause
|
|
\only<3>{
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
|
|
\end{center}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the (imaginary) meta-study}
|
|
\begin{center}
|
|
\only<1>{
|
|
\framesubtitle{finite sampling introduces variation: the sampling distribution}
|
|
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
|
|
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
|
Tests}
|
|
}\pause
|
|
\only<2>{
|
|
\framesubtitle{statistic vs. population parameter}
|
|
\includegraphics[width=.8\linewidth]{figs/statistic1.png}
|
|
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
|
Tests}
|
|
}\pause
|
|
\only<3>{
|
|
\framesubtitle{statistic vs. population parameter}
|
|
\includegraphics[width=.8\linewidth]{figs/statistic2.png}
|
|
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
|
Tests}
|
|
}\pause
|
|
\only<4>{
|
|
\framesubtitle{shat parts of this diagram do we have in real life?}
|
|
|
|
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
|
|
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
|
Tests}
|
|
}\pause
|
|
\only<5>{
|
|
\framesubtitle{what parts of this diagram do we have in real life?}
|
|
|
|
\includegraphics[width=.8\linewidth]{figs/statistic3.png}
|
|
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
|
Tests}
|
|
}\pause
|
|
\only<6->{
|
|
\framesubtitle{what statistics does }
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\includegraphics[width=1.\linewidth]{figs/statistic4.png}
|
|
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
|
|
Tests}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{itemize}
|
|
\item it assumes, derives, or simulates the sampling
|
|
distribution\pause
|
|
\item the sampling distribution makes only sense if you think
|
|
about it in terms of the meta study\pause
|
|
\item {\color{red} the sampling distribution is the key to
|
|
answering questions about the population from the value of
|
|
the statistic}
|
|
\end{itemize}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
}
|
|
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
% % ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{illustrating examples}
|
|
\begin{question}{lung volume of smokers}
|
|
Assume you know the sampling distribution of the mean lung volume
|
|
of smokers. Would you believe that
|
|
the sample came from a group of smokers?
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/example01.png}
|
|
\end{center}
|
|
\end{question}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{illustrating examples}
|
|
\begin{question}{lung volume of smokers}
|
|
What about now? How would the sampling distribution change if I
|
|
change the population to (i) athletes or (ii) old people?
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/example02.png}
|
|
\end{center}
|
|
\end{question}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{illustrating examples}
|
|
\begin{question}{Is this diet effective?}
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/example03.png}
|
|
\end{center}
|
|
\end{question}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{illustrating examples}
|
|
\begin{question}{Is this diet effective?}
|
|
What do you think now?
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/example04.png}
|
|
\end{center}
|
|
\end{question}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{summary}
|
|
\begin{itemize}
|
|
\item In statistics, we use finite samples from a population to reason
|
|
about features of the population. \pause
|
|
\item The particular feature of the population we are interested in is called
|
|
{\color{blue} population parameter}. We usually measure this
|
|
parameter in our finite sample as well
|
|
({\color{blue}statistic}).\pause
|
|
\item Because of variations due to finite sampling the statistic
|
|
almost never matches the population parameter. \pause
|
|
\item Using the {\color{blue}sampling distribution} of the statistic, we make
|
|
statements about the relation between our statistic and the
|
|
population parameter.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{outlook}
|
|
{\bf Questions to be addressed}
|
|
\begin{itemize}
|
|
\item How do we choose the statistic?
|
|
\item How do we get the sampling distribution?
|
|
\item How does statistical reasoning work in practice?
|
|
\end{itemize}
|
|
{\bf Perspective}
|
|
\begin{itemize}
|
|
\item We start by looking at a few standard distribution.
|
|
\item We will use those in the statistical tests that follow.
|
|
\item For each statistical test, I also try to provide a
|
|
non-parametric method.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{probability primer}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{probability models}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{frame}
|
|
\frametitle{getting the model right}
|
|
In statistics/probability it is important to select the correct
|
|
distribution. Models are easier to remember if you remember a
|
|
``standard situation''.
|
|
|
|
\begin{itemize}
|
|
\item What is the distribution corresponding to throwing a coin? \pause
|
|
\item What in neuroscience/psychology is like throwing a coin (fair or
|
|
unfair)?\pause
|
|
\item What is the distribution of counting heads in repeated
|
|
independent coin tosses?\pause
|
|
\item What in neuroscience/psychology corresponds to counting heads in
|
|
repeated independent coin tosses?
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the different models}
|
|
\only<1>{
|
|
\framesubtitle{Bernoulli distribution}
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/Bernoulli.pdf}
|
|
|
|
\end{center}
|
|
|
|
\begin{itemize}
|
|
\item single coin toss (success/ failure)
|
|
\item distribution $p(X=1)=p$
|
|
\end{itemize}
|
|
}\pause
|
|
\only<2>{
|
|
\framesubtitle{uniform distribution}
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/Uniform.pdf}
|
|
|
|
\end{center}
|
|
|
|
\begin{itemize}
|
|
\item $n$ items with the same probability of occurence
|
|
\item distribution $p(X=k)=\frac{1}{n}$
|
|
\end{itemize}
|
|
}\pause
|
|
\only<3>{
|
|
\framesubtitle{binomial distribution}
|
|
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/Binomial00.pdf}
|
|
\includegraphics[width=.4\linewidth]{figs/Binomial01.pdf}
|
|
\end{center}
|
|
|
|
\begin{itemize}
|
|
\item number of $k$ successes/heads in $n$ trials
|
|
\item distribution $P(X=k)= {n \choose
|
|
k} p^k (1-p)^{n-k}$
|
|
\item parameters $n,p$
|
|
\end{itemize}
|
|
}\pause
|
|
\only<4>{
|
|
\framesubtitle{Poisson distribution}
|
|
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/Poisson00.pdf}
|
|
\includegraphics[width=.4\linewidth]{figs/Poisson01.pdf}
|
|
\end{center}
|
|
|
|
\begin{itemize}
|
|
\item successes per time unit for (very) large $n$ and small $p$
|
|
\item distribution $P(X=k) = \frac{\lambda^k
|
|
e^{-\lambda}}{k!}$
|
|
\item parameter: success rate $\lambda$
|
|
\end{itemize}
|
|
}
|
|
\only<5>{
|
|
\framesubtitle{Gaussian/ normal distribution}
|
|
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/Gaussian00.pdf}
|
|
\end{center}
|
|
|
|
\begin{itemize}
|
|
\item shows up everywhere (central limit theorem)
|
|
\item distribution $p(x) = \frac{1}{\sigma\sqrt{2\pi}}\operatorname{exp}\left\{-\frac{\left(x-\mu\right)^2}{2\sigma^2}\right\}$
|
|
\item parameter: mean $\mu$, standard deviation $\sigma$
|
|
\end{itemize}
|
|
}
|
|
\only<6>{
|
|
\framesubtitle{caveat}
|
|
\begin{question}{important distinction}
|
|
\begin{itemize}
|
|
\item For {\em discrete} random variables $P(X=k)$ makes sense
|
|
(probabilities are like ``single weights'').
|
|
\item For {\em continuous} random variables $p(X=x)=0$ (probabilities
|
|
are like ``water'').
|
|
\item For {\em continuous} random variables it makes only sense to
|
|
ask for the probability that they take values in a particular
|
|
range.
|
|
\end{itemize}
|
|
\end{question}
|
|
|
|
}
|
|
|
|
\end{frame}
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{example}
|
|
You place a mouse in a circular maze and place some food on the
|
|
opposite side. In each trial you record whether the mouse went {\em
|
|
left} (``L'') or {\em right} (``R'') to get the food.
|
|
\vspace{.5cm}
|
|
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.59\linewidth}
|
|
\begin{itemize}
|
|
\item What kind of distribution would you expect for the number of
|
|
``R'' in $10$ trials? What is the distribution of the number of
|
|
``L''?\pause
|
|
\item Here is the result of $10$ trials: ``LLLLLLLLLL''. What is
|
|
the probability of that?
|
|
\item What do you conclude from that?
|
|
\end{itemize}
|
|
\end{minipage}
|
|
\begin{minipage}{0.4\linewidth}
|
|
\only<1->{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/Binomial00.pdf}
|
|
\end{center}
|
|
}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{cumulative distribution function}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{cumulative distribution function (c.d.f.)}
|
|
\framesubtitle{we will need that a lot in statistics}
|
|
\begin{itemize}
|
|
\item The c.d.f. is used to compute the probability that a random
|
|
variable is in a particular range.
|
|
|
|
\item It is defined as $F(y) = P(X \le y)$
|
|
|
|
\item For the binomial distribution this would be
|
|
$$F(k) = P(\mbox{no. of
|
|
successes} \le k)\mbox{ in } n \mbox{ trials}$$
|
|
|
|
\item Where could I
|
|
see that probability in that plot for $k=5$ and $n=10$?
|
|
\begin{center}
|
|
\only<1>{
|
|
\includegraphics[width=.5\linewidth]{figs/Binomial00.pdf}
|
|
}
|
|
\only<2>{
|
|
\includegraphics[width=.5\linewidth]{figs/BinomialCdf00.pdf}
|
|
}\pause
|
|
\only<3>{
|
|
\includegraphics[width=.5\linewidth]{figs/BinomialCdf01.pdf}
|
|
}
|
|
|
|
\end{center}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{cumulative distribution function (c.d.f.)}
|
|
\framesubtitle{example}
|
|
\small
|
|
You want to find out whether a subject performs significantly
|
|
different from chance in $10$ trials that either are successful or not.
|
|
\begin{itemize}[<+->]
|
|
\item What would be a good decision rule?
|
|
\item[] {\color{gray} We set thresholds on the number of successes
|
|
and decide that (s)he is performing at chance if the performance
|
|
falls within the thresholds.}
|
|
\item What is the distribution of the number of successes in $n=10$
|
|
trials if the subject performs at chance?
|
|
\item[] {\color{gray} Binomial with $n=10$ and $p=\frac{1}{2}$}
|
|
\item Let's say we set the threshold at $k=2$ and $k=8$, what is the
|
|
probability that we think (s)he is {\em not} performing at chance,
|
|
even though (s)he is?
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{cumulative distribution function (c.d.f.)}
|
|
\framesubtitle{example}
|
|
\small
|
|
\begin{itemize}[<+->]
|
|
\item Let's say we set the threshold at $k=2$ and $k=8$, what is the
|
|
probability that we think (s)he is {\em not} performing at chance,
|
|
even though (s)he is?
|
|
\item[] {\color{gray} The probability for that is $P(X \le 2 \mbox{
|
|
or } X \ge 8)$. Using the c.d.f. that is
|
|
\begin{align*}
|
|
P(X \le 2 \mbox{ or } X \ge 8) &= P(X \le 2) + P(X \ge 8)
|
|
= P(X \le 2) + (1-P(X \le 7))
|
|
\end{align*}
|
|
}
|
|
\end{itemize}
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=.5\linewidth]{figs/BinomialExample00.pdf}
|
|
\end{center}
|
|
}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{joint and conditional distributions}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{conditional and marginal $\rightarrow$ joint distribution}
|
|
\framesubtitle{Bayes' rule}
|
|
\begin{itemize}
|
|
\small
|
|
\item Assume you ran decision experiments with two subject. Subject \#1 had a success
|
|
probability of $50\%$, while subject \#2 achieved $80\%$.
|
|
\item $70\%$ of the trials were run with the first subject, $30\%$ of
|
|
the trials with the other.
|
|
\item Each trial gets saved in a file on the hard disk.\pause
|
|
\item Now, let's assume your recording software had a bug and did not
|
|
store the subject ID in the file.
|
|
\item For a given file, we have two random variables now: subject ID $X$,
|
|
number of successes $Y$.
|
|
\end{itemize}
|
|
\begin{center}
|
|
\includegraphics[height=.32\linewidth]{figs/decision01.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{joint and conditional distributions}
|
|
\framesubtitle{definitions}
|
|
\begin{definition}{Joint, marginal, and conditional distribution}
|
|
\begin{itemize}
|
|
\item The {\bf joint distribution $P(X,Y)$} gives the probability
|
|
that a particular combination of $X$ and $Y$ occur at the same
|
|
time. \pause
|
|
\item The {\bf marginal distributions $P(X)$ and $P(Y)$} specify
|
|
the probabilities that a particular value occurs if the value of
|
|
the other variable is ignored. \pause
|
|
\item The {\bf conditional distribution $P(X|Y)$} gives the
|
|
probability of particular values of $X$ given that $Y$ has
|
|
particular values.
|
|
\end{itemize}\pause
|
|
\end{definition}
|
|
\begin{center} {\color{blue} joint distribution
|
|
$\stackrel{\mbox{Bayes' Rule}}{\leftrightarrow}$
|
|
marginal and conditional distribution}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{conditional and marginal $\rightarrow$ joint distribution}
|
|
\framesubtitle{Bayes' rule}
|
|
\begin{itemize}
|
|
\small
|
|
\item Assume you ran decision experiments with two subject. Subject \#1 had a success
|
|
probability of $50\%$, while subject \#2 achieved $80\%$.
|
|
\item $70\%$ of the trials were run with the first subject, $30\%$ of
|
|
the trials with the other.
|
|
\item What probabilities do I need to write at the edges?
|
|
\item What distribution do I use for the subjects ID ($X$)?
|
|
\item What distribution do I use for the conditional distribution $Y|X$?
|
|
\end{itemize}
|
|
\begin{center}
|
|
\only<1>{\includegraphics[height=.32\linewidth]{figs/decision01.pdf}}
|
|
\only<2>{\includegraphics[height=.32\linewidth]{figs/decision02.pdf}}
|
|
\only<3>{\includegraphics[height=.32\linewidth]{figs/decision03.pdf}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{conditional and marginal $\rightarrow$ joint distribution}
|
|
\framesubtitle{Bayes' rule}
|
|
\begin{itemize}
|
|
\small
|
|
\item The joint probability are obtained by multiplying the
|
|
probabilities along the paths from the root note to the leaves.
|
|
\begin{center}
|
|
\includegraphics[height=.32\linewidth]{figs/decision03.pdf}
|
|
\end{center}\pause
|
|
\item In algebraic terms, this is known as {\em Bayes' rule} (very important!)
|
|
$$\color{red} P(Y|X)P(X) = P(X|Y)P(Y) = P(X,Y)$$\pause
|
|
\item You can remember it as ``moving variables in front of the
|
|
bar''
|
|
$$P(X|Y) P(Y) = P(X,Y|\_)$$
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{Bayes' rule}
|
|
$$P(X|Y)P(Y) = P(Y|X)P(X) = P(X,Y)$$
|
|
|
|
\begin{task}{Independent random variables}
|
|
If two random variables are independent, the joint distribution is
|
|
the product of their marginals $$ P(X,Y) =P(X) P(Y)$$
|
|
How can you see that from Bayes' rule?
|
|
\end{task}
|
|
\pause
|
|
|
|
\begin{solution}{Solution}
|
|
If the variables are independent $P(X|Y) = P(X)$ and $P(Y|X) =
|
|
P(Y)$: The probability of $X$ is the same as the probability of
|
|
$X$ given that I know $Y$, because knowing $Y$ does not help.
|
|
\end{solution}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{Joint $\rightarrow$ marginal and conditional distribution}
|
|
\begin{itemize}
|
|
\small
|
|
\item The plot shows the joint distribution $P(X,Y)$, where $X$ is
|
|
the subject id and $Y$ the number of successes in $n=10$ trials.
|
|
\begin{center}
|
|
\only<-1>{\includegraphics[width=.83\linewidth]{figs/Joint00.pdf}}
|
|
\only<2>{\includegraphics[width=.83\linewidth]{figs/Joint01.pdf}}
|
|
\only<3>{\includegraphics[width=.83\linewidth]{figs/Joint02.pdf}}
|
|
\end{center}
|
|
|
|
\only<-1>{ \vspace{2cm}}
|
|
\only<2-3>{ \item We can get the marginal distributions via {\em
|
|
marginalization} (very important!):
|
|
$$\color{red} P(Y) =\sum_{i=1}^2P(X=i, Y) \mbox{ and } P(X) =
|
|
\sum_{j=0}^{n} P(X, Y=j)$$}
|
|
\only<3->{ \item We can get the conditional distribution via Bayes' rule:
|
|
$$P(X|Y)P(Y) = P(X,Y) \Leftrightarrow P(X|Y) = \frac{P(X,Y)}{P(Y)}$$}
|
|
\only<-2>{ \vspace{2cm}}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{The posterior}
|
|
\begin{itemize}
|
|
\small
|
|
\item Could we use the probability distribution to get an idea which
|
|
subject the number of successes came from?\pause
|
|
\item Use Bayes' rule to ``invert'' the conditional distribution
|
|
$$P(X|Y=k) = P(X,Y=k)/P(Y=k)$$
|
|
\end{itemize}
|
|
\begin{center}
|
|
\only<-2>{\includegraphics[height=.28\linewidth]{figs/Joint02.pdf}}
|
|
\only<3->{\includegraphics[height=.53\linewidth]{figs/Posterior00.pdf}}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{summary}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{summary}
|
|
\begin{itemize}
|
|
\item We need to know certain distributions to use them as sampling
|
|
distribution. \pause
|
|
\item For many distributions one can use a ``standard situation'' to
|
|
remember them. \pause
|
|
\item When dealing with two or more random variables one deals with
|
|
{\color{blue}joint, marginal}, and {\color{blue}conditional
|
|
distributions}.\pause
|
|
\item Marginal and conditional distributions can be converted into
|
|
the joint distribution via {\color{blue}Bayes' rule}.\pause
|
|
\item The conversion in the other direction can be done via
|
|
{\color{blue}marginalization} and {\color{blue}Bayes' rule}.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{error bars \& confidence intervals}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% ----------------------------------------------------------
|
|
\subsection{errorbars}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{illustrating example}
|
|
|
|
As part of a study of the development of the thymus gland, researcher
|
|
weighed the glands of $50$ chick embyos after 14 days of
|
|
incubation. The following plot depicts the mean thymus gland weights in (mg):
|
|
\mycite{modified from SWS exercise 6.3.3.}
|
|
\pause
|
|
{\bf Which of the two bar plots is the correct way of displaying the
|
|
data?}
|
|
|
|
\begin{columns}
|
|
\begin{column}[l]{.5\linewidth}
|
|
\includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
|
|
\end{column}
|
|
\begin{column}[r]{.5\linewidth}
|
|
\pause That depends on what you want to say
|
|
\begin{itemize}
|
|
\item To give a measure of variability in the data: use the
|
|
{\color{blue} standard deviation $\hat\sigma =
|
|
\sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
|
|
\item To make a statement about the variability in the mean
|
|
estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
|
|
%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% that depends: variability (descriptiv statistics, how variable is
|
|
% the mean -> inferential, makes only sense in the meta-study setting)
|
|
% first matlab exercise: simulate standard error
|
|
% recommend paper for eyeballing test results from standard errors
|
|
% from std of mean to confidence intervals
|
|
% introduce bootstrapping (matlab exercise), then t-statistic
|
|
% intervals
|
|
% end with standard error of the median (and the thing from wikipedia)
|
|
\end{frame}
|
|
%------------------------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{standard error}
|
|
\framesubtitle{bootstrapping}
|
|
|
|
\begin{task}{quantifying the variability in the mean}
|
|
Download \url{https://www.dropbox.com/s/20l7ptrdc4kkceq/materialNMI.zip}
|
|
|
|
Load the dataset {\tt thymusglandweights.dat} into matlab and use
|
|
the first $50$ datapoints as your dataset. Repeat the following
|
|
steps $m=500$ times:
|
|
\begin{enumerate}
|
|
\item sample $50$ data points from $x$ with replacement
|
|
\item compute their mean and store it
|
|
\end{enumerate}
|
|
Look at the standard deviation of the computed means and compare
|
|
it to the standard error.
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
%------------------------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{standard error}
|
|
\framesubtitle{bootstrapping}
|
|
\begin{itemize}
|
|
\item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
|
|
{\color{blue}an estimate of the standard deviation of the means}
|
|
in repeated experiments which is computed form a single
|
|
experiment.
|
|
\item When you want to do statistical tests on the mean, it is
|
|
better to use the standard error, because one can eyeball
|
|
significance from it
|
|
\mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
|
|
in experimental biology. The Journal of Cell Biology, 177(1),
|
|
7--11.}
|
|
\item {\color{blue}Bootstrapping} is a way to generate an estimate
|
|
of the {\color{blue}sampling distribution of any statistic}. Instead of
|
|
sampling from the true distribution, it samples from the
|
|
empirical distribution represented by your dataset.
|
|
\mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%------------------------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{standard error of the median?}
|
|
{\bf What kind of errorbars should we use for the median?}
|
|
|
|
It depends again:
|
|
|
|
{\bf Descriptive statistics}
|
|
\begin{itemize}
|
|
\item As a {\color{blue}descriptive statistic} one could use the {\em median
|
|
absolute deviation}: the median of the absolute differences of
|
|
the datapoints from the median.
|
|
\item Alternatively, one could bootstrap a standard deviation of the
|
|
median.
|
|
\end{itemize}
|
|
\pause
|
|
{\bf Inferential statistics}
|
|
\begin{itemize}
|
|
\item For {\color{blue}inferential statistics} one should use
|
|
something that gives the reader {\color{blue}information about
|
|
significance}.
|
|
\item Here, {\color{blue} confidence intervals} are a better choice.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\subsection{confidence intervals \& bootstrapping}
|
|
%------------------------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence intervals}
|
|
\begin{center}
|
|
\only<1>{
|
|
\vspace{.1cm}
|
|
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
|
|
}\pause
|
|
\only<2>{
|
|
\vspace{.1cm}
|
|
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}\pause
|
|
\only<3>{
|
|
\vspace{.1cm}
|
|
\includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}\pause
|
|
\only<4>{
|
|
\vspace{.1cm}
|
|
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}\pause
|
|
\only<5>{
|
|
\vspace{.1cm}
|
|
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}\pause
|
|
\only<6>{
|
|
\vspace{.1cm}
|
|
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
|
|
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
|
|
}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence intervals for the median}
|
|
\begin{definition}{Confidence interval}
|
|
A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
|
|
$\hat\theta$ is an interval $\hat\theta \pm a$ such that the
|
|
population parameter $\theta$ is contained in that interval
|
|
$(1-\alpha)\cdot 100\%$ of the experiments.
|
|
|
|
An alternative way to put it is that $(\hat\theta - \theta) \in
|
|
[-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
|
|
\end{definition}
|
|
|
|
|
|
\begin{columns}
|
|
\begin{column}[l]{.5\linewidth}
|
|
If we knew the sampling distribution of the median $\hat m$, could
|
|
we generate a e.g. a $95\%$ confidence interval?\pause
|
|
\vspace{.5cm}
|
|
|
|
Yes, we could choose the interval such that $\hat m - m$ in that
|
|
interval in $95\%$ of the cases.
|
|
\end{column}
|
|
\begin{column}[r]{.5\linewidth}
|
|
\only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
|
|
\only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
|
|
\end{column}
|
|
\end{columns}
|
|
|
|
|
|
% \begin{task}{Bootstrapping a confidence interval for the median}
|
|
% \begin{itemize}
|
|
% \item Use the same dataset as before.
|
|
% \item Bootstrap $500$ medians.
|
|
% \item Compute the $2.5\%$ and the $97.5\%$ percentile of the
|
|
% $500$ medians.
|
|
% \end{itemize}
|
|
% \end{task}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence intervals for the median}
|
|
\framesubtitle{how to get the sampling distribution}
|
|
|
|
\begin{task}{Bootstrapping a confidence interval for the median}
|
|
\begin{itemize}
|
|
\item Use the same dataset as before.
|
|
\item Bootstrap $500$ medians.
|
|
\item Compute the $2.5\%$ and the $97.5\%$ percentile of the
|
|
$500$ medians.
|
|
\end{itemize}
|
|
These two numbers give you $\hat m -a$ and $\hat m + a$ for
|
|
the $95\%$ confidence interval.
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence intervals for the median}
|
|
\framesubtitle{how to get it analytically}
|
|
There is also an analytical estimation oft the confidence interval
|
|
for the median: Use the $\frac{\alpha}{2}$ and $1 - \frac{\alpha}{2}$
|
|
quantile of a binomial distribution.
|
|
|
|
|
|
\begin{task}{Comparing the analytical interval to the bootstrapped}
|
|
\begin{itemize}
|
|
\item Get the $\frac{\alpha}{2}$ quantile minus one and $1 -
|
|
\frac{\alpha}{2}$ quantile of a binomial distribution using {\tt
|
|
binoinv}.
|
|
\item Sort you data points and use the data points at the position
|
|
corresponding to the quantiles.
|
|
\item Compare that to the bootstrapped confidence interval.
|
|
\end{itemize}
|
|
\end{task}
|
|
\tiny The idea behind this:
|
|
\begin{itemize}
|
|
\item The probability that the true median $m$ is covered by the
|
|
interval between $x_r$ and $x_{r+1}$ is binomial $${n \choose r}
|
|
\left(\frac{1}{2}\right)^r \left(\frac{1}{2}\right)^{n-r}$$
|
|
\item No we take enough intervals in the ``middle'' of our sample
|
|
that we cover the true median with at least $1-\alpha$
|
|
probability.
|
|
\mycite{David, H. A., \& Nagaraja, H. N. (2003). Order Statistics. MES (Vol. 1, p. 482). Wiley. doi:10.1016/j.bpj.2010.07.012}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence intervals}
|
|
\framesubtitle{Notice the theme!}
|
|
\begin{enumerate}
|
|
\item choose a statistic
|
|
\item get a the sampling distribution of the statistic (by theory or
|
|
simulation)
|
|
\item use that distribution to reason about the relation between the
|
|
true population parameter (e.g. $m$) and the sampled statistic
|
|
$\hat m$
|
|
\end{enumerate}
|
|
|
|
\begin{center}
|
|
\color{blue}
|
|
This is the scaffold of most statistical techniques. Try to find
|
|
it and it can help you understand them.
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{let's practice that again}
|
|
\framesubtitle{confidence interval for the mean}
|
|
|
|
\begin{task}{Bootstrapping a confidence interval for the mean}
|
|
\begin{itemize}
|
|
\item Use the same dataset as before.
|
|
\item Use bootstrapping to get a $95\%$ confidence interval for
|
|
the mean.
|
|
\end{itemize}
|
|
\end{task}
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence interval for the mean}
|
|
\framesubtitle{confidence interval for the mean}
|
|
Getting a convenient sampling distribution is (a little bit) more
|
|
difficult:
|
|
\begin{itemize}
|
|
\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
|
|
well
|
|
\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
|
|
\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
|
|
$\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
|
|
\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
|
|
\frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
|
|
parameters.\pause
|
|
\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
|
|
\mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
|
|
\item Therefore,
|
|
\begin{align*}
|
|
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
|
|
\end{align*}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{confidence interval for the mean}
|
|
\begin{task}{Bootstrapping a confidence interval for the mean}
|
|
Extend your script to contain the analytical confidence
|
|
interval using
|
|
\begin{align*}
|
|
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
|
|
\end{align*}
|
|
\end{task}
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\subsection{summary}
|
|
% ----------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{summary}
|
|
\begin{emphasize}{Which errorbars should I choose?}
|
|
Always use errorbars to help the reader see your point.
|
|
\end{emphasize}
|
|
\pause
|
|
\begin{itemize}
|
|
\item Errorbars can {\color{blue} describe the variability} in a dataset
|
|
({\color{blue}descriptive statistics}). Example: {\em standard deviation, inter-quartile
|
|
range, ...}
|
|
\item {\color{blue}Errorbars yield information about significance in testing
|
|
(inferential statistics)}. Examples: {\em standard error of the mean, confidence
|
|
intervals, ...}
|
|
\item Other possible ways of displaying variability: {\em
|
|
boxplots, violin plots, histograms, ...}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{statistical tests}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{one-sample test on the mean}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{from confidence intervals to one-sample test}
|
|
|
|
\begin{task}{example: eye movements}
|
|
\small
|
|
In an experiment you measure eye movements of subjects on the
|
|
screen. You want be sure that the subject fixates a certain target
|
|
(at $x=0$). During the fixation period, you aquire $n=16$
|
|
measurements. The measurements have a mean of $\hat\mu=2.5$ and a
|
|
standard deviation of $\hat\sigma=4$. Assuming that the single
|
|
fixation locations are Gaussian distributed, can you be $95\%$
|
|
confident that the subject focused the target (x-Position)?
|
|
\end{task}
|
|
\pause
|
|
\begin{solution}{use confidence intervals}
|
|
\small
|
|
Compute a $95\%$ confidence interval: Does it contain
|
|
$\mu=0$? Yes? Then we are $95\%$ confident!
|
|
|
|
From the table we get $t_{0.025}=2.131$, the standard error is
|
|
$\frac{\hat\sigma}{\sqrt{n}} = \frac{4}{\sqrt{16}}=1$ which means
|
|
that $$0\pm t_{0.025}\frac{\hat\sigma}{\sqrt{n}} = 0 \pm 2.131$$
|
|
is our confidence interval. Therefore we cannot be $95$\%
|
|
confident in this case.
|
|
\end{solution}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{from confidence intervals to one-sample test}
|
|
\begin{task}{example: eye movements}
|
|
Could we put the interval on $\mu=0$ as well?
|
|
\end{task}
|
|
\pause
|
|
\begin{solution}{Example: eye movements}
|
|
Yes, if the interval around $\hat\mu$ contains $\mu$, then the
|
|
interval around $\mu$ also contains $\hat\mu$.
|
|
\end{solution}
|
|
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{One-sample t-test}
|
|
|
|
\begin{task}{example 2: eye movements again}
|
|
\small
|
|
Now assume that there is a fixation target at $x=0$. You are
|
|
running the experiment with a monkey and you want to discard all
|
|
trials in which the monkey was not fixating the target.
|
|
|
|
During the trial, you aquire again $n=16$ measurements with mean
|
|
$\hat\mu=2.5$ and standard deviation $\hat\sigma=4$. How can you be
|
|
confident that the monkey did not fixate the target if you are
|
|
willing to be wrong in $5\%$ of the cases if ``wrong'' means that
|
|
you believe the subject was not fixating when in fact it was.
|
|
\end{task}
|
|
\pause
|
|
\begin{solution}{Example 2: eye movements again}
|
|
\small
|
|
The steps to the solution is exactly the same, only the logic is
|
|
different.
|
|
\begin{itemize}
|
|
\item We make a $95\%$ confidence around the fixation target
|
|
$\mu=0$. This means that if the monkey was actually fixating the
|
|
target, $95\%$ of the measured averaged positions $\hat\mu$ would
|
|
fall into that interval.
|
|
\item $5\%$ of the measured would fall outside the interval even
|
|
though the monkey fixated and we would falsely treat them as not as ``not
|
|
fixated''.
|
|
\end{itemize}
|
|
\end{solution}
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{one-sample t-test}
|
|
\framesubtitle{Notice the theme again!}
|
|
\only<1>{
|
|
\begin{center}
|
|
\includegraphics[width=0.4\linewidth]{figs/repetition0.png}
|
|
\end{center}
|
|
\begin{enumerate}
|
|
\small
|
|
\item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
|
|
\end{enumerate}
|
|
}\pause
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=0.4\linewidth]{figs/repetition1.png}
|
|
\end{center}
|
|
\begin{enumerate}
|
|
\small
|
|
\item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
|
|
\item Get a sampling distribution! Here, we get it by assuming that
|
|
the positions $x_1,...,x_{16}$ are Gaussian.
|
|
\end{enumerate}
|
|
}\pause
|
|
\only<3>{
|
|
\begin{center}
|
|
\includegraphics[width=0.4\linewidth]{figs/repetition2.png}
|
|
\end{center}
|
|
\begin{enumerate}
|
|
\small
|
|
\item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
|
|
\item Get a sampling distribution! Here, we get it by assuming that
|
|
the positions $x_1,...,x_{16}$ are Gaussian. The resulting
|
|
distribution of $t$ is a t-distribution.
|
|
\end{enumerate}
|
|
}\pause
|
|
\only<4>{
|
|
\begin{center}
|
|
\includegraphics[width=0.4\linewidth]{figs/repetition3.png}
|
|
\end{center}
|
|
\begin{enumerate}
|
|
\small
|
|
\item Choose a statistic! We take the standardized mean $t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
|
|
\item Get a {\color{blue}null distribution}! Here, we get it by assuming that
|
|
the positions $x_1,...,x_{16}$ are Gaussian. The resulting
|
|
distribution of $t$ is a t-distribution.
|
|
\item Get an interval around $\mu=0$ in which values of $\hat\mu$
|
|
are assumed typical for $\mu=0$, the {\color{blue}null hypothesis
|
|
$H_0$}.
|
|
\end{enumerate}
|
|
}
|
|
\pause
|
|
\only<5>{
|
|
\begin{center}
|
|
\includegraphics[width=0.4\linewidth]{figs/repetition5.png}
|
|
\end{center}
|
|
\begin{enumerate}
|
|
\small
|
|
\item Choose a statistic! We take the standardized mean
|
|
$t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
|
|
\item Get a {\color{blue}null distribution}! Here, we get it by assuming that
|
|
the positions $x_1,...,x_{16}$ are Gaussian. The resulting
|
|
distribution of $t$ is a t-distribution.
|
|
\item Get an interval around $\mu=0$ in which values of $\hat\mu$
|
|
are assumed typical for $\mu=0$, the {\color{blue}null hypothesis
|
|
$H_0$}. This is done by fixing the {\color{blue}type I error} probability.
|
|
\end{enumerate}
|
|
}
|
|
\pause
|
|
\only<6>{
|
|
\begin{center}
|
|
\includegraphics[width=0.4\linewidth]{figs/repetition4.png}
|
|
\end{center}
|
|
\begin{enumerate}
|
|
\small
|
|
\item Choose a statistic! We take the standardized mean
|
|
$t=\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}}$.
|
|
\item Get a {\color{blue}null distribution}! Here, we get it by assuming that
|
|
the positions $x_1,...,x_{16}$ are Gaussian. The resulting
|
|
distribution of $t$ is a t-distribution.
|
|
\item Get an interval around $\mu=0$ in which values of $\hat\mu$
|
|
are assumed typical for $\mu=0$, the {\color{blue}null hypothesis
|
|
$H_0$}. This is done by fixing the {\color{blue}type I error} probability.
|
|
\item Outside that interval we consider $\mu=0$ as implausible and
|
|
reject $H_0$.
|
|
\end{enumerate}
|
|
}
|
|
|
|
\end{frame}
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
\subsection{another one-sample test}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{another one-sample test}
|
|
\begin{task}{Fair coin?}
|
|
\small
|
|
Assume you carry out the following test to determine whether a coin
|
|
is fair or not:
|
|
|
|
You throw the coin $n=3$ times. If the result is either $3\times$
|
|
head or $3\times$ tail, you conclude that the coin is not fair.
|
|
|
|
Answer the following questions (for yourself first):
|
|
\begin{enumerate}
|
|
\item What is the meta-study? \pause {\em Repeated experiments of 3 throws
|
|
with this the coin.}\pause
|
|
\item What is the statistic used? \pause {\em The number of heads (could also
|
|
be tails).}\pause
|
|
\item What is $H_0$? \pause {\em The coin is fair.}\pause
|
|
\item What is the Null distribution? \pause {\em The distribution is
|
|
binomial $$p(k \mbox{heads in }n \mbox{ throws})={n \choose k}
|
|
\left(\frac{1}{2}\right)^k \left(\frac{1}{2}\right)^{n-k} $$}\pause
|
|
\item What is the Type I error of this test? \pause {\em $p(HHH|H_0) + p(TTT|H_0) = \frac{2}{8}$}
|
|
\end{enumerate}
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\subsection{paired sample t-test}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{paired sample t-test}
|
|
\begin{task}{Hunger Rating (SWS, Example 3.2.4)}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\small During a weight loss study each of nine subjects was given either the
|
|
active drug m-chlorophenylpiperazine (mCPP) for two weeks and then a placebo
|
|
for another two weeks, or else was given the placebo for the first two weeks and
|
|
then mCPP for the second two weeks. Can we say that there was an
|
|
effect with significance level $5$\%?
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=0.8\linewidth]{figs/hunger.png}
|
|
\end{center}
|
|
\end{minipage}
|
|
|
|
\end{minipage}
|
|
\vspace{.5cm}
|
|
|
|
What could we use as statistic?
|
|
What is $H_0$?
|
|
Is the difference significant?
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{paired sample t-test}
|
|
\begin{solution}{Hunger Rating (SWS, Example 3.2.4)}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\small
|
|
\begin{enumerate}
|
|
\item The statistic is the difference between drug and placebo?\pause
|
|
\item $H_0$ is ``there is no difference'', i.e. the true mean of
|
|
the differences is zero. \pause
|
|
\item The standard error is $33/\sqrt{9}=11$.\pause
|
|
\item $n-1=8$ DoF yields (t-distribution table) $t_{0.025}=2.306$, so we
|
|
would reject $H_0$ if $\hat\mu$ in $0\pm t_{0.025}\cdot 11 = \pm
|
|
25.366$. \pause
|
|
\item This means the difference is significant with $\alpha=0.05$.
|
|
\end{enumerate}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=0.8\linewidth]{figs/hunger.png}
|
|
\end{center}
|
|
\end{minipage}
|
|
|
|
\end{minipage}
|
|
\end{solution}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{paired sample t-test}
|
|
\begin{itemize}
|
|
\item a paired sample consists of a number of {\em paired}
|
|
measurements (e.g. before/after)\pause
|
|
\item build the differences (either there are many and or check that
|
|
they are approx. Gaussian distributed)\pause
|
|
\item use a one-sample t-test on the differences
|
|
\end{itemize}
|
|
\end{frame}
|
|
% ----------------------------------------------------------
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{sign rank test}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{sign rank test}
|
|
\begin{task}{Hunger Rating (SWS, Example 3.2.4)}
|
|
\small
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\small Consider again the example data from before. Instead of
|
|
taking the difference, we consider now only whether ``drug'' was
|
|
smaller or greater than ``placebo''. We then count the number of
|
|
times for which ``drug''$<$``placebo'' and the number of times
|
|
``drug''$>$``placebo''.
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=0.5\linewidth]{figs/hunger.png}
|
|
\end{center}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\begin{itemize}
|
|
\item What is the statistic?\pause {\em The number $N_+$ of ``>''
|
|
or the number $N_-$ of ``<''.} \pause
|
|
|
|
\item What is $H_0$? \pause {\em $N_+ = N/2$}
|
|
\pause
|
|
\item What is $H_A$? \pause {\em $N+ > N/2$ or $N_+ < N/2$}
|
|
\pause
|
|
\item What is the Null distribution? \pause {\em Binomial with $p=0.5$}
|
|
\pause
|
|
\item Given $\alpha$, how is the region determined in which we
|
|
reject $H_0$? \pause {\em Choose a such that $P(k>a|H_0) + P(k<a|H_0)$ equals $\alpha$}
|
|
\end{itemize}
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{sign rank test vs. paired sample t-test}
|
|
\begin{itemize}
|
|
\item paired sample t-test assumes that the differences are Gaussian distributed\pause
|
|
\item the sign rank test makes no assumption about the distribution\pause
|
|
\item both assume that the pairs are independently drawn\pause
|
|
\item the sign rank test is less powerful than the t-test (you will
|
|
see in a minute what that means)
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{test nomenclature}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\begin{frame}
|
|
\frametitle{test nomenclature}
|
|
\begin{center}
|
|
\only<1>{\includegraphics[width=\linewidth]{figs/testframework00.pdf}}
|
|
\only<2>{\includegraphics[width=\linewidth]{figs/testframework01.pdf}}
|
|
\end{center}
|
|
\small
|
|
\begin{columns}
|
|
\begin{column}[l]{.5\linewidth}
|
|
{\bf You want:}
|
|
\begin{itemize}
|
|
\item large power
|
|
\item small type I \& II error probability ($\alpha$ and $\beta$)
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}[r]{.5\linewidth}
|
|
\begin{itemize}
|
|
\item \hyperlink{sec:power}{\color{magenta}detour II: statistical power} \hypertarget{back:power}{}
|
|
\item \hyperlink{sec:bayesian}{\color{magenta}detour III: Bayes rule
|
|
and statistical tests} \hypertarget{back:bayesian}{}
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
|
|
Which of the above can {\bf you} choose? \pause {\em the type I error
|
|
probability $\alpha$}
|
|
|
|
|
|
\end{frame}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{zoo of statistical tests}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
|
|
|
|
\begin{frame}
|
|
\hypertarget{back:detourIV}{}
|
|
\frametitle{how to choose the statistical test}
|
|
\begin{center}
|
|
\includegraphics[height=.38\linewidth]{figs/fig0.pdf}
|
|
\end{center}
|
|
\begin{itemize}
|
|
\item Normality can be checked with a QQ-plot
|
|
(\hyperlink{sec:qqplots}{\color{magenta} detour IV: QQ-plots}).
|
|
\item If $n$ is large and the variance of the data distribution is
|
|
finite, the central limit theorem guarantees normality for
|
|
``summed statistics''.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
|
|
% ------------
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[height=.6\linewidth]{figs/fig2.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
% ------------
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[height=.6\linewidth]{figs/fig3.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
% ------------
|
|
|
|
%-----------------------------------------------------------------
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{tests for normal data}
|
|
\begin{task}{menstrual cycle}
|
|
The data set {\tt menstrual.dat} contains the lengths of the
|
|
menstrual cycles in a random sample of 15 women. Assume we want to
|
|
the hypothesis that the mean length of human menstrual cycle is
|
|
equal to a lunar month ($29.5$ days). Consider the data to be
|
|
sufficiently normal.
|
|
|
|
Questions:
|
|
\begin{itemize}
|
|
\item What is $H_0$? What is $H_A$? \pause $H_0: \hat\mu=29.5$,
|
|
$H_A: \hat\mu\not=29.5$ \pause
|
|
\item What is the test statistic? \pause $t=\frac{\hat\mu -
|
|
29.5}{\hat\sigma/\sqrt{n}}$ \pause
|
|
\item Which test should did you use and why? {\em One sample t-test: data
|
|
normal, one sample against a fixed mean.}
|
|
\end{itemize}
|
|
\end{task}
|
|
|
|
\hyperlink{sec:twotailed}{\color{magenta}detour I: one- vs. two-tailed}
|
|
\hypertarget{back:twotailed}{}
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[height=.6\linewidth]{figs/fig4.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{task}{chirping}
|
|
A scientist conducted a study of how often her pet parakeet
|
|
chirps. She recorded the number of distinct chirps the parakeet
|
|
made in a 30-minute period, sometimes when the room was silent and
|
|
sometimes when music was playing. The data are shown in the
|
|
following table. Test whether the bird changes its chirping
|
|
behavior when music is playing (data set {\tt
|
|
chirping.dat}. columns: day, with, without).
|
|
|
|
Questions:
|
|
\begin{itemize}
|
|
\item What is $H_0$? What is $H_A$? \pause
|
|
$d_i=x_{\mbox{with}}-x_{\mbox{without}}$. $H_0: \hat\mu_d=0$,
|
|
$H_0: \hat\mu_d\not=0$ \pause
|
|
\item What is the test statistic? \pause $t=\frac{\hat\mu_d -
|
|
0}{\hat\sigma_d/\sqrt{n}}$ \pause
|
|
\item Which test should did you use and why? \pause {\em Paired t-test: data
|
|
sufficiently normal, measurements are paired by day.}
|
|
\end{itemize}
|
|
\end{task}
|
|
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[height=.7\linewidth]{figs/fig5.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/fig6.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{two indepedendent sample test}
|
|
\begin{task}{Brain Weights (permutation test)}
|
|
The dataset {\tt brainweight.dat} contains brain weights of males
|
|
and females. It consists of {\bf (i) two samples (male/female)}
|
|
which are {\bf (ii) not paired}. We want to test whether the mean
|
|
brain weights of males and females are different.
|
|
\begin{itemize}
|
|
\item What could we use as statistic?\pause {\em~the difference in the
|
|
means} \pause
|
|
\item What would be $H_0$?\pause {\em~the difference is zero} \pause
|
|
\item Think about a way to generate an estimate of the Null
|
|
distribution with Matlab? \pause {\em~Permutation test: Shuffle the
|
|
labels, compute difference in means, repeat ...}. \pause
|
|
\end{itemize}
|
|
|
|
\end{task}
|
|
\begin{itemize}
|
|
\item There is {\color{blue}two-sample independent t-test} is the parametric test
|
|
for this dataset.
|
|
\item If normality does not hold, you can use the
|
|
{\color{blue}Wilcoxon-Mann-Whitney test}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{one- and two-sample t-test and sign test}
|
|
\begin{center}
|
|
\tiny
|
|
\bgroup
|
|
\def\arraystretch{2}
|
|
\begin{tabular}{|l|c|c|c|}
|
|
\hline
|
|
\textbf{name} & \textbf{statistic} & $\boldsymbol{H_{0}}$ & \textbf{Null distribution}\tabularnewline
|
|
\hline
|
|
\hline
|
|
one sample t-test & $t=\frac{\overline{x}-0}{\mbox{SE}_x}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline
|
|
\hline
|
|
paired sample t-test & $t=\frac{\overline{d}-0}{\mbox{SE}_d},\, d=x_{i}-y_{i}$ & mean of $t$ is zero & t-distr. with $n-1$ DoF\tabularnewline
|
|
\hline
|
|
sign test & $t=\#\left[x_{i}<y_{i}\right]$ & median of $x_{i}-y_{i}$
|
|
is zero & binomial distr. $\mathcal{B}\left(\frac{1}{2},n\right)$\tabularnewline
|
|
& & so $t = \frac{n}{2}$ & \tabularnewline
|
|
\hline
|
|
two indep. sample t-test & $t=\frac{(\overline{x}-\overline{y})-\text{0}}{\sqrt{\frac{s_{x}^{2}}{n_{x}}-\frac{s_{y}^{2}}{n_{y}}}}$ & mean of $t$ is zero & t-distr. with DoF\tabularnewline
|
|
& & & $\frac{\left(\mbox{SE}_{x}^{2}+\mbox{SE}_{y}^{2}\right)^{2}}{\mbox{SE}_{x}^{4}/(n_{x}-1)+\mbox{SE}_{y}^{4}/(n_{y}-1)}$\tabularnewline
|
|
\hline
|
|
\end{tabular}
|
|
\egroup
|
|
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
|
|
|
|
% % ----------------------------------------------------------
|
|
% \begin{frame}
|
|
% \frametitle{}
|
|
% \begin{center}
|
|
% \includegraphics[width=.8\linewidth]{figs/fig7.pdf}
|
|
% \end{center}
|
|
|
|
% \end{frame}
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/fig8.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/fig9.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/fig10.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=.9\linewidth]{figs/fig11.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/fig12.pdf}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{goodness of fit for two categorial variables}
|
|
\small
|
|
Suppose you observe two binary variables $A\in \{0,1\}$ and $B\in
|
|
\{0,1\}$ in a series of several experiments (e.g. $A$ is success or
|
|
not; $B$ indicates gender). You collect the outcomes of the
|
|
experiments in a table
|
|
\begin{center}
|
|
\begin{tabular}{l|cc|}
|
|
& \bf A=0 & \bf A=1\\\hline
|
|
\bf B=0 & $n_{00}$ & $n_{01}$\\
|
|
\bf B=1 & $n_{10}$ & $n_{11}$
|
|
\end{tabular}
|
|
\end{center}
|
|
and want to test whether it is consistent with a fixed probability
|
|
distribution you know from the literature
|
|
\begin{center}
|
|
\begin{tabular}{l|cc|l}
|
|
& \bf A=0 & \bf A=1\\\hline
|
|
\bf B=0 & $p_{00}$ & $p_{01}$ & $p(B=0)$\\
|
|
\bf B=1 & $p_{10}$ & $p_{11}$ & $p(B=1)$ \\\hline
|
|
& $p(A=0)$ & $p(A=1)$ & $1$
|
|
\end{tabular}
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
\begin{frame}
|
|
\begin{center}
|
|
\includegraphics[width=0.9\linewidth]{figs/fig01.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\begin{center}
|
|
\includegraphics[width=0.9\linewidth]{figs/fig02.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\begin{center}
|
|
\includegraphics[width=0.9\linewidth]{figs/fig03.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
|
|
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=0.9\linewidth]{figs/fig04.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=0.9\linewidth]{figs/fig05.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{}
|
|
\begin{center}
|
|
\includegraphics[width=0.9\linewidth]{figs/fig06.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{zoo of statistical tests}
|
|
{\bf Don't take the diagram too serious}
|
|
\begin{itemize}
|
|
\item is ANOVA a method for
|
|
\begin{itemize}
|
|
\item the relation between a categorial variable and an interval/ratio
|
|
variable?
|
|
\item the relation between groups of interval/ratio variables?
|
|
\end{itemize}\pause
|
|
\item is linear regression a method for
|
|
\begin{itemize}
|
|
\item the relation between two interval/ratio variables?
|
|
\item the relation between infinitely many groups of interval/ratio
|
|
variables?
|
|
\end{itemize}\pause
|
|
\item Can ANOVA be seen as some kind of linear regression?\pause
|
|
\item There are many more statistics and many more tests out
|
|
there.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{advice}
|
|
\begin{itemize}
|
|
\item There is not a general recipe, not a general way of looking at
|
|
and doing data analysis (otherwise statisticians would be unemployed
|
|
and a computer would do their job). \pause
|
|
\item Use your intelligence (and the book by Zar) to choose the right
|
|
one. \pause
|
|
\item Ask if you don't know what to take
|
|
(e.g. \url{stats.stackexchange.com}). \pause
|
|
\item Play around in Matlab with toy example to get a feeling for a
|
|
particular method/test/idea ...
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{summary}
|
|
\begin{itemize}
|
|
\item Statistical tests have always the same ingredients:
|
|
\begin{enumerate}
|
|
\item a {\color{blue} test statistic}
|
|
\item a default situation under which we can compute/simulate the
|
|
{\color{blue} null distribution} of the test statistic
|
|
({\color{blue}null hypothesis $H_0$})
|
|
\end{enumerate}
|
|
\pause
|
|
\item in order to make a decision between $H_0$ and $H_A$ you set
|
|
boundaries
|
|
\item these boundaries determine your {\color{blue}type I error} or
|
|
{\color{blue}false positives rate}, or vice versa\pause
|
|
\end{itemize}
|
|
\begin{emphasize}{scaffold of statistical tests}
|
|
Remembering this structure will help you to find the right
|
|
statistical test and understand it.
|
|
\end{emphasize}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{p-values}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\begin{frame}
|
|
\frametitle{the mother or all statistics: p-values}
|
|
\framesubtitle{why p-values?}
|
|
\begin{itemize}
|
|
\item Different persons might accept more or less conservative type I error rates.
|
|
(Is $\alpha=0.01$ significant or is $\alpha=0.05$ enough?)
|
|
\item P-values are a universal way of reporting statistics such that
|
|
the type I error rate can be chosen by each person individually.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%---------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
\begin{task}{true or false?}
|
|
\begin{itemize}
|
|
\item From $p<0.01$ you can deduce that your result is of
|
|
biological importance.\pause
|
|
|
|
|
|
% \item {\color{gray} False. A small p-value doesn't say
|
|
% anything
|
|
% about biological importance. It just indicates that the data
|
|
% and $H_0$ are not very compatible.} \pause
|
|
|
|
\item The p-value is the probability of observing a dataset
|
|
resulting in a test-statistic more extreme than the one at hand,
|
|
assuming the null hypothesis is true.\pause
|
|
|
|
% \item {\color{gray} True.} \pause
|
|
\item $1-p$ is the probability of the alternative hypothesis being
|
|
true.
|
|
|
|
% \item {\color{gray} False. The p-value cannot tell us anything
|
|
% about whether one of the hypotheses are true or not.}
|
|
\end{itemize}
|
|
\end{task}
|
|
\end{frame}
|
|
%---------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
\framesubtitle{What is a p-value?}
|
|
\only<1>{
|
|
So far, we chose a particular threshold $b$ by fixing the type I error
|
|
rate $\alpha$.
|
|
\begin{center}
|
|
\includegraphics[width=.7\linewidth]{figs/pval0.png}
|
|
\end{center}
|
|
}
|
|
\only<2>{
|
|
\begin{itemize}
|
|
\item The {\color{blue}p-value} is the type I error rate if you use
|
|
your {\color{blue} actually measured statistic} as threshold.
|
|
\item In other words: The p-value is the minimal type I error rate
|
|
you have to accept if you call your result significant.
|
|
\end{itemize}
|
|
\begin{center}
|
|
\includegraphics[width=.7\linewidth]{figs/pval1.png}
|
|
\end{center}
|
|
}
|
|
\end{frame}
|
|
%---------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
\framesubtitle{Why is it a universal measure?}
|
|
|
|
The p-value is the minimal type I error rate you have to accept if you
|
|
call your result significant.
|
|
|
|
\begin{itemize}
|
|
\item If you have a personal $\alpha$-level that is larger than the
|
|
p-value, you automatically know that the decision threshold lies
|
|
``further inside''
|
|
\item This means you {\color{blue}can simply compare your $\alpha$-level with the
|
|
p-value}: if the p-value is smaller, then you call that result
|
|
significant, otherwise you don't.
|
|
\end{itemize}
|
|
|
|
\begin{center}
|
|
\includegraphics[width=.45\linewidth]{figs/pval0.png}
|
|
\includegraphics[width=.45\linewidth]{figs/pval1.png}
|
|
\end{center}
|
|
\end{frame}
|
|
%---------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
|
|
\begin{task}{p-values if $H_0$ is true}
|
|
Is the following procedure correct?
|
|
|
|
\vspace{.5cm}
|
|
|
|
In order to show that a sample $x_1,...,x_n$ follows a Normal
|
|
distribution with mean zero, you perform a t-test. If the p-value is
|
|
large, you conclude that there is evidence for $H_0$, i.e. accept
|
|
that $x_1,...,x_n$ has mean zero and is normally distributed.
|
|
|
|
\vspace{.5cm}
|
|
To find the answer, simulate normally distributed random variables
|
|
with {\tt randn} in Matlab and compute the p-value with a one-sample
|
|
t-test. Repeat that several times and plot a histogram of the p-value.
|
|
|
|
\end{task}
|
|
\pause
|
|
\begin{itemize}
|
|
\item If $H_0$ is true, the p-value is uniformly distributed between 0
|
|
and 1. Why?\pause
|
|
\pause
|
|
\item Think about the beginning of this lecture
|
|
$$p=P(|x| > |t|) = 1 - P(|x| \le |t|) = 1 - \mbox{c.d.f.}(|t|) \sim U([0,1])$$
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%--------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
|
|
\begin{task}{Study design}
|
|
Is the following procedure statistically sound?
|
|
|
|
\vspace{.5cm}
|
|
|
|
Psychophysical experiments with human subjects can be time-consuming
|
|
and costly. In order to get a significant effect with minimal effort
|
|
you use the following procedure: You start with a few subjects. If
|
|
your statistical test for the effect returns a p-value smaller than
|
|
$0.05$ you stop and publish. Otherwise you repeat adding subjects
|
|
and computing p-values until you get a significant results (or run
|
|
out of time and money).
|
|
|
|
\end{task}
|
|
\pause
|
|
|
|
\begin{solution}{Answer}
|
|
No, the procedure is not sound. Even if $H_0$ is true, you will
|
|
eventually get a p-value smaller than $0.05$ since it is uniformly
|
|
distributed between $0$ and $1$ in this case.
|
|
\end{solution}
|
|
\end{frame}
|
|
|
|
%--------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
|
|
\begin{task}{p-values over studies}
|
|
If there is no effect, how many studies would yield a significant
|
|
p-value (for $\alpha=0.05$)?
|
|
\end{task}
|
|
\pause
|
|
\begin{solution}{Answer}
|
|
$5\%$
|
|
\end{solution}
|
|
\pause
|
|
\begin{task}{p-values in publications}
|
|
Do you think that only publishing positive findings poses a problem?
|
|
\end{task}
|
|
\pause
|
|
\begin{solution}{Answer}
|
|
Yes. If I only publish significant positive findings, then I can
|
|
publish anything if I just repeat the study long enough.
|
|
\end{solution}
|
|
|
|
\end{frame}
|
|
|
|
%---------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{the mother of all statistics: the p-value}
|
|
\begin{task}{true or false?}
|
|
\begin{itemize}
|
|
\item From $p<0.01$ you can deduce that your result is of
|
|
biological importance.\pause
|
|
|
|
|
|
\item {\color{gray} False. A small p-value doesn't say anything
|
|
about biological importance. It just indicates that the data
|
|
and $H_0$ are not very compatible.} \pause
|
|
|
|
\item The p-value is the probability of observing a dataset
|
|
resulting in a test-statistic more extreme than the one at hand,
|
|
assuming the null hypothesis is true.\pause
|
|
|
|
\item {\color{gray} True.} \pause
|
|
\item $1-p$ is the probability of the alternative hypothesis being
|
|
true. \pause
|
|
|
|
\item {\color{gray} False. The p-value cannot tell us anything
|
|
about whether one of the hypotheses are true or not.}
|
|
\end{itemize}
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{multiple hypothesis testing}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{two tests}
|
|
\begin{task}{Correct or not?}
|
|
You have two independent samples from a treatment group and a
|
|
control group. You are not sure whether your data meets the
|
|
requirement of a t-test. Therefore, you carry out a t-test and a
|
|
ranksum test. If one of them rejects $H_0$ you use this one to
|
|
report your findings in a paper.
|
|
|
|
\vspace{.5cm}
|
|
\footnotesize
|
|
|
|
To approach an answer, use Matlab and
|
|
\begin{itemize}
|
|
\item repeatedly sample two datasets from the same Normal distribution
|
|
$\mathcal N(0,1)$.
|
|
\item for each pair of datasets compute the test statistic of a
|
|
ranksum test (use {\tt ranksum}) and a t-test (use {\tt ttest2})
|
|
\item Plot the values of the statistics against each other (using {\tt
|
|
plot(T, R, 'k.')}). What can you observe?
|
|
\item Count the number of times at least one of the tests gives a
|
|
p-value smaller than $0.05$. What can you observe?
|
|
\end{itemize}
|
|
\end{task}
|
|
|
|
|
|
|
|
\end{frame}
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{two tests}
|
|
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{0.6\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/multipletesting.pdf}
|
|
\end{center}
|
|
\end{minipage}
|
|
\begin{minipage}{0.39\linewidth}
|
|
\small
|
|
\only<1-4>{
|
|
\begin{itemize}
|
|
\item the two statistics are clearly correlated\pause
|
|
\item What is the type I error rate for each single test?\pause
|
|
\item Where is the type I error area in the combined plot? \pause
|
|
\item Is the type I error rate in the combined strategy lower or
|
|
larger compared to using just a single test?\pause
|
|
\end{itemize}
|
|
}
|
|
\only<5>{
|
|
\small
|
|
\color{blue} The combined strategy has a higher error rate! This gets
|
|
worse for more tests. For that reason we have to account for multiple
|
|
testing!
|
|
}
|
|
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{two tests}
|
|
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{0.49\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/multipletesting.pdf}
|
|
\end{center}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\small
|
|
\begin{itemize}
|
|
\item When is something called multiple testing?\pause
|
|
\item[]{\color{gray} If a hypothesis is a compound of single
|
|
hypotheses.}\pause
|
|
\item If I test $\mu_1 = \mu_2 = \mu_3$ by testing $\mu_i = \mu_j$
|
|
for all $i\not= j$ and reject as soon as one of the test rejects,
|
|
does the type I error increase or decrease?\pause
|
|
\item[]{\color{gray} It increases, because a have the chance to make
|
|
an error in all conditions.}\pause
|
|
\item Can the type I error also go in the other direction?\pause
|
|
\item[]{\color{gray} Yes, it could. For example if the single
|
|
hypotheses are combined with ``and''.}
|
|
\end{itemize}
|
|
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
%---------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{summary}
|
|
\begin{itemize}
|
|
\item Multiple testing tests a {\color{blue}compound hypothesis} by
|
|
testing several single hypotheses.\pause
|
|
\item {\color{blue}Multiple testing can decrease or increase type I/II error}
|
|
dependening on how the single hypothese are combined (``or'' type
|
|
I up, ``and'' type I down).\pause
|
|
\item This can be accounted for (e.g. by {\em Bonferroni correction:
|
|
divide $\alpha$ by number of tests}). However, better is to have
|
|
a test that directly tests the compound hypothesis. ANOVA is a
|
|
typical example for that.
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{study design}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\begin{frame}
|
|
\frametitle{general theme}
|
|
\begin{enumerate}
|
|
\item make an educated guess about the true parameters
|
|
\item state how accurate/powerful you want to be
|
|
\item select $n$ based on that
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{estimating a single mean}
|
|
\framesubtitle{standard error and $\alpha$}
|
|
\begin{itemize}
|
|
\item Assume you want to make estimate the mean of some quantity.\pause
|
|
\item From a pilot study or the literature, you have an estimate $s$
|
|
of the standard deviation and $\tilde\mu$ of the mean of that
|
|
quantity.\pause
|
|
\item $\tilde \mu$ could also be chosen to set a minimal detectable difference.\pause
|
|
\item In order to test whether your mean $\hat\mu$ is different from
|
|
a fixed mean $\mu_0$ on an $\alpha$-level of $5\%$ you know that
|
|
the $95\%$ confidence interval around $\tilde\mu$ should not
|
|
contain $\mu_0$: $$\underbrace{|\tilde\mu - \mu_0|}_{=:\delta} \ge
|
|
t_{0.025, \nu}\frac{s}{\sqrt{n}}$$
|
|
\pause
|
|
\item This mean you should set $n$ to be
|
|
$$n \ge \left(\frac{t_{0.025, \nu}\cdot s}{\delta}\right)^2 $$
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{estimating means}
|
|
\framesubtitle{type I and type II error}
|
|
{\bf one can also take the desired power $1-\beta$ into account}
|
|
$$n \ge \frac{s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$
|
|
\only<1>{
|
|
\includegraphics[width=.5\linewidth]{figs/experimentalDesign00.pdf}
|
|
\includegraphics[width=.5\linewidth]{figs/experimentalDesign01.pdf}
|
|
}
|
|
\pause
|
|
|
|
{\bf rearranging the formula yields an estimate for minimal
|
|
detectable difference}
|
|
$$\delta \ge \sqrt{\frac{s^2}{n}}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)$$
|
|
\pause
|
|
|
|
{\bf for two means, this formula becomes}
|
|
$$n \ge \frac{2s^2}{\delta^2}\left(t_{\alpha,\nu}, + t_{\beta(1),\nu}\right)^2$$
|
|
|
|
\pause
|
|
|
|
\begin{emphasize}{iterative estimation}
|
|
Since $\nu$ depends on $n$ (i.e. $\nu=n-1$), we need to estimate
|
|
$n$ iteratively.
|
|
\end{emphasize}
|
|
|
|
\mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
|
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
|
Hall. doi:10.1037/0012764}
|
|
|
|
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{example}
|
|
\framesubtitle{Zar, example 7.2}
|
|
\small
|
|
Researches observed the weight changes in twelve rats after being
|
|
subjected to forced exercise. The mean difference is
|
|
$\hat\mu=-0.65g$, the sample variance is $\hat\sigma^2=1.5682
|
|
g^2$. We wish to test the difference to $\mu=0$ with $\alpha=0.05$
|
|
and a $1-\beta=0.9\cdot 100\%$ chance of detecting a population mean
|
|
different from $\mu_0=0$ by as little as $1.0g$.
|
|
|
|
\pause
|
|
|
|
Let's guess that a sample size of $n=20$ would be required. Then
|
|
$\nu=19$, $t_{0.025,19}=2.093$, $\beta=1-0.9=0.1$, and
|
|
$t_{0.1,19}=1.328$. This means
|
|
$$n=\frac{1.5682}{1^2}(2.093+1.3828)^2 = 18.4.$$
|
|
|
|
\pause
|
|
|
|
Now let's us $n=19$ as an estimate, in which case $\nu=18$,
|
|
$t_{0.025,18}=2.101$, $t_{0.1,18}=1.330$,
|
|
and $$n=\frac{1.5682}{1^2}(2.101+1.330)^2=18.5.$$
|
|
Thus we need a sample size of at least $19$.
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{ANOVA}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{from linear regression to ANOVA}
|
|
\begin{frame}
|
|
\frametitle{from linear regression to ANOVA}
|
|
|
|
\small The following table contains the impulse frequency of the
|
|
electric field from electric fish measured at several temperatures
|
|
(data for project 03).
|
|
|
|
\begin{center}
|
|
\tiny
|
|
\begin{tabular}{lccccccc}
|
|
{\bf temperature C${}^\circ$} & \multicolumn{3}{c}{\bf impulse frequency [number/sec]} \\ \hline\\
|
|
20.00 & 225.00 & 230.00 & 239.00 \\
|
|
22.00 & 251.00 & 259.00 & 265.00 \\
|
|
23.00 & 266.00 & 273.00 & 280.00 \\
|
|
25.00 & 287.00 & 295.00 & 302.00 \\
|
|
27.00 & 301.00 & 310.00 & 317.00 \\
|
|
28.00 & 307.00 & 313.00 & 325.00 \\
|
|
30.00 & 324.00 & 330.00 & 338.00
|
|
\end{tabular}
|
|
|
|
\end{center}
|
|
|
|
\begin{itemize}
|
|
\item Our goal will be to test whether $\mu_{20}=...=\mu_{30}$.
|
|
\item Note that ANOVA is not the method to analyze this
|
|
dataset. Linear regression is because temperature is on an interval
|
|
scale. We will just use the ideas here for illustration.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{from linear regression to ANOVA}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/regression01.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{from linear regression to ANOVA}
|
|
\begin{center}
|
|
\includegraphics[width=.7\linewidth]{figs/regression02.pdf}
|
|
\end{center}
|
|
What kind of regression line would we expect if the means were equal?
|
|
\pause {\em One with slope $\alpha=0$.}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\includegraphics[width=1.\linewidth]{figs/regression02.pdf}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{itemize}
|
|
\item For linear regression data, we would test whether
|
|
$\alpha=0$.
|
|
\item For categorial inputs (x-axis), we cannot compute a
|
|
regression line. Therefore, we need a different approach.
|
|
\end{itemize}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{law of total variance}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{frame}
|
|
\frametitle{law of total variance}
|
|
\only<1>{
|
|
Approach law of total variance
|
|
$$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
|
|
\color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
|
|
\begin{center}
|
|
\includegraphics[width=.7\linewidth]{figs/regression02.pdf}
|
|
\end{center}
|
|
}\pause
|
|
\only<2>{
|
|
Approach law of total variance
|
|
$$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
|
|
\color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
|
|
\begin{center}
|
|
\includegraphics[width=.7\linewidth]{figs/regression03.pdf}
|
|
\end{center}
|
|
}\pause
|
|
\only<3>{
|
|
Approach law of total variance
|
|
$$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
|
|
\color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
|
|
Data generation model for regression $f_{ij} = {\color{mygreen} \alpha t_i} + \beta + {\color{lightblue}\varepsilon_{ij}}$
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/regression04.pdf}
|
|
\end{center}
|
|
}\pause
|
|
\only<4>{
|
|
Approach law of total variance
|
|
$$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
|
|
\color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
|
|
Data generation model for regression
|
|
$f_{ij} = {\color{mygreen} \alpha t_i} + \beta +
|
|
{\color{lightblue}\varepsilon_{ij}}: $ $${\color{mygreen} \alpha=0}
|
|
\Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/regression04.pdf}
|
|
\end{center}
|
|
}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{single factor ANOVA}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{data model for single factor ANOVA}
|
|
Approach law of total variance
|
|
$$\color{red} \mathbb V[f] = \color{mygreen} \mathbb V[\mu] +
|
|
\color{lightblue}\mathbb E[\mathbb V[f|\mu_i]]$$
|
|
Data generation model for single factor ANOVA
|
|
$f_{ij} = \overline{\mu} + {\color{mygreen} \tau_{i}} +
|
|
{\color{lightblue}\varepsilon_{ij}}$:
|
|
$${\color{mygreen} \tau_i=\tau_j=0}
|
|
\Rightarrow {\color{mygreen} \mathbb V[\mu] = 0} \Rightarrow \mu_{20} = \mu_{22} = ... = \mu_{30}$$
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/regression05.pdf}
|
|
\end{center}
|
|
\end{frame}
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{statistic of ANOVA}
|
|
\begin{columns}
|
|
\begin{column}{0.43\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/regression02.pdf}
|
|
|
|
\vspace{-.2cm}
|
|
|
|
\includegraphics[width=1.\linewidth]{figs/Fdistribution00.pdf}
|
|
\end{center}
|
|
\end{column}
|
|
\begin{column}{0.55\linewidth}
|
|
\begin{align*}
|
|
\:&\mbox{\color{lightblue} error SS}&=\color{lightblue}\sum_{ij}\left(x_{ij}-\mu_{i}\right)^{2}\\
|
|
+\:&\mbox{\color{mygreen} group SS}&=\color{mygreen}\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}\\\hline
|
|
\:&\mbox{\color{red} total SS}&=\color{red}\sum_{ij}\left(x_{ij}-\mu\right)^{2}
|
|
\end{align*}
|
|
\pause
|
|
\begin{align*}
|
|
\mbox{\color{mygreen}groups MS}=\frac{\mbox{\color{mygreen}group SS}}{\mbox{\color{mygreen}groups DF}}&=\color{mygreen}\frac{\sum_{i}n_{i}\left(\hat{\mu}_{i}-\mu\right)^{2}}{k-1}\\\mbox{\color{lightblue}error MS}=\frac{\mbox{\color{lightblue}error SS}}{\mbox{\color{lightblue}error DF}}&=\color{lightblue}\frac{\sum_{ij}\left(x_{ij}-\hat{\mu_{i}}\right)^{2}}{N-k}\\\color{dodgerblue}F&=\frac{\mbox{\color{mygreen}group MS}}{\mbox{\color{lightblue}error MS}}
|
|
\end{align*}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{summary single factor ANOVA}
|
|
\begin{itemize}
|
|
\item {\bf Goal:} Test whether several means are equal or not.\pause
|
|
\item {\bf Strategy:} Use law of total variance to explain the overall
|
|
variance with the {\em variance of the means} and the {\em variance
|
|
within groups}\pause
|
|
\item If the total variance can be solely explained from {\em variance
|
|
within groups}, then the means do not vary and must be the same. \pause
|
|
\item Since a statistic should be large if the data does not fit to
|
|
$H_0$, we use $\frac{MS(between)}{MS(within)}$ which can be shown to
|
|
have an F-distribution under certain ...\pause
|
|
\item {\bf Assumptions:}
|
|
\begin{itemize}
|
|
\item The groups must be independent of each other.
|
|
\item In each group, the specimen must be i.i.d. from the particular
|
|
population distribution $f_{ij} \sim p(f|\mu_i) $.
|
|
\item The standard deviations of the groups are equal
|
|
($\sigma_\varepsilon$ is the same for all groups).
|
|
\item The residuals $\varepsilon$ must be Normally distributed
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\subsection{study design for ANOVA}
|
|
\begin{frame}
|
|
\frametitle{study design for ANOVA}
|
|
\begin{itemize}
|
|
\item If the means are different (but all other assumptions are
|
|
satisfied), then $F$ follows a non-central F-distribution.
|
|
\item Like in the case of one- and two-sample t-tests, this can be
|
|
used to adjust $n$ for the desired power.
|
|
\item Alternatively, one can estimate the minimal detectable
|
|
difference $\delta$ from estimates of the {\em error MS} $s^2$
|
|
and $n$, or $n$ from $\delta$ and $s^2$, respectively.
|
|
\end{itemize}
|
|
\mycite{Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
|
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
|
Hall. doi:10.1037/0012764}
|
|
|
|
\end{frame}
|
|
|
|
\subsection{non-parametric ANOVA}
|
|
\begin{frame}
|
|
\frametitle{Kruskal-Wallis test}
|
|
\begin{itemize}
|
|
\item Can be applied if the data is not normally distributed.
|
|
\item Is equivalent to Mann-Whitney/Wilcoxon rank sum test for two
|
|
factor levels.
|
|
\item Needs the variances to be equal as well.
|
|
\item Instead of testing equality of means/medians it tests for
|
|
equality of distributions.
|
|
\item For more details see {\em Biostatistical Analysis}.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Testing the difference among several medians}
|
|
\begin{itemize}
|
|
\item Can be applied if the data is not normally distributed.
|
|
\item Does not need the variances to be equal.
|
|
\item For more details see {\em Biostatistical Analysis}.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\section{more complex ANOVAs}
|
|
\subsection{blocking}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{blocking}
|
|
\footnotesize
|
|
{\bf Blocking}
|
|
How does experience affect the anatomy of the brain? In a typical
|
|
experiment to study this question, young rats are placed in one of
|
|
three environments for 80 days:
|
|
|
|
\begin{itemize}
|
|
\item[T1] Standard environment.The rat is housed with a single
|
|
companion in a standard lab cage.
|
|
\item[T2] Enriched environment. The rat is housed with several
|
|
companions in a large cage, furnished with various playthings.
|
|
\item[T3] Impoverished environment.The rat lives alone in a standard
|
|
lab cage.
|
|
\end{itemize}
|
|
|
|
At the end of the 80-day experience, various anatomical measurements
|
|
are made on the rats' brains. Suppose a researcher plans to conduct
|
|
the above experiment using 30 rats. To minimize variation in response,
|
|
all 30 animals will be male, of the same age and strain. To reduce
|
|
variation even further, the researcher can take advantage of the
|
|
similarity of animals from the same litter. In this approach, the
|
|
researcher would obtain three male rats from each of 10 litters. The
|
|
three littermates from each litter would be assigned at random: one to
|
|
T1, one to T2, and one to T3.
|
|
\end{frame}
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{How to create blocks}
|
|
|
|
Try to create blocks that are as homogeneous within themselves as
|
|
possible, so that the inherent variation between experimental units
|
|
becomes, as far as possible, variation between blocks rather than
|
|
within blocks (see SWS chapter 11.6).
|
|
|
|
{\bf Fish data:}
|
|
\begin{itemize}
|
|
\item each fish is a block
|
|
\item the different categories are the factor of interest
|
|
\item note that we have one measurement per block and factor, but
|
|
there could be more
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{data model for block randomized ANOVA}
|
|
|
|
Data generation model for randomized block factor ANOVA
|
|
$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$:
|
|
|
|
\vspace{.5cm}
|
|
|
|
How do we know that there is no interaction $\gamma_{ij}$ between
|
|
the blocks and the factors?
|
|
\begin{itemize}
|
|
\item {\bf a priori knowledge:} why should temperature be dependent on
|
|
fish identity
|
|
\item {\bf additivity:} for each factor $i$, the values differ by
|
|
the {\em same} amount $\beta_j$. \pause
|
|
\end{itemize}
|
|
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/regression06.pdf}
|
|
\end{center}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\only<2>{\color{red} Would that also be the case if the values cross at the point?}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
\subsection{two factor ANOVA}
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{What's the funny way to write down the data model in ANOVA?}
|
|
|
|
Data generation model for a two factor ANOVA with interaction
|
|
$$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$
|
|
|
|
{\bf Note that:}
|
|
\begin{itemize}
|
|
\item The sum over the $\tau_i$, $\beta_j$, $\gamma_{ij}$, and
|
|
$\varepsilon_{ijk}$ terms are always zero. They model the {\em deviation}
|
|
from the grand mean. \pause
|
|
\item The directly correspond to the available SS/ MS terms. For
|
|
example, in the block randomized ANOVA
|
|
\begin{itemize}
|
|
\item $f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \varepsilon_{ijk}$
|
|
\item $SS(total) = SS(temperature) + SS(blocks) + SS(within)$
|
|
\end{itemize}
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{different hypotheses from a 2-factor ANOVA}
|
|
\small
|
|
Data generation model for a two factor ANOVA with interaction
|
|
$$f_{ijk} = \overline{\mu} + \tau_{i} + \beta_j + \gamma_{ij} + \varepsilon_{ijk}$$
|
|
|
|
\begin{itemize}
|
|
\item {\bf Blocking: } Assume $\gamma_{ij}=0$. Test
|
|
$$F=\frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS}
|
|
(\varepsilon_{ijk})}$$\pause
|
|
\item {\bf Repeated Measures: } Assume $\gamma_{ij}=0$. Entity
|
|
which was repeatedly measured becomes block.\pause
|
|
\item {\bf Two factor testing factor influence: } Assume $\gamma_{ij}\not=0$. Test
|
|
$$F = \frac{\mbox{temperature MS} (\tau_i)}{\mbox{error MS}
|
|
(\varepsilon_{ijk})}$$\pause
|
|
\item {\bf Two factor testing interaction: } Assume $\gamma_{ij}\not=0$. Test
|
|
$$F=\frac{\mbox{interaction MS}(\gamma_{ij})} {\mbox{error MS}
|
|
(\varepsilon_{ijk})}$$
|
|
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{summary}
|
|
\begin{itemize}
|
|
\small
|
|
\item ANOVA is a very flexible method to study the interactions of
|
|
categorial variables (factors) and ratio/ interval data \pause
|
|
\item Works by checking whether a certain factor/ interaction between
|
|
factors, ... is needed to explain the variability in the data \pause
|
|
\item Relies on assumptions that need to be checked
|
|
\begin{itemize}
|
|
\item equal variance for each factor level
|
|
\item the residuals are Normally distributed
|
|
\item number of points $n_i$ should be the same
|
|
\end{itemize}\pause
|
|
\item There is a whole zoo of ANOVA techniques, for all kinds of
|
|
situations. This is just the tip of the iceberg.
|
|
\item One can often get away with violating some of the
|
|
assumptions. For more details on that check {\em Biostatistical Analysis}
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{detour I: One-tailed vs. two-tailed}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
%---------------------------------------------------------------
|
|
\begin{frame}
|
|
\hypertarget{sec:twotailed}{}
|
|
|
|
\frametitle{one-tailed tests}
|
|
\begin{task}{Correct or not?}
|
|
Imagine a pharmaceutical company runs clinical trials for a drug
|
|
that enhances the ability to focus. To that end they apply the drug
|
|
to a treatment and measure scores in a standardized test. From the
|
|
literature it is known that normal subjects have a score of about 0.
|
|
|
|
Since the company want to test whether the drug {\em enhances (score
|
|
> 0)} the ability to focus, they choose a one-tailed test ($H_A:$
|
|
treatment group performs better than the performance from the
|
|
literature).
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
|
|
%-------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{one tailed test}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
{\bf two tailed test}
|
|
|
|
\includegraphics[width=\linewidth]{figs/twotailed.png}
|
|
\footnotesize
|
|
\vspace{-1cm}
|
|
|
|
e.g.
|
|
|
|
\begin{itemize}
|
|
\item $H_0: \mu = 0$
|
|
\item $H_A: \mu \not= 0$
|
|
\vspace{1.8cm}
|
|
\end{itemize}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
{\bf one tailed test}
|
|
|
|
\includegraphics[width=\linewidth]{figs/onetailed.png}
|
|
\footnotesize
|
|
\vspace{-1cm}
|
|
e.g.
|
|
|
|
\begin{itemize}
|
|
\item $H_0: \mu = 0$
|
|
\item $H_A: \mu > 0$
|
|
\item $\hat\mu < 0$ must directly imply $\hat\mu$ came from
|
|
$P(\hat\mu|H_0)$
|
|
\item if that is not the case, using one-tailed is cheating
|
|
\end{itemize}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\hyperlink{back:twotailed}{\color{gray}go back}
|
|
\end{frame}
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{detour II: Statistical Power}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\begin{frame}
|
|
\frametitle{Why is it hard to assess the power of a test?}
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{.5\linewidth}
|
|
\includegraphics[width=.8\linewidth]{figs/power.pdf}
|
|
\end{minipage}
|
|
\begin{minipage}{.5\linewidth}
|
|
\begin{itemize}
|
|
\item Power = 1 - P(type II error)\\
|
|
= P(reject $H_0$| $H_A$ is true)\pause
|
|
\item in general the distribution
|
|
\begin{center}
|
|
P(test statistic|$H_A$ is true)
|
|
\end{center}
|
|
is not available to us.
|
|
\pause
|
|
\item Therefore, the power can often only be specified for a
|
|
specific $H_A$.
|
|
|
|
\end{itemize}
|
|
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\mycite{J. H. Zar, Biostatistical Analysis}
|
|
\hypertarget{sec:power}{}
|
|
\hyperlink{back:power}{\color{gray}go back}
|
|
|
|
\end{frame}
|
|
|
|
|
|
% ----------------------------------------------------------
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{detour III: Bayes rule and statistical tests}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\hypertarget{sec:bayesian}{}
|
|
|
|
\frametitle{Why is this funny (or sad)?}
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/frequentistsvsbayesians.png}
|
|
\end{center}
|
|
\mycite{http://xkcd.com/1132/}
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{Why is this funny (or sad)?}
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{.5\linewidth}
|
|
\includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png}
|
|
\mycite{http://xkcd.com/1132/}
|
|
\end{minipage}
|
|
\begin{minipage}{.5\linewidth}
|
|
\begin{itemize}
|
|
\item $H_0:$ the sun has not gone nova
|
|
\item $H_A:$ the sun has gone nova \pause
|
|
\item test procedure: we believe the detector \pause
|
|
\item Null distribution: multinomial $n=2, p_1 = \frac{1}{6}, ..., p_6 = \frac{1}{6}$ \pause
|
|
\item the probability of making a type I error is $p(2\times
|
|
6)=\frac{1}{6}\cdot \frac{1}{6} \approx 0.028$
|
|
\end{itemize}
|
|
\pause
|
|
So ... what is wrong?
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{A similar example}
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{.5\linewidth}
|
|
{\bf sensitivity \& specificity of a HIV test}
|
|
|
|
\begin{tabular}{ccc}
|
|
& HIV & no HIV\tabularnewline
|
|
test + & 99.7\% & 1.5\%\tabularnewline
|
|
test - & 0.03\% & 98.5\%\tabularnewline
|
|
\end{tabular}
|
|
|
|
\vspace{1cm}
|
|
|
|
{\bf HIV prevalence (Germany)}
|
|
|
|
\begin{tabular}{cc}
|
|
HIV & no HIV\tabularnewline
|
|
0.1\% & 99.9\%\tabularnewline
|
|
\end{tabular}
|
|
|
|
|
|
\end{minipage}
|
|
\begin{minipage}{.5\linewidth}
|
|
\begin{task}{}
|
|
What is the probability that you are HIV+ if you test positive?
|
|
\end{task}\pause
|
|
In order to answer that question, you need two rules for
|
|
probability.\pause
|
|
|
|
\vspace{1cm}
|
|
|
|
What is the power, what is the type I error of the test?
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{Bayes rule and marginalization}
|
|
{\bf Bayes rule}
|
|
$$p(A|B)p(B) = p(B|A)p(A)$$
|
|
|
|
{\bf joint probability}
|
|
$$p(A,B) = p(A|B)p(B) = p(B|A)p(A)$$
|
|
|
|
{\bf marginalization}
|
|
$$p(B) = \sum_{\mbox{possible values a of }A}p(a,B)$$
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{probability/Bayesian nomenclature}
|
|
\frametitle{repetition}
|
|
Let $T\in \{+, -\}$ be the test result and $H\in \{+,-\}$ whether you
|
|
are HIV positive or not.
|
|
\begin{itemize}
|
|
\item $p(T|H)$ is the {\em likelihood} \pause
|
|
\item $p(H)$ is the {\em prior} \pause
|
|
\item $p(H|T)$ is the {\em posterior}
|
|
\end{itemize}
|
|
\pause
|
|
Given the prior and the likelihood, we can compute the posterior.
|
|
\begin{align*}
|
|
p(H|T) &= \frac{P(T|H)P(H)}{P(T)} &\mbox{Bayes rule}\\
|
|
&= \frac{P(T|H)P(H)}{\sum_h P(T,h)} &\mbox{marginalization}\\
|
|
&= \frac{P(T|H)P(H)}{\sum_h P(T|h)p(h)} &\mbox{joint
|
|
probability}
|
|
\end{align*}
|
|
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{HIV test}
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{.5\linewidth}
|
|
\begin{tabular}{ccc}
|
|
& HIV & no HIV\tabularnewline
|
|
test + & 99.7\% & 1.5\%\tabularnewline
|
|
test - & 0.03\% & 98.5\%\tabularnewline
|
|
\end{tabular}
|
|
\end{minipage}
|
|
\begin{minipage}{.5\linewidth}
|
|
\begin{tabular}{cc}
|
|
HIV & no HIV\tabularnewline
|
|
0.1\% & 99.9\%\tabularnewline
|
|
\end{tabular}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
|
|
|
|
\begin{align*}
|
|
p(H=+|T=+)&= \frac{P(T=+|H=+)P(H=+)}{\sum_{h\in\{+,-\}} P(T=+|H=h)p(H=h)} \\
|
|
p(H=+|T=+)&= \frac{0.997 \cdot 0.001}{0.997 \cdot 0.001 + 0.015
|
|
\cdot 0.999} \\
|
|
&\approx 0.062
|
|
\end{align*}
|
|
\pause
|
|
This means with a positive HIV test, you have about $6.2$\% chance of
|
|
being HIV positive. Why is this number so low? \pause
|
|
|
|
\only<3>{Because a lot of the people for which the test is positives
|
|
are false positives from the HIV- group. This is because HIV+ is
|
|
relatively rare.}
|
|
\end{frame}
|
|
|
|
%-----------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{Why is this funny (or sad)?}
|
|
\begin{minipage}{1.\linewidth}
|
|
\begin{minipage}{.5\linewidth}
|
|
\includegraphics[width=.7\linewidth]{figs/frequentistsvsbayesians.png}
|
|
\mycite{http://xkcd.com/1132/}
|
|
\end{minipage}
|
|
\begin{minipage}{.5\linewidth}
|
|
{\bf Why is it funny:} Because it points at the fact that
|
|
statistical tests usually look at the likelihood only and ignore
|
|
the prior.
|
|
|
|
\vspace{1cm}
|
|
|
|
{\bf Why is it sad?} Because statistical tests usually look at
|
|
the likelihood and ignore the prior.
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\hyperlink{back:bayesian}{\color{gray}go back}
|
|
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{detour IV: Assessing normality with QQ plots}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{frame}
|
|
\hypertarget{sec:qqplots}{}
|
|
\frametitle{histogram equalization}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{task}{histogram equalization}
|
|
Which function $y = f(x)$ transforms $x$ such that it has the
|
|
distribution of $p(y)$?
|
|
\end{task}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\only<1>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE0.png}
|
|
\end{center}
|
|
}\pause
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE0Solution.png}
|
|
\end{center}
|
|
}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{histogram equalization}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.4\linewidth}
|
|
\begin{task}{histogram equalization}
|
|
How would the function look like if the target was a Normal
|
|
distribution?
|
|
\end{task}
|
|
\end{minipage}
|
|
\begin{minipage}{0.6\linewidth}
|
|
\only<1>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE1.png}
|
|
\end{center}
|
|
}\pause
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE1Solution.png}
|
|
\end{center}
|
|
}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{histogram equalization}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.4\linewidth}
|
|
\begin{task}{histogram equalization}
|
|
Is the target distribution a Normal distribution?
|
|
\end{task}
|
|
\end{minipage}
|
|
\begin{minipage}{0.6\linewidth}
|
|
\only<1>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE2.png}
|
|
\end{center}
|
|
}\pause
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE2Solution.png}
|
|
\end{center}
|
|
}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
% ----------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{QQ-plots}
|
|
\begin{itemize}
|
|
\item QQ-plots can be used to visually assess whether a set of data
|
|
points might follow a certain distribution. \pause
|
|
\item A QQ-plot is constructed by
|
|
\begin{enumerate}
|
|
\item computing the fraction of data points $q_1,...,q_n$ that are
|
|
lower or equal than a given $x_1,...,x_n$ (Where do you know
|
|
that function from?)\pause
|
|
\item and plotting it against the value $y_j$ of the other
|
|
distribution which has the same $q_i$
|
|
\end{enumerate}\pause
|
|
|
|
\item If the two distributions are equal the QQ-plot shows a straight line.\pause
|
|
\item How would you assess the normality of data $x_1,...,x_n$ with
|
|
a QQ-plot? \pause {\em make the target distribution a Gaussian}
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{histogram equalization}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.4\linewidth}
|
|
\begin{task}{special transform}
|
|
Which function $y = f(x)$ transforms $x$ such that it has the
|
|
distribution of $p(y)$?
|
|
|
|
|
|
Do you know that function?
|
|
|
|
\end{task}
|
|
|
|
\only<2>{{\bf Answer:} The cumulative distribution function $f(x) = F(x)$.}
|
|
\end{minipage}
|
|
\begin{minipage}{0.6\linewidth}
|
|
\only<1>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE3.png}
|
|
\end{center}
|
|
}\pause
|
|
\only<2>{
|
|
\begin{center}
|
|
\includegraphics[width=1.\linewidth]{figs/HE3Solution.png}
|
|
\end{center}
|
|
}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\hyperlink{back:detourIV}{\color{gray} back to statistical tests}
|
|
\end{frame}
|
|
|
|
\end{document}
|
|
|
|
|