This repository has been archived on 2021-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
scientificComputing/statistics-fabian/lecture_statistics02.tex

773 lines
25 KiB
TeX

\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
\usetheme{Singapore}
\setbeamercovered{opaque}
\usecolortheme{tuebingen}
\setbeamertemplate{navigation symbols}{}
\usefonttheme{default}
\useoutertheme{infolines}
% \useoutertheme{miniframes}
}
\AtBeginSubsection[]
{
\begin{frame}<beamer>
\begin{center}
\Huge \insertsectionhead
\end{center}
\tableofcontents[
currentsubsection,
hideothersubsections,
sectionstyle=show/hide,
subsectionstyle=show/shaded,
]
% \frametitle{\insertsectionhead}
\end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\setbeamertemplate{blocks}[rounded][shadow=true]
\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
University T\"ubingen\\
Bernstein Center T\"ubingen}
\institute[Scientific Computing]{}
\date{10/21/2014}
%\logo{\pgfuseimage{logo}}
\subject{Lectures}
%%%%%%%%%% configuration for code
\lstset{
basicstyle=\ttfamily,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
captionpos=b,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}
\input{../latex/environments.tex}
\makeatother
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{information}
\begin{itemize}
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
A. A. (2010). Statistics for the Life Sciences (4th ed.,
p. 668). Prentice Hall.
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
Hall. doi:10.1037/0012764
\item \url{http://stats.stackexchange.com}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 2 -- errorbars, confidence intervals, and tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Types of evidence}
\begin{frame}
\scriptsize
\frametitle{Examples}
\begin{itemize}
\item Before new drugs are given to human subjects, it is common
practice to first test them in dogs or other animals. In part of
one study, a new investigational drug was given to eight male and
eight female dogs at doses of 8 mg/kg and 25 mg/kg. Within each
sex, the two doses were assigned at random to the eight dogs. Many
``endpoints'' were measured, such as cholesterol, sodium, glucose,
and so on, from blood samples, in order to screen for toxicity
problems in the dogs before starting studies on humans. One
endpoint was alkaline phosphatase level (or APL, measured in U/l).
For females, the effect of increasing the dose from 8 to 25 mg/kg
was positive, although small (the average APL increased from 133.5
to 143 U/l), but for males the effect of increasing the dose from
8 to 25 mg/kg was negative.\pause
\item On 15 July 1911, 65-year-old Mrs. Jane Decker was struck by
lightning while in her house. She had been deaf since birth, but
after being struck, she recovered her hearing, which led to a
headline in the New York Times, ``Lightning Cures Deafness.''
\pause
\item Some research has suggested that there is a genetic basis for
sexual orientation. One such study involved measuring the
midsagittal area of the anterior commissure (AC) of the brain for
30 homosexual men, 30 heterosexual men, and 30 heterosexual
women. The researchers found that the AC tends to be larger in
heterosexual women than in heterosexual men and that it is even
larger in homosexual men.
\end{itemize}
\mycite{Samuels, Wittmer, Schaffner 2010}
\end{frame}
\begin{frame}
\scriptsize
\frametitle{types of evidence}
\begin{center}
\Large
{\em experiment} \\ is better than\\ {\em observational study}\\ is
better than\\ {\em anecdotal evidence}
\end{center}
\end{frame}
\subsection{What is inferential statistics?}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{sources of error in an experiment}
\begin{task}{Think about it for 2 min}
If you repeat a scientific experiment, why do you not get the same
result every time you repeat it?
\end{task}
\pause
\begin{itemize}
\item sampling error (a finite subset of the population of interest
is selected in each experiment)
\item nonsampling errors (e.g. noise, uncontrolled factors)
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\begin{frame}[fragile]
\frametitle{statisticians are lazy}
\Large
\only<1>{
\begin{center}
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
\end{center}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<2>{
\begin{center}
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
\end{center}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<3>{
\begin{center}
\includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
\end{center}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}
\end{frame}
% % ----------------------------------------------------------
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
Assume you know the sampling distribution of the mean lung volume
of smokers. Would you believe that
the sample came from a group of smokers?
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example01.png}
\end{center}
\end{question}
\end{frame}
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
What about now? How would the sampling distribution change if I
change the population to (i) athletes, (ii) old people, (iii) all people?
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example02.png}
\end{center}
\end{question}
\end{frame}
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example03.png}
\end{center}
\end{question}
\end{frame}
\begin{frame}
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
What do you think now?
\begin{center}
\includegraphics[width=.6\linewidth]{figs/example04.png}
\end{center}
\end{question}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{the (imaginary) meta-study}
\begin{center}
\only<1>{
\framesubtitle{finite sampling introduces variation: the sampling distribution}
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<2>{
\framesubtitle{statistic vs. population parameter}
\includegraphics[width=.8\linewidth]{figs/statistic1.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<3>{
\framesubtitle{statistic vs. population parameter}
\includegraphics[width=.8\linewidth]{figs/statistic2.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<4>{
\framesubtitle{shat parts of this diagram do we have in real life?}
\includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<5>{
\framesubtitle{what parts of this diagram do we have in real life?}
\includegraphics[width=.8\linewidth]{figs/statistic3.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
}\pause
\only<6->{
\framesubtitle{what statistics does }
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.5\linewidth}
\includegraphics[width=1.\linewidth]{figs/statistic4.png}
\mycite{Hesterberg et al., Bootstrap Methods and Permutation
Tests}
\end{minipage}
\begin{minipage}{0.5\linewidth}
\begin{itemize}
\item it assumes, derives, or simulates the sampling
distribution\pause
\item the sampling distribution makes only sense if you think
about it in terms of the meta study\pause
\item {\color{red} the sampling distribution is the key to
answering questions about the population from the value of
the statistic}
\end{itemize}
\end{minipage}
\end{minipage}
}
\end{center}
\end{frame}
\begin{frame}
\frametitle{summary}
\begin{itemize}
\item In statistics, we use finite samples from a population to reason
about features of the population. \pause
\item The particular feature of the population we are interested in is called
{\color{blue} population parameter}. We usually measure this
parameter in our finite sample as well
({\color{blue}statistic}).\pause
\item Because of variations due to finite sampling the statistic
almost never matches the population parameter. \pause
\item Using the {\color{blue}sampling distribution} of the statistic, we make
statements about the relation between our statistic and the
population parameter.
\end{itemize}
\end{frame}
\subsection{Errorbars}
% ----------------------------------------------------------
\begin{frame}
\frametitle{illustrating example}
As part of a study of the development of the thymus gland, researcher
weighed the glands of $50$ chick embyos after 14 days of
incubation. The following plot depicts the mean thymus gland weights in (mg):
\mycite{modified from SWS exercise 6.3.3.}
\pause
{\bf Which of the two bar plots is the correct way of displaying the
data?}
\begin{columns}
\begin{column}[l]{.5\linewidth}
\includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
\end{column}
\begin{column}[r]{.5\linewidth}
\pause That depends on what you want to say
\begin{itemize}
\item To give a measure of variability in the data: use the
{\color{blue} standard deviation $\hat\sigma =
\sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
\item To make a statement about the variability in the mean
estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
\end{itemize}
\end{column}
\end{columns}
%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% that depends: variability (descriptiv statistics, how variable is
% the mean -> inferential, makes only sense in the meta-study setting)
% first matlab exercise: simulate standard error
% recommend paper for eyeballing test results from standard errors
% from std of mean to confidence intervals
% introduce bootstrapping (matlab exercise), then t-statistic
% intervals
% end with standard error of the median (and the thing from wikipedia)
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}
\frametitle{standard error}
\framesubtitle{bootstrapping}
\begin{task}{standard error vs. standard deviation}
\begin{itemize}
\item Download the dataset {\tt thymusglandweights.dat} from Ilias
\item Write a program that loads the data into matlab, extracts
the the first $80$ datapoints, and repeat the following steps
$m=500$ times:
\begin{enumerate}
\item draw $80$ data points from $x$ with replacement
\item compute their mean and store it
\end{enumerate}
Look at the standard deviation of the computed means.
\item Compare the result to the standard deviation of the original
$80$ data points and the standard error.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{standard error}
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
m = 500;
x = thymusglandweights(1:n);
mu = zeros(m,1);
for i = 1:m
mu(i) = mean(x(randi(n,n,1)));
end
disp(['bootstrap standard error: ', num2str(std(mu))]);
disp(['standard error: ', num2str(std(x)/sqrt(n))]);
\end{lstlisting}
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{standard error}
\framesubtitle{bootstrapping}
\begin{itemize}
\item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
{\color{blue}an estimate of the standard deviation of the means}
in repeated experiments which is computed form a single
experiment.
\item When you want to do statistical tests on the mean, it is
better to use the standard error, because one can eyeball
significance from it
\mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
in experimental biology. The Journal of Cell Biology, 177(1),
7--11.}
\item {\color{blue}Bootstrapping} is a way to generate an estimate
of the {\color{blue}sampling distribution of any statistic}. Instead of
sampling from the true distribution, it samples from the
empirical distribution represented by your dataset.
\mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
\end{itemize}
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{standard error of the median?}
{\bf What kind of errorbars should we use for the median?}
It depends again:
{\bf Descriptive statistics}
\begin{itemize}
\item As a {\color{blue}descriptive statistic} one could use the {\em median
absolute deviation}: the median of the absolute differences of
the datapoints from the median.
\item Alternatively, one could bootstrap a standard error of the
median.
\end{itemize}
\pause
{\bf Inferential statistics}
\begin{itemize}
\item For {\color{blue}inferential statistics} one should use
something that gives the reader {\color{blue}information about
significance}.
\item Here, {\color{blue} confidence intervals} are a better choice.
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\subsection{confidence intervals \& bootstrapping}
%------------------------------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals}
\begin{center}
\only<1>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<2>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<3>{
\vspace{.1cm}
\includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<4>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<5>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<6>{
\vspace{.1cm}
\includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
\mycite{Larry Gonick, The Cartoon Guide to Statistics}
}
\end{center}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals for the median}
\begin{definition}{Confidence interval}
A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
$\hat\theta$ is an interval $\hat\theta \pm a$ such that the
population parameter $\theta$ is contained in that interval
$(1-\alpha)\cdot 100\%$ of the experiments.
An alternative way to put it is that $(\hat\theta - \theta) \in
[-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
\end{definition}
\begin{columns}
\begin{column}[l]{.5\linewidth}
If we knew the sampling distribution of the median $\hat m$, could
we generate a e.g. a $95\%$ confidence interval?\pause
\vspace{.5cm}
Yes, we could choose the interval such that $\hat m - m$ in that
interval in $95\%$ of the cases.
\end{column}
\begin{column}[r]{.5\linewidth}
\only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
\only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
\end{column}
\end{columns}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals for the mean via bootstrapping}
\framesubtitle{how to get the sampling distribution}
\begin{task}{bootstrapping a confidence interval for the mean}
\begin{itemize}
\item Use the same dataset as before.
\item Bootstrap $500$ means.
\item Plot their distribution.
\item Compute the $2.5\%$ and the $97.5\%$ percentile of the
$500$ means.
\item Mark them in the plot.
\end{itemize}
These two numbers give you $\hat m -a$ and $\hat m + a$ for
the $95\%$ confidence interval.
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{confidence intervals for the median}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);
m = 500;
me = zeros(m,1);
for i = 1:m
me(i) = mean(x(randi(n,n,1)));
end
disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
\end{lstlisting}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence intervals}
\framesubtitle{Notice the theme!}
\begin{enumerate}
\item choose a statistic
\item get a the sampling distribution of the statistic (by theory or
simulation)
\item use that distribution to reason about the relation between the
true population parameter (e.g. $m$) and the sampled statistic
$\hat m$
\end{enumerate}
\begin{center}
\color{blue}
This is the scaffold of most statistical techniques. Try to find
it and it can help you understand them.
\end{center}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence interval for the mean}
\framesubtitle{Let's search the pattern in the normal way of computing
a confidence interval for the mean}
\begin{itemize}
\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
well
\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
$\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
\frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
parameters.\pause
\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
\mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
\item Therefore,
\begin{align*}
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
\end{itemize}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
\frametitle{confidence interval for the mean}
\begin{task}{Bootstrapping a confidence interval for the mean}
Extend your script to contain the analytical confidence
interval using
\begin{align*}
P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
Hint: Use the function {\tt tinv(0.025, n-1)} to get the value of
$t_{2.5\%}$ and similar for $t_{97.5\%}$.
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{solution}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);
m = 500;
me = zeros(m,1);
for i = 1:m
me(i) = mean(x(randi(n,n,1)));
end
t025 = tinv(0.025, n-1);
t975 = tinv(0.975, n-1);
se = std(x)/sqrt(n);
disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
disp(['analytical CI: ' , num2str(mean(x)+t025*se), ' ' , num2str(mean(x)+t975*se)]);
\end{lstlisting}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{statistical tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{ingredients into a test}
\begin{itemize}
\item {\bf What is the goal of a test?}\pause
\item[] Check whether a measured
statistic looks different from what you would expect if there was no
effect.\pause
\item {\bf What are the ingredients into a test?}\pause
\item[] a test statistic (e.g. the mean, the median, ...) and a null
distribution\pause
\item {\bf What is a null distribution?}\pause
\item[] The sampling distribution of the statistic in case there is
no effect (i.e. the Null hypothesis is true).
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{how tests work}
\begin{enumerate}
\item Choose a statistic.
\item Get a null distribution.
\item Compare your actually measure value with the Null
distribution.
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{Example: one sample test}
\framesubtitle{step 2: get a Null distribution}
\scriptsize
Assume that the expected weight of a thymus gland from the
literature is 34.3g. We want to test whether the mean of our
thymus gland dataset is different from the expectation in the
literature. Comparing a statistic of a dataset against a fixed value
is called {\em one sample test}.
\pause
\begin{itemize}
\item {\bf How could we simulate the distribution of the data if the
mean was really 30g?}\pause
\item[] Bootstrapping.
\end{itemize}
\begin{task}{generating a null distribution}
\begin{itemize}
\item Write a matlab program that bootstraps 2000 means from the
thymus gland dataset.
\item How can we adjust the data that it has mean 34.3g (remember,
we want to simulate the null distribution)?
\item Plot a histogram of these 2000 means.
\item Also indicate the actual mean of the data.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{Example: one sample test}
\framesubtitle{step 3: compare the actual value to the Null distribution}
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.5\linewidth}
The question we want to answer in this step is:
\begin{center}
\color{blue} Does the actually measure value look like it came
from the Null distribution?
\end{center}
\end{minipage}
\begin{minipage}{0.5\linewidth}
\includegraphics[width=\linewidth]{figs/bootstraptest.png}
\end{minipage}
\end{minipage}
{\bf How could we do this in our bootstrapping example?}\pause
\begin{itemize}
\item Set a threshold. \pause How do we choose the threshold? \pause Via type I error.\pause
\item Specify the type I error if we used the actual measured value
as threshold (p-value). Why is that a reasonable strategy?
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Example: one sample test}
\framesubtitle{step 3: compare the actual value to the Null distribution}
\begin{task}{type I error and p-value}
Extend the script such that it
\begin{itemize}
\item computes the $5\%$ significance boundaries from the
distribution and plot it into the histogram.
\item computes a p-value.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{two sample test}
\framesubtitle{permutation test}
Brain Weight In 1888, P. Topinard published data on the brain
weights of hundreds of French men and women. Brain weights are given
in gram. The data can be downloaded from Ilias (example 002 from
yesterday).
\vspace{.5cm}
{\bf How could we determine (similar to bootstrapping) whether the
mean brain weight of males and females are different?}
\begin{itemize}
\item What do we use as a statistic?
\item[]<2-> The difference of the means of the two groups.
\item How do we simulate the null distribution?
\item[]<3-> Shuffle the labels ``male'' and ``female'', compute
difference in means of two groups, and repeat.
\end{itemize}
\end{frame}
\begin{frame}
\begin{center}
\Huge That's it.
\end{center}
\end{frame}
\end{document}