\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} 
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{bm} 
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
  \usetheme{Singapore}
  \setbeamercovered{opaque}
  \usecolortheme{tuebingen}
  \setbeamertemplate{navigation symbols}{}
  \usefonttheme{default}
  \useoutertheme{infolines}
  % \useoutertheme{miniframes}
}

\AtBeginSubsection[]
{
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
    \tableofcontents[ 
    currentsubsection, 
    hideothersubsections, 
    sectionstyle=show/hide, 
    subsectionstyle=show/shaded, 
] 
    % \frametitle{\insertsectionhead}
  \end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5

\setbeamertemplate{blocks}[rounded][shadow=true]

\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
  University T\"ubingen\\
Bernstein Center T\"ubingen}

\institute[Scientific Computing]{}
 \date{10/21/2014}
%\logo{\pgfuseimage{logo}}

\subject{Lectures}

%%%%%%%%%% configuration for code
\lstset{
 basicstyle=\ttfamily,
 numbers=left,
 showstringspaces=false,
 language=Matlab,
 commentstyle=\itshape\color{darkgray},
 keywordstyle=\color{blue},
 stringstyle=\color{green},
 backgroundcolor=\color{blue!10},
 breaklines=true,
 breakautoindent=true,
 columns=flexible,
 frame=single,
 captionpos=b,
 xleftmargin=1em,
 xrightmargin=1em,
 aboveskip=10pt
 }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}

\input{../latex/environments.tex}
\makeatother
 
\begin{document} 
 
\begin{frame} 
  \titlepage 

\end{frame} 


\begin{frame} 
  \frametitle{information}
  \begin{itemize}
  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
    A. A. (2010). Statistics for the Life Sciences (4th ed.,
    p. 668). Prentice Hall.
  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764
  \item \url{http://stats.stackexchange.com}
  \end{itemize}
\end{frame} 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 2 -- errorbars, confidence intervals, and tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Types of evidence}
\begin{frame}
  \scriptsize
  \frametitle{Examples}
  \begin{itemize}
  \item Before new drugs are given to human subjects, it is common
    practice to first test them in dogs or other animals. In part of
    one study, a new investigational drug was given to eight male and
    eight female dogs at doses of 8 mg/kg and 25 mg/kg.  Within each
    sex, the two doses were assigned at random to the eight dogs. Many
    ``endpoints'' were measured, such as cholesterol, sodium, glucose,
    and so on, from blood samples, in order to screen for toxicity
    problems in the dogs before starting studies on humans.  One
    endpoint was alkaline phosphatase level (or APL, measured in U/l).
    For females, the effect of increasing the dose from 8 to 25 mg/kg
    was positive, although small (the average APL increased from 133.5
    to 143 U/l), but for males the effect of increasing the dose from
    8 to 25 mg/kg was negative.\pause
  \item On 15 July 1911, 65-year-old Mrs. Jane Decker was struck by
    lightning while in her house. She had been deaf since birth, but
    after being struck, she recovered her hearing, which led to a
    headline in the New York Times, ``Lightning Cures Deafness.''
    \pause
  \item Some research has suggested that there is a genetic basis for
    sexual orientation. One such study involved measuring the
    midsagittal area of the anterior commissure (AC) of the brain for
    30 homosexual men, 30 heterosexual men, and 30 heterosexual
    women. The researchers found that the AC tends to be larger in
    heterosexual women than in heterosexual men and that it is even
    larger in homosexual men.
  \end{itemize}
  \mycite{Samuels, Wittmer, Schaffner 2010}
\end{frame}


\begin{frame}
  \scriptsize
  \frametitle{types of evidence}
  \begin{center}
    \Large
    {\em experiment} \\ is better than\\ {\em observational study}\\ is
    better than\\ {\em anecdotal evidence}
  \end{center}
\end{frame}


\subsection{What is inferential statistics?}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
  \frametitle{sources of error in an experiment}
  \begin{task}{Think about it for 2 min}
    If you repeat a scientific experiment, why do you not get the same
    result every time you repeat it?
  \end{task}
  \pause
  \begin{itemize}
  \item sampling error (a finite subset of the population of interest
    is selected in each experiment)
  \item nonsampling errors (e.g. noise, uncontrolled factors)
  \end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}[fragile]
\frametitle{statisticians are lazy}
\Large
\only<1>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-26-05_771.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<2>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-41-39_523.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
}\pause
\only<3>{
  \begin{center}
    \includegraphics[width=.8\linewidth]{figs/2012-10-29_16-29-35_312.jpg}
  \end{center}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
}
\end{frame}

% % ----------------------------------------------------------
\begin{frame} 
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
  Assume you know the sampling distribution of the mean lung volume
  of smokers. Would you believe that
  the sample came from a group of smokers?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example01.png}
  \end{center}
\end{question}
\end{frame}

\begin{frame} 
\frametitle{illustrating examples}
\begin{question}{lung volume of smokers}
  What about now? How would the sampling distribution change if I
  change the population to (i) athletes, (ii) old people, (iii) all people?
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example02.png}
  \end{center}
\end{question}
\end{frame}


\begin{frame} 
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example03.png}
  \end{center}
\end{question}
\end{frame}

\begin{frame} 
\frametitle{illustrating examples}
\begin{question}{Is this diet effective?}
  What do you think now? 
  \begin{center}
    \includegraphics[width=.6\linewidth]{figs/example04.png}
  \end{center}
\end{question}
\end{frame}

% ----------------------------------------------------------
\begin{frame} 
\frametitle{the (imaginary) meta-study}
\begin{center}
  \only<1>{
    \framesubtitle{finite sampling introduces variation: the sampling distribution}
    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause 
  \only<2>{
    \framesubtitle{statistic vs. population parameter}
    \includegraphics[width=.8\linewidth]{figs/statistic1.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<3>{
    \framesubtitle{statistic vs. population parameter}
    \includegraphics[width=.8\linewidth]{figs/statistic2.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<4>{
    \framesubtitle{shat parts of this diagram do we have in real life?}

    \includegraphics[width=.8\linewidth]{figs/samplingDistribution.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<5>{
    \framesubtitle{what parts of this diagram do we have in real life?}

    \includegraphics[width=.8\linewidth]{figs/statistic3.png}
    \mycite{Hesterberg et al., Bootstrap Methods and Permutation
      Tests} 
  }\pause
  \only<6->{
    \framesubtitle{what statistics does }
    \begin{minipage}{1.0\linewidth}
      \begin{minipage}{0.5\linewidth}
        \includegraphics[width=1.\linewidth]{figs/statistic4.png}
        \mycite{Hesterberg et al., Bootstrap Methods and Permutation
          Tests}
      \end{minipage}
      \begin{minipage}{0.5\linewidth}
        \begin{itemize}
        \item it assumes, derives, or simulates the sampling
          distribution\pause
        \item the sampling distribution makes only sense if you think
          about it in terms of the meta study\pause
        \item  {\color{red} the sampling distribution is the key to
            answering questions about the population from the value of
            the statistic}
        \end{itemize}
      \end{minipage}
    \end{minipage}
  }

\end{center}
\end{frame}



\begin{frame} 
\frametitle{summary}
\begin{itemize}
\item In statistics, we use finite samples from a population to reason
  about features of the population. \pause
\item The particular feature of the population we are interested in is called
  {\color{blue} population parameter}. We usually measure this
  parameter in our finite sample as well
  ({\color{blue}statistic}).\pause
\item Because of variations due to finite sampling the statistic
  almost never matches the population parameter. \pause
\item Using the {\color{blue}sampling distribution} of the statistic, we make
  statements about the relation between our statistic and the
  population parameter. 
\end{itemize}
\end{frame}

\subsection{Errorbars}
% ----------------------------------------------------------
\begin{frame} 
\frametitle{illustrating example}

As part of a study of the development of the thymus gland, researcher
weighed the glands of $50$ chick embyos after 14 days of
incubation. The following plot depicts the mean thymus gland weights in (mg): 
\mycite{modified from SWS exercise 6.3.3.}
\pause
{\bf Which of the two bar plots is the correct way of displaying the
  data?}

\begin{columns}
  \begin{column}[l]{.5\linewidth}
    \includegraphics[width=\linewidth]{figs/StandardErrorOrStandardDeviation.pdf}
  \end{column}
  \begin{column}[r]{.5\linewidth}
    \pause That depends on what you want to say
    \begin{itemize}
    \item To give a measure of variability in the data: use the
      {\color{blue} standard deviation $\hat\sigma =
        \sqrt{\frac{1}{n-1}\sum_{i=1}^n (x_i - \hat\mu)^2}$}
    \item To make a statement about the variability in the mean
      estimation: use {\color{blue}standard error $\frac{\hat\sigma}{\sqrt{n}}$}
    \end{itemize}
  \end{column}
\end{columns}

%%%%%%%%%%%%%%% GO ON HERE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% that depends: variability (descriptiv statistics, how variable is
% the mean -> inferential, makes only sense in the meta-study setting)
% first matlab exercise: simulate standard error
% recommend paper for eyeballing test results from standard errors
% from std of mean to confidence intervals
% introduce bootstrapping (matlab exercise), then t-statistic
% intervals
% end with standard error of the median (and the thing from wikipedia)
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}
  \frametitle{standard error}
  \framesubtitle{bootstrapping}
  
  \begin{task}{standard error vs. standard deviation}

    \begin{itemize}
    \item Download the dataset {\tt thymusglandweights.dat} from Ilias
    \item Write a program that loads the data into matlab, extracts
      the the first $80$ datapoints, and repeat the following steps
      $m=500$ times:
      \begin{enumerate}
      \item draw $80$ data points from $x$ with replacement
      \item compute their mean and store it
      \end{enumerate}
      Look at the standard deviation of the computed means.
    \item Compare the result to the standard deviation of the original
      $80$ data points and the standard error.
    \end{itemize}
  \end{task}
\end{frame}

\begin{frame}[fragile]
  \frametitle{standard error}
\begin{lstlisting}
load thymusglandweights.dat

n = 80;
m = 500;
x = thymusglandweights(1:n);


mu = zeros(m,1);
for i = 1:m
    mu(i) = mean(x(randi(n,n,1)));
end
disp(['bootstrap standard error: ', num2str(std(mu))]);
disp(['standard error: ', num2str(std(x)/sqrt(n))]);
\end{lstlisting}
\end{frame}
%------------------------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{standard error}
  \framesubtitle{bootstrapping}
  \begin{itemize}
  \item The sample standard error $\frac{\hat\sigma}{\sqrt{n}}$ is
    {\color{blue}an estimate of the standard deviation of the means}
    in repeated experiments which is computed form a single
    experiment.
  \item When you want to do statistical tests on the mean, it is
    better to use the standard error, because one can eyeball
    significance from it
    \mycite{Cumming, G., Fidler, F., \& Vaux, D. L. (2007). Error bars
      in experimental biology. The Journal of Cell Biology, 177(1),
      7--11.}
    \item {\color{blue}Bootstrapping} is a way to generate an estimate
      of the {\color{blue}sampling distribution of any statistic}. Instead of
      sampling from the true distribution, it samples from the
      empirical distribution represented by your dataset.
      \mycite{Efron, B., \& Tibshirani, R. J. (1994). An Introduction to the Bootstrap. Chapman and Hall/CRC}
  \end{itemize}
\end{frame}

%------------------------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{standard error of the median?}
  {\bf What kind of errorbars should we use for the median?}

  It depends again:

  {\bf Descriptive statistics}
  \begin{itemize}
  \item As a {\color{blue}descriptive statistic} one could use the {\em median
      absolute deviation}: the median of the absolute differences of
    the datapoints from the median.
  \item Alternatively, one could bootstrap a standard error of the
    median.
  \end{itemize}
  \pause
  {\bf Inferential statistics}
  \begin{itemize}
  \item For {\color{blue}inferential statistics} one should use
    something that gives the reader {\color{blue}information about
      significance}. 
  \item Here, {\color{blue} confidence intervals} are a better choice.
  \end{itemize}
\end{frame}

% ----------------------------------------------------------
\subsection{confidence intervals \& bootstrapping}
%------------------------------------------------------------------------------
\begin{frame} 
\frametitle{confidence intervals}
\begin{center}
  \only<1>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-55-39_181.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}

  }\pause
  \only<2>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-56-59_866.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<3>{
    \vspace{.1cm}
    \includegraphics[width=.4\linewidth]{figs/2012-10-29_14-58-18_054.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<4>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_14-59-05_984.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<5>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-04-38_517.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }\pause
  \only<6>{
    \vspace{.1cm}
    \includegraphics[width=.6\linewidth]{figs/2012-10-29_15-09-25_388.jpg}
  \mycite{Larry Gonick, The Cartoon Guide to Statistics}
  }
\end{center}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals for the median}
  \begin{definition}{Confidence interval}
    A confidence $(1-\alpha)\cdot 100\%$ interval for a statistic
    $\hat\theta$ is an interval $\hat\theta \pm a$ such that the
    population parameter $\theta$ is contained in that interval
    $(1-\alpha)\cdot 100\%$ of the experiments.

    An alternative way to put it is that $(\hat\theta - \theta) \in
    [-a,a]$ in $(1-\alpha)\cdot 100\%$ of the cases.
  \end{definition}


\begin{columns}
  \begin{column}[l]{.5\linewidth}
  If we knew the sampling distribution of the median $\hat m$, could
  we generate a e.g. a $95\%$ confidence interval?\pause
  \vspace{.5cm}

  Yes, we could choose the interval such that $\hat m - m$ in that
  interval in $95\%$ of the cases.
  \end{column}
  \begin{column}[r]{.5\linewidth}
    \only<1>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian00.pdf}}
    \only<2>{\includegraphics[width=\linewidth]{figs/samplingDistributionMedian01.pdf}}
  \end{column}
\end{columns}



\end{frame}

% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals for the mean via bootstrapping}
  \framesubtitle{how to get the sampling distribution}

  \begin{task}{bootstrapping a confidence interval for the mean}
    \begin{itemize}
    \item Use the same dataset as before.
    \item Bootstrap $500$ means.
    \item Plot their distribution.
    \item Compute the $2.5\%$ and the $97.5\%$ percentile of the
      $500$ means.
    \item Mark them in the plot. 
    \end{itemize}
    These two numbers give you $\hat m -a$ and $\hat m + a$ for
      the $95\%$ confidence interval.
  \end{task}
\end{frame}

\begin{frame}[fragile]
  \frametitle{confidence intervals for the median}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);

m = 500;
me = zeros(m,1);
for i = 1:m
    me(i) = mean(x(randi(n,n,1)));
end

disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);

\end{lstlisting}
\end{frame}
% ----------------------------------------------------------
\begin{frame}
  \frametitle{confidence intervals}
  \framesubtitle{Notice the theme!}
  \begin{enumerate}
  \item choose a statistic
  \item get a the sampling distribution of the statistic (by theory or
    simulation)
  \item use that distribution to reason about the relation between the
    true population parameter (e.g. $m$) and the sampled statistic
    $\hat m$
  \end{enumerate}
  
  \begin{center}
    \color{blue}
    This is the scaffold of most statistical techniques. Try to find
    it and it can help you understand them.
  \end{center}

\end{frame}



% ----------------------------------------------------------
\begin{frame} 
\frametitle{confidence interval for the mean}
\framesubtitle{Let's search the pattern in the normal way of computing
a confidence interval for the mean}  
\begin{itemize}
\item If the $x_1,...,x_n\sim \mathcal N(\mu,\sigma)$ are Gaussian, then $\hat\mu$ is Gaussian as
  well
\item What is the mean of $\hat\mu$? What is its standard deviation?\pause
\item[]{\color{gray} $\langle\hat\mu\rangle_{X_1,...,X_n} = \mu$ and
    $\mbox{std}(\hat\mu) = \frac{\sigma}{\sqrt{n}}$}\pause
\item The problem is, that $\hat\mu \sim \mathcal N\left(\mu,
    \frac{\sigma}{\sqrt{n}}\right)$ depends on unknown population
  parameters.\pause
\item However, $$\frac{\hat\mu-\mu}{\hat\sigma/\sqrt{n}} \sim
  \mbox{t-distribution with }n-1\mbox{ degrees of freedom}$$
\item Therefore,
\begin{align*}
  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
\end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame} 
\frametitle{confidence interval for the mean}
\begin{task}{Bootstrapping a confidence interval for the mean}
 Extend your script to contain the analytical confidence
 interval using
\begin{align*}
  P\left(t_{2.5\%}\le\frac{\hat{\mu}-\mu}{\hat{\sigma}/\sqrt{n}}\le t_{97.5\%}\right)&=P\left(t_{2.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\le\hat{\mu}-\mu\le t_{97.5\%}\frac{\hat{\sigma}}{\sqrt{n}}\right)
\end{align*}
Hint: Use the function {\tt tinv(0.025, n-1)} to get the value of
$t_{2.5\%}$ and similar for $t_{97.5\%}$.
\end{task}


\end{frame}


\begin{frame}[fragile]
\frametitle{solution}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 80;
x = thymusglandweights(1:n);

m = 500;
me = zeros(m,1);
for i = 1:m
    me(i) = mean(x(randi(n,n,1)));
end

t025 = tinv(0.025, n-1);
t975 = tinv(0.975, n-1);

se = std(x)/sqrt(n);

disp(['bootstrap quantiles: ' , num2str(quantile(me,0.025)), ' ' ,num2str(quantile(me,1-0.025))]);
disp(['analytical CI: ' , num2str(mean(x)+t025*se), ' ' , num2str(mean(x)+t975*se)]);

\end{lstlisting}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{statistical tests}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
  \frametitle{ingredients into a test}
  
  \begin{itemize}
  \item {\bf What is the goal of a test?}\pause
  \item[] Check whether a measured
  statistic looks different from what you would expect if there was no
  effect.\pause
  \item {\bf What are the ingredients into a test?}\pause
  \item[] a test statistic (e.g. the mean, the median, ...) and a null
    distribution\pause
  \item {\bf What is a null distribution?}\pause
  \item[] The sampling distribution of the statistic in case there is
    no effect (i.e. the Null hypothesis is true).
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{how tests work}
  \begin{enumerate}
  \item Choose a statistic.
  \item Get a null distribution.
  \item Compare your actually measure value with the Null
    distribution.
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Example: one sample test}
  \framesubtitle{step 2: get a Null distribution}
  \scriptsize
  Assume that the expected weight of a thymus gland from the
  literature is 34.3g. We want to test whether the mean of our
  thymus gland dataset is different from the expectation in the
  literature. Comparing a statistic of a dataset against a fixed value
  is called {\em one sample test}. 
  \pause

  \begin{itemize}
  \item {\bf How could we simulate the distribution of the data if the
      mean was really 30g?}\pause
  \item[] Bootstrapping.
  \end{itemize}

  \begin{task}{generating a null distribution}
    \begin{itemize}
    \item Write a matlab program that bootstraps 2000 means from the
      thymus gland dataset.
    \item How can we adjust the data that it has mean 34.3g (remember,
      we want to simulate the null distribution)?
    \item Plot a histogram of these 2000 means.
    \item Also indicate the actual mean of the data. 
    \end{itemize}
  \end{task}
\end{frame}

\begin{frame}
  \frametitle{Example: one sample test}
  \framesubtitle{step 3: compare the actual value to the Null distribution}
  \begin{minipage}{1.0\linewidth}
    \begin{minipage}{0.5\linewidth}
      The question we want to answer in this step is:
      \begin{center}
        \color{blue} Does the actually measure value look like it came
        from the Null distribution?
      \end{center}
    \end{minipage}
    \begin{minipage}{0.5\linewidth}
      \includegraphics[width=\linewidth]{figs/bootstraptest.png}
    \end{minipage}
  \end{minipage}
  {\bf How could we do this in our bootstrapping example?}\pause
  \begin{itemize}
  \item Set a threshold. \pause How do we choose the threshold? \pause Via type I error.\pause
  \item Specify the type I error if we used the actual measured value
    as threshold (p-value). Why is that a reasonable strategy?
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Example: one sample test}
  \framesubtitle{step 3: compare the actual value to the Null distribution}
  \begin{task}{type I error and p-value}
    Extend the script such that it
    \begin{itemize}
    \item computes the $5\%$ significance boundaries from the
      distribution and plot it into the histogram.
    \item computes a p-value.
    \end{itemize}
  \end{task}
\end{frame}

\begin{frame}
  \frametitle{two sample test}
  \framesubtitle{permutation test}
  Brain Weight In 1888, P. Topinard published data on the brain
  weights of hundreds of French men and women. Brain weights are given
  in gram. The data can be downloaded from Ilias (example 002 from
  yesterday). 

  \vspace{.5cm}
  {\bf How could we determine (similar to bootstrapping) whether the
    mean brain weight of males and females are different?}
  \begin{itemize}
  \item What do we use as a statistic?
  \item[]<2-> The difference of the means of the two groups.
  \item How do we simulate the null distribution?
  \item[]<3-> Shuffle the labels ``male'' and ``female'', compute
    difference in means of two groups, and repeat. 
  \end{itemize}
  
\end{frame}


\begin{frame}
  \begin{center}
    \Huge That's it.
  \end{center}
\end{frame}

\end{document}