scientificComputing/statistics/lecture_statistics.tex

\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
  \usetheme{Singapore}
  \setbeamercovered{opaque}
  \usecolortheme{tuebingen}
  \setbeamertemplate{navigation symbols}{}
  \usefonttheme{default}
  \useoutertheme{infolines}
  % \useoutertheme{miniframes}
}

\AtBeginSection[]
{
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
    % \frametitle{\insertsectionhead}
    % \tableofcontents[currentsection,hideothersubsections]
  \end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5

\setbeamertemplate{blocks}[rounded][shadow=true]

\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
  University T\"ubingen\\
Bernstein Center T\"ubingen}

\institute[Scientific Computing]{}
 \date{11/27/2013}
%\logo{\pgfuseimage{logo}}

\subject{Lectures}

%%%%%%%%%% configuration for code
\lstset{
 basicstyle=\ttfamily,
 numbers=left,
 showstringspaces=false,
 language=Matlab,
 commentstyle=\itshape\color{darkgray},
 keywordstyle=\color{blue},
 stringstyle=\color{green},
 backgroundcolor=\color{blue!10},
 breaklines=true,
 breakautoindent=true,
 columns=flexible,
 frame=single,
 captionpos=b,
 xleftmargin=1em,
 xrightmargin=1em,
 aboveskip=10pt
 }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}

\input{../latex/environments.tex}
\makeatother

\begin{document}

\begin{frame}
  \titlepage

\end{frame}

\begin{frame}
  \frametitle{plan}
  \setcounter{tocdepth}{1}
  \tableofcontents

\end{frame}
\begin{frame}
  \frametitle{information}
  \begin{itemize}
  \item Samuels, M. L., Wittmer, J. A., \& Schaffner,
    A. A. (2010). Statistics for the Life Sciences (4th ed.,
    p. 668). Prentice Hall.
  \item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
    Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
    Hall. doi:10.1037/0012764
  \item \url{http://stats.stackexchange.com}
  \end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% errorbars (error bar paper)
% confidence intervals (sources of error)
% plotting (the right plot for the right data, Dan plotting paper)
% statistical test structure (bootstrapping, resampling, permutation)
% Don'ts: repeated testing, exclude data points
% study design
% PCA

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[Pr<50>ludium]{Prelude}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% ----------------------------------------------------------
\begin{frame}
\frametitle{my expectations to this course}
\begin{itemize}
\item interest and participation
\item motivation to understand and question concepts
\item high scientific standard
\item intellectual honesty
\item sincere cooperation
\end{itemize}
\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{this week will be ...}

\only<1>{
\framesubtitle{... no \sout{fun} piece of cake}
\begin{center}
  \includegraphics[height=0.7\textheight]{figs/feeding.jpg}
\end{center}
}

\only<2>{
\framesubtitle{... no \sout{fun} piece of cake}
\begin{center}
  \includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg}
\end{center}
}

\only<3>{
\framesubtitle{... no lecture (please!)}
\begin{center}
  \includegraphics[height=0.7\textheight]{figs/soccer.jpg}
\end{center}
}

\end{frame}

% ----------------------------------------------------------
\begin{frame}
\frametitle{What you should learn this week}
\begin{itemize}
\item What makes good plots?
\item What is descriptive/inferential statistics?
\item What is the general structure of a statistical test?
\item What does a p-value mean?
\item How can I build my own tests?
\item How large should my $n$ be?
\item What is {\em maximum likelihood} and why is it important?
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{types of data}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
  \frametitle{data scales}
  \framesubtitle{What data types are distinguished in statistics?}
  \Large
  {\bf Why are data types important?}
  \pause
  \begin{itemize}
  \item selection of statistics
  \item selection of plots
  \item selection of correct tests
  \end{itemize}
\end{frame}
%-------------------------------------------------------------

\begin{frame}
  \frametitle{data scales}
  \framesubtitle{nominal/categorial scale}
  \begin{itemize}
  \item properties like cell type, experimental group (i.e. treatment
    1, treatment 2, control)
  \item each observation/sample is put into one category
  \item there is no reasonable order among the categories
  \item  example: [rods, cones] vs. [cones, rods]
  \end{itemize}
\end{frame}
%-------------------------------------------------------------

\begin{frame}
  \frametitle{data scales}
  \framesubtitle{ordinal scale}
  \begin{itemize}
  \item like nominal scale, but there is an order
  \item {\bf but:} there is no reasonable measure of {\em distance}
    between the classes
  \item examples: ranks, ratings
  \end{itemize}
\end{frame}
%-------------------------------------------------------------

\begin{frame}
  \frametitle{data scales}
  \framesubtitle{interval scale}
  \begin{itemize}
  \item quantitative/metric values
  \item reasonable measure of distance between values but no absolute zero
  \item examples: temperature in $^\circ$C
  \end{itemize}
\end{frame}
%-------------------------------------------------------------

\begin{frame}
  \frametitle{data scales}
  \framesubtitle{absolut/ratio scale}
  \begin{itemize}
  \item like interval scale but with absolute zero
  \item example: temperature in $^\circ$K
  \end{itemize}
  \pause
  \begin{emphasize}{relationsships between scales}
    \begin{itemize}
    \item scales exhibit increasing information content from nominal
      to absolute
    \item conversion  ,,downwards'' always possible
    \end{itemize}
  \end{emphasize}
\end{frame}

%-------------------------------------------------------------
\begin{frame}
  \frametitle{examples from neuroscience and psychology}
  \begin{itemize}
  \item {\bf nominal:}\pause
    \begin{itemize}
    \item treatment group
    \item stimulus class
    \item cell type
    \end{itemize}

  \item {\bf ordinal:} \pause
    \begin{itemize}
    \item ratings
    \item clinical stages of a disease
    \item states of an ion channel
    \end{itemize}
  \item {\bf Absolut-/Ratioskala:}\pause
    \begin{itemize}
    \item firing rate
    \item membrane potential
    \item ion concentration
    \end{itemize}
  \end{itemize}
\end{frame}
%-------------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{statistics}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%-------------------------------------------------------------
\begin{frame}
  \frametitle{What is "a statistic"?}
  \begin{definition}{statistic}
    A statistic (singular) is a single measure of some attribute of a
    sample (e.g., its arithmetic mean value). It is calculated by
    applying a function (statistical algorithm) to the values of the
    items of the sample, which are known together as a set of data.

    \source{http://en.wikipedia.org/wiki/Statistic}
  \end{definition}
\end{frame}

%-------------------------------------------------------------
\begin{frame}
  \frametitle{Beispiele f\"ur Teststatistiken}
  \begin{itemize}
  \item {\bf nominal:}\pause
    \begin{itemize}
    \item count
    \item relative frequency/proportion
    \end{itemize}

  \item {\bf ordinal:} \pause
    \begin{itemize}
    \item median
    \item quantile/percentile
    \item rank correlation
    \end{itemize}
  \item {\bf absolute/ratio:}\pause
    \begin{itemize}
    \item mean
    \item variance/ standard deviation
    \item Pearson correlation
    \end{itemize}
  \end{itemize}
\end{frame}

%-------------------------------------------------------------
\begin{frame}
  \frametitle{exercise}
  \begin{task}{Spearman rank correlation}
    \begin{enumerate}
    \item Use {\tt randi} to generate two  100-dimensional vectors
      {\tt x,y} of random integers between $0$ and $10$.
    \item Find out how to compute the Spearman
      rank correlation  $$\rho = 1- {\frac {6 \sum
          d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
      difference in the rank between the single data points.
    \item Compute $\rho$ between $x$ and $y$, between $x$ and
      $y^2$, between $\log(x+1)$ and $y^2$.
    \item Compute the "standard" (Pearson) correlation coefficient
      between these values.
    \item What can you observe and why does it make sense?
    \end{enumerate}
  \end{task}
\end{frame}

%-------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{solution}
  \begin{solution}{Spearman rank correlation }
\scriptsize
\begin{lstlisting}
>>> x = randi(10, 100, 1);
>>> y = randi(10, 100, 1);
>>> corr(x,y,'type','Spearman')
ans =
    0.1220
>>> corr(x,y.^2,'type','Spearman')
ans =
    0.1220
>>> corr(x,y,'type','Pearson')
ans =
    0.1074
>>> corr(x,y.^2,'type','Pearson')
ans =
    0.0551
\end{lstlisting}
The rank correlation does not change under a monotone transformation
of the data. Therefore, it can be used for ordinal data. The Pearson
correlation coefficient does not have that property.
  \end{solution}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{description of data and plotting}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{nominal scale}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%-------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Darstellung nominaler Daten}
  \framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit}
      \begin{center}
        \includegraphics[width=.8\linewidth]{figs/nominaldataplot}
      \end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Darstellung nominaler Daten}
  \framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit}
      \scriptsize
\begin{lstlisting}
% eigentlicher Plot
bar([1,2], [50, 90], 'facecolor', 'k')

% Achsenbeschriftung
ylabel('cell count')
xlabel('cell type')

% Kosmetik
xlim([0.5,2.5])
ylim([0, 100])
box('off')
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20)

% Settings fuers Abspeichern
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
\end{lstlisting}
\end{frame}

%----------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Darstellung nominaler Daten}
  \framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit}
      \begin{center}
        \includegraphics[width=.8\linewidth]{figs/nominaldataplot2}
      \end{center}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Darstellung nominaler Daten}
  \framesubtitle{\"Ubung}
  \begin{task}{Pie-Chart}
    Plotte dieselben Daten ($n_{py}=50$, $n_{in}=90$) als Pie-Chart in Matlab.
  \end{task}
\end{frame}
%-------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Darstellung nominaler Daten}
  \framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit}
      \scriptsize
\begin{lstlisting}
data = [50, 90];
h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'})
hText = findobj(h,'Type','text') % text object handles

set(h(1), 'FaceColor', [.2,.2,.2]);
set(h(2), 'Rotation', 45);
set(h(3), 'FaceColor', [.8,.8,.8]);
set(h(4), 'Rotation', 45);

title('cell count')
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'})
box('off')
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
\end{lstlisting}
\end{frame}

%-------------------------------------------------------------
\begin{frame}[fragile]
  \frametitle{Darstellung von Interval-/Absolutskala Daten}

\end{frame}

\end{document}