448 lines
13 KiB
TeX
Executable File
448 lines
13 KiB
TeX
Executable File
\documentclass{beamer}
|
||
\usepackage{xcolor}
|
||
\usepackage{listings}
|
||
\usepackage{pgf}
|
||
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
||
%\usepackage{multimedia}
|
||
\usepackage[latin1]{inputenc}
|
||
\usepackage{amsmath}
|
||
\usepackage{bm}
|
||
\usepackage[T1]{fontenc}
|
||
\usepackage{hyperref}
|
||
\usepackage{ulem}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\mode<presentation>
|
||
{
|
||
\usetheme{Singapore}
|
||
\setbeamercovered{opaque}
|
||
\usecolortheme{tuebingen}
|
||
\setbeamertemplate{navigation symbols}{}
|
||
\usefonttheme{default}
|
||
\useoutertheme{infolines}
|
||
% \useoutertheme{miniframes}
|
||
}
|
||
|
||
\AtBeginSection[]
|
||
{
|
||
\begin{frame}<beamer>
|
||
\begin{center}
|
||
\Huge \insertsectionhead
|
||
\end{center}
|
||
% \frametitle{\insertsectionhead}
|
||
% \tableofcontents[currentsection,hideothersubsections]
|
||
\end{frame}
|
||
}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
||
|
||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
||
|
||
\title[]{Scientific Computing -- Statistics}
|
||
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
|
||
University T\"ubingen\\
|
||
Bernstein Center T\"ubingen}
|
||
|
||
\institute[Scientific Computing]{}
|
||
\date{11/27/2013}
|
||
%\logo{\pgfuseimage{logo}}
|
||
|
||
\subject{Lectures}
|
||
|
||
%%%%%%%%%% configuration for code
|
||
\lstset{
|
||
basicstyle=\ttfamily,
|
||
numbers=left,
|
||
showstringspaces=false,
|
||
language=Matlab,
|
||
commentstyle=\itshape\color{darkgray},
|
||
keywordstyle=\color{blue},
|
||
stringstyle=\color{green},
|
||
backgroundcolor=\color{blue!10},
|
||
breaklines=true,
|
||
breakautoindent=true,
|
||
columns=flexible,
|
||
frame=single,
|
||
captionpos=b,
|
||
xleftmargin=1em,
|
||
xrightmargin=1em,
|
||
aboveskip=10pt
|
||
}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
||
\newcommand{\mycite}[1]{
|
||
\begin{flushright}
|
||
\tiny \color{black!80} #1
|
||
\end{flushright}
|
||
}
|
||
|
||
\input{../latex/environments.tex}
|
||
\makeatother
|
||
|
||
\begin{document}
|
||
|
||
\begin{frame}
|
||
\titlepage
|
||
|
||
\end{frame}
|
||
|
||
\begin{frame}
|
||
\frametitle{plan}
|
||
\setcounter{tocdepth}{1}
|
||
\tableofcontents
|
||
|
||
\end{frame}
|
||
\begin{frame}
|
||
\frametitle{information}
|
||
\begin{itemize}
|
||
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
|
||
A. A. (2010). Statistics for the Life Sciences (4th ed.,
|
||
p. 668). Prentice Hall.
|
||
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
||
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
||
Hall. doi:10.1037/0012764
|
||
\item \url{http://stats.stackexchange.com}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
% errorbars (error bar paper)
|
||
% confidence intervals (sources of error)
|
||
% plotting (the right plot for the right data, Dan plotting paper)
|
||
% statistical test structure (bootstrapping, resampling, permutation)
|
||
% Don'ts: repeated testing, exclude data points
|
||
% study design
|
||
% PCA
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\section[Pr<50>ludium]{Prelude}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
||
% ----------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{my expectations to this course}
|
||
\begin{itemize}
|
||
\item interest and participation
|
||
\item motivation to understand and question concepts
|
||
\item high scientific standard
|
||
\item intellectual honesty
|
||
\item sincere cooperation
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% ----------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{this week will be ...}
|
||
|
||
\only<1>{
|
||
\framesubtitle{... no \sout{fun} piece of cake}
|
||
\begin{center}
|
||
\includegraphics[height=0.7\textheight]{figs/feeding.jpg}
|
||
\end{center}
|
||
}
|
||
|
||
\only<2>{
|
||
\framesubtitle{... no \sout{fun} piece of cake}
|
||
\begin{center}
|
||
\includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg}
|
||
\end{center}
|
||
}
|
||
|
||
\only<3>{
|
||
\framesubtitle{... no lecture (please!)}
|
||
\begin{center}
|
||
\includegraphics[height=0.7\textheight]{figs/soccer.jpg}
|
||
\end{center}
|
||
}
|
||
|
||
\end{frame}
|
||
|
||
% ----------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{What you should learn this week}
|
||
\begin{itemize}
|
||
\item What makes good plots?
|
||
\item What is descriptive/inferential statistics?
|
||
\item What is the general structure of a statistical test?
|
||
\item What does a p-value mean?
|
||
\item How can I build my own tests?
|
||
\item How large should my $n$ be?
|
||
\item What is {\em maximum likelihood} and why is it important?
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\subsection{types of data}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
||
\begin{frame}
|
||
\frametitle{data scales}
|
||
\framesubtitle{What data types are distinguished in statistics?}
|
||
\Large
|
||
{\bf Why are data types important?}
|
||
\pause
|
||
\begin{itemize}
|
||
\item selection of statistics
|
||
\item selection of plots
|
||
\item selection of correct tests
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{data scales}
|
||
\framesubtitle{nominal/categorial scale}
|
||
\begin{itemize}
|
||
\item properties like cell type, experimental group (i.e. treatment
|
||
1, treatment 2, control)
|
||
\item each observation/sample is put into one category
|
||
\item there is no reasonable order among the categories
|
||
\item example: [rods, cones] vs. [cones, rods]
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{data scales}
|
||
\framesubtitle{ordinal scale}
|
||
\begin{itemize}
|
||
\item like nominal scale, but there is an order
|
||
\item {\bf but:} there is no reasonable measure of {\em distance}
|
||
between the classes
|
||
\item examples: ranks, ratings
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{data scales}
|
||
\framesubtitle{interval scale}
|
||
\begin{itemize}
|
||
\item quantitative/metric values
|
||
\item reasonable measure of distance between values but no absolute zero
|
||
\item examples: temperature in $^\circ$C
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{data scales}
|
||
\framesubtitle{absolut/ratio scale}
|
||
\begin{itemize}
|
||
\item like interval scale but with absolute zero
|
||
\item example: temperature in $^\circ$K
|
||
\end{itemize}
|
||
\pause
|
||
\begin{emphasize}{relationsships between scales}
|
||
\begin{itemize}
|
||
\item scales exhibit increasing information content from nominal
|
||
to absolute
|
||
\item conversion ,,downwards'' always possible
|
||
\end{itemize}
|
||
\end{emphasize}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{examples from neuroscience and psychology}
|
||
\begin{itemize}
|
||
\item {\bf nominal:}\pause
|
||
\begin{itemize}
|
||
\item treatment group
|
||
\item stimulus class
|
||
\item cell type
|
||
\end{itemize}
|
||
|
||
\item {\bf ordinal:} \pause
|
||
\begin{itemize}
|
||
\item ratings
|
||
\item clinical stages of a disease
|
||
\item states of an ion channel
|
||
\end{itemize}
|
||
\item {\bf Absolut-/Ratioskala:}\pause
|
||
\begin{itemize}
|
||
\item firing rate
|
||
\item membrane potential
|
||
\item ion concentration
|
||
\end{itemize}
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\subsection{statistics}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{What is "a statistic"?}
|
||
\begin{definition}{statistic}
|
||
A statistic (singular) is a single measure of some attribute of a
|
||
sample (e.g., its arithmetic mean value). It is calculated by
|
||
applying a function (statistical algorithm) to the values of the
|
||
items of the sample, which are known together as a set of data.
|
||
|
||
\source{http://en.wikipedia.org/wiki/Statistic}
|
||
\end{definition}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{Beispiele f\"ur Teststatistiken}
|
||
\begin{itemize}
|
||
\item {\bf nominal:}\pause
|
||
\begin{itemize}
|
||
\item count
|
||
\item relative frequency/proportion
|
||
\end{itemize}
|
||
|
||
\item {\bf ordinal:} \pause
|
||
\begin{itemize}
|
||
\item median
|
||
\item quantile/percentile
|
||
\item rank correlation
|
||
\end{itemize}
|
||
\item {\bf absolute/ratio:}\pause
|
||
\begin{itemize}
|
||
\item mean
|
||
\item variance/ standard deviation
|
||
\item Pearson correlation
|
||
\end{itemize}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{exercise}
|
||
\begin{task}{Spearman rank correlation}
|
||
\begin{enumerate}
|
||
\item Use {\tt randi} to generate two 100-dimensional vectors
|
||
{\tt x,y} of random integers between $0$ and $10$.
|
||
\item Find out how to compute the Spearman
|
||
rank correlation $$\rho = 1- {\frac {6 \sum
|
||
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
|
||
difference in the rank between the single data points.
|
||
\item Compute $\rho$ between $x$ and $y$, between $x$ and
|
||
$y^2$, between $\log(x+1)$ and $y^2$.
|
||
\item Compute the "standard" (Pearson) correlation coefficient
|
||
between these values.
|
||
\item What can you observe and why does it make sense?
|
||
\end{enumerate}
|
||
\end{task}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{solution}
|
||
\begin{solution}{Spearman rank correlation }
|
||
\scriptsize
|
||
\begin{lstlisting}
|
||
>>> x = randi(10, 100, 1);
|
||
>>> y = randi(10, 100, 1);
|
||
>>> corr(x,y,'type','Spearman')
|
||
ans =
|
||
0.1220
|
||
>>> corr(x,y.^2,'type','Spearman')
|
||
ans =
|
||
0.1220
|
||
>>> corr(x,y,'type','Pearson')
|
||
ans =
|
||
0.1074
|
||
>>> corr(x,y.^2,'type','Pearson')
|
||
ans =
|
||
0.0551
|
||
\end{lstlisting}
|
||
The rank correlation does not change under a monotone transformation
|
||
of the data. Therefore, it can be used for ordinal data. The Pearson
|
||
correlation coefficient does not have that property.
|
||
\end{solution}
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\section{description of data and plotting}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\subsection{nominal scale}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit}
|
||
\begin{center}
|
||
\includegraphics[width=.8\linewidth]{figs/nominaldataplot}
|
||
\end{center}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit}
|
||
\scriptsize
|
||
\begin{lstlisting}
|
||
% eigentlicher Plot
|
||
bar([1,2], [50, 90], 'facecolor', 'k')
|
||
|
||
% Achsenbeschriftung
|
||
ylabel('cell count')
|
||
xlabel('cell type')
|
||
|
||
% Kosmetik
|
||
xlim([0.5,2.5])
|
||
ylim([0, 100])
|
||
box('off')
|
||
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20)
|
||
|
||
% Settings fuers Abspeichern
|
||
set(gcf, 'PaperUnits', 'centimeters');
|
||
set(gcf, 'PaperSize', [11.7 9.0]);
|
||
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
||
\end{lstlisting}
|
||
\end{frame}
|
||
|
||
%----------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit}
|
||
\begin{center}
|
||
\includegraphics[width=.8\linewidth]{figs/nominaldataplot2}
|
||
\end{center}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{\"Ubung}
|
||
\begin{task}{Pie-Chart}
|
||
Plotte dieselben Daten ($n_{py}=50$, $n_{in}=90$) als Pie-Chart in Matlab.
|
||
\end{task}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit}
|
||
\scriptsize
|
||
\begin{lstlisting}
|
||
data = [50, 90];
|
||
h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'})
|
||
hText = findobj(h,'Type','text') % text object handles
|
||
|
||
set(h(1), 'FaceColor', [.2,.2,.2]);
|
||
set(h(2), 'Rotation', 45);
|
||
set(h(3), 'FaceColor', [.8,.8,.8]);
|
||
set(h(4), 'Rotation', 45);
|
||
|
||
title('cell count')
|
||
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'})
|
||
box('off')
|
||
set(gcf, 'PaperUnits', 'centimeters');
|
||
set(gcf, 'PaperSize', [11.7 9.0]);
|
||
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
||
\end{lstlisting}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung von Interval-/Absolutskala Daten}
|
||
|
||
\end{frame}
|
||
|
||
\end{document}
|
||
|
||
|