410 lines
12 KiB
TeX
Executable File
410 lines
12 KiB
TeX
Executable File
\documentclass{beamer}
|
||
\usepackage{xcolor}
|
||
\usepackage{listings}
|
||
\usepackage{pgf}
|
||
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
||
%\usepackage{multimedia}
|
||
\usepackage[ngerman]{babel}
|
||
\usepackage[latin1]{inputenc}
|
||
\usepackage{amsmath}
|
||
\usepackage{bm}
|
||
\usepackage[T1]{fontenc}
|
||
\usepackage{hyperref}
|
||
\usepackage{ulem}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\mode<presentation>
|
||
{
|
||
\usetheme{Singapore}
|
||
\setbeamercovered{opaque}
|
||
\usecolortheme{tuebingen}
|
||
\setbeamertemplate{navigation symbols}{}
|
||
\usefonttheme{default}
|
||
\useoutertheme{infolines}
|
||
% \useoutertheme{miniframes}
|
||
}
|
||
|
||
\AtBeginSection[]
|
||
{
|
||
\begin{frame}<beamer>
|
||
\begin{center}
|
||
\Huge \insertsectionhead
|
||
\end{center}
|
||
% \frametitle{\insertsectionhead}
|
||
% \tableofcontents[currentsection,hideothersubsections]
|
||
\end{frame}
|
||
}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
||
|
||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
||
|
||
\title[]{Scientific Computing -- Statistics}
|
||
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
|
||
University T\"ubingen\\
|
||
Bernstein Center T\"ubingen}
|
||
|
||
\institute[Scientific Computing]{}
|
||
\date{11/27/2013}
|
||
%\logo{\pgfuseimage{logo}}
|
||
|
||
\subject{Lectures}
|
||
|
||
%%%%%%%%%% configuration for code
|
||
\lstset{
|
||
basicstyle=\ttfamily,
|
||
numbers=left,
|
||
showstringspaces=false,
|
||
language=Matlab,
|
||
commentstyle=\itshape\color{darkgray},
|
||
keywordstyle=\color{blue},
|
||
stringstyle=\color{green},
|
||
backgroundcolor=\color{blue!10},
|
||
breaklines=true,
|
||
breakautoindent=true,
|
||
columns=flexible,
|
||
frame=single,
|
||
captionpos=b,
|
||
xleftmargin=1em,
|
||
xrightmargin=1em,
|
||
aboveskip=10pt
|
||
}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
||
\newcommand{\mycite}[1]{
|
||
\begin{flushright}
|
||
\tiny \color{black!80} #1
|
||
\end{flushright}
|
||
}
|
||
|
||
\input{../latex/environments.tex}
|
||
\makeatother
|
||
|
||
\begin{document}
|
||
|
||
\begin{frame}
|
||
\titlepage
|
||
|
||
\end{frame}
|
||
|
||
\begin{frame}
|
||
\frametitle{Plan}
|
||
\setcounter{tocdepth}{1}
|
||
\tableofcontents
|
||
|
||
\end{frame}
|
||
\begin{frame}
|
||
\frametitle{Information \"uber Statistik}
|
||
\begin{itemize}
|
||
\item Samuels, M. L., Wittmer, J. A., \& Schaffner,
|
||
A. A. (2010). Statistics for the Life Sciences (4th ed.,
|
||
p. 668). Prentice Hall.
|
||
\item Zar, J. H. (1999). Biostatistical Analysis. (D. Lynch,
|
||
Ed.)Prentice Hall New Jersey (4th ed., Vol. 4th, p. 663). Prentice
|
||
Hall. doi:10.1037/0012764
|
||
\item \url{http://stats.stackexchange.com}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
% errorbars (error bar paper)
|
||
% confidence intervals (sources of error)
|
||
% plotting (the right plot for the right data, Dan plotting paper)
|
||
% statistical test structure (bootstrapping, resampling, permutation)
|
||
% Don'ts: repeated testing, exclude data points
|
||
% study design
|
||
% PCA
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\section[Pr<50>ludium]{Pr<EFBFBD>ludium}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
||
% ----------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{Meine Erwartungen an Masterstudenten}
|
||
\begin{itemize}
|
||
\item Interesse und Partizipation
|
||
\item Motivation Konzepte zu verstehen und zu hinterfragen
|
||
\item einen hohen wissenschaftlichen Qualit<69>tsstandard
|
||
\item intellektuelle Redlichkeit
|
||
\item ehrliche Kooperation
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% ----------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{diese Woche wird ...}
|
||
|
||
\only<1>{
|
||
\framesubtitle{... kein \sout{Spa<EFBFBD>} Zuckerschlecken}
|
||
\begin{center}
|
||
\includegraphics[height=0.7\textheight]{figs/feeding.jpg}
|
||
\end{center}
|
||
}
|
||
|
||
\only<2>{
|
||
\framesubtitle{... kein \sout{Spa<EFBFBD>} Zuckerschlecken}
|
||
\begin{center}
|
||
\includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg}
|
||
\end{center}
|
||
}
|
||
|
||
\only<3>{
|
||
\framesubtitle{... keine Vorlesung (Bitte!)}
|
||
\begin{center}
|
||
\includegraphics[height=0.7\textheight]{figs/soccer.jpg}
|
||
\end{center}
|
||
}
|
||
|
||
\end{frame}
|
||
|
||
% ----------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{Was Ihr diese Woche lernen solltet}
|
||
\begin{itemize}
|
||
\item Eigenschaften guter Plots
|
||
\item Was ist deskriptive/ inferentielle Statistik?
|
||
\item die generelle Struktur statistischer Tests
|
||
\item Was ist/bedeutet ein p-Wert?
|
||
\item Wie bastele ich mir meinen eigenen Test?
|
||
\item Wie gro<72> mu<6D> mein $n$ sein?
|
||
\item Principal Component Analysis (PCA)
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\section[deskriptive Statistik, Fehlerbalken \& Plots]{Day 1 -- Deskriptive
|
||
Statistik, Fehlerbalken und Plots}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\subsection{Arten von Daten}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
||
\begin{frame}
|
||
\frametitle{Datenskalen}
|
||
\framesubtitle{Welche Datentypen gibt es in der Statistik?}
|
||
\Large
|
||
{\bf Warum sind Datentypen wichtig?}
|
||
\pause
|
||
\begin{itemize}
|
||
\item Auswahl passender Statistiken
|
||
\item Auswahl angemessener Plots
|
||
\item Auswahl von korrekten Tests
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{Datenskalen}
|
||
\framesubtitle{Nominalskala (engl. ,,categorial'')}
|
||
\begin{itemize}
|
||
\item Eigenschaften wie Zelltyp, Versuchsgruppe
|
||
\item jede Beobachtung wird eine bestimmten Klasse (Kategorie)
|
||
zugeordnet
|
||
\item Die Klassen besitzen keine sinnvolle Ordnung
|
||
\item Beispiel: [Zapfen, St<53>bchen] vs. [St<53>bchen, Zapfen]
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{Datenskalen}
|
||
\framesubtitle{Ordinalskala (engl. ,,ordinal'')}
|
||
\begin{itemize}
|
||
\item Wie Nominalskala, nur ist sinnvolles Ordnen m\"oglich
|
||
\item {\bf aber:} Unterschiede zwischen den Werten bzw. Klassen
|
||
nicht vergleichbar (keine Abst\"ande)
|
||
\item Beispiel: Platzierungen, G\"uteklassen, Ratingskalen
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{Datenskalen}
|
||
\framesubtitle{Intervallskala (engl. ,,interval'')}
|
||
\begin{itemize}
|
||
\item quantitative/metrische Werte
|
||
\item Abst\"ande zwischen Werten sind sinnvoll, aber es gibt keine
|
||
absoluten Nullpunkt
|
||
\item Beispiel: physikalische Gr\"o{\ss}en wie Temperatur in Grad Celsius
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
|
||
\begin{frame}
|
||
\frametitle{Datenskalen}
|
||
\framesubtitle{Absolut- oder Verh\"altnisskala (engl. ,,ratio)}
|
||
\begin{itemize}
|
||
\item wie Intervallskala, aber mit absolutem Nullpunkt
|
||
\item Beispiel: Temperatur in Kelvin, Einkommen
|
||
\end{itemize}
|
||
\pause
|
||
\begin{emphasize}{Verh\"altnis der Skalen}
|
||
\begin{itemize}
|
||
\item Skalen besitzen aufsteigenden Informationsgehalt von
|
||
Nominal- zu Absolutskala
|
||
\item Konvertierung ,,abw\"arts'' immer m\"oglich
|
||
\end{itemize}
|
||
\end{emphasize}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{Beispiele aus Neuro-/Kognitionswissenschaften}
|
||
\begin{itemize}
|
||
\item {\bf Nominalskala:}\pause
|
||
\begin{itemize}
|
||
\item Versuchsgruppenzugeh\"origkeit
|
||
\item Stimulusklasse
|
||
\item Zelltyp
|
||
\end{itemize}
|
||
|
||
\item {\bf Ordinalskala:} \pause
|
||
\begin{itemize}
|
||
\item Ratings
|
||
\item Krankheitsstadien
|
||
\item Zust\"ande eines Ionenkanals
|
||
\end{itemize}
|
||
\item {\bf Absolut-/Ratioskala:}\pause
|
||
\begin{itemize}
|
||
\item Feuerrate
|
||
\item Membranpotential
|
||
\item Ionenkonzentration
|
||
\end{itemize}
|
||
\end{itemize}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\subsection{Statistiken}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{Was ist eine (Test)-Statistik}
|
||
\begin{definition}{Teststatistik, Pr\"ufwert}
|
||
\begin{itemize}
|
||
\item Als {\em Teststatistik} bezeichnet man in der mathematischen
|
||
Statistik eine Stichprobenfunktion die einer Stichprobe einen
|
||
Wert zuordnet (z.B. Mittelwert, Median, Standardabweichung,
|
||
...).
|
||
|
||
\item Als {\em Pr\"ufwert} wird die Realisation einer Teststatistik
|
||
anhand einer Stichprobe bezeichnet.
|
||
|
||
\source{http://de.wikipedia.org/wiki/Teststatistik}
|
||
\end{itemize}
|
||
\end{definition}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{Beispiele f\"ur Teststatistiken}
|
||
\begin{itemize}
|
||
\item {\bf Nominalskala:}\pause
|
||
\begin{itemize}
|
||
\item Anzahl
|
||
\item relative H\"aufigkeit
|
||
\end{itemize}
|
||
|
||
\item {\bf Ordinalskala:} \pause
|
||
\begin{itemize}
|
||
\item Median
|
||
\item Perzentilen
|
||
\item Rangkorrelationskoeffizient
|
||
\end{itemize}
|
||
\item {\bf Absolut-/Ratioskala:}\pause
|
||
\begin{itemize}
|
||
\item Mittelwert
|
||
\item Varianz/ Standardabweichung
|
||
\item Pearson Korrelationskoeffizient
|
||
\end{itemize}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}
|
||
\frametitle{\"Ubung}
|
||
\begin{task}{Spearman Rangkorrelationskoeffizient}
|
||
\begin{enumerate}
|
||
\item Benutze {\tt randi} um zwei 100-dimensionale Vektoren {\tt x,y} mit
|
||
Zufalls-Integern zwischen $0$ und $10$ zu berechnen.
|
||
\item Finde heraus wie man in Matlab den Spearman
|
||
Rangkorrelationskoeffizient $$\rho = 1- {\frac {6 \sum
|
||
d_i^2}{n(n^2 - 1)}}$$ berechnet. $d_i = x_i - y_i$ ist die
|
||
Differenz im Rang der Datenpunkte.
|
||
\item Berechne $\rho$ zwischen $x$ und $y$, zwischen $x$ und
|
||
$y^2$, zwischen $\log(x+1)$ und $y^2$. Berechne auch den
|
||
\"ublichen (Pearson) Korrelationskoeffizient zwischen diesen
|
||
Werten. Was kann man beobachten und warum macht das Sinn?
|
||
\end{enumerate}
|
||
\end{task}
|
||
\end{frame}
|
||
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{\"Ubung - L\"osung}
|
||
\begin{solution}{Spearman Rangkorrelationskoeffizient}
|
||
\scriptsize
|
||
\begin{lstlisting}
|
||
>>> x = randi(10, 100, 1);
|
||
>>> y = randi(10, 100, 1);
|
||
>>> corr(x,y,'type','Spearman')
|
||
ans =
|
||
0.1220
|
||
>>> corr(x,y.^2,'type','Spearman')
|
||
ans =
|
||
0.1220
|
||
>>> corr(x,y,'type','Pearson')
|
||
ans =
|
||
0.1074
|
||
>>> corr(x,y.^2,'type','Pearson')
|
||
ans =
|
||
0.0551
|
||
\end{lstlisting}
|
||
Der Rangkorrelationskoeffizient \"andert sich nicht bei monotoner
|
||
Transformation der Daten. Daher ist er f\"ur ordinale Daten
|
||
geeignet. Der Pearson Korrelationskoeffizient ist es nicht.
|
||
\end{solution}
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\section{Beschreibung und Darstellung von Daten}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
\subsection{Nominalskala}
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit}
|
||
\begin{center}
|
||
\includegraphics[width=.6\linewidth]{figs/nominaldataplot}
|
||
\end{center}
|
||
\end{frame}
|
||
%-------------------------------------------------------------
|
||
\begin{frame}[fragile]
|
||
\frametitle{Darstellung nominaler Daten}
|
||
\framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit}
|
||
\scriptsize
|
||
\begin{lstlisting}
|
||
% eigentlicher Plot
|
||
bar([1,2], [50, 90], 'facecolor', 'k')
|
||
|
||
% Achsenbeschriftung
|
||
ylabel('cell count')
|
||
xlabel('cell type')
|
||
|
||
% Kosmetik
|
||
xlim([0.5,2.5])
|
||
ylim([0, 100])
|
||
box('off')
|
||
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20)
|
||
|
||
% Settings fuers Abspeichern
|
||
set(gcf, 'PaperUnits', 'centimeters');
|
||
set(gcf, 'PaperSize', [11.7 9.0]);
|
||
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
||
\end{lstlisting}
|
||
\end{frame}
|
||
|
||
|
||
\end{document}
|
||
|
||
|