diff --git a/statistics/lecture_statistics.tex b/statistics/lecture_statistics.tex index 46c390d..36c7c31 100755 --- a/statistics/lecture_statistics.tex +++ b/statistics/lecture_statistics.tex @@ -4,7 +4,6 @@ \usepackage{pgf} %\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} %\usepackage{multimedia} -\usepackage[ngerman]{babel} \usepackage[latin1]{inputenc} \usepackage{amsmath} \usepackage{bm} @@ -86,13 +85,13 @@ Bernstein Center T\"ubingen} \end{frame} \begin{frame} - \frametitle{Plan} + \frametitle{plan} \setcounter{tocdepth}{1} \tableofcontents \end{frame} \begin{frame} - \frametitle{Information \"uber Statistik} + \frametitle{information} \begin{itemize} \item Samuels, M. L., Wittmer, J. A., \& Schaffner, A. A. (2010). Statistics for the Life Sciences (4th ed., @@ -114,41 +113,41 @@ Bernstein Center T\"ubingen} % PCA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section[Präludium]{Präludium} +\section[Präludium]{Prelude} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- \begin{frame} -\frametitle{Meine Erwartungen an Masterstudenten} +\frametitle{my expectations to this course} \begin{itemize} -\item Interesse und Partizipation -\item Motivation Konzepte zu verstehen und zu hinterfragen -\item einen hohen wissenschaftlichen Qualitätsstandard -\item intellektuelle Redlichkeit -\item ehrliche Kooperation +\item interest and participation +\item motivation to understand and question concepts +\item high scientific standard +\item intellectual honesty +\item sincere cooperation \end{itemize} \end{frame} % ---------------------------------------------------------- \begin{frame} -\frametitle{diese Woche wird ...} +\frametitle{this week will be ...} \only<1>{ -\framesubtitle{... kein \sout{Spaß} Zuckerschlecken} +\framesubtitle{... no \sout{fun} piece of cake} \begin{center} \includegraphics[height=0.7\textheight]{figs/feeding.jpg} \end{center} } \only<2>{ -\framesubtitle{... kein \sout{Spaß} Zuckerschlecken} +\framesubtitle{... no \sout{fun} piece of cake} \begin{center} \includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg} \end{center} } \only<3>{ -\framesubtitle{... keine Vorlesung (Bitte!)} +\framesubtitle{... no lecture (please!)} \begin{center} \includegraphics[height=0.7\textheight]{figs/soccer.jpg} \end{center} @@ -158,138 +157,131 @@ Bernstein Center T\"ubingen} % ---------------------------------------------------------- \begin{frame} -\frametitle{Was Ihr diese Woche lernen solltet} +\frametitle{What you should learn this week} \begin{itemize} -\item Eigenschaften guter Plots -\item Was ist deskriptive/ inferentielle Statistik? -\item die generelle Struktur statistischer Tests -\item Was ist/bedeutet ein p-Wert? -\item Wie bastele ich mir meinen eigenen Test? -\item Wie groß muß mein $n$ sein? -\item Principal Component Analysis (PCA) +\item What makes good plots? +\item What is descriptive/inferential statistics? +\item What is the general structure of a statistical test? +\item What does a p-value mean? +\item How can I build my own tests? +\item How large should my $n$ be? +\item What is {\em maximum likelihood} and why is it important? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section[deskriptive Statistik, Fehlerbalken \& Plots]{Day 1 -- Deskriptive - Statistik, Fehlerbalken und Plots} +\section[descriptive statistics, errorbars, and plots]{Day 1 -- descriptive statistics, errorbars, and plots} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Arten von Daten} +\subsection{types of data} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} - \frametitle{Datenskalen} - \framesubtitle{Welche Datentypen gibt es in der Statistik?} + \frametitle{data scales} + \framesubtitle{What data types are distinguished in statistics?} \Large - {\bf Warum sind Datentypen wichtig?} + {\bf Why are data types important?} \pause \begin{itemize} - \item Auswahl passender Statistiken - \item Auswahl angemessener Plots - \item Auswahl von korrekten Tests + \item selection of statistics + \item selection of plots + \item selection of correct tests \end{itemize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{Datenskalen} - \framesubtitle{Nominalskala (engl. ,,categorial'')} + \frametitle{data scales} + \framesubtitle{nominal/categorial scale} \begin{itemize} - \item Eigenschaften wie Zelltyp, Versuchsgruppe - \item jede Beobachtung wird eine bestimmten Klasse (Kategorie) - zugeordnet - \item Die Klassen besitzen keine sinnvolle Ordnung - \item Beispiel: [Zapfen, Stäbchen] vs. [Stäbchen, Zapfen] + \item properties like cell type, experimental group (i.e. treatment + 1, treatment 2, control) + \item each observation/sample is put into one category + \item there is no reasonable order among the categories + \item example: [rods, cones] vs. [cones, rods] \end{itemize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{Datenskalen} - \framesubtitle{Ordinalskala (engl. ,,ordinal'')} + \frametitle{data scales} + \framesubtitle{ordinal scale} \begin{itemize} - \item Wie Nominalskala, nur ist sinnvolles Ordnen m\"oglich - \item {\bf aber:} Unterschiede zwischen den Werten bzw. Klassen - nicht vergleichbar (keine Abst\"ande) - \item Beispiel: Platzierungen, G\"uteklassen, Ratingskalen + \item like nominal scale, but there is an order + \item {\bf but:} there is no reasonable measure of {\em distance} + between the classes + \item examples: ranks, ratings \end{itemize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{Datenskalen} - \framesubtitle{Intervallskala (engl. ,,interval'')} + \frametitle{data scales} + \framesubtitle{interval scale} \begin{itemize} - \item quantitative/metrische Werte - \item Abst\"ande zwischen Werten sind sinnvoll, aber es gibt keine - absoluten Nullpunkt - \item Beispiel: physikalische Gr\"o{\ss}en wie Temperatur in Grad Celsius + \item quantitative/metric values + \item reasonable measure of distance between values but no absolute zero + \item examples: temperature in $^\circ$C \end{itemize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{Datenskalen} - \framesubtitle{Absolut- oder Verh\"altnisskala (engl. ,,ratio)} + \frametitle{data scales} + \framesubtitle{absolut/ratio scale} \begin{itemize} - \item wie Intervallskala, aber mit absolutem Nullpunkt - \item Beispiel: Temperatur in Kelvin, Einkommen + \item like interval scale but with absolute zero + \item example: temperature in $^\circ$K \end{itemize} \pause - \begin{emphasize}{Verh\"altnis der Skalen} + \begin{emphasize}{relationsships between scales} \begin{itemize} - \item Skalen besitzen aufsteigenden Informationsgehalt von - Nominal- zu Absolutskala - \item Konvertierung ,,abw\"arts'' immer m\"oglich + \item scales exhibit increasing information content from nominal + to absolute + \item conversion ,,downwards'' always possible \end{itemize} \end{emphasize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{Beispiele aus Neuro-/Kognitionswissenschaften} + \frametitle{examples from neuroscience and psychology} \begin{itemize} - \item {\bf Nominalskala:}\pause + \item {\bf nominal:}\pause \begin{itemize} - \item Versuchsgruppenzugeh\"origkeit - \item Stimulusklasse - \item Zelltyp + \item treatment group + \item stimulus class + \item cell type \end{itemize} - \item {\bf Ordinalskala:} \pause + \item {\bf ordinal:} \pause \begin{itemize} - \item Ratings - \item Krankheitsstadien - \item Zust\"ande eines Ionenkanals + \item ratings + \item clinical stages of a disease + \item states of an ion channel \end{itemize} \item {\bf Absolut-/Ratioskala:}\pause \begin{itemize} - \item Feuerrate - \item Membranpotential - \item Ionenkonzentration + \item firing rate + \item membrane potential + \item ion concentration \end{itemize} \end{itemize} \end{frame} %------------------------------------------------------------- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Statistiken} +\subsection{statistics} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %------------------------------------------------------------- \begin{frame} - \frametitle{Was ist eine (Test)-Statistik} - \begin{definition}{Teststatistik, Pr\"ufwert} - \begin{itemize} - \item Als {\em Teststatistik} bezeichnet man in der mathematischen - Statistik eine Stichprobenfunktion die einer Stichprobe einen - Wert zuordnet (z.B. Mittelwert, Median, Standardabweichung, - ...). - - \item Als {\em Pr\"ufwert} wird die Realisation einer Teststatistik - anhand einer Stichprobe bezeichnet. - - \source{http://de.wikipedia.org/wiki/Teststatistik} - \end{itemize} + \frametitle{What is "a statistic"?} + \begin{definition}{statistic} + A statistic (singular) is a single measure of some attribute of a + sample (e.g., its arithmetic mean value). It is calculated by + applying a function (statistical algorithm) to the values of the + items of the sample, which are known together as a set of data. + + \source{http://en.wikipedia.org/wiki/Statistic} \end{definition} \end{frame} @@ -297,50 +289,51 @@ Bernstein Center T\"ubingen} \begin{frame} \frametitle{Beispiele f\"ur Teststatistiken} \begin{itemize} - \item {\bf Nominalskala:}\pause + \item {\bf nominal:}\pause \begin{itemize} - \item Anzahl - \item relative H\"aufigkeit + \item count + \item relative frequency/proportion \end{itemize} - \item {\bf Ordinalskala:} \pause + \item {\bf ordinal:} \pause \begin{itemize} - \item Median - \item Perzentilen - \item Rangkorrelationskoeffizient + \item median + \item quantile/percentile + \item rank correlation \end{itemize} - \item {\bf Absolut-/Ratioskala:}\pause + \item {\bf absolute/ratio:}\pause \begin{itemize} - \item Mittelwert - \item Varianz/ Standardabweichung - \item Pearson Korrelationskoeffizient + \item mean + \item variance/ standard deviation + \item Pearson correlation \end{itemize} \end{itemize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{\"Ubung} - \begin{task}{Spearman Rangkorrelationskoeffizient} + \frametitle{exercise} + \begin{task}{Spearman rank correlation} \begin{enumerate} - \item Benutze {\tt randi} um zwei 100-dimensionale Vektoren {\tt x,y} mit - Zufalls-Integern zwischen $0$ und $10$ zu berechnen. - \item Finde heraus wie man in Matlab den Spearman - Rangkorrelationskoeffizient $$\rho = 1- {\frac {6 \sum - d_i^2}{n(n^2 - 1)}}$$ berechnet. $d_i = x_i - y_i$ ist die - Differenz im Rang der Datenpunkte. - \item Berechne $\rho$ zwischen $x$ und $y$, zwischen $x$ und - $y^2$, zwischen $\log(x+1)$ und $y^2$. Berechne auch den - \"ublichen (Pearson) Korrelationskoeffizient zwischen diesen - Werten. Was kann man beobachten und warum macht das Sinn? + \item Use {\tt randi} to generate two 100-dimensional vectors + {\tt x,y} of random integers between $0$ and $10$. + \item Find out how to compute the Spearman + rank correlation $$\rho = 1- {\frac {6 \sum + d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the + difference in the rank between the single data points. + \item Compute $\rho$ between $x$ and $y$, between $x$ and + $y^2$, between $\log(x+1)$ and $y^2$. + \item Compute the "standard" (Pearson) correlation coefficient + between these values. + \item What can you observe and why does it make sense? \end{enumerate} \end{task} \end{frame} %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{\"Ubung - L\"osung} - \begin{solution}{Spearman Rangkorrelationskoeffizient} + \frametitle{solution} + \begin{solution}{Spearman rank correlation } \scriptsize \begin{lstlisting} >>> x = randi(10, 100, 1); @@ -358,16 +351,16 @@ ans = ans = 0.0551 \end{lstlisting} -Der Rangkorrelationskoeffizient \"andert sich nicht bei monotoner -Transformation der Daten. Daher ist er f\"ur ordinale Daten -geeignet. Der Pearson Korrelationskoeffizient ist es nicht. +The rank correlation does not change under a monotone transformation +of the data. Therefore, it can be used for ordinal data. The Pearson +correlation coefficient does not have that property. \end{solution} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Beschreibung und Darstellung von Daten} +\section{description of data and plotting} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Nominalskala} +\subsection{nominal scale} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %------------------------------------------------------------- \begin{frame}[fragile] @@ -403,7 +396,7 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); \end{lstlisting} \end{frame} -%------------------------------------------------------------- +%---------------------------------------------------------- \begin{frame}[fragile] \frametitle{Darstellung nominaler Daten} \framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit} @@ -443,6 +436,11 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); \end{lstlisting} \end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{Darstellung von Interval-/Absolutskala Daten} + +\end{frame} \end{document}