810 lines
25 KiB
TeX
810 lines
25 KiB
TeX
\documentclass{beamer}
|
|
\usepackage{xcolor}
|
|
\usepackage{listings}
|
|
\usepackage{pgf}
|
|
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
|
|
%\usepackage{multimedia}
|
|
\usepackage[latin1]{inputenc}
|
|
\usepackage{amsmath}
|
|
\usepackage{bm}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage{hyperref}
|
|
\usepackage{ulem}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\mode<presentation>
|
|
{
|
|
\usetheme{Singapore}
|
|
\setbeamercovered{opaque}
|
|
\usecolortheme{tuebingen}
|
|
\setbeamertemplate{navigation symbols}{}
|
|
\usefonttheme{default}
|
|
\useoutertheme{infolines}
|
|
% \useoutertheme{miniframes}
|
|
}
|
|
|
|
\AtBeginSubsection[]
|
|
{
|
|
\begin{frame}<beamer>
|
|
\begin{center}
|
|
\Huge \insertsectionhead
|
|
\end{center}
|
|
\tableofcontents[
|
|
currentsubsection,
|
|
hideothersubsections,
|
|
sectionstyle=show/hide,
|
|
subsectionstyle=show/shaded,
|
|
]
|
|
% \frametitle{\insertsectionhead}
|
|
\end{frame}
|
|
}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
|
|
|
|
\setbeamertemplate{blocks}[rounded][shadow=true]
|
|
|
|
\title[]{Scientific Computing -- Statistics}
|
|
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
|
|
University T\"ubingen\\
|
|
Bernstein Center T\"ubingen}
|
|
|
|
\institute[Scientific Computing]{}
|
|
\date{10/20/2014}
|
|
%\logo{\pgfuseimage{logo}}
|
|
|
|
\subject{Lectures}
|
|
|
|
%%%%%%%%%% configuration for code
|
|
\lstset{
|
|
basicstyle=\ttfamily,
|
|
numbers=left,
|
|
showstringspaces=false,
|
|
language=Matlab,
|
|
commentstyle=\itshape\color{darkgray},
|
|
keywordstyle=\color{blue},
|
|
stringstyle=\color{green},
|
|
backgroundcolor=\color{blue!10},
|
|
breaklines=true,
|
|
breakautoindent=true,
|
|
columns=flexible,
|
|
frame=single,
|
|
captionpos=b,
|
|
xleftmargin=1em,
|
|
xrightmargin=1em,
|
|
aboveskip=10pt
|
|
}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\newcommand{\mycite}[1]{
|
|
\begin{flushright}
|
|
\tiny \color{black!80} #1
|
|
\end{flushright}
|
|
}
|
|
|
|
\input{../latex/environments.tex}
|
|
\makeatother
|
|
|
|
\begin{document}
|
|
|
|
\begin{frame}
|
|
\titlepage
|
|
|
|
\end{frame}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% errorbars (error bar paper)
|
|
% confidence intervals (sources of error)
|
|
% plotting (the right plot for the right data, Dan plotting paper)
|
|
% statistical test structure (bootstrapping, resampling, permutation)
|
|
% Don'ts: repeated testing, exclude data points
|
|
% study design
|
|
% PCA
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section[Prelude]{Prelude}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{my expectations to this course}
|
|
\begin{itemize}
|
|
\item interest and participation
|
|
\item motivation to understand and question concepts
|
|
\item high scientific standard
|
|
\item intellectual honesty
|
|
\item sincere cooperation
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{this week will be ...}
|
|
|
|
\only<1>{
|
|
\framesubtitle{... no \sout{fun} piece of cake}
|
|
\begin{center}
|
|
\includegraphics[height=0.7\textheight]{figs/feeding.jpg}
|
|
\end{center}
|
|
}
|
|
|
|
\only<2>{
|
|
\framesubtitle{... no \sout{fun} piece of cake}
|
|
\begin{center}
|
|
\includegraphics[height=0.7\textheight]{figs/nacho-trainer.jpg}
|
|
\end{center}
|
|
}
|
|
|
|
\only<3>{
|
|
\framesubtitle{... no lecture (please!)}
|
|
\begin{center}
|
|
\includegraphics[height=0.7\textheight]{figs/soccer.jpg}
|
|
\end{center}
|
|
}
|
|
|
|
\end{frame}
|
|
|
|
% ----------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{What you should learn this week}
|
|
\begin{itemize}
|
|
\item What makes good plots?
|
|
\item What is descriptive/inferential statistics?
|
|
\item What is the general structure of a statistical test?
|
|
\item What does a p-value mean?
|
|
\item How can I build my own tests?
|
|
\item How large should my $n$ be?
|
|
\item What is {\em maximum likelihood} and why is it important?
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{Day 1 -- descriptive statistics and plots}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{types of data}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{frame}
|
|
\frametitle{data scales}
|
|
\framesubtitle{What data types are distinguished in statistics?}
|
|
\Large
|
|
{\bf Why are data types important?}
|
|
\pause
|
|
\begin{itemize}
|
|
\item selection of statistics
|
|
\item selection of plots
|
|
\item selection of correct tests
|
|
\end{itemize}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{data scales}
|
|
\framesubtitle{nominal/categorial scale}
|
|
\begin{itemize}
|
|
\item properties like cell type, experimental group (i.e. treatment
|
|
1, treatment 2, control)
|
|
\item each observation/sample is put into one category
|
|
\item there is no reasonable order among the categories
|
|
\item example: [rods, cones] vs. [cones, rods]
|
|
\end{itemize}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{data scales}
|
|
\framesubtitle{ordinal scale}
|
|
\begin{itemize}
|
|
\item like nominal scale, but there is an order
|
|
\item {\bf but:} there is no reasonable measure of {\em distance}
|
|
between the classes
|
|
\item examples: ranks, ratings
|
|
\end{itemize}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{data scales}
|
|
\framesubtitle{interval scale}
|
|
\begin{itemize}
|
|
\item quantitative/metric values
|
|
\item reasonable measure of distance between values but no absolute zero
|
|
\item examples: temperature in $^\circ$C
|
|
\end{itemize}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{data scales}
|
|
\framesubtitle{absolut/ratio scale}
|
|
\begin{itemize}
|
|
\item like interval scale but with absolute zero
|
|
\item example: temperature in $^\circ$K
|
|
\end{itemize}
|
|
\pause
|
|
\begin{emphasize}{relationsships between scales}
|
|
\begin{itemize}
|
|
\item scales exhibit increasing information content from nominal
|
|
to absolute
|
|
\item conversion ,,downwards'' always possible
|
|
\end{itemize}
|
|
\end{emphasize}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{examples from neuroscience and psychology}
|
|
\begin{itemize}
|
|
\item {\bf nominal:}\pause
|
|
\begin{itemize}
|
|
\item treatment group
|
|
\item stimulus class
|
|
\item cell type
|
|
\end{itemize}
|
|
|
|
\item {\bf ordinal:} \pause
|
|
\begin{itemize}
|
|
\item ratings
|
|
\item clinical stages of a disease
|
|
\item states of an ion channel
|
|
\end{itemize}
|
|
\item {\bf Absolut-/Ratioskala:}\pause
|
|
\begin{itemize}
|
|
\item firing rate
|
|
\item membrane potential
|
|
\item ion concentration
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{statistics}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%-------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{What is "a statistic"?}
|
|
\begin{definition}{statistic}
|
|
A statistic (singular) is a single measure of some attribute of a
|
|
sample (e.g., its arithmetic mean value). It is calculated by
|
|
applying a function (statistical algorithm) to the values of the
|
|
items of the sample, which are known together as a set of data.
|
|
|
|
\source{http://en.wikipedia.org/wiki/Statistic}
|
|
\end{definition}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{Beispiele f\"ur Teststatistiken}
|
|
\begin{itemize}
|
|
\item {\bf nominal:}\pause
|
|
\begin{itemize}
|
|
\item count
|
|
\item relative frequency/proportion
|
|
\end{itemize}
|
|
|
|
\item {\bf ordinal:} \pause
|
|
\begin{itemize}
|
|
\item median
|
|
\item quantile/percentile
|
|
\item rank correlation
|
|
\end{itemize}
|
|
\item {\bf absolute/ratio:}\pause
|
|
\begin{itemize}
|
|
\item mean
|
|
\item variance/ standard deviation
|
|
\item Pearson correlation
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}
|
|
\frametitle{exercise}
|
|
\begin{task}{Spearman rank correlation}
|
|
\begin{enumerate}
|
|
\item Use {\tt randi} to generate two vectors
|
|
{\tt x,y} with $100$ random integers between $0$ and $10$ each.
|
|
\item Find out how to compute the Spearman
|
|
rank correlation $$\rho = 1- {\frac {6 \sum
|
|
d_i^2}{n(n^2 - 1)}}$$ with Matlab. $d_i = x_i - y_i$ is the
|
|
difference in the rank between the single data points.
|
|
\item Compute $\rho$ between $x$ and $y$, between $x$ and
|
|
$y^2$, between $\log(x+1)$ and $y^2$.
|
|
\item Compute the "standard" (Pearson) correlation coefficient
|
|
between these values.
|
|
\item What can you observe and why does it make sense?
|
|
\end{enumerate}
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{solution}
|
|
\begin{solution}{Spearman rank correlation }
|
|
\scriptsize
|
|
\begin{lstlisting}
|
|
>>> x = randi(10, 100, 1);
|
|
>>> y = randi(10, 100, 1);
|
|
>>> corr(x,y,'type','Spearman')
|
|
ans =
|
|
0.1220
|
|
>>> corr(x,y.^2,'type','Spearman')
|
|
ans =
|
|
0.1220
|
|
>>> corr(x,y,'type','Pearson')
|
|
ans =
|
|
0.1074
|
|
>>> corr(x,y.^2,'type','Pearson')
|
|
ans =
|
|
0.0551
|
|
\end{lstlisting}
|
|
The rank correlation does not change under a monotone transformation
|
|
of the data. Therefore, it can be used for ordinal data. The Pearson
|
|
correlation coefficient does not have that property.
|
|
\end{solution}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{what makes a good plot}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{}
|
|
\begin{center}
|
|
\Huge What makes a good plot?
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{features of a good plot}
|
|
A good plot
|
|
\begin{itemize}
|
|
\item helps the reader to clearly understand your point.\pause
|
|
\item is not misleading and let's the reader judge the information
|
|
on her own (different y-axis/length scales in two related plots,
|
|
"squeezing" via log-plots). \pause
|
|
\item contains information about the data (a comic might be
|
|
illustrative, but does not contain information about the
|
|
data).\pause
|
|
\item adheres to the principle of {\em ink minimization}.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{features of a good plot}
|
|
\framesubtitle{design/organization}
|
|
\begin{itemize}
|
|
\item Is the display consistent with the model or hypothesis
|
|
being tested?\pause
|
|
\item Are there "empty dimensions" in the display that could be
|
|
removed (A 3D pie chart for 2D categorical data, extraneous colors
|
|
that do not encode meaningful information)?\pause
|
|
\item Does the display provide an honest and transparent portrayal
|
|
of the data (hiding, smoothing, modifying data points should be
|
|
avoided or explicitly mentioned)?
|
|
\end{itemize}
|
|
\mycite{Allen et al. 2012, Neuron}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{features of a good plot}
|
|
\framesubtitle{axes}
|
|
\begin{itemize}
|
|
\item Are axes scales defined as linear, log, or radial?\pause
|
|
\item Does each axis label describe the variable and its units (use
|
|
"a.u." for arbitrary units)?\pause
|
|
\item Are axes limits appropriate for the data (The graphic should
|
|
not be bounded at zero if the data can take on both positive and
|
|
negative values.)?\pause
|
|
\item Is the aspect ratio appropriate for the data (When x and y
|
|
axes contrast the same variable under different conditions the
|
|
graphic should be square.)?
|
|
\end{itemize}
|
|
\mycite{Allen et al. 2012, Neuron}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{features of a good plot}
|
|
\framesubtitle{color mapping}
|
|
\begin{itemize}
|
|
\item Is a color bar provided?\pause
|
|
\item Is the color map sensible for the data type (does the data
|
|
extend to both $\pm$, does it live in an interval, is it
|
|
circular)?\pause
|
|
\item Are contrasting colors consistent with a natural interpretation?
|
|
\item Can features be discriminated when printed in grayscale?
|
|
\item Has red/green contrast been avoided to accommodate common
|
|
forms of colorblindness?
|
|
\end{itemize}
|
|
\mycite{Allen et al. 2012, Neuron}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{features of a good plot}
|
|
\framesubtitle{uncertainty}
|
|
\begin{itemize}
|
|
\item Does the display indicate the uncertainty of estimated parameters?\pause
|
|
\item Is the type of error surface appropriate for the data?
|
|
\begin{itemize}
|
|
\item Use standard deviations to describe variability in the population.\pause
|
|
\item Use standard errors or confidence intervals to make inferences
|
|
about parameters estimated from a sample.\pause
|
|
\item Parametric confidence intervals should only be used if data
|
|
meet the assumptions of the underlying model.\pause
|
|
\end{itemize}
|
|
\item Are the units of uncertainty defined (is it standard error, is
|
|
it $95\%$ confidence interval)?
|
|
\end{itemize}
|
|
\mycite{Allen et al. 2012, Neuron}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{features of a good plot}
|
|
\framesubtitle{annotation}
|
|
\begin{itemize}
|
|
\item Are all symbols defined, preferably by directly labeling objects?\pause
|
|
\item Is the directionality of a contrast between conditions obvious?\pause
|
|
\item Is the number of samples or independent experiments indicated?\pause
|
|
\item Are statistical procedures and criteria for significance described?\pause
|
|
\item Are uncommon abbreviations avoided or clearly defined?\pause
|
|
\item Are abbreviations consistent with those used in the text?
|
|
\end{itemize}
|
|
\mycite{Allen et al. 2012, Neuron}
|
|
\end{frame}
|
|
|
|
\subsection{bad examples}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{suboptimal example}
|
|
\begin{center}
|
|
\includegraphics[width=.5\linewidth]{figs/nobelbad}
|
|
\end{center}
|
|
\mycite{Hafting et al. 2005, nature}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{suboptimal example}
|
|
\begin{center}
|
|
\includegraphics[width=.5\linewidth]{figs/badbarright.png}
|
|
\end{center}
|
|
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{suboptimal example}
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/yaxisscalingleft.png}
|
|
\hspace{.5cm}
|
|
\includegraphics[width=.4\linewidth]{figs/yaxisscalingright.png}
|
|
\end{center}
|
|
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{suboptimal example}
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/badscatterleft.png}
|
|
\hspace{.5cm}
|
|
\includegraphics[width=.4\linewidth]{figs/badscatterright.png}
|
|
\end{center}
|
|
\source{http://en.wikipedia.org/wiki/Misleading\_graph}
|
|
\end{frame}
|
|
|
|
|
|
%-------------------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\frametitle{suboptimal example}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/badbarplot}
|
|
\end{center}
|
|
\source{www.enfovis.com}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{plotting data}
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting nominal data}
|
|
\framesubtitle{bar plot for count and relative frequency}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/nominaldataplot}
|
|
\end{center}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting nominal data}
|
|
\framesubtitle{bar plot for count and relative frequency}
|
|
\scriptsize
|
|
\begin{lstlisting}
|
|
% plot
|
|
bar([1,2], [50, 90], 'facecolor', 'k')
|
|
|
|
% labels axes
|
|
ylabel('cell count')
|
|
xlabel('cell type')
|
|
|
|
% cosmetics
|
|
xlim([0.5,2.5])
|
|
ylim([0, 100])
|
|
box('off')
|
|
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20)
|
|
|
|
% settings for saving the figure
|
|
set(gcf, 'PaperUnits', 'centimeters');
|
|
set(gcf, 'PaperSize', [11.7 9.0]);
|
|
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting nominal data}
|
|
\framesubtitle{pie chart for count and relative frequency}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/nominaldataplot2}
|
|
\end{center}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting nominal data}
|
|
\framesubtitle{exercise}
|
|
\begin{task}{pie chart}
|
|
Plot the same data ($n_{py}=50$, $n_{in}=90$) as a pie chart in Matlab.
|
|
\end{task}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting nominal data}
|
|
\framesubtitle{pie chart for relative frequency}
|
|
\scriptsize
|
|
\begin{lstlisting}
|
|
data = [50, 90];
|
|
h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'})
|
|
hText = findobj(h,'Type','text') % text object handles
|
|
|
|
set(h(1), 'FaceColor', [.2,.2,.2]);
|
|
set(h(2), 'Rotation', 45);
|
|
set(h(3), 'FaceColor', [.8,.8,.8]);
|
|
set(h(4), 'Rotation', 45);
|
|
|
|
title('cell count')
|
|
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'})
|
|
box('off')
|
|
set(gcf, 'PaperUnits', 'centimeters');
|
|
set(gcf, 'PaperSize', [11.7 9.0]);
|
|
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{histogram}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/histogram}
|
|
\end{center}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{bad choice of bins}
|
|
\begin{center}
|
|
\includegraphics[width=.4\linewidth]{figs/histogrambad}
|
|
\includegraphics[width=.4\linewidth]{figs/histogrambad2}
|
|
\end{center}
|
|
\begin{summary}{Rule of thumb}
|
|
Choose the bins $b\approx n/20$.
|
|
\end{summary}
|
|
\end{frame}
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{how to do in Matlab}
|
|
\scriptsize
|
|
\begin{lstlisting}
|
|
x = randn(2000,1); % generate Gaussian data
|
|
|
|
hist(x, 50); % generate histogram
|
|
|
|
% set facecolor to gray
|
|
h = findobj(gca, 'Type','patch');
|
|
set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2)
|
|
|
|
% plot a white grid over it
|
|
h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2)
|
|
uistack(h, 'top')
|
|
|
|
% cosmetics
|
|
box('off');
|
|
xlabel('Data')
|
|
ylabel('Count')
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{bar plot}
|
|
There are several ways to plot a sample $x_1, ..., x_n$ of interval/ratio/absolute
|
|
scale with a bar plot
|
|
\begin{center}
|
|
\includegraphics[width=.6\linewidth]{figs/barplots.png}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile,fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{bar plot}
|
|
\scriptsize
|
|
\begin{lstlisting}
|
|
% bar plot
|
|
x = rand(10,1);
|
|
gray = [.5,.5,.5];
|
|
|
|
bar(1, mean(x), 'EdgeColor','w','FaceColor', gray);
|
|
hold on
|
|
|
|
bar(2, mean(x), 'EdgeColor','w','FaceColor', gray);
|
|
plot(0*x + 2, x, 'ok');
|
|
|
|
bar(3, mean(x), 'EdgeColor','w','FaceColor', gray);
|
|
errorbar(3, mean(x), std(x), 'ok');
|
|
|
|
bar(4, mean(x), 'EdgeColor','w','FaceColor', gray);
|
|
errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok');
|
|
set(gca, 'xtick',[])
|
|
ylabel('uniformly distributed random data in [0,1]')
|
|
box('off')
|
|
title('different forms of bar plots')
|
|
hold off
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile,fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{bar plot and measure of central tendency and spread}
|
|
|
|
\begin{itemize}
|
|
\item A bar plot collapses real data onto a single number and some
|
|
measure of spread. This number is usually a {\em measure of central
|
|
tendency}, i.e. a typical/central value for the probability
|
|
distribution of the data.\pause
|
|
\item What measures of central tendency can you think of?\pause
|
|
\begin{itemize}
|
|
\item mean
|
|
\item median
|
|
\item geometric mean (the nth root of the product of the data values)
|
|
\item weighted mean
|
|
\item midrange (mean of the maximum and minimum values of a data set)
|
|
\end{itemize}\pause
|
|
\item Additionally, the bar plot is equipped with a measure of {\em
|
|
spread} or {\em dispersion}. What measure of spread can you think of?\pause
|
|
\begin{itemize}
|
|
\item standard deviation
|
|
\item range (maximum minus minimum of a dataset)
|
|
\item inter-quartile range
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile,fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{measure of central tendency and spread}
|
|
\Large
|
|
\begin{center}
|
|
\bf The part of statistics that summarizes data in a small number
|
|
of values is called {\em descriptive statistics}.
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile,fragile]
|
|
\frametitle{robust statistics}
|
|
\begin{task}{When is statistic called robust (leave-one-out)?}
|
|
\begin{itemize}
|
|
\item Generate an array with $20$ random numbers using {\tt
|
|
randn}.
|
|
\item Compute $20$ means: the $i^{th}$ mean is computed from the
|
|
data set {\em without} the $i^{th}$ example.
|
|
\item Repeat this with the median.
|
|
\item Make a bar plot that depicts the means of the computed means
|
|
and medians along with an appropriate measure of dispersion.
|
|
\item What can you observe? Do you understand why?
|
|
\end{itemize}
|
|
\end{task}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{boxplot}
|
|
\begin{minipage}{1.0\linewidth}
|
|
\begin{minipage}{0.5\linewidth}
|
|
\begin{center}
|
|
\includegraphics[width=\linewidth]{figs/boxplot.png}
|
|
\end{center}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\linewidth}
|
|
Who knows what the elements mean?\pause
|
|
\begin{itemize}
|
|
\item the box depicts the inter-quartile range
|
|
\item the line denotes the median
|
|
\item the whiskers denote the extreme value of the data not
|
|
considered outliers
|
|
\item outliers are plotted separately
|
|
\end{itemize}
|
|
\begin{task}{Outliers}
|
|
\begin{itemize}
|
|
\item Find out how an outlier is defined in a matlab boxplot.
|
|
\item Can you remove an outlier from the dataset?
|
|
\end{itemize}
|
|
\end{task}
|
|
\end{minipage}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting interval/ratio/absolute data}
|
|
\framesubtitle{violinplot}
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/violinplots.png}
|
|
\end{center}
|
|
\begin{itemize}
|
|
\item Violinplots depict the distribution of the data by a
|
|
smoothed histogram.
|
|
\item Additional information (data points, median,
|
|
inter-quartile range) are plotted inside.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting combinations of scales}
|
|
What could we use for a combination of categorial/nominal and
|
|
interval/ratio/absolute?
|
|
\pause
|
|
\begin{center}
|
|
\includegraphics[width=.5\linewidth]{figs/factorplot.png}
|
|
\end{center}
|
|
Each category is a single bar.
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\frametitle{plotting combinations of scales}
|
|
What could we use for a combination of interval/ratio/absolute and
|
|
interval/ratio/absolute, e.g. $(x_1, y_1), ..., (x_n,y_n)$? \pause
|
|
\begin{center}
|
|
\includegraphics[width=.8\linewidth]{figs/paireddata.png}
|
|
\end{center}
|
|
Scatter plot or paired bar chart. Scatter plot can also be used for
|
|
ordinal vs. ordinal data (why not the bar chart?).
|
|
\end{frame}
|
|
|
|
%-------------------------------------------------------------
|
|
\begin{frame}[fragile]
|
|
\begin{center}
|
|
\Huge
|
|
That's it.
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
|
|
\end{document}
|
|
|
|
|