diff --git a/statistics/figs/badbarplot.jpg b/statistics/figs/badbarplot.jpg new file mode 100644 index 0000000..f9327de Binary files /dev/null and b/statistics/figs/badbarplot.jpg differ diff --git a/statistics/figs/feeding.jpg b/statistics/figs/feeding.jpg old mode 100644 new mode 100755 diff --git a/statistics/figs/histogram.png b/statistics/figs/histogram.png new file mode 100755 index 0000000..8eaaa88 Binary files /dev/null and b/statistics/figs/histogram.png differ diff --git a/statistics/figs/histogrambad.png b/statistics/figs/histogrambad.png new file mode 100755 index 0000000..3ce2c65 Binary files /dev/null and b/statistics/figs/histogrambad.png differ diff --git a/statistics/figs/histogrambad2.png b/statistics/figs/histogrambad2.png new file mode 100755 index 0000000..a11dc47 Binary files /dev/null and b/statistics/figs/histogrambad2.png differ diff --git a/statistics/figs/nacho-trainer.jpg b/statistics/figs/nacho-trainer.jpg old mode 100644 new mode 100755 diff --git a/statistics/figs/nobelbad.jpg b/statistics/figs/nobelbad.jpg new file mode 100755 index 0000000..9ff4a06 Binary files /dev/null and b/statistics/figs/nobelbad.jpg differ diff --git a/statistics/figs/nominaldataplot.png b/statistics/figs/nominaldataplot.png old mode 100644 new mode 100755 diff --git a/statistics/figs/nominaldataplot2.png b/statistics/figs/nominaldataplot2.png old mode 100644 new mode 100755 diff --git a/statistics/figs/soccer.jpg b/statistics/figs/soccer.jpg old mode 100644 new mode 100755 diff --git a/statistics/lecture_statistics.tex b/statistics/lecture_statistics.tex index 36c7c31..3ef58ff 100755 --- a/statistics/lecture_statistics.tex +++ b/statistics/lecture_statistics.tex @@ -113,7 +113,7 @@ Bernstein Center T\"ubingen} % PCA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section[Pr�ludium]{Prelude} +\section[Prelude]{Prelude} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ---------------------------------------------------------- @@ -364,32 +364,162 @@ correlation coefficient does not have that property. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{Darstellung nominaler Daten} - \framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit} + \frametitle{} + \begin{center} + \Huge What makes a good plot? + \end{center} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + A good plot + \begin{itemize} + \item helps the reader to clearly understand your point.\pause + \item is not misleading and let's the reader judge the information + on her own (different y-axis/length scales in two related plots, + "squeezing" via log-plots). \pause + \item contains information about the data (a comic might be + illustrative, but does not contain information about the + data).\pause + \item adheres to the principle of {\em ink minimization}. + \end{itemize} +\end{frame} + + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{design/organization} + \begin{itemize} + \item Is the display consistent with the model or hypothesis + being tested?\pause + \item Are there "empty dimensions" in the display that could be + removed (A 3D pie chart for 2D categorical data, extraneous colors + that do not encode meaningful information)?\pause + \item Does the display provide an honest and transparent portrayal + of the data (hiding, smoothing, modifying data points should be + avoided or explicitly mentioned)? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{axes} + \begin{itemize} + \item Are axes scales defined as linear, log, or radial?\pause + \item Does each axis label describe the variable and its units (use + "a.u." for arbitrary units)?\pause + \item Are axes limits appropriate for the data (The graphic should + not be bounded at zero if the data can take on both positive and + negative values.)?\pause + \item Is the aspect ratio appropriate for the data (When x and y + axes contrast the same variable under different conditions the + graphic should be square.)? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{color mapping} + \begin{itemize} + \item Is a color bar provided?\pause + \item Is the color map sensible for the data type (does the data + extend to both $\pm$, does it live in an interval, is it + circular)?\pause + \item Are contrasting colors consistent with a natural interpretation? + \item Can features be discriminated when printed in grayscale? + \item Has red/green contrast been avoided to accommodate common + forms of colorblindness? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{uncertainty} + \begin{itemize} + \item Does the display indicate the uncertainty of estimated parameters?\pause + \item Is the type of error surface appropriate for the data? + \begin{itemize} + \item Use standard deviations to describe variability in the population.\pause + \item Use standard errors or confidence intervals to make inferences + about parameters estimated from a sample.\pause + \item Parametric confidence intervals should only be used if data + meet the assumptions of the underlying model.\pause + \end{itemize} + \item Are the units of uncertainty defined (is it standard error, is + it $95\%$ confidence interval)? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{features of a good plot} + \framesubtitle{annotation} + \begin{itemize} + \item Are all symbols defined, preferably by directly labeling objects?\pause + \item Is the directionality of a contrast between conditions obvious?\pause + \item Is the number of samples or independent experiments indicated?\pause + \item Are statistical procedures and criteria for significance described?\pause + \item Are uncommon abbreviations avoided or clearly defined?\pause + \item Are abbreviations consistent with those used in the text? + \end{itemize} + \mycite{Allen et al. 2012, Neuron} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.5\linewidth]{figs/nobelbad} + \end{center} + \mycite{Hafting et al. 2005, nature} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{different axes} +\end{frame} +%------------------------------------------------------------- + +\begin{frame} + \frametitle{Bad bar plot} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/badbarplot} + \end{center} + \source{www.enfovis.com} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting nominal data} + \framesubtitle{bar plot for count and relative frequency} \begin{center} \includegraphics[width=.8\linewidth]{figs/nominaldataplot} \end{center} \end{frame} %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{Darstellung nominaler Daten} - \framesubtitle{Bar-Plot f\"ur Anzahl/ rel. H\"aufigkeit} + \frametitle{plotting nominal data} + \framesubtitle{bar plot for count and relative frequency} \scriptsize \begin{lstlisting} -% eigentlicher Plot +% plot bar([1,2], [50, 90], 'facecolor', 'k') -% Achsenbeschriftung +% labels axes ylabel('cell count') xlabel('cell type') -% Kosmetik +% cosmetics xlim([0.5,2.5]) ylim([0, 100]) box('off') set(gca,'XTick',1:2,'XTickLabel',{'pyramidal','interneuron'},'FontSize',20) -% Settings fuers Abspeichern +% settings for saving the figure set(gcf, 'PaperUnits', 'centimeters'); set(gcf, 'PaperSize', [11.7 9.0]); set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); @@ -398,8 +528,8 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); %---------------------------------------------------------- \begin{frame}[fragile] - \frametitle{Darstellung nominaler Daten} - \framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit} + \frametitle{plotting nominal data} + \framesubtitle{pie chart for count and relative frequency} \begin{center} \includegraphics[width=.8\linewidth]{figs/nominaldataplot2} \end{center} @@ -407,15 +537,15 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{Darstellung nominaler Daten} - \framesubtitle{\"Ubung} - \begin{task}{Pie-Chart} - Plotte dieselben Daten ($n_{py}=50$, $n_{in}=90$) als Pie-Chart in Matlab. + \framesubtitle{exercise} + \begin{task}{pie chart} + Plot the same data ($n_{py}=50$, $n_{in}=90$) as a pie chart in Matlab. \end{task} \end{frame} %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{Darstellung nominaler Daten} - \framesubtitle{Pie-Chart f\"ur Anzahl/ rel. H\"aufigkeit} + \framesubtitle{pie chart for relative frequency} \scriptsize \begin{lstlisting} data = [50, 90]; @@ -438,9 +568,64 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{Darstellung von Interval-/Absolutskala Daten} - + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{histogram} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/histogram} + \end{center} \end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bad choice of bins} + \begin{center} + \includegraphics[width=.4\linewidth]{figs/histogrambad} + \includegraphics[width=.4\linewidth]{figs/histogrambad2} + \end{center} + \begin{summary}{Rule of thumb} + Choose the bins $b\approx n/20$. + \end{summary} +\end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{how to do in Matlab} + \scriptsize +\begin{lstlisting} +x = randn(2000,1); % generate Gaussian data + +hist(x, 50); % generate histogram + +% set facecolor to gray +h = findobj(gca, 'Type','patch'); +set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2) + +% plot a white grid over it +h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2) +uistack(h, 'top') + +% cosmetics +box('off'); +xlabel('Data') +ylabel('Count') +\end{lstlisting} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{other ways} + There are other ways to plot a sample $x_1, ..., x_n$ of + interval/ratio/absolute scale data. E.g. + \begin{itemize} + \item box plot + \item bar plot + \item smoothed histogram + \item ... + \end{itemize} + We will look at them while plotting mixed data in the following. +\end{frame} + \end{document} diff --git a/statistics/matlab/gridxy.m b/statistics/matlab/gridxy.m new file mode 100644 index 0000000..1fbe64a --- /dev/null +++ b/statistics/matlab/gridxy.m @@ -0,0 +1,127 @@ +function hh = gridxy(x,varargin) +% GRIDXY - Plot grid lines +% GRIDXY(X) plots vertical grid lines at the positions specified +% by X. GRIDXY(X,Y) also plots horizontal grid lines at the positions +% specified by Y. GRIDXY uses the current axes, if any. Lines outside +% the plot area are plotted but not shown. When X or Y is empty no vertical +% or horizontal lines are plotted. +% +% The lines are plotted as a single graphics object. H = GRIDXY(..) returns +% a graphics handle to that line object. +% +% GRIDXY(..., 'Prop1','Val1','Prop2','Val2', ...) uses the properties +% and values specified for color, linestyle, etc. Execute GET(H), where H is +% a line handle, to see a list of line object properties and their current values. +% Execute SET(H) to see a list of line object properties and legal property values. +% +% Examples +% % some random plot +% plot(10*rand(100,1), 10*rand(100,1),'bo') ; +% % horizontal red dashed grid +% gridxy([1.1 3.2 4.5],'Color','r','Linestyle',':') ; +% % vertical solid thicker yellowish grid, and store the handle +% h = gridxy([],[2.1:0.7:5 8],'Color',[0.9 1.0 0.2],'linewidth',3) ; +% +% GRIDXY can be used to plot a irregular grid on the axes. +% +% See also PLOT, REFLINE, GRID, AXES, REFLINEXY + +% NOTE: This function was previously known as XYREFLINE + +% for Matlab R13 +% version 2.2 (feb 2008) +% (c) Jos van der Geest +% email: jos@jasen.nl + +% History +% Created (1.0) feb 2006 +% 2.0 apr 2007 - renamed from reflinexy to gridxy, reflinexy is now used +% for plotting intersection between X and Y axes +% 2.1 apr 2007 - add error check for line properties +% 2.2 feb 2008 - added set(gca,'layer','top') to put gridlines behind the +% axis tick marks + +error(nargchk(1,Inf,nargin)) ; + +% check the arguments +if ~isnumeric(x), + error('Numeric argument expected') ; +end + +if nargin==1, + y = [] ; + va = [] ; +else + va = varargin ; + if ischar(va{1}), + % optional arguments are + y = [] ; + elseif isnumeric(va{1}) + y = va{1} ; + va = va(2:end) ; + else + error('Invalid second argument') ; + end + if mod(size(va),2) == 1, + error('Property-Value have to be pairs') ; + end +end + +% get the axes to plot in +hca=get(get(0,'currentfigure'),'currentaxes'); +if isempty(hca), + warning('No current axes found') ; + return ; +end + +% get the current limits of the axis +% used for limit restoration later on +xlim = get(hca,'xlim') ; +ylim = get(hca,'ylim') ; + +% setup data for the vertical lines +xx1 = repmat(x(:).',3,1) ; +yy1 = repmat([ylim(:) ; nan],1,numel(x)) ; + +% setup data for the horizontal lines +xx2 = repmat([xlim(:) ; nan],1,numel(y)) ; +yy2 = repmat(y(:).',3,1) ; + + +% create data for a single line object +xx1 = [xx1 xx2] ; +if ~isempty(xx1), + yy1 = [yy1 yy2] ; + % add the line to the current axes + np = get(hca,'nextplot') ; + set(hca,'nextplot','add') ; + h = line('xdata',xx1(:),'ydata',yy1(:)) ; + set(hca,'ylim',ylim,'xlim',xlim) ; % reset the limits + + uistack(h,'bottom') ; % push lines to the bottom of the graph + set(hca,'nextplot',np,'Layer','top') ; % reset the nextplot state + + if ~isempty(va), + try + set(h,va{:}) ; % set line properties + catch + msg = lasterror ; + error(msg.message(21:end)) ; + end + end + +else + h = [] ; +end + +if nargout==1, % if requested return handle + hh = h ; +end + + + + + + + + diff --git a/statistics/matlab/intervalplots.m b/statistics/matlab/intervalplots.m index b069918..7b04e92 100644 --- a/statistics/matlab/intervalplots.m +++ b/statistics/matlab/intervalplots.m @@ -2,5 +2,15 @@ close all % histogram figure() x = randn(2000,1); -hist(x, 50, 'gray') -box('off'); \ No newline at end of file +hist(x, 50); +h = findobj(gca, 'Type','patch'); +set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2) +grid('off') +h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2) +box('off'); +uistack(h, 'top') +xlabel('Data') +ylabel('Count') +set(gcf, 'PaperUnits', 'centimeters'); +set(gcf, 'PaperSize', [11.7 9.0]); +set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);