diff --git a/statistics/figs/badbarleft.png b/statistics/figs/badbarleft.png new file mode 100644 index 0000000..667e029 Binary files /dev/null and b/statistics/figs/badbarleft.png differ diff --git a/statistics/figs/badbarright.png b/statistics/figs/badbarright.png new file mode 100644 index 0000000..22f32dc Binary files /dev/null and b/statistics/figs/badbarright.png differ diff --git a/statistics/figs/badscatterleft.png b/statistics/figs/badscatterleft.png new file mode 100644 index 0000000..de3e864 Binary files /dev/null and b/statistics/figs/badscatterleft.png differ diff --git a/statistics/figs/badscatterright.png b/statistics/figs/badscatterright.png new file mode 100644 index 0000000..44918a6 Binary files /dev/null and b/statistics/figs/badscatterright.png differ diff --git a/statistics/figs/barplots.png b/statistics/figs/barplots.png new file mode 100644 index 0000000..b0aa5d7 Binary files /dev/null and b/statistics/figs/barplots.png differ diff --git a/statistics/figs/boxplot.png b/statistics/figs/boxplot.png new file mode 100644 index 0000000..7a0cf69 Binary files /dev/null and b/statistics/figs/boxplot.png differ diff --git a/statistics/figs/factorplot.png b/statistics/figs/factorplot.png new file mode 100644 index 0000000..4a8aafc Binary files /dev/null and b/statistics/figs/factorplot.png differ diff --git a/statistics/figs/paireddata.png b/statistics/figs/paireddata.png new file mode 100644 index 0000000..231d620 Binary files /dev/null and b/statistics/figs/paireddata.png differ diff --git a/statistics/figs/violinplots.png b/statistics/figs/violinplots.png new file mode 100644 index 0000000..f530083 Binary files /dev/null and b/statistics/figs/violinplots.png differ diff --git a/statistics/figs/yaxisscalingleft.png b/statistics/figs/yaxisscalingleft.png new file mode 100644 index 0000000..645aabb Binary files /dev/null and b/statistics/figs/yaxisscalingleft.png differ diff --git a/statistics/figs/yaxisscalingright.png b/statistics/figs/yaxisscalingright.png new file mode 100644 index 0000000..2db0eac Binary files /dev/null and b/statistics/figs/yaxisscalingright.png differ diff --git a/statistics/lecture_statistics.tex b/statistics/lecture_statistics.tex index 3ef58ff..a54dcef 100755 --- a/statistics/lecture_statistics.tex +++ b/statistics/lecture_statistics.tex @@ -42,7 +42,7 @@ Bernstein Center T\"ubingen} \institute[Scientific Computing]{} - \date{11/27/2013} + \date{10/20/2014} %\logo{\pgfuseimage{logo}} \subject{Lectures} @@ -359,9 +359,7 @@ correlation coefficient does not have that property. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{description of data and plotting} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{nominal scale} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{what makes a good plot} %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{} @@ -470,6 +468,8 @@ correlation coefficient does not have that property. \end{itemize} \mycite{Allen et al. 2012, Neuron} \end{frame} + +\subsection{bad examples} %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{suboptimal example} @@ -481,17 +481,50 @@ correlation coefficient does not have that property. %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{different axes} + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.5\linewidth]{figs/badbarright.png} + \end{center} + \source{http://en.wikipedia.org/wiki/Misleading\_graph} \end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.4\linewidth]{figs/yaxisscalingleft.png} + \hspace{.5cm} + \includegraphics[width=.4\linewidth]{figs/yaxisscalingright.png} + \end{center} + \source{http://en.wikipedia.org/wiki/Misleading\_graph} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{suboptimal example} + \begin{center} + \includegraphics[width=.4\linewidth]{figs/badscatterleft.png} + \hspace{.5cm} + \includegraphics[width=.4\linewidth]{figs/badscatterright.png} + \end{center} + \source{http://en.wikipedia.org/wiki/Misleading\_graph} +\end{frame} + + %------------------------------------------------------------- \begin{frame} - \frametitle{Bad bar plot} + \frametitle{suboptimal example} \begin{center} \includegraphics[width=.8\linewidth]{figs/badbarplot} \end{center} \source{www.enfovis.com} \end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{nominal scale} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{plotting nominal data} @@ -536,7 +569,7 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); \end{frame} %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{Darstellung nominaler Daten} + \frametitle{plotting nominal data} \framesubtitle{exercise} \begin{task}{pie chart} Plot the same data ($n_{py}=50$, $n_{in}=90$) as a pie chart in Matlab. @@ -544,7 +577,7 @@ set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); \end{frame} %------------------------------------------------------------- \begin{frame}[fragile] - \frametitle{Darstellung nominaler Daten} + \frametitle{plotting nominal data} \framesubtitle{pie chart for relative frequency} \scriptsize \begin{lstlisting} @@ -614,18 +647,152 @@ ylabel('Count') %------------------------------------------------------------- \begin{frame}[fragile] \frametitle{plotting interval/ratio/absolute data} - \framesubtitle{other ways} - There are other ways to plot a sample $x_1, ..., x_n$ of - interval/ratio/absolute scale data. E.g. + \framesubtitle{bar plot} + There are several ways to plot a sample $x_1, ..., x_n$ of interval/ratio/absolute + scale with a bar plot + \begin{center} + \includegraphics[width=.6\linewidth]{figs/barplots.png} + \end{center} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bar plot} +\scriptsize +\begin{lstlisting} +% bar plot +x = rand(10,1); +gray = [.5,.5,.5]; + +bar(1, mean(x), 'EdgeColor','w','FaceColor', gray); +hold on + +bar(2, mean(x), 'EdgeColor','w','FaceColor', gray); +plot(0*x + 2, x, 'ok'); + +bar(3, mean(x), 'EdgeColor','w','FaceColor', gray); +errorbar(3, mean(x), std(x), 'ok'); + +bar(4, mean(x), 'EdgeColor','w','FaceColor', gray); +errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok'); +set(gca, 'xtick',[]) +ylabel('uniformly distributed random data in [0,1]') +box('off') +title('different forms of bar plots') +hold off +\end{lstlisting} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{bar plot and measure of central tendency and spread} + \begin{itemize} - \item box plot - \item bar plot - \item smoothed histogram - \item ... + \item A bar plot collapses real data onto a single number and some + measure of spread. This number is usually a {\em measure of central + tendency}, i.e. a typical/central value for the probability + distribution of the data.\pause + \item What measures of central tendency can you think of?\pause + \begin{itemize} + \item mean + \item median + \item geometric mean (the nth root of the product of the data values) + \item weighted mean + \item midrange (mean of the maximum and minimum values of a data set) + \end{itemize}\pause + \item Additionally, the bar plot is equipped with a measure of {\em + spread} or {\em dispersion}. What measure of spread can you think of?\pause + \begin{itemize} + \item standard deviation + \item range (maximum minus minimum of a dataset) + \item inter-quartile range + \end{itemize} \end{itemize} - We will look at them while plotting mixed data in the following. \end{frame} +%------------------------------------------------------------- +\begin{frame}[fragile,fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{measure of central tendency and spread} + \Large + \begin{center} + \bf The part of statistics that summarizes data in a small number + of values is called {\em descriptive statistics}. + \end{center} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{boxplot} + \begin{minipage}{1.0\linewidth} + \begin{minipage}{0.5\linewidth} + \begin{center} + \includegraphics[width=\linewidth]{figs/boxplot.png} + \end{center} + \end{minipage} + \begin{minipage}{0.5\linewidth} + Who knows what the elements mean?\pause + \begin{itemize} + \item the box depicts the inter-quartile range + \item the line denotes the median + \item the whiskers denote the extreme value of the data not + considered outliers + \item outliers are plotted separately + \end{itemize} + \begin{task}{Outliers} + \begin{itemize} + \item Find out how an outlier is defined in a matlab boxplot. + \item Can you remove an outlier from the dataset? + \end{itemize} + \end{task} + \end{minipage} + \end{minipage} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting interval/ratio/absolute data} + \framesubtitle{violinplot} + \begin{center} + \includegraphics[width=.8\linewidth]{figs/violinplots.png} + \end{center} + \begin{itemize} + \item Violinplots depict the distribution of the data by a + smoothed histogram. + \item Additional information (data points, median, + inter-quartile range) are plotted inside. + \end{itemize} +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting combinations of scales} + What could we use for a combination of categorial/nominal and + interval/ratio/absolute? + \pause + \begin{center} + \includegraphics[width=.5\linewidth]{figs/factorplot.png} + \end{center} + Each category is a single bar. +\end{frame} + +%------------------------------------------------------------- +\begin{frame}[fragile] + \frametitle{plotting combinations of scales} + What could we use for a combination of interval/ratio/absolute and + interval/ratio/absolute, e.g. $(x_1, y_1), ..., (x_n,y_n)$? \pause + \begin{center} + \includegraphics[width=.8\linewidth]{figs/paireddata.png} + \end{center} + Scatter plot or paired bar chart. Scatter plot can also be used for + ordinal vs. ordinal data (why not the bar chart?). +\end{frame} + + + \end{document} diff --git a/statistics/matlab/intervalplots.m b/statistics/matlab/intervalplots.m index 7b04e92..79e5a9d 100644 --- a/statistics/matlab/intervalplots.m +++ b/statistics/matlab/intervalplots.m @@ -14,3 +14,42 @@ ylabel('Count') set(gcf, 'PaperUnits', 'centimeters'); set(gcf, 'PaperSize', [11.7 9.0]); set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); + +% bar plot +figure +x = rand(10,1); +gray = [.5,.5,.5]; + +bar(1, mean(x), 'EdgeColor','w','FaceColor', gray); +hold on + +bar(2, mean(x), 'EdgeColor','w','FaceColor', gray); +plot(0*x + 2, x, 'ok'); + +bar(3, mean(x), 'EdgeColor','w','FaceColor', gray); +errorbar(3, mean(x), std(x), 'ok'); + +bar(4, mean(x), 'EdgeColor','w','FaceColor', gray); +errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok'); +set(gca, 'xtick',[]) +ylabel('uniformly distributed random data in [0,1]') +box('off') +title('different forms of bar plots') +set(gcf, 'PaperUnits', 'centimeters'); +set(gcf, 'PaperSize', [11.7 9.0]); +set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); +hold off + +% box plot +figure +x = rand(10,1); +x(10) = 3; +boxplot(x) +set(gca, 'xtick',[]) +ylabel('data') +box('off') +title('box plot') +set(gcf, 'PaperUnits', 'centimeters'); +set(gcf, 'PaperSize', [11.7 9.0]); +set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]); +hold off