From 92ed8184191212c3542b0747791c320c2ae7670b Mon Sep 17 00:00:00 2001 From: Jan Benda Date: Fri, 16 Oct 2015 00:17:00 +0200 Subject: [PATCH] First matlab codes for statistics --- statistics/code/quartiles.m | 25 +++ statistics/code/randomwalk.m | 6 + statistics/code/randomwalkstatistics.m | 25 +++ statistics/lecture/descriptivestatistics.tex | 194 +++++++++++++------ 4 files changed, 194 insertions(+), 56 deletions(-) create mode 100644 statistics/code/quartiles.m create mode 100644 statistics/code/randomwalk.m create mode 100644 statistics/code/randomwalkstatistics.m diff --git a/statistics/code/quartiles.m b/statistics/code/quartiles.m new file mode 100644 index 0000000..9b38af1 --- /dev/null +++ b/statistics/code/quartiles.m @@ -0,0 +1,25 @@ +% generate data: +x = randn( 1, 100000 ); + +% histogram: +[h,b] = hist( x, 100 ); +% normalize: +bs = b(2)-b(1); +h = h/sum(h)/bs; + +% plot: +bar( b, h ); +xlabel( 'x' ); + +% median, quartile: +xs = sort( x ) +q = [ xs(length(xs)/4), xs(length(xs)/2), xs(3*length(xs)/4) ]; +%q = quantile( x, [0.25, 0.5, 0.75 ] ); + +% plot: +bar( b(b=q(1)) & (b=q(1)) & (b=q(2)) & (b=q(2)) & (b=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] ); +hold off; diff --git a/statistics/code/randomwalk.m b/statistics/code/randomwalk.m new file mode 100644 index 0000000..a442159 --- /dev/null +++ b/statistics/code/randomwalk.m @@ -0,0 +1,6 @@ +function x = randomwalk(n,p) + r = rand(n,1); + r(r=p) = +1.0; + x = cumsum(r); +end diff --git a/statistics/code/randomwalkstatistics.m b/statistics/code/randomwalkstatistics.m new file mode 100644 index 0000000..43632cd --- /dev/null +++ b/statistics/code/randomwalkstatistics.m @@ -0,0 +1,25 @@ +p = 0.5; +nsteps = 100; +nwalks = 1000; + +y = zeros( nwalks, nsteps/10 ); +for k = 1:length( y ) + x = randomwalk( nsteps, p ); + for j = 1:nsteps/10 + y(k,j) = x((j-1)*10+1); + end + %plot( x ) + %pause( 1 ) + if rem(k,100) == 0 + %[h1,b1] = hist( y(1:k,1), [-50:2:50] ); + %[h2,b2] = hist( y(1:k,2), [-50:2:50] ); + %bar( b1, h1, 1.0, 'b' ); + %hold on; + %bar( b2, h2, 'FaceColor', 'r' ); + %hold off; + sdev = var( y(1:k,:), 1 ); + plot( sdev ) + pause( 1.0 ); + end +end + diff --git a/statistics/lecture/descriptivestatistics.tex b/statistics/lecture/descriptivestatistics.tex index a2ae859..c6e49f5 100644 --- a/statistics/lecture/descriptivestatistics.tex +++ b/statistics/lecture/descriptivestatistics.tex @@ -89,108 +89,190 @@ \tableofcontents \end{frame} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Descriptive statistics} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{types of data} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Statistics of ratio data} +%------------------------------------------------------------- \begin{frame} - \frametitle{data scales} - \framesubtitle{What data types are distinguished in statistics?} - \Large - {\bf Why are data types important?} - \pause + \frametitle{Statistics of ratio data} \begin{itemize} - \item selection of statistics - \item selection of plots - \item selection of correct tests + \item Location, central tendency + \begin{itemize} + \item arithmetic mean + \item median + \item mode + \end{itemize} + + \item Spread, dispersion + \begin{itemize} + \item variance + \item standard deviation + \item interquartile range + \item coefficient of variation + \item minimum, maximum + \end{itemize} + + \item Shape + \begin{itemize} + \item skewnees + \item kurtosis + \end{itemize} + + \item Dependence + \begin{itemize} + \item Pearson correlation coefficient + \item Spearman's rank correlation coefficient + \end{itemize} + \end{itemize} \end{frame} -%------------------------------------------------------------- +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Data types} + +%------------------------------------------------------------- \begin{frame} - \frametitle{data scales} - \framesubtitle{nominal/categorial scale} + \frametitle{Data types: nominal scale} \begin{itemize} - \item properties like cell type, experimental group (i.e. treatment - 1, treatment 2, control) - \item each observation/sample is put into one category - \item there is no reasonable order among the categories - \item example: [rods, cones] vs. [cones, rods] + \item Binary + \begin{itemize} + \item ``yes/no'', + \item ``true/false'', + \item ``success/failure'', etc. + \end{itemize} + \item Categorial + \begin{itemize} + \item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''), + \item blood type (``A/B/AB/0''), + \item parts of speech (``noun/veerb/preposition/article/...''), + \item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc. + \end{itemize} + \item Each observation/measurement/sample is put into one category + \item There is no reasonable order among the categories.\\ + example: [rods, cones] vs. [cones, rods] + \pause + \item Statistics: mode, i.e. the most common item \end{itemize} \end{frame} -%------------------------------------------------------------- +%------------------------------------------------------------- \begin{frame} - \frametitle{data scales} - \framesubtitle{ordinal scale} + \frametitle{Data types: ordinal scale} \begin{itemize} - \item like nominal scale, but there is an order - \item {\bf but:} there is no reasonable measure of {\em distance} + \item Like nominal scale, but with an order + \item Examples: ranks, ratings + \begin{itemize} + \item ``bad/ok/good'', + \item ``cold/warm/hot'', + \item ``young/old'', etc. + \end{itemize} + \item {\bf But:} there is no reasonable measure of {\em distance} between the classes - \item examples: ranks, ratings + \pause + \item Statistics: mode, median \end{itemize} \end{frame} -%------------------------------------------------------------- +%------------------------------------------------------------- \begin{frame} - \frametitle{data scales} - \framesubtitle{interval scale} + \frametitle{Data types: interval scale} \begin{itemize} - \item quantitative/metric values - \item reasonable measure of distance between values but no absolute zero - \item examples: temperature in $^\circ$C + \item Quantitative/metric values + \item Reasonable measure of distance between values, but no absolute zero + \item Examples: + \begin{itemize} + \item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C) + \item Direction measured in degrees from magnetic or true north + \end{itemize} + \pause + \item Statistics: + \begin{itemize} + \item Central tendency: mode, median, arithmetic mean + \item Dispersion: range, standard deviation + \end{itemize} \end{itemize} \end{frame} -%------------------------------------------------------------- +%------------------------------------------------------------- \begin{frame} - \frametitle{data scales} - \framesubtitle{absolut/ratio scale} + \frametitle{Data types: absolute/ratio scale} \begin{itemize} - \item like interval scale but with absolute zero - \item example: temperature in $^\circ$K - \end{itemize} + \item Like interval scale, but with absolute origin/zero + \item Examples: + \begin{itemize} + \item Temperature in $^\circ$K + \item Length, mass, duration, electric charge, ... + \item Plane angle, etc. + \item Count (e.g. number of spikes in response to a stimulus) + \end{itemize} \pause - %\begin{emphasize}{relationsships between scales} + \item Statistics: \begin{itemize} - \item scales exhibit increasing information content from nominal - to absolute - \item conversion ,,downwards'' always possible + \item Central tendency: mode, median, arithmetic, geometric, harmonic mean + \item Dispersion: range, standard deviation + \item Coefficient of variation (ratio standard deviation/mean) + \item All other statistical measures \end{itemize} - %\end{emphasize} + \end{itemize} \end{frame} %------------------------------------------------------------- \begin{frame} - \frametitle{examples from neuroscience and psychology} + \frametitle{Data types} \begin{itemize} - \item {\bf nominal:}\pause + \item Data type selects \begin{itemize} - \item treatment group - \item stimulus class - \item cell type + \item statistics + \item type of plots (bar graph versus x-y plot) + \item correct tests + \end{itemize} + \item Scales exhibit increasing information content from nominal + to absolute.\\ + Conversion ,,downwards'' is always possible + \item For example: size measured in meter (ratio scale) $\rightarrow$ + categories ``small/medium/large'' (ordinal scale) + \end{itemize} +\end{frame} + +%------------------------------------------------------------- +\begin{frame} + \frametitle{Examples from neuroscience} + \begin{itemize} + + \item {\bf absolute:}\pause + \begin{itemize} + \item size of neuron/brain + \item length of axon + \item ion concentration + \item membrane potential + \item firing rate + \end{itemize} + + \item {\bf interval:}\pause + \begin{itemize} + \item edge orientation \end{itemize} \item {\bf ordinal:} \pause \begin{itemize} + \item stages of a disease \item ratings - \item clinical stages of a disease - \item states of an ion channel \end{itemize} - \item {\bf Absolut-/Ratioskala:}\pause + + \item {\bf nominal:}\pause \begin{itemize} - \item firing rate - \item membrane potential - \item ion concentration + \item cell type + \item odor + \item states of an ion channel \end{itemize} + \end{itemize} \end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Real-valued data} +\end{document} \ No newline at end of file