First lecture on descriptive statistics
This commit is contained in:
parent
1264b4749a
commit
fb9008f571
20
programming/lectures/Makefile
Normal file
20
programming/lectures/Makefile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
TEXFILES=$(wildcard *.tex)
|
||||||
|
TEXFILES=boolean_logical_indexing.tex control_structures.tex data_structures.tex plotting_spike_trains.tex programming_basics.tex scripts_functions.tex sta_stc.tex variables_datatypes.tex vectors_matrices.tex
|
||||||
|
|
||||||
|
PDFFILES=$(TEXFILES:.tex=.pdf)
|
||||||
|
|
||||||
|
pdf : $(PDFFILES)
|
||||||
|
|
||||||
|
$(PDFFILES) : %.pdf : %.tex
|
||||||
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
|
||||||
|
|
||||||
|
cleanall : clean
|
||||||
|
rm -f $(PDFFILES)
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
|
12
statistics/code/checkmymedian.m
Normal file
12
statistics/code/checkmymedian.m
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
% check whether the median returned by mymedian
|
||||||
|
% really separates a vector into two halfs
|
||||||
|
for i = 1:140 % loop over different length
|
||||||
|
for k = 1:10 % try several times
|
||||||
|
a = randn( i, 1 ); % generate some data
|
||||||
|
m = mymedian( a ) % compute median
|
||||||
|
if length( a(a>m) ) ~= length( a(a<m) ) % check
|
||||||
|
disp( 'error!' )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
24
statistics/code/diehistograms.m
Normal file
24
statistics/code/diehistograms.m
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
% dependence of histogram on number of rolls:
|
||||||
|
nrolls = [ 20, 100, 1000 ];
|
||||||
|
for i = [1:length(nrolls)]
|
||||||
|
d = rollthedie( nrolls(i) );
|
||||||
|
% plain hist:
|
||||||
|
% hist( d )
|
||||||
|
|
||||||
|
% check bin counts of plain hist:
|
||||||
|
% h = hist( d )
|
||||||
|
|
||||||
|
% force 6 bins:
|
||||||
|
% hist( d, 6 )
|
||||||
|
|
||||||
|
% set the right bin centers:
|
||||||
|
bins = 1:6;
|
||||||
|
%hist( d, bins )
|
||||||
|
|
||||||
|
% normalize histogram and compare to expectation:
|
||||||
|
hold on
|
||||||
|
plot( [0 7], [1/6 1/6], '-r', 'linewidth', 10 )
|
||||||
|
hist( d, bins, 1.0, 'facecolor', 'b' )
|
||||||
|
hold off
|
||||||
|
pause
|
||||||
|
end
|
17
statistics/code/gaussianbins.m
Normal file
17
statistics/code/gaussianbins.m
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
x = randn( 100, 1 );
|
||||||
|
bins1 = -4:2:4;
|
||||||
|
bins2 = -4:0.5:4;
|
||||||
|
subplot( 1, 2, 1 );
|
||||||
|
hold on;
|
||||||
|
hist( x, bins1 );
|
||||||
|
hist( x, bins2 );
|
||||||
|
xlabel('x')
|
||||||
|
ylabel('Frequeny')
|
||||||
|
hold off;
|
||||||
|
subplot( 1, 2, 2 );
|
||||||
|
hold on;
|
||||||
|
hist( x, bins1, 1.0/(bins1(2)-bins1(1)) );
|
||||||
|
hist( x, bins2, 1.0/(bins2(2)-bins2(1)) );
|
||||||
|
xlabel('x')
|
||||||
|
ylabel('Probability density')
|
||||||
|
hold off;
|
22
statistics/code/gaussianpdf.m
Normal file
22
statistics/code/gaussianpdf.m
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
% plot Gaussian pdf:
|
||||||
|
dx=0.1
|
||||||
|
x = [-4.0:dx:4.0];
|
||||||
|
p = exp(-0.5*x.^2)/sqrt(2.0*pi);
|
||||||
|
hold on
|
||||||
|
plot(x,p, 'linewidth', 10 )
|
||||||
|
|
||||||
|
% compute integral between x1 and x2:
|
||||||
|
x1=1.0
|
||||||
|
x2=2.0
|
||||||
|
P = sum(p((x>=x1)&(x<x2)))*dx
|
||||||
|
|
||||||
|
% draw random numbers:
|
||||||
|
r = randn( 10000, 1 );
|
||||||
|
hist(r,x,1.0/dx)
|
||||||
|
|
||||||
|
% check P:
|
||||||
|
Pr = sum((r>=x1)&(r<x2))/length(r)
|
||||||
|
|
||||||
|
hold off
|
||||||
|
|
||||||
|
|
24
statistics/code/histogramquartiles.m
Normal file
24
statistics/code/histogramquartiles.m
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
% generate data:
|
||||||
|
x = randn( 1, 100000 );
|
||||||
|
|
||||||
|
% histogram:
|
||||||
|
[h,b] = hist( x, 100 );
|
||||||
|
% normalize:
|
||||||
|
bs = b(2)-b(1);
|
||||||
|
h = h/sum(h)/bs;
|
||||||
|
|
||||||
|
% plot:
|
||||||
|
bar( b, h );
|
||||||
|
xlabel( 'x' );
|
||||||
|
|
||||||
|
% median, quartile:
|
||||||
|
q = quartiles( x );
|
||||||
|
%q = quantile( x, [0.25, 0.5, 0.75 ] );
|
||||||
|
|
||||||
|
% plot:
|
||||||
|
hold on;
|
||||||
|
bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
|
||||||
|
bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
|
||||||
|
bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
|
||||||
|
bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
|
||||||
|
hold off;
|
13
statistics/code/mymedian.m
Normal file
13
statistics/code/mymedian.m
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
function m = mymedian( x )
|
||||||
|
% returns the median of the vector x
|
||||||
|
xs = sort( x );
|
||||||
|
if ( length( xs ) == 0 )
|
||||||
|
m = NaN;
|
||||||
|
elseif ( rem( length( xs ), 2 ) == 0 )
|
||||||
|
index = length( xs )/2;
|
||||||
|
m = (xs( index ) + xs( index+1 ))/2;
|
||||||
|
else
|
||||||
|
index = (length( xs ) + 1)/2;
|
||||||
|
m = xs( index );
|
||||||
|
end
|
||||||
|
end
|
@ -1,25 +1,15 @@
|
|||||||
% generate data:
|
function q = quartiles( x )
|
||||||
x = randn( 1, 100000 );
|
% returns a vector with the first, second, and third quartile of the vector x
|
||||||
|
xs = sort( x );
|
||||||
% histogram:
|
if ( length( xs ) == 0 )
|
||||||
[h,b] = hist( x, 100 );
|
q = [];
|
||||||
% normalize:
|
elseif ( rem( length( xs ), 2 ) == 0 )
|
||||||
bs = b(2)-b(1);
|
index = length( xs )/2;
|
||||||
h = h/sum(h)/bs;
|
m = (xs( index ) + xs( index+1 ))/2;
|
||||||
|
q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
|
||||||
% plot:
|
else
|
||||||
bar( b, h );
|
index = (length( xs ) + 1)/2;
|
||||||
xlabel( 'x' );
|
m = xs( index );
|
||||||
|
q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
|
||||||
% median, quartile:
|
end
|
||||||
xs = sort( x )
|
end
|
||||||
q = [ xs(length(xs)/4), xs(length(xs)/2), xs(3*length(xs)/4) ];
|
|
||||||
%q = quantile( x, [0.25, 0.5, 0.75 ] );
|
|
||||||
|
|
||||||
% plot:
|
|
||||||
bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
|
|
||||||
hold on;
|
|
||||||
bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
|
|
||||||
bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
|
|
||||||
bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
|
|
||||||
hold off;
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
function x = randomwalk(n,p)
|
function x = randomwalk(n,p)
|
||||||
|
% returns a random wolk with n steps and
|
||||||
|
% probability p for positive steps.
|
||||||
r = rand(n,1);
|
r = rand(n,1);
|
||||||
r(r<p) = -1.0;
|
r(r<p) = -1.0;
|
||||||
r(r>=p) = +1.0;
|
r(r>=p) = +1.0;
|
||||||
|
4
statistics/code/rollthedie.m
Normal file
4
statistics/code/rollthedie.m
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
function x = rollthedie( n )
|
||||||
|
% return a vector with the result of rolling a die n times
|
||||||
|
x = randi( [1, 6], n, 1 );
|
||||||
|
end
|
18
statistics/lecture/Makefile
Normal file
18
statistics/lecture/Makefile
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
TEXFILES=$(wildcard *.tex)
|
||||||
|
PDFFILES=$(TEXFILES:.tex=.pdf)
|
||||||
|
|
||||||
|
pdf : $(PDFFILES)
|
||||||
|
|
||||||
|
$(PDFFILES) : %.pdf : %.tex
|
||||||
|
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
|
||||||
|
|
||||||
|
cleanall : clean
|
||||||
|
rm -f $(PDFFILES)
|
||||||
|
|
||||||
|
watch :
|
||||||
|
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||||
|
|
||||||
|
|
@ -1,43 +1,55 @@
|
|||||||
\documentclass{beamer}
|
\documentclass[12pt]{report}
|
||||||
|
|
||||||
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\title[]{Scientific Computing --- Descriptive Statistics}
|
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||||
\author[]{Jan Benda}
|
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||||
\institute[]{Neuroethology}
|
\date{WS 15/16}
|
||||||
\date[]{WS 15/16}
|
|
||||||
\titlegraphic{\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% \newcommand{\tr}[2]{#1} % en
|
||||||
%%%%% beamer %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
% \usepackage[english]{babel}
|
||||||
\mode<presentation>
|
\newcommand{\tr}[2]{#2} % de
|
||||||
{
|
\usepackage[german]{babel}
|
||||||
\usetheme{Singapore}
|
|
||||||
\setbeamercovered{opaque}
|
|
||||||
\usecolortheme{tuebingen}
|
|
||||||
\setbeamertemplate{navigation symbols}{}
|
|
||||||
\usefonttheme{default}
|
|
||||||
\useoutertheme{infolines}
|
|
||||||
% \useoutertheme{miniframes}
|
|
||||||
}
|
|
||||||
|
|
||||||
%\AtBeginSection[]
|
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%{
|
\usepackage{pslatex} % nice font for pdf file
|
||||||
% \begin{frame}<beamer>
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||||
% \begin{center}
|
|
||||||
% \Huge \insertsectionhead
|
|
||||||
% \end{center}
|
|
||||||
% \end{frame}
|
|
||||||
%}
|
|
||||||
|
|
||||||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||||
\setcounter{tocdepth}{1}
|
\setcounter{tocdepth}{1}
|
||||||
|
|
||||||
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\usepackage[english]{babel}
|
\usepackage{graphicx}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
|
||||||
|
|
||||||
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\usepackage{listings}
|
||||||
|
\lstset{
|
||||||
|
inputpath=../code,
|
||||||
|
basicstyle=\ttfamily\footnotesize,
|
||||||
|
numbers=left,
|
||||||
|
showstringspaces=false,
|
||||||
|
language=Matlab,
|
||||||
|
commentstyle=\itshape\color{darkgray},
|
||||||
|
keywordstyle=\color{blue},
|
||||||
|
stringstyle=\color{green},
|
||||||
|
backgroundcolor=\color{blue!10},
|
||||||
|
breaklines=true,
|
||||||
|
breakautoindent=true,
|
||||||
|
columns=flexible,
|
||||||
|
frame=single,
|
||||||
|
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||||
|
captionpos=t,
|
||||||
|
xleftmargin=1em,
|
||||||
|
xrightmargin=1em,
|
||||||
|
aboveskip=10pt
|
||||||
|
}
|
||||||
|
|
||||||
|
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\usepackage{amsmath}
|
\usepackage{amsmath}
|
||||||
\usepackage{bm}
|
\usepackage{bm}
|
||||||
\usepackage{pslatex} % nice font for pdf file
|
|
||||||
%\usepackage{multimedia}
|
|
||||||
|
|
||||||
\usepackage{dsfont}
|
\usepackage{dsfont}
|
||||||
\newcommand{\naZ}{\mathds{N}}
|
\newcommand{\naZ}{\mathds{N}}
|
||||||
\newcommand{\gaZ}{\mathds{Z}}
|
\newcommand{\gaZ}{\mathds{Z}}
|
||||||
@ -47,59 +59,45 @@
|
|||||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||||
\newcommand{\koZ}{\mathds{C}}
|
\newcommand{\koZ}{\mathds{C}}
|
||||||
|
|
||||||
%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\usepackage{graphicx}
|
|
||||||
\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
|
|
||||||
|
|
||||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\usepackage{listings}
|
\usepackage{ifthen}
|
||||||
\lstset{
|
|
||||||
basicstyle=\ttfamily,
|
\newcommand{\code}[1]{\texttt{#1}}
|
||||||
numbers=left,
|
|
||||||
showstringspaces=false,
|
\newcommand{\source}[1]{
|
||||||
language=Matlab,
|
\begin{flushright}
|
||||||
commentstyle=\itshape\color{darkgray},
|
\color{gray}\scriptsize \url{#1}
|
||||||
keywordstyle=\color{blue},
|
\end{flushright}
|
||||||
stringstyle=\color{green},
|
}
|
||||||
backgroundcolor=\color{blue!10},
|
|
||||||
breaklines=true,
|
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||||
breakautoindent=true,
|
{\medskip}
|
||||||
columns=flexible,
|
|
||||||
frame=single,
|
%\newcommand{\showlisting}{yes}
|
||||||
captionpos=b,
|
\newcommand{\showlisting}{no}
|
||||||
xleftmargin=1em,
|
\newcounter{theexercise}
|
||||||
xrightmargin=1em,
|
\setcounter{theexercise}{1}
|
||||||
aboveskip=10pt
|
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||||
}
|
\arabic{theexercise}:} \stepcounter{theexercise}\newline \newcommand{\exercisesource}{#1}}%
|
||||||
|
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\equal{\showlisting}{yes}}{\medskip\lstinputlisting{\exercisesource}}{}}\medskip}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\begin{frame}[plain]
|
\maketitle
|
||||||
\frametitle{}
|
|
||||||
\vspace{-1cm}
|
|
||||||
\titlepage % erzeugt Titelseite
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\begin{frame}
|
|
||||||
\frametitle{Content}
|
|
||||||
\tableofcontents
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
|
%\tableofcontents
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Descriptive statistics}
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Statistics of ratio data}
|
\section{Statistics of real-valued data}
|
||||||
|
|
||||||
%-------------------------------------------------------------
|
|
||||||
\begin{frame}
|
|
||||||
\frametitle{Statistics of ratio data}
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Location, central tendency
|
\item Location, central tendency
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@ -107,7 +105,6 @@
|
|||||||
\item median
|
\item median
|
||||||
\item mode
|
\item mode
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item Spread, dispersion
|
\item Spread, dispersion
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item variance
|
\item variance
|
||||||
@ -116,163 +113,294 @@
|
|||||||
\item coefficient of variation
|
\item coefficient of variation
|
||||||
\item minimum, maximum
|
\item minimum, maximum
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item Shape
|
\item Shape
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item skewnees
|
\item skewnees
|
||||||
\item kurtosis
|
\item kurtosis
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item Dependence
|
\item Dependence
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Pearson correlation coefficient
|
\item Pearson correlation coefficient
|
||||||
\item Spearman's rank correlation coefficient
|
\item Spearman's rank correlation coefficient
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\subsection{Median, Quartile, Percentile}
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{median}
|
||||||
|
\caption{\label{medianfig} Median.}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{definition}[\tr{median}{Median}]
|
||||||
|
\tr{Half of the observations $X=(x_1, x_2, \ldots, x_n)$ are
|
||||||
|
larger than the median and half of them are smaller than the
|
||||||
|
median.} {Der Median teilt eine Liste von Messwerten so in zwei
|
||||||
|
H\"alften, dass die eine H\"alfte der Daten nicht gr\"o{\ss}er
|
||||||
|
und die andere H\"alfte nicht kleiner als der Median ist.}
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{exercise}[mymedian.m]
|
||||||
|
\tr{Write a function that computes the median of a vector.}
|
||||||
|
{Schreibe eine Funktion, die den Median eines Vektors zur\"uckgibt.}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\code{matlab} stellt die Funktion \code{median()} zur Berechnung des Medians bereit.
|
||||||
|
|
||||||
|
\begin{exercise}[checkmymedian.m]
|
||||||
|
\tr{Write a script that tests whether your median function really
|
||||||
|
returns a median above which are the same number of data than
|
||||||
|
below. In particular the script should test data vectors of
|
||||||
|
different length.} {Schreibe ein Skript, das testet ob die
|
||||||
|
\code{mymedian} Funktion wirklich die Zahl zur\"uckgibt, \"uber
|
||||||
|
der genausoviele Datenwerte liegen wie darunter. Das Skript sollte
|
||||||
|
insbesondere verschieden lange Datenvektoren testen.}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{quartile}
|
||||||
|
\caption{\label{quartilefig} Median und Quartile.}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{definition}[\tr{quartile}{Quartile}]
|
||||||
|
Die Quartile Q1, Q2 und Q3 unterteilen die Daten in vier gleich
|
||||||
|
gro{\ss}e Gruppen, die jeweils ein Viertel der Daten enthalten.
|
||||||
|
Das mittlere Quartil entspricht dem Median.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{exercise}[quartiles.m]
|
||||||
|
\tr{Write a function that computes the first, second, and third quartile of a vector.}
|
||||||
|
{Schreibe eine Funktion, die das erste, zweite und dritte Quartil als Vektor zur\"uckgibt.}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\subsection{Histogram}
|
||||||
|
|
||||||
|
Histogramme z\"ahlen die H\"aufigkeit $n_i$ des Auftretens von
|
||||||
|
$N=\sum_{i=1}^M n_i$ Messwerten in $M$ Messbereichsklassen $i$ (Bins).
|
||||||
|
Die Klassen unterteilen den Wertebereich meist in angrenzende und
|
||||||
|
gleich gro{\ss}e Intervalle. Histogramme sch\"atzen die
|
||||||
|
Wahrscheinlichkeitsverteilung der Messwerte ab.
|
||||||
|
|
||||||
|
\begin{exercise}[rollthedie.m]
|
||||||
|
\tr{Write a function that simulates rolling a die $n$ times.}
|
||||||
|
{Schreibe eine Funktion, die das $n$-malige W\"urfeln mit einem W\"urfel simuliert.}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\begin{exercise}[diehistograms.m]
|
||||||
|
\tr{Plot histograms from rolling the die 20, 100, 1000 times. Use
|
||||||
|
the plain hist(x) function, force 6 bins via hist( x, 6 ), and set
|
||||||
|
meaningfull bins positions.} {Plotte Histogramme von 20, 100, und
|
||||||
|
1000-mal w\"urfeln. Benutze \code{hist(x)}, erzwinge sechs Bins
|
||||||
|
mit \code{hist(x,6)}, und setze selbst sinnvolle Bins. Normiere
|
||||||
|
anschliessend das Histogram auf geeignete Weise.}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{diehistograms}
|
||||||
|
\caption{\label{diehistogramsfig} \tr{Histograms of rolling a die
|
||||||
|
100 or 500 times. Left: plain histograms counting the frequency
|
||||||
|
of the six possible outcomes. Right: the same data normalized
|
||||||
|
to their sum.}{Histogramme des Ergebnisses von 100 oder 500 mal
|
||||||
|
W\"urfeln. Links: das absolute Histogramm z\"ahlt die Anzahl des
|
||||||
|
Auftretens jeder Augenzahl. Rechts: Normiert auf die Summe des
|
||||||
|
Histogramms werden die beiden Messungen vergleichbar.}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Bei ganzzahligen Messdaten (z.B. die Augenzahl eines W\"urfels)
|
||||||
|
kann f\"ur jede auftretende Zahl eine Klasse definiert werden.
|
||||||
|
Damit die H\"ohe der Histogrammbalken unabh\"angig von der Anzahl der Messwerte wird,
|
||||||
|
normiert man das Histogram auf die Anzahl der Messwerte.
|
||||||
|
Die H\"ohe der Histogrammbalken gibt dann die Wahrscheinlichkeit $P(x_i)$
|
||||||
|
des Auftretens der Gr\"o{\ss}e $x_i$ in der $i$-ten Klasse an
|
||||||
|
\[ P_i = \frac{n_i}{N} = \frac{n_i}{\sum_{i=1}^M n_i} \; . \]
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Probability density function}
|
||||||
|
|
||||||
|
Meistens haben wir es jedoch mit reellen Messgr\"o{\ss}en zu tun.
|
||||||
|
|
||||||
|
\begin{exercise}[gaussianbins.m]
|
||||||
|
\tr{Draw 100 random data from a Gaussian distribution and plot
|
||||||
|
histograms with different bin sizes of the data.} {Ziehe 100
|
||||||
|
normalverteilte Zufallszahlen und erzeuge Histogramme mit
|
||||||
|
unterschiedlichen Klassenbreiten. Was f\"allt auf?}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{pdfhistogram}
|
||||||
|
\caption{\label{pdfhistogramfig} \tr{Histograms of normally
|
||||||
|
distributed data with different bin sizes.}{Histogramme mit
|
||||||
|
verschiednenen Klassenbreiten eines Datensatzes von
|
||||||
|
normalverteilten Messwerten. Links: Die H\"ohe des absoluten
|
||||||
|
Histogramms h\"angt von der Klassenbreite ab. Rechts: Bei auf
|
||||||
|
das Integral normierten Histogrammen werden auch
|
||||||
|
unterschiedliche Klassenbreiten vergleichbar.}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Histogramme von reellen Messwerten m\"ussen auf das Integral 1 normiert werden, so dass
|
||||||
|
das Integral (nicht die Summe) \"uber das Histogramm eins ergibt. Das Integral
|
||||||
|
ist die Fl\"ache des Histograms. Diese setzt sich zusammen aus der Fl\"ache der einzelnen
|
||||||
|
Histogrammbalken. Diese haben die H\"ohe $n_i$ und die Breite $\Delta x$. Die Gesamtfl\"ache
|
||||||
|
$A$ des Histogramms ist also
|
||||||
|
\[ A = \sum_{i=1}^N ( n_i \cdot \Delta x ) = \Delta x \sum_{i=1}^N n_i \]
|
||||||
|
und das normierte Histogramm hat die H\"ohe
|
||||||
|
\[ p(x_i) = \frac{n_i}{\Delta x \sum_{i=1}^N n_i} \]
|
||||||
|
Es muss also nicht nur durch die Summe, sondern auch durch die Breite der Klassen $\Delta x$
|
||||||
|
geteilt werden.
|
||||||
|
|
||||||
|
$p(x_i)$ kann keine Wahrscheinlichkeit sein, da $p(x_i)$ nun eine
|
||||||
|
Einheit hat --- das Inverse der Einheit der Messgr\"osse $x$. Man
|
||||||
|
spricht von einer Wahrscheinlichkeitsdichte.
|
||||||
|
|
||||||
|
\begin{figure}[t]
|
||||||
|
\includegraphics[width=1\textwidth]{pdfprobabilities}
|
||||||
|
\caption{\label{pdfprobabilitiesfig} Wahrscheinlichkeiten bei
|
||||||
|
einer Wahrscheinlichkeitsdichtefunktion.}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{exercise}
|
||||||
|
\tr{Plot the Gaussian probability density}{Plotte die Gauss'sche Wahrscheinlichkeitsdichte }
|
||||||
|
\[ p_g(x) = 1/\sqrt{2\pi\sigma^2}e^{-\frac{(x-\mu)^2}{2\sigma^2}}\]
|
||||||
|
\tr{What does it mean?}{Was bedeutet die folgende Wahrscheinlichkeit?}
|
||||||
|
\[ P(x_1 < x < x2) = \int_{x_1}^{x_2} p(x) \, dx \]
|
||||||
|
\tr{How large is}{Wie gro{\ss} ist}
|
||||||
|
\[ \int_{-\infty}^{+\infty} p(x) \, dx \; ?\]
|
||||||
|
\tr{Why?}{Warum?}
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Data types}
|
\subsection{Data types}
|
||||||
|
|
||||||
%-------------------------------------------------------------
|
\subsubsection{Nominal scale}
|
||||||
\begin{frame}
|
\begin{itemize}
|
||||||
\frametitle{Data types: nominal scale}
|
\item Binary
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Binary
|
\item ``yes/no'',
|
||||||
\begin{itemize}
|
\item ``true/false'',
|
||||||
\item ``yes/no'',
|
\item ``success/failure'', etc.
|
||||||
\item ``true/false'',
|
|
||||||
\item ``success/failure'', etc.
|
|
||||||
\end{itemize}
|
|
||||||
\item Categorial
|
|
||||||
\begin{itemize}
|
|
||||||
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
|
||||||
\item blood type (``A/B/AB/0''),
|
|
||||||
\item parts of speech (``noun/veerb/preposition/article/...''),
|
|
||||||
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
|
||||||
\end{itemize}
|
|
||||||
\item Each observation/measurement/sample is put into one category
|
|
||||||
\item There is no reasonable order among the categories.\\
|
|
||||||
example: [rods, cones] vs. [cones, rods]
|
|
||||||
\pause
|
|
||||||
\item Statistics: mode, i.e. the most common item
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\item Categorial
|
||||||
|
|
||||||
%-------------------------------------------------------------
|
|
||||||
\begin{frame}
|
|
||||||
\frametitle{Data types: ordinal scale}
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Like nominal scale, but with an order
|
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
||||||
\item Examples: ranks, ratings
|
\item blood type (``A/B/AB/0''),
|
||||||
\begin{itemize}
|
\item parts of speech (``noun/veerb/preposition/article/...''),
|
||||||
\item ``bad/ok/good'',
|
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
||||||
\item ``cold/warm/hot'',
|
|
||||||
\item ``young/old'', etc.
|
|
||||||
\end{itemize}
|
|
||||||
\item {\bf But:} there is no reasonable measure of {\em distance}
|
|
||||||
between the classes
|
|
||||||
\pause
|
|
||||||
\item Statistics: mode, median
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\item Each observation/measurement/sample is put into one category
|
||||||
|
\item There is no reasonable order among the categories.\\
|
||||||
%-------------------------------------------------------------
|
example: [rods, cones] vs. [cones, rods]
|
||||||
\begin{frame}
|
\item Statistics: mode, i.e. the most common item
|
||||||
\frametitle{Data types: interval scale}
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsubsection{Ordinal scale}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Like nominal scale, but with an order
|
||||||
|
\item Examples: ranks, ratings
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Quantitative/metric values
|
\item ``bad/ok/good'',
|
||||||
\item Reasonable measure of distance between values, but no absolute zero
|
\item ``cold/warm/hot'',
|
||||||
\item Examples:
|
\item ``young/old'', etc.
|
||||||
\begin{itemize}
|
\end{itemize}
|
||||||
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
\item {\bf But:} there is no reasonable measure of {\em distance}
|
||||||
\item Direction measured in degrees from magnetic or true north
|
between the classes
|
||||||
\end{itemize}
|
\item Statistics: mode, median
|
||||||
\pause
|
\end{itemize}
|
||||||
\item Statistics:
|
|
||||||
\begin{itemize}
|
\subsubsection{Interval scale}
|
||||||
\item Central tendency: mode, median, arithmetic mean
|
\begin{itemize}
|
||||||
\item Dispersion: range, standard deviation
|
\item Quantitative/metric values
|
||||||
\end{itemize}
|
\item Reasonable measure of distance between values, but no absolute zero
|
||||||
|
\item Examples:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
||||||
|
\item Direction measured in degrees from magnetic or true north
|
||||||
|
\end{itemize}
|
||||||
|
\item Statistics:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Central tendency: mode, median, arithmetic mean
|
||||||
|
\item Dispersion: range, standard deviation
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\end{itemize}
|
||||||
|
|
||||||
%-------------------------------------------------------------
|
\subsubsection{Absolute/ratio scale}
|
||||||
\begin{frame}
|
\begin{itemize}
|
||||||
\frametitle{Data types: absolute/ratio scale}
|
\item Like interval scale, but with absolute origin/zero
|
||||||
|
\item Examples:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Like interval scale, but with absolute origin/zero
|
\item Temperature in $^\circ$K
|
||||||
\item Examples:
|
\item Length, mass, duration, electric charge, ...
|
||||||
\begin{itemize}
|
\item Plane angle, etc.
|
||||||
\item Temperature in $^\circ$K
|
\item Count (e.g. number of spikes in response to a stimulus)
|
||||||
\item Length, mass, duration, electric charge, ...
|
|
||||||
\item Plane angle, etc.
|
|
||||||
\item Count (e.g. number of spikes in response to a stimulus)
|
|
||||||
\end{itemize}
|
|
||||||
\pause
|
|
||||||
\item Statistics:
|
|
||||||
\begin{itemize}
|
|
||||||
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
|
||||||
\item Dispersion: range, standard deviation
|
|
||||||
\item Coefficient of variation (ratio standard deviation/mean)
|
|
||||||
\item All other statistical measures
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\item Statistics:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
||||||
|
\item Dispersion: range, standard deviation
|
||||||
|
\item Coefficient of variation (ratio standard deviation/mean)
|
||||||
|
\item All other statistical measures
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
%-------------------------------------------------------------
|
\subsubsection{Data types}
|
||||||
\begin{frame}
|
\begin{itemize}
|
||||||
\frametitle{Data types}
|
\item Data type selects
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Data type selects
|
\item statistics
|
||||||
\begin{itemize}
|
\item type of plots (bar graph versus x-y plot)
|
||||||
\item statistics
|
\item correct tests
|
||||||
\item type of plots (bar graph versus x-y plot)
|
\end{itemize}
|
||||||
\item correct tests
|
\item Scales exhibit increasing information content from nominal
|
||||||
\end{itemize}
|
to absolute.\\
|
||||||
\item Scales exhibit increasing information content from nominal
|
Conversion ,,downwards'' is always possible
|
||||||
to absolute.\\
|
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
||||||
Conversion ,,downwards'' is always possible
|
categories ``small/medium/large'' (ordinal scale)
|
||||||
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
\end{itemize}
|
||||||
categories ``small/medium/large'' (ordinal scale)
|
|
||||||
|
\subsubsection{Examples from neuroscience}
|
||||||
|
\begin{itemize}
|
||||||
|
\item {\bf absolute:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item size of neuron/brain
|
||||||
|
\item length of axon
|
||||||
|
\item ion concentration
|
||||||
|
\item membrane potential
|
||||||
|
\item firing rate
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%-------------------------------------------------------------
|
\item {\bf interval:}
|
||||||
\begin{frame}
|
|
||||||
\frametitle{Examples from neuroscience}
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
|
\item edge orientation
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
\item {\bf absolute:}\pause
|
\item {\bf ordinal:}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item size of neuron/brain
|
\item stages of a disease
|
||||||
\item length of axon
|
\item ratings
|
||||||
\item ion concentration
|
\end{itemize}
|
||||||
\item membrane potential
|
|
||||||
\item firing rate
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\item {\bf interval:}\pause
|
\item {\bf nominal:}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item edge orientation
|
\item cell type
|
||||||
\end{itemize}
|
\item odor
|
||||||
|
\item states of an ion channel
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
\item {\bf ordinal:} \pause
|
\end{itemize}
|
||||||
\begin{itemize}
|
|
||||||
\item stages of a disease
|
|
||||||
\item ratings
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\item {\bf nominal:}\pause
|
|
||||||
\begin{itemize}
|
|
||||||
\item cell type
|
|
||||||
\item odor
|
|
||||||
\item states of an ion channel
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\end{itemize}
|
\end{document}
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
|
|
||||||
\end{document}
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\subsection{Statistics}
|
||||||
|
What is "a statistic"? % dt. Sch\"atzfunktion
|
||||||
|
\begin{definition}[statistic]
|
||||||
|
A statistic (singular) is a single measure of some attribute of a
|
||||||
|
sample (e.g., its arithmetic mean value). It is calculated by
|
||||||
|
applying a function (statistical algorithm) to the values of the
|
||||||
|
items of the sample, which are known together as a set of data.
|
||||||
|
|
||||||
|
\source{http://en.wikipedia.org/wiki/Statistic}
|
||||||
|
\end{definition}
|
||||||
|
32
statistics/lecture/diehistograms.py
Normal file
32
statistics/lecture/diehistograms.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# roll the die:
|
||||||
|
x1 = np.random.random_integers( 1, 6, 100 )
|
||||||
|
x2 = np.random.random_integers( 1, 6, 500 )
|
||||||
|
bins = np.arange(0.5, 7, 1.0)
|
||||||
|
|
||||||
|
plt.xkcd()
|
||||||
|
|
||||||
|
fig = plt.figure( figsize=(6,4) )
|
||||||
|
ax = fig.add_subplot( 1, 2, 1 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Frequency' )
|
||||||
|
ax.hist([x2, x1], bins, color=['#FFCC00', '#FFFF66' ])
|
||||||
|
|
||||||
|
ax = fig.add_subplot( 1, 2, 2 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Probability' )
|
||||||
|
ax.hist([x2, x1], bins, normed=True, color=['#FFCC00', '#FFFF66' ])
|
||||||
|
plt.tight_layout()
|
||||||
|
fig.savefig( 'diehistograms.pdf' )
|
||||||
|
plt.show()
|
||||||
|
|
33
statistics/lecture/median.py
Normal file
33
statistics/lecture/median.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# normal distribution:
|
||||||
|
x = np.arange( -4.0, 4.0, 0.01 )
|
||||||
|
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||||
|
|
||||||
|
plt.xkcd()
|
||||||
|
fig = plt.figure( figsize=(6,4) )
|
||||||
|
ax = fig.add_subplot( 1, 1, 1 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Probability density p(x)' )
|
||||||
|
ax.set_ylim( 0.0, 0.46 )
|
||||||
|
ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||||
|
ax.text(-1.0, 0.1, '50%', ha='center' )
|
||||||
|
ax.text(+1.0, 0.1, '50%', ha='center' )
|
||||||
|
ax.annotate('Median',
|
||||||
|
xy=(0.1, 0.3), xycoords='data',
|
||||||
|
xytext=(1.6, 0.35), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=40") )
|
||||||
|
ax.fill_between( x[x<0], 0.0, g[x<0], color='#ffcc00' )
|
||||||
|
ax.fill_between( x[x>0], 0.0, g[x>0], color='#99ff00' )
|
||||||
|
ax.plot(x,g, 'b', lw=4)
|
||||||
|
ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
|
||||||
|
plt.tight_layout()
|
||||||
|
fig.savefig( 'median.pdf' )
|
||||||
|
plt.show()
|
||||||
|
|
39
statistics/lecture/pdfhistogram.py
Normal file
39
statistics/lecture/pdfhistogram.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# normal distribution:
|
||||||
|
x = np.arange( -4.0, 4.0, 0.01 )
|
||||||
|
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||||
|
r = np.random.randn( 100 )
|
||||||
|
|
||||||
|
plt.xkcd()
|
||||||
|
|
||||||
|
fig = plt.figure( figsize=(6,4) )
|
||||||
|
ax = fig.add_subplot( 1, 2, 1 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Frequency' )
|
||||||
|
#ax.set_ylim( 0.0, 0.46 )
|
||||||
|
#ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||||
|
ax.hist(r, 5, color='#CC0000')
|
||||||
|
ax.hist(r, 20, color='#FFCC00')
|
||||||
|
|
||||||
|
ax = fig.add_subplot( 1, 2, 2 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Probability density p(x)' )
|
||||||
|
#ax.set_ylim( 0.0, 0.46 )
|
||||||
|
#ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||||
|
ax.hist(r, 5, normed=True, color='#CC0000')
|
||||||
|
ax.hist(r, 20, normed=True, color='#FFCC00')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
fig.savefig( 'pdfhistogram.pdf' )
|
||||||
|
plt.show()
|
||||||
|
|
36
statistics/lecture/pdfprobabilities.py
Normal file
36
statistics/lecture/pdfprobabilities.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# normal distribution:
|
||||||
|
x = np.arange( -3.0, 5.0, 0.01 )
|
||||||
|
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||||
|
x1=0.0
|
||||||
|
x2=1.0
|
||||||
|
|
||||||
|
plt.xkcd()
|
||||||
|
fig = plt.figure( figsize=(6,4) )
|
||||||
|
ax = fig.add_subplot( 1, 1, 1 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Probability density p(x)' )
|
||||||
|
ax.set_ylim( 0.0, 0.46 )
|
||||||
|
ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||||
|
ax.annotate('Gaussian',
|
||||||
|
xy=(-1.0, 0.28), xycoords='data',
|
||||||
|
xytext=(-2.5, 0.35), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=110") )
|
||||||
|
ax.annotate('$P(0<x<1) = \int_0^1 p(x) \, dx$',
|
||||||
|
xy=(0.6, 0.28), xycoords='data',
|
||||||
|
xytext=(1.2, 0.4), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=80") )
|
||||||
|
ax.fill_between( x[(x>x1)&(x<x2)], 0.0, g[(x>x1)&(x<x2)], color='#cc0000' )
|
||||||
|
ax.plot(x,g, 'b', lw=4)
|
||||||
|
plt.tight_layout()
|
||||||
|
fig.savefig( 'pdfprobabilities.pdf' )
|
||||||
|
plt.show()
|
||||||
|
|
50
statistics/lecture/quartile.py
Normal file
50
statistics/lecture/quartile.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# normal distribution:
|
||||||
|
x = np.arange( -4.0, 4.0, 0.01 )
|
||||||
|
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||||
|
q = [ -0.67488, 0.0, 0.67488 ]
|
||||||
|
|
||||||
|
plt.xkcd()
|
||||||
|
fig = plt.figure( figsize=(6,4) )
|
||||||
|
ax = fig.add_subplot( 1, 1, 1 )
|
||||||
|
ax.spines['right'].set_visible(False)
|
||||||
|
ax.spines['top'].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.set_xlabel( 'x' )
|
||||||
|
ax.set_ylabel( 'Probability density p(x)' )
|
||||||
|
ax.set_ylim( 0.0, 0.46 )
|
||||||
|
ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||||
|
ax.text(-1.2, 0.1, '25%', ha='center' )
|
||||||
|
ax.text(-0.35, 0.1, '25%', ha='center' )
|
||||||
|
ax.text(+0.35, 0.1, '25%', ha='center' )
|
||||||
|
ax.text(+1.2, 0.1, '25%', ha='center' )
|
||||||
|
ax.annotate('1. quartile',
|
||||||
|
xy=(-0.75, 0.2), xycoords='data',
|
||||||
|
xytext=(-1.7, 0.25), textcoords='data', ha='right',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
|
||||||
|
connectionstyle="angle3,angleA=170,angleB=120") )
|
||||||
|
ax.annotate('3. quartile',
|
||||||
|
xy=(0.75, 0.17), xycoords='data',
|
||||||
|
xytext=(1.7, 0.22), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=70") )
|
||||||
|
ax.annotate('Median',
|
||||||
|
xy=(0.1, 0.3), xycoords='data',
|
||||||
|
xytext=(1.6, 0.35), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=40") )
|
||||||
|
ax.fill_between( x[x<q[0]], 0.0, g[x<q[0]], color='#ffcc00' )
|
||||||
|
ax.fill_between( x[(x>q[0])&(x<q[1])], 0.0, g[(x>q[0])&(x<q[1])], color='#ff0000' )
|
||||||
|
ax.fill_between( x[(x>q[1])&(x<q[2])], 0.0, g[(x>q[1])&(x<q[2])], color='#ff9900' )
|
||||||
|
ax.fill_between( x[x>q[2]], 0.0, g[x>q[2]], color='#ffff66' )
|
||||||
|
ax.plot(x,g, 'b', lw=4)
|
||||||
|
ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
|
||||||
|
ax.plot([q[0], q[0]], [0.0, 0.4], 'k', lw=2 )
|
||||||
|
ax.plot([q[2], q[2]], [0.0, 0.4], 'k', lw=2 )
|
||||||
|
plt.tight_layout()
|
||||||
|
fig.savefig( 'quartile.pdf' )
|
||||||
|
plt.show()
|
||||||
|
|
Reference in New Issue
Block a user