First lecture on descriptive statistics
This commit is contained in:
parent
1264b4749a
commit
fb9008f571
20
programming/lectures/Makefile
Normal file
20
programming/lectures/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
TEXFILES=$(wildcard *.tex)
|
||||
TEXFILES=boolean_logical_indexing.tex control_structures.tex data_structures.tex plotting_spike_trains.tex programming_basics.tex scripts_functions.tex sta_stc.tex variables_datatypes.tex vectors_matrices.tex
|
||||
|
||||
PDFFILES=$(TEXFILES:.tex=.pdf)
|
||||
|
||||
pdf : $(PDFFILES)
|
||||
|
||||
$(PDFFILES) : %.pdf : %.tex
|
||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||
|
||||
clean :
|
||||
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
|
||||
|
||||
cleanall : clean
|
||||
rm -f $(PDFFILES)
|
||||
|
||||
watch :
|
||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||
|
||||
|
12
statistics/code/checkmymedian.m
Normal file
12
statistics/code/checkmymedian.m
Normal file
@ -0,0 +1,12 @@
|
||||
% check whether the median returned by mymedian
|
||||
% really separates a vector into two halfs
|
||||
for i = 1:140 % loop over different length
|
||||
for k = 1:10 % try several times
|
||||
a = randn( i, 1 ); % generate some data
|
||||
m = mymedian( a ) % compute median
|
||||
if length( a(a>m) ) ~= length( a(a<m) ) % check
|
||||
disp( 'error!' )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
24
statistics/code/diehistograms.m
Normal file
24
statistics/code/diehistograms.m
Normal file
@ -0,0 +1,24 @@
|
||||
% dependence of histogram on number of rolls:
|
||||
nrolls = [ 20, 100, 1000 ];
|
||||
for i = [1:length(nrolls)]
|
||||
d = rollthedie( nrolls(i) );
|
||||
% plain hist:
|
||||
% hist( d )
|
||||
|
||||
% check bin counts of plain hist:
|
||||
% h = hist( d )
|
||||
|
||||
% force 6 bins:
|
||||
% hist( d, 6 )
|
||||
|
||||
% set the right bin centers:
|
||||
bins = 1:6;
|
||||
%hist( d, bins )
|
||||
|
||||
% normalize histogram and compare to expectation:
|
||||
hold on
|
||||
plot( [0 7], [1/6 1/6], '-r', 'linewidth', 10 )
|
||||
hist( d, bins, 1.0, 'facecolor', 'b' )
|
||||
hold off
|
||||
pause
|
||||
end
|
17
statistics/code/gaussianbins.m
Normal file
17
statistics/code/gaussianbins.m
Normal file
@ -0,0 +1,17 @@
|
||||
x = randn( 100, 1 );
|
||||
bins1 = -4:2:4;
|
||||
bins2 = -4:0.5:4;
|
||||
subplot( 1, 2, 1 );
|
||||
hold on;
|
||||
hist( x, bins1 );
|
||||
hist( x, bins2 );
|
||||
xlabel('x')
|
||||
ylabel('Frequeny')
|
||||
hold off;
|
||||
subplot( 1, 2, 2 );
|
||||
hold on;
|
||||
hist( x, bins1, 1.0/(bins1(2)-bins1(1)) );
|
||||
hist( x, bins2, 1.0/(bins2(2)-bins2(1)) );
|
||||
xlabel('x')
|
||||
ylabel('Probability density')
|
||||
hold off;
|
22
statistics/code/gaussianpdf.m
Normal file
22
statistics/code/gaussianpdf.m
Normal file
@ -0,0 +1,22 @@
|
||||
% plot Gaussian pdf:
|
||||
dx=0.1
|
||||
x = [-4.0:dx:4.0];
|
||||
p = exp(-0.5*x.^2)/sqrt(2.0*pi);
|
||||
hold on
|
||||
plot(x,p, 'linewidth', 10 )
|
||||
|
||||
% compute integral between x1 and x2:
|
||||
x1=1.0
|
||||
x2=2.0
|
||||
P = sum(p((x>=x1)&(x<x2)))*dx
|
||||
|
||||
% draw random numbers:
|
||||
r = randn( 10000, 1 );
|
||||
hist(r,x,1.0/dx)
|
||||
|
||||
% check P:
|
||||
Pr = sum((r>=x1)&(r<x2))/length(r)
|
||||
|
||||
hold off
|
||||
|
||||
|
24
statistics/code/histogramquartiles.m
Normal file
24
statistics/code/histogramquartiles.m
Normal file
@ -0,0 +1,24 @@
|
||||
% generate data:
|
||||
x = randn( 1, 100000 );
|
||||
|
||||
% histogram:
|
||||
[h,b] = hist( x, 100 );
|
||||
% normalize:
|
||||
bs = b(2)-b(1);
|
||||
h = h/sum(h)/bs;
|
||||
|
||||
% plot:
|
||||
bar( b, h );
|
||||
xlabel( 'x' );
|
||||
|
||||
% median, quartile:
|
||||
q = quartiles( x );
|
||||
%q = quantile( x, [0.25, 0.5, 0.75 ] );
|
||||
|
||||
% plot:
|
||||
hold on;
|
||||
bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
|
||||
bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
|
||||
bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
|
||||
bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
|
||||
hold off;
|
13
statistics/code/mymedian.m
Normal file
13
statistics/code/mymedian.m
Normal file
@ -0,0 +1,13 @@
|
||||
function m = mymedian( x )
|
||||
% returns the median of the vector x
|
||||
xs = sort( x );
|
||||
if ( length( xs ) == 0 )
|
||||
m = NaN;
|
||||
elseif ( rem( length( xs ), 2 ) == 0 )
|
||||
index = length( xs )/2;
|
||||
m = (xs( index ) + xs( index+1 ))/2;
|
||||
else
|
||||
index = (length( xs ) + 1)/2;
|
||||
m = xs( index );
|
||||
end
|
||||
end
|
@ -1,25 +1,15 @@
|
||||
% generate data:
|
||||
x = randn( 1, 100000 );
|
||||
|
||||
% histogram:
|
||||
[h,b] = hist( x, 100 );
|
||||
% normalize:
|
||||
bs = b(2)-b(1);
|
||||
h = h/sum(h)/bs;
|
||||
|
||||
% plot:
|
||||
bar( b, h );
|
||||
xlabel( 'x' );
|
||||
|
||||
% median, quartile:
|
||||
xs = sort( x )
|
||||
q = [ xs(length(xs)/4), xs(length(xs)/2), xs(3*length(xs)/4) ];
|
||||
%q = quantile( x, [0.25, 0.5, 0.75 ] );
|
||||
|
||||
% plot:
|
||||
bar( b(b<q(1)), h(b<q(1)), 'FaceColor', [0.5 0 0.5] );
|
||||
hold on;
|
||||
bar( b((b>=q(1)) & (b<q(2))), h((b>=q(1)) & (b<q(2))), 'FaceColor', [0.9 0 0] );
|
||||
bar( b((b>=q(2)) & (b<q(3))), h((b>=q(2)) & (b<q(3))), 'FaceColor', [0 0 0.9] );
|
||||
bar( b(b>=q(3)), h(b>=q(3)), 'FaceColor', [0.5 0 0.5] );
|
||||
hold off;
|
||||
function q = quartiles( x )
|
||||
% returns a vector with the first, second, and third quartile of the vector x
|
||||
xs = sort( x );
|
||||
if ( length( xs ) == 0 )
|
||||
q = [];
|
||||
elseif ( rem( length( xs ), 2 ) == 0 )
|
||||
index = length( xs )/2;
|
||||
m = (xs( index ) + xs( index+1 ))/2;
|
||||
q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
|
||||
else
|
||||
index = (length( xs ) + 1)/2;
|
||||
m = xs( index );
|
||||
q = [ round( xs(length(xs)/4) ), m, xs(round(3*length(xs)/4)) ];
|
||||
end
|
||||
end
|
||||
|
@ -1,4 +1,6 @@
|
||||
function x = randomwalk(n,p)
|
||||
% returns a random wolk with n steps and
|
||||
% probability p for positive steps.
|
||||
r = rand(n,1);
|
||||
r(r<p) = -1.0;
|
||||
r(r>=p) = +1.0;
|
||||
|
4
statistics/code/rollthedie.m
Normal file
4
statistics/code/rollthedie.m
Normal file
@ -0,0 +1,4 @@
|
||||
function x = rollthedie( n )
|
||||
% return a vector with the result of rolling a die n times
|
||||
x = randi( [1, 6], n, 1 );
|
||||
end
|
18
statistics/lecture/Makefile
Normal file
18
statistics/lecture/Makefile
Normal file
@ -0,0 +1,18 @@
|
||||
TEXFILES=$(wildcard *.tex)
|
||||
PDFFILES=$(TEXFILES:.tex=.pdf)
|
||||
|
||||
pdf : $(PDFFILES)
|
||||
|
||||
$(PDFFILES) : %.pdf : %.tex
|
||||
pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
|
||||
|
||||
clean :
|
||||
rm -f *~ $(TEXFILES:.tex=.aux) $(TEXFILES:.tex=.log) $(TEXFILES:.tex=.out) $(TEXFILES:.tex=.nav) $(TEXFILES:.tex=.snm) $(TEXFILES:.tex=.toc) $(TEXFILES:.tex=.vrb)
|
||||
|
||||
cleanall : clean
|
||||
rm -f $(PDFFILES)
|
||||
|
||||
watch :
|
||||
while true; do ! make -q pdf && make pdf; sleep 0.5; done
|
||||
|
||||
|
@ -1,43 +1,55 @@
|
||||
\documentclass{beamer}
|
||||
\documentclass[12pt]{report}
|
||||
|
||||
%%%%% title %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\title[]{Scientific Computing --- Descriptive Statistics}
|
||||
\author[]{Jan Benda}
|
||||
\institute[]{Neuroethology}
|
||||
\date[]{WS 15/16}
|
||||
\titlegraphic{\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||
|
||||
%%%%% beamer %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\mode<presentation>
|
||||
{
|
||||
\usetheme{Singapore}
|
||||
\setbeamercovered{opaque}
|
||||
\usecolortheme{tuebingen}
|
||||
\setbeamertemplate{navigation symbols}{}
|
||||
\usefonttheme{default}
|
||||
\useoutertheme{infolines}
|
||||
% \useoutertheme{miniframes}
|
||||
}
|
||||
\title{\tr{Introduction to Scientific Computing}{Einf\"uhrung in die wissenschaftliche Datenverarbeitung}}
|
||||
\author{Jan Benda\\Abteilung Neuroethologie\\[2ex]\includegraphics[width=0.3\textwidth]{UT_WBMW_Rot_RGB}}
|
||||
\date{WS 15/16}
|
||||
|
||||
%%%% language %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% \newcommand{\tr}[2]{#1} % en
|
||||
% \usepackage[english]{babel}
|
||||
\newcommand{\tr}[2]{#2} % de
|
||||
\usepackage[german]{babel}
|
||||
|
||||
%\AtBeginSection[]
|
||||
%{
|
||||
% \begin{frame}<beamer>
|
||||
% \begin{center}
|
||||
% \Huge \insertsectionhead
|
||||
% \end{center}
|
||||
% \end{frame}
|
||||
%}
|
||||
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{pslatex} % nice font for pdf file
|
||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||
|
||||
\setbeamertemplate{blocks}[rounded][shadow=true]
|
||||
%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry}
|
||||
\setcounter{tocdepth}{1}
|
||||
|
||||
%%%%% packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[english]{babel}
|
||||
%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{graphicx}
|
||||
\usepackage{xcolor}
|
||||
\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
|
||||
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
inputpath=../code,
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
caption={\protect\filename@parse{\lstname}\protect\filename@base},
|
||||
captionpos=t,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
|
||||
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{amsmath}
|
||||
\usepackage{bm}
|
||||
\usepackage{pslatex} % nice font for pdf file
|
||||
%\usepackage{multimedia}
|
||||
|
||||
\usepackage{dsfont}
|
||||
\newcommand{\naZ}{\mathds{N}}
|
||||
\newcommand{\gaZ}{\mathds{Z}}
|
||||
@ -47,59 +59,45 @@
|
||||
\newcommand{\reZpN}{\mathds{R^+_0}}
|
||||
\newcommand{\koZ}{\mathds{C}}
|
||||
|
||||
%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{graphicx}
|
||||
\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}}
|
||||
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
basicstyle=\ttfamily,
|
||||
numbers=left,
|
||||
showstringspaces=false,
|
||||
language=Matlab,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
keywordstyle=\color{blue},
|
||||
stringstyle=\color{green},
|
||||
backgroundcolor=\color{blue!10},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
captionpos=b,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
%%%%% structure: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{ifthen}
|
||||
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
|
||||
\newcommand{\source}[1]{
|
||||
\begin{flushright}
|
||||
\color{gray}\scriptsize \url{#1}
|
||||
\end{flushright}
|
||||
}
|
||||
|
||||
\newenvironment{definition}[1][]{\medskip\noindent\textbf{Definition}\ifthenelse{\equal{#1}{}}{}{ #1}:\newline}%
|
||||
{\medskip}
|
||||
|
||||
%\newcommand{\showlisting}{yes}
|
||||
\newcommand{\showlisting}{no}
|
||||
\newcounter{theexercise}
|
||||
\setcounter{theexercise}{1}
|
||||
\newenvironment{exercise}[1][]{\medskip\noindent\textbf{\tr{Exercise}{\"Ubung}
|
||||
\arabic{theexercise}:} \stepcounter{theexercise}\newline \newcommand{\exercisesource}{#1}}%
|
||||
{\ifthenelse{\equal{\exercisesource}{}}{}{\ifthenelse{\equal{\showlisting}{yes}}{\medskip\lstinputlisting{\exercisesource}}{}}\medskip}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}[plain]
|
||||
\frametitle{}
|
||||
\vspace{-1cm}
|
||||
\titlepage % erzeugt Titelseite
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{frame}
|
||||
\frametitle{Content}
|
||||
\tableofcontents
|
||||
\end{frame}
|
||||
\maketitle
|
||||
|
||||
%\tableofcontents
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Descriptive statistics}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\chapter{\tr{Descriptive statistics}{Deskriptive Statistik}}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Statistics of ratio data}
|
||||
\section{Statistics of real-valued data}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Statistics of ratio data}
|
||||
\begin{itemize}
|
||||
\item Location, central tendency
|
||||
\begin{itemize}
|
||||
@ -107,7 +105,6 @@
|
||||
\item median
|
||||
\item mode
|
||||
\end{itemize}
|
||||
|
||||
\item Spread, dispersion
|
||||
\begin{itemize}
|
||||
\item variance
|
||||
@ -116,163 +113,294 @@
|
||||
\item coefficient of variation
|
||||
\item minimum, maximum
|
||||
\end{itemize}
|
||||
|
||||
\item Shape
|
||||
\begin{itemize}
|
||||
\item skewnees
|
||||
\item kurtosis
|
||||
\end{itemize}
|
||||
|
||||
\item Dependence
|
||||
\begin{itemize}
|
||||
\item Pearson correlation coefficient
|
||||
\item Spearman's rank correlation coefficient
|
||||
\end{itemize}
|
||||
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Median, Quartile, Percentile}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{median}
|
||||
\caption{\label{medianfig} Median.}
|
||||
\end{figure}
|
||||
|
||||
\begin{definition}[\tr{median}{Median}]
|
||||
\tr{Half of the observations $X=(x_1, x_2, \ldots, x_n)$ are
|
||||
larger than the median and half of them are smaller than the
|
||||
median.} {Der Median teilt eine Liste von Messwerten so in zwei
|
||||
H\"alften, dass die eine H\"alfte der Daten nicht gr\"o{\ss}er
|
||||
und die andere H\"alfte nicht kleiner als der Median ist.}
|
||||
\end{definition}
|
||||
|
||||
\begin{exercise}[mymedian.m]
|
||||
\tr{Write a function that computes the median of a vector.}
|
||||
{Schreibe eine Funktion, die den Median eines Vektors zur\"uckgibt.}
|
||||
\end{exercise}
|
||||
|
||||
\code{matlab} stellt die Funktion \code{median()} zur Berechnung des Medians bereit.
|
||||
|
||||
\begin{exercise}[checkmymedian.m]
|
||||
\tr{Write a script that tests whether your median function really
|
||||
returns a median above which are the same number of data than
|
||||
below. In particular the script should test data vectors of
|
||||
different length.} {Schreibe ein Skript, das testet ob die
|
||||
\code{mymedian} Funktion wirklich die Zahl zur\"uckgibt, \"uber
|
||||
der genausoviele Datenwerte liegen wie darunter. Das Skript sollte
|
||||
insbesondere verschieden lange Datenvektoren testen.}
|
||||
\end{exercise}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{quartile}
|
||||
\caption{\label{quartilefig} Median und Quartile.}
|
||||
\end{figure}
|
||||
|
||||
\begin{definition}[\tr{quartile}{Quartile}]
|
||||
Die Quartile Q1, Q2 und Q3 unterteilen die Daten in vier gleich
|
||||
gro{\ss}e Gruppen, die jeweils ein Viertel der Daten enthalten.
|
||||
Das mittlere Quartil entspricht dem Median.
|
||||
\end{definition}
|
||||
|
||||
\begin{exercise}[quartiles.m]
|
||||
\tr{Write a function that computes the first, second, and third quartile of a vector.}
|
||||
{Schreibe eine Funktion, die das erste, zweite und dritte Quartil als Vektor zur\"uckgibt.}
|
||||
\end{exercise}
|
||||
|
||||
\subsection{Histogram}
|
||||
|
||||
Histogramme z\"ahlen die H\"aufigkeit $n_i$ des Auftretens von
|
||||
$N=\sum_{i=1}^M n_i$ Messwerten in $M$ Messbereichsklassen $i$ (Bins).
|
||||
Die Klassen unterteilen den Wertebereich meist in angrenzende und
|
||||
gleich gro{\ss}e Intervalle. Histogramme sch\"atzen die
|
||||
Wahrscheinlichkeitsverteilung der Messwerte ab.
|
||||
|
||||
\begin{exercise}[rollthedie.m]
|
||||
\tr{Write a function that simulates rolling a die $n$ times.}
|
||||
{Schreibe eine Funktion, die das $n$-malige W\"urfeln mit einem W\"urfel simuliert.}
|
||||
\end{exercise}
|
||||
|
||||
\begin{exercise}[diehistograms.m]
|
||||
\tr{Plot histograms from rolling the die 20, 100, 1000 times. Use
|
||||
the plain hist(x) function, force 6 bins via hist( x, 6 ), and set
|
||||
meaningfull bins positions.} {Plotte Histogramme von 20, 100, und
|
||||
1000-mal w\"urfeln. Benutze \code{hist(x)}, erzwinge sechs Bins
|
||||
mit \code{hist(x,6)}, und setze selbst sinnvolle Bins. Normiere
|
||||
anschliessend das Histogram auf geeignete Weise.}
|
||||
\end{exercise}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{diehistograms}
|
||||
\caption{\label{diehistogramsfig} \tr{Histograms of rolling a die
|
||||
100 or 500 times. Left: plain histograms counting the frequency
|
||||
of the six possible outcomes. Right: the same data normalized
|
||||
to their sum.}{Histogramme des Ergebnisses von 100 oder 500 mal
|
||||
W\"urfeln. Links: das absolute Histogramm z\"ahlt die Anzahl des
|
||||
Auftretens jeder Augenzahl. Rechts: Normiert auf die Summe des
|
||||
Histogramms werden die beiden Messungen vergleichbar.}}
|
||||
\end{figure}
|
||||
|
||||
Bei ganzzahligen Messdaten (z.B. die Augenzahl eines W\"urfels)
|
||||
kann f\"ur jede auftretende Zahl eine Klasse definiert werden.
|
||||
Damit die H\"ohe der Histogrammbalken unabh\"angig von der Anzahl der Messwerte wird,
|
||||
normiert man das Histogram auf die Anzahl der Messwerte.
|
||||
Die H\"ohe der Histogrammbalken gibt dann die Wahrscheinlichkeit $P(x_i)$
|
||||
des Auftretens der Gr\"o{\ss}e $x_i$ in der $i$-ten Klasse an
|
||||
\[ P_i = \frac{n_i}{N} = \frac{n_i}{\sum_{i=1}^M n_i} \; . \]
|
||||
|
||||
|
||||
\subsection{Probability density function}
|
||||
|
||||
Meistens haben wir es jedoch mit reellen Messgr\"o{\ss}en zu tun.
|
||||
|
||||
\begin{exercise}[gaussianbins.m]
|
||||
\tr{Draw 100 random data from a Gaussian distribution and plot
|
||||
histograms with different bin sizes of the data.} {Ziehe 100
|
||||
normalverteilte Zufallszahlen und erzeuge Histogramme mit
|
||||
unterschiedlichen Klassenbreiten. Was f\"allt auf?}
|
||||
\end{exercise}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{pdfhistogram}
|
||||
\caption{\label{pdfhistogramfig} \tr{Histograms of normally
|
||||
distributed data with different bin sizes.}{Histogramme mit
|
||||
verschiednenen Klassenbreiten eines Datensatzes von
|
||||
normalverteilten Messwerten. Links: Die H\"ohe des absoluten
|
||||
Histogramms h\"angt von der Klassenbreite ab. Rechts: Bei auf
|
||||
das Integral normierten Histogrammen werden auch
|
||||
unterschiedliche Klassenbreiten vergleichbar.}}
|
||||
\end{figure}
|
||||
|
||||
Histogramme von reellen Messwerten m\"ussen auf das Integral 1 normiert werden, so dass
|
||||
das Integral (nicht die Summe) \"uber das Histogramm eins ergibt. Das Integral
|
||||
ist die Fl\"ache des Histograms. Diese setzt sich zusammen aus der Fl\"ache der einzelnen
|
||||
Histogrammbalken. Diese haben die H\"ohe $n_i$ und die Breite $\Delta x$. Die Gesamtfl\"ache
|
||||
$A$ des Histogramms ist also
|
||||
\[ A = \sum_{i=1}^N ( n_i \cdot \Delta x ) = \Delta x \sum_{i=1}^N n_i \]
|
||||
und das normierte Histogramm hat die H\"ohe
|
||||
\[ p(x_i) = \frac{n_i}{\Delta x \sum_{i=1}^N n_i} \]
|
||||
Es muss also nicht nur durch die Summe, sondern auch durch die Breite der Klassen $\Delta x$
|
||||
geteilt werden.
|
||||
|
||||
$p(x_i)$ kann keine Wahrscheinlichkeit sein, da $p(x_i)$ nun eine
|
||||
Einheit hat --- das Inverse der Einheit der Messgr\"osse $x$. Man
|
||||
spricht von einer Wahrscheinlichkeitsdichte.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{pdfprobabilities}
|
||||
\caption{\label{pdfprobabilitiesfig} Wahrscheinlichkeiten bei
|
||||
einer Wahrscheinlichkeitsdichtefunktion.}
|
||||
\end{figure}
|
||||
|
||||
\begin{exercise}
|
||||
\tr{Plot the Gaussian probability density}{Plotte die Gauss'sche Wahrscheinlichkeitsdichte }
|
||||
\[ p_g(x) = 1/\sqrt{2\pi\sigma^2}e^{-\frac{(x-\mu)^2}{2\sigma^2}}\]
|
||||
\tr{What does it mean?}{Was bedeutet die folgende Wahrscheinlichkeit?}
|
||||
\[ P(x_1 < x < x2) = \int_{x_1}^{x_2} p(x) \, dx \]
|
||||
\tr{How large is}{Wie gro{\ss} ist}
|
||||
\[ \int_{-\infty}^{+\infty} p(x) \, dx \; ?\]
|
||||
\tr{Why?}{Warum?}
|
||||
\end{exercise}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Data types}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Data types: nominal scale}
|
||||
\subsubsection{Nominal scale}
|
||||
\begin{itemize}
|
||||
\item Binary
|
||||
\begin{itemize}
|
||||
\item Binary
|
||||
\begin{itemize}
|
||||
\item ``yes/no'',
|
||||
\item ``true/false'',
|
||||
\item ``success/failure'', etc.
|
||||
\end{itemize}
|
||||
\item Categorial
|
||||
\begin{itemize}
|
||||
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
||||
\item blood type (``A/B/AB/0''),
|
||||
\item parts of speech (``noun/veerb/preposition/article/...''),
|
||||
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
||||
\end{itemize}
|
||||
\item Each observation/measurement/sample is put into one category
|
||||
\item There is no reasonable order among the categories.\\
|
||||
example: [rods, cones] vs. [cones, rods]
|
||||
\pause
|
||||
\item Statistics: mode, i.e. the most common item
|
||||
\item ``yes/no'',
|
||||
\item ``true/false'',
|
||||
\item ``success/failure'', etc.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Data types: ordinal scale}
|
||||
\item Categorial
|
||||
\begin{itemize}
|
||||
\item Like nominal scale, but with an order
|
||||
\item Examples: ranks, ratings
|
||||
\begin{itemize}
|
||||
\item ``bad/ok/good'',
|
||||
\item ``cold/warm/hot'',
|
||||
\item ``young/old'', etc.
|
||||
\end{itemize}
|
||||
\item {\bf But:} there is no reasonable measure of {\em distance}
|
||||
between the classes
|
||||
\pause
|
||||
\item Statistics: mode, median
|
||||
\item cell type (``rod/cone/horizontal cell/bipolar cell/ganglion cell''),
|
||||
\item blood type (``A/B/AB/0''),
|
||||
\item parts of speech (``noun/veerb/preposition/article/...''),
|
||||
\item taxonomic groups (``Coleoptera/Lepidoptera/Diptera/Hymenoptera''), etc.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Data types: interval scale}
|
||||
\item Each observation/measurement/sample is put into one category
|
||||
\item There is no reasonable order among the categories.\\
|
||||
example: [rods, cones] vs. [cones, rods]
|
||||
\item Statistics: mode, i.e. the most common item
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Ordinal scale}
|
||||
\begin{itemize}
|
||||
\item Like nominal scale, but with an order
|
||||
\item Examples: ranks, ratings
|
||||
\begin{itemize}
|
||||
\item Quantitative/metric values
|
||||
\item Reasonable measure of distance between values, but no absolute zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
||||
\item Direction measured in degrees from magnetic or true north
|
||||
\end{itemize}
|
||||
\pause
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\end{itemize}
|
||||
\item ``bad/ok/good'',
|
||||
\item ``cold/warm/hot'',
|
||||
\item ``young/old'', etc.
|
||||
\end{itemize}
|
||||
\item {\bf But:} there is no reasonable measure of {\em distance}
|
||||
between the classes
|
||||
\item Statistics: mode, median
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Interval scale}
|
||||
\begin{itemize}
|
||||
\item Quantitative/metric values
|
||||
\item Reasonable measure of distance between values, but no absolute zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$C ($20^\circ$C is not twice as hot as $10^\circ$C)
|
||||
\item Direction measured in degrees from magnetic or true north
|
||||
\end{itemize}
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
\end{itemize}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Data types: absolute/ratio scale}
|
||||
\subsubsection{Absolute/ratio scale}
|
||||
\begin{itemize}
|
||||
\item Like interval scale, but with absolute origin/zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Like interval scale, but with absolute origin/zero
|
||||
\item Examples:
|
||||
\begin{itemize}
|
||||
\item Temperature in $^\circ$K
|
||||
\item Length, mass, duration, electric charge, ...
|
||||
\item Plane angle, etc.
|
||||
\item Count (e.g. number of spikes in response to a stimulus)
|
||||
\end{itemize}
|
||||
\pause
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\item Coefficient of variation (ratio standard deviation/mean)
|
||||
\item All other statistical measures
|
||||
\end{itemize}
|
||||
\item Temperature in $^\circ$K
|
||||
\item Length, mass, duration, electric charge, ...
|
||||
\item Plane angle, etc.
|
||||
\item Count (e.g. number of spikes in response to a stimulus)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
\item Statistics:
|
||||
\begin{itemize}
|
||||
\item Central tendency: mode, median, arithmetic, geometric, harmonic mean
|
||||
\item Dispersion: range, standard deviation
|
||||
\item Coefficient of variation (ratio standard deviation/mean)
|
||||
\item All other statistical measures
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Data types}
|
||||
\subsubsection{Data types}
|
||||
\begin{itemize}
|
||||
\item Data type selects
|
||||
\begin{itemize}
|
||||
\item Data type selects
|
||||
\begin{itemize}
|
||||
\item statistics
|
||||
\item type of plots (bar graph versus x-y plot)
|
||||
\item correct tests
|
||||
\end{itemize}
|
||||
\item Scales exhibit increasing information content from nominal
|
||||
to absolute.\\
|
||||
Conversion ,,downwards'' is always possible
|
||||
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
||||
categories ``small/medium/large'' (ordinal scale)
|
||||
\item statistics
|
||||
\item type of plots (bar graph versus x-y plot)
|
||||
\item correct tests
|
||||
\end{itemize}
|
||||
\item Scales exhibit increasing information content from nominal
|
||||
to absolute.\\
|
||||
Conversion ,,downwards'' is always possible
|
||||
\item For example: size measured in meter (ratio scale) $\rightarrow$
|
||||
categories ``small/medium/large'' (ordinal scale)
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Examples from neuroscience}
|
||||
\begin{itemize}
|
||||
\item {\bf absolute:}
|
||||
\begin{itemize}
|
||||
\item size of neuron/brain
|
||||
\item length of axon
|
||||
\item ion concentration
|
||||
\item membrane potential
|
||||
\item firing rate
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%-------------------------------------------------------------
|
||||
\begin{frame}
|
||||
\frametitle{Examples from neuroscience}
|
||||
\item {\bf interval:}
|
||||
\begin{itemize}
|
||||
\item edge orientation
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf absolute:}\pause
|
||||
\begin{itemize}
|
||||
\item size of neuron/brain
|
||||
\item length of axon
|
||||
\item ion concentration
|
||||
\item membrane potential
|
||||
\item firing rate
|
||||
\end{itemize}
|
||||
\item {\bf ordinal:}
|
||||
\begin{itemize}
|
||||
\item stages of a disease
|
||||
\item ratings
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf interval:}\pause
|
||||
\begin{itemize}
|
||||
\item edge orientation
|
||||
\end{itemize}
|
||||
\item {\bf nominal:}
|
||||
\begin{itemize}
|
||||
\item cell type
|
||||
\item odor
|
||||
\item states of an ion channel
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf ordinal:} \pause
|
||||
\begin{itemize}
|
||||
\item stages of a disease
|
||||
\item ratings
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\item {\bf nominal:}\pause
|
||||
\begin{itemize}
|
||||
\item cell type
|
||||
\item odor
|
||||
\item states of an ion channel
|
||||
\end{itemize}
|
||||
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
\end{document}
|
||||
|
||||
|
||||
\end{document}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Statistics}
|
||||
What is "a statistic"? % dt. Sch\"atzfunktion
|
||||
\begin{definition}[statistic]
|
||||
A statistic (singular) is a single measure of some attribute of a
|
||||
sample (e.g., its arithmetic mean value). It is calculated by
|
||||
applying a function (statistical algorithm) to the values of the
|
||||
items of the sample, which are known together as a set of data.
|
||||
|
||||
\source{http://en.wikipedia.org/wiki/Statistic}
|
||||
\end{definition}
|
||||
|
32
statistics/lecture/diehistograms.py
Normal file
32
statistics/lecture/diehistograms.py
Normal file
@ -0,0 +1,32 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# roll the die:
|
||||
x1 = np.random.random_integers( 1, 6, 100 )
|
||||
x2 = np.random.random_integers( 1, 6, 500 )
|
||||
bins = np.arange(0.5, 7, 1.0)
|
||||
|
||||
plt.xkcd()
|
||||
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
ax = fig.add_subplot( 1, 2, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Frequency' )
|
||||
ax.hist([x2, x1], bins, color=['#FFCC00', '#FFFF66' ])
|
||||
|
||||
ax = fig.add_subplot( 1, 2, 2 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Probability' )
|
||||
ax.hist([x2, x1], bins, normed=True, color=['#FFCC00', '#FFFF66' ])
|
||||
plt.tight_layout()
|
||||
fig.savefig( 'diehistograms.pdf' )
|
||||
plt.show()
|
||||
|
33
statistics/lecture/median.py
Normal file
33
statistics/lecture/median.py
Normal file
@ -0,0 +1,33 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# normal distribution:
|
||||
x = np.arange( -4.0, 4.0, 0.01 )
|
||||
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
ax = fig.add_subplot( 1, 1, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Probability density p(x)' )
|
||||
ax.set_ylim( 0.0, 0.46 )
|
||||
ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||
ax.text(-1.0, 0.1, '50%', ha='center' )
|
||||
ax.text(+1.0, 0.1, '50%', ha='center' )
|
||||
ax.annotate('Median',
|
||||
xy=(0.1, 0.3), xycoords='data',
|
||||
xytext=(1.6, 0.35), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=40") )
|
||||
ax.fill_between( x[x<0], 0.0, g[x<0], color='#ffcc00' )
|
||||
ax.fill_between( x[x>0], 0.0, g[x>0], color='#99ff00' )
|
||||
ax.plot(x,g, 'b', lw=4)
|
||||
ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
|
||||
plt.tight_layout()
|
||||
fig.savefig( 'median.pdf' )
|
||||
plt.show()
|
||||
|
39
statistics/lecture/pdfhistogram.py
Normal file
39
statistics/lecture/pdfhistogram.py
Normal file
@ -0,0 +1,39 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# normal distribution:
|
||||
x = np.arange( -4.0, 4.0, 0.01 )
|
||||
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||
r = np.random.randn( 100 )
|
||||
|
||||
plt.xkcd()
|
||||
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
ax = fig.add_subplot( 1, 2, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Frequency' )
|
||||
#ax.set_ylim( 0.0, 0.46 )
|
||||
#ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||
ax.hist(r, 5, color='#CC0000')
|
||||
ax.hist(r, 20, color='#FFCC00')
|
||||
|
||||
ax = fig.add_subplot( 1, 2, 2 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Probability density p(x)' )
|
||||
#ax.set_ylim( 0.0, 0.46 )
|
||||
#ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||
ax.hist(r, 5, normed=True, color='#CC0000')
|
||||
ax.hist(r, 20, normed=True, color='#FFCC00')
|
||||
|
||||
plt.tight_layout()
|
||||
fig.savefig( 'pdfhistogram.pdf' )
|
||||
plt.show()
|
||||
|
36
statistics/lecture/pdfprobabilities.py
Normal file
36
statistics/lecture/pdfprobabilities.py
Normal file
@ -0,0 +1,36 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# normal distribution:
|
||||
x = np.arange( -3.0, 5.0, 0.01 )
|
||||
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||
x1=0.0
|
||||
x2=1.0
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
ax = fig.add_subplot( 1, 1, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Probability density p(x)' )
|
||||
ax.set_ylim( 0.0, 0.46 )
|
||||
ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||
ax.annotate('Gaussian',
|
||||
xy=(-1.0, 0.28), xycoords='data',
|
||||
xytext=(-2.5, 0.35), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
|
||||
connectionstyle="angle3,angleA=10,angleB=110") )
|
||||
ax.annotate('$P(0<x<1) = \int_0^1 p(x) \, dx$',
|
||||
xy=(0.6, 0.28), xycoords='data',
|
||||
xytext=(1.2, 0.4), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=80") )
|
||||
ax.fill_between( x[(x>x1)&(x<x2)], 0.0, g[(x>x1)&(x<x2)], color='#cc0000' )
|
||||
ax.plot(x,g, 'b', lw=4)
|
||||
plt.tight_layout()
|
||||
fig.savefig( 'pdfprobabilities.pdf' )
|
||||
plt.show()
|
||||
|
50
statistics/lecture/quartile.py
Normal file
50
statistics/lecture/quartile.py
Normal file
@ -0,0 +1,50 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# normal distribution:
|
||||
x = np.arange( -4.0, 4.0, 0.01 )
|
||||
g = np.exp(-0.5*x*x)/np.sqrt(2.0*np.pi)
|
||||
q = [ -0.67488, 0.0, 0.67488 ]
|
||||
|
||||
plt.xkcd()
|
||||
fig = plt.figure( figsize=(6,4) )
|
||||
ax = fig.add_subplot( 1, 1, 1 )
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.yaxis.set_ticks_position('left')
|
||||
ax.xaxis.set_ticks_position('bottom')
|
||||
ax.set_xlabel( 'x' )
|
||||
ax.set_ylabel( 'Probability density p(x)' )
|
||||
ax.set_ylim( 0.0, 0.46 )
|
||||
ax.set_yticks( np.arange( 0.0, 0.45, 0.1 ) )
|
||||
ax.text(-1.2, 0.1, '25%', ha='center' )
|
||||
ax.text(-0.35, 0.1, '25%', ha='center' )
|
||||
ax.text(+0.35, 0.1, '25%', ha='center' )
|
||||
ax.text(+1.2, 0.1, '25%', ha='center' )
|
||||
ax.annotate('1. quartile',
|
||||
xy=(-0.75, 0.2), xycoords='data',
|
||||
xytext=(-1.7, 0.25), textcoords='data', ha='right',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
|
||||
connectionstyle="angle3,angleA=170,angleB=120") )
|
||||
ax.annotate('3. quartile',
|
||||
xy=(0.75, 0.17), xycoords='data',
|
||||
xytext=(1.7, 0.22), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=70") )
|
||||
ax.annotate('Median',
|
||||
xy=(0.1, 0.3), xycoords='data',
|
||||
xytext=(1.6, 0.35), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
|
||||
connectionstyle="angle3,angleA=10,angleB=40") )
|
||||
ax.fill_between( x[x<q[0]], 0.0, g[x<q[0]], color='#ffcc00' )
|
||||
ax.fill_between( x[(x>q[0])&(x<q[1])], 0.0, g[(x>q[0])&(x<q[1])], color='#ff0000' )
|
||||
ax.fill_between( x[(x>q[1])&(x<q[2])], 0.0, g[(x>q[1])&(x<q[2])], color='#ff9900' )
|
||||
ax.fill_between( x[x>q[2]], 0.0, g[x>q[2]], color='#ffff66' )
|
||||
ax.plot(x,g, 'b', lw=4)
|
||||
ax.plot([0.0, 0.0], [0.0, 0.45], 'k', lw=2 )
|
||||
ax.plot([q[0], q[0]], [0.0, 0.4], 'k', lw=2 )
|
||||
ax.plot([q[2], q[2]], [0.0, 0.4], 'k', lw=2 )
|
||||
plt.tight_layout()
|
||||
fig.savefig( 'quartile.pdf' )
|
||||
plt.show()
|
||||
|
Reference in New Issue
Block a user