From 69b9afbc80a757e2363382b62608b9c24b1340c6 Mon Sep 17 00:00:00 2001 From: Jan Benda Date: Tue, 20 Oct 2015 18:26:43 +0200 Subject: [PATCH] Added correlation plots --- statistics/code/diehistograms.m | 6 +- statistics/lecture/correlation.py | 34 ++++ statistics/lecture/descriptivestatistics.tex | 165 ++++++++++++++++++- statistics/lecture/nonlincorrelation.py | 42 +++++ 4 files changed, 242 insertions(+), 5 deletions(-) create mode 100644 statistics/lecture/correlation.py create mode 100644 statistics/lecture/nonlincorrelation.py diff --git a/statistics/code/diehistograms.m b/statistics/code/diehistograms.m index 8af7d68..d04ca98 100644 --- a/statistics/code/diehistograms.m +++ b/statistics/code/diehistograms.m @@ -3,16 +3,16 @@ nrolls = [ 20, 100, 1000 ]; for i = [1:length(nrolls)] d = rollthedie( nrolls(i) ); % plain hist: - % hist( d ) + %hist( d ) % check bin counts of plain hist: % h = hist( d ) % force 6 bins: - % hist( d, 6 ) + %hist( d, 6 ) % set the right bin centers: - bins = 1:6; + %bins = 1:6; %hist( d, bins ) % normalize histogram and compare to expectation: diff --git a/statistics/lecture/correlation.py b/statistics/lecture/correlation.py new file mode 100644 index 0000000..db317c7 --- /dev/null +++ b/statistics/lecture/correlation.py @@ -0,0 +1,34 @@ +import numpy as np +import matplotlib.pyplot as plt + +plt.xkcd() +fig = plt.figure( figsize=(6,5) ) +n = 200 +for k, r in enumerate( [ 1.0, 0.6, 0.0, -0.9 ] ) : + print r + x = np.random.randn( n ) + y = r*x + np.sqrt(1.0-r*r)*np.random.randn( n ) + ax = fig.add_subplot( 2, 2, k+1 ) + ax.spines['right'].set_visible(False) + ax.spines['top'].set_visible(False) + ax.yaxis.set_ticks_position('left') + ax.xaxis.set_ticks_position('bottom') + ax.text( -2, 2.5, 'r=%.1f' % r ) + if k == 0 : + ax.text( 2.8, -2, 'positively\ncorrelated', ha='right' ) + elif k == 1 : + ax.text( 2.8, -2.5, 'weakly\ncorrelated', ha='right' ) + elif k == 2 : + ax.text( 2.8, -2.5, 'not\ncorrelated', ha='right' ) + elif k == 3 : + ax.text( -2.5, -2, 'negatively\ncorrelated', ha='left' ) + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_xlim( -3.0, 3.0) + ax.set_ylim( -3.0, 3.0) + ax.scatter( x, y ) + +plt.tight_layout() +plt.savefig('correlation.pdf') +plt.show() + diff --git a/statistics/lecture/descriptivestatistics.tex b/statistics/lecture/descriptivestatistics.tex index 21e7493..ac99345 100644 --- a/statistics/lecture/descriptivestatistics.tex +++ b/statistics/lecture/descriptivestatistics.tex @@ -19,10 +19,141 @@ \usepackage[left=25mm,right=25mm,top=20mm,bottom=30mm]{geometry} \setcounter{tocdepth}{1} -%%%% graphics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%% section style %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[sf,bf,it,big,clearempty]{titlesec} +\setcounter{secnumdepth}{-1} + + +%%%%% units %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro + + +%%%%% figures %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \usepackage{graphicx} \usepackage{xcolor} -\newcommand{\texpicture}[1]{{\sffamily\small\input{#1.tex}}} +\pagecolor{white} + +\newcommand{\ruler}{\par\noindent\setlength{\unitlength}{1mm}\begin{picture}(0,6)% + \put(0,4){\line(1,0){170}}% + \multiput(0,2)(10,0){18}{\line(0,1){4}}% + \multiput(0,3)(1,0){170}{\line(0,1){2}}% + \put(0,0){\makebox(0,0){{\tiny 0}}}% + \put(10,0){\makebox(0,0){{\tiny 1}}}% + \put(20,0){\makebox(0,0){{\tiny 2}}}% + \put(30,0){\makebox(0,0){{\tiny 3}}}% + \put(40,0){\makebox(0,0){{\tiny 4}}}% + \put(50,0){\makebox(0,0){{\tiny 5}}}% + \put(60,0){\makebox(0,0){{\tiny 6}}}% + \put(70,0){\makebox(0,0){{\tiny 7}}}% + \put(80,0){\makebox(0,0){{\tiny 8}}}% + \put(90,0){\makebox(0,0){{\tiny 9}}}% + \put(100,0){\makebox(0,0){{\tiny 10}}}% + \put(110,0){\makebox(0,0){{\tiny 11}}}% + \put(120,0){\makebox(0,0){{\tiny 12}}}% + \put(130,0){\makebox(0,0){{\tiny 13}}}% + \put(140,0){\makebox(0,0){{\tiny 14}}}% + \put(150,0){\makebox(0,0){{\tiny 15}}}% + \put(160,0){\makebox(0,0){{\tiny 16}}}% + \put(170,0){\makebox(0,0){{\tiny 17}}}% + \end{picture}\par} + +% figures: +\setlength{\fboxsep}{0pt} +\newcommand{\texpicture}[1]{{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\fbox{\sffamily\footnotesize\input{#1.tex}}} +%\newcommand{\texpicture}[1]{\setlength{\fboxsep}{2mm}\fbox{#1}} +%\newcommand{\texpicture}[1]{} +\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}} + +% maximum number of floats: +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{0} +\setcounter{totalnumber}{2} + +% float placement fractions: +\renewcommand{\textfraction}{0.2} +\renewcommand{\topfraction}{0.8} +\renewcommand{\bottomfraction}{0.0} +\renewcommand{\floatpagefraction}{0.5} + +% spacing for floats: +\setlength{\floatsep}{12pt plus 2pt minus 2pt} +\setlength{\textfloatsep}{20pt plus 4pt minus 2pt} +\setlength{\intextsep}{12pt plus 2pt minus 2pt} + +% spacing for a floating page: +\makeatletter + \setlength{\@fptop}{0pt} + \setlength{\@fpsep}{8pt plus 2.0fil} + \setlength{\@fpbot}{0pt plus 1.0fil} +\makeatother + +% rules for floats: +\newcommand{\topfigrule}{\vspace*{10pt}{\hrule height0.4pt}\vspace*{-10.4pt}} +\newcommand{\bottomfigrule}{\vspace*{-10.4pt}{\hrule height0.4pt}\vspace*{10pt}} + +% captions: +\usepackage[format=plain,singlelinecheck=off,labelfont=bf,font={small,sf}]{caption} + +% put caption on separate float: +\newcommand{\breakfloat}{\end{figure}\begin{figure}[t]} + +% references to panels of a figure within the caption: +\newcommand{\figitem}[1]{\textsf{\bfseries\uppercase{#1}}} +% references to figures: +\newcommand{\panel}[1]{\textsf{\uppercase{#1}}} +\newcommand{\fref}[1]{\textup{\ref{#1}}} +\newcommand{\subfref}[2]{\textup{\ref{#1}}\,\panel{#2}} +% references to figures in normal text: +\newcommand{\fig}{Fig.} +\newcommand{\Fig}{Figure} +\newcommand{\figs}{Figs.} +\newcommand{\Figs}{Figures} +\newcommand{\figref}[1]{\fig~\fref{#1}} +\newcommand{\Figref}[1]{\Fig~\fref{#1}} +\newcommand{\figsref}[1]{\figs~\fref{#1}} +\newcommand{\Figsref}[1]{\Figs~\fref{#1}} +\newcommand{\subfigref}[2]{\fig~\subfref{#1}{#2}} +\newcommand{\Subfigref}[2]{\Fig~\subfref{#1}{#2}} +\newcommand{\subfigsref}[2]{\figs~\subfref{#1}{#2}} +\newcommand{\Subfigsref}[2]{\Figs~\subfref{#1}{#2}} +% references to figures within bracketed text: +\newcommand{\figb}{Fig.} +\newcommand{\figsb}{Figs.} +\newcommand{\figrefb}[1]{\figb~\fref{#1}} +\newcommand{\figsrefb}[1]{\figsb~\fref{#1}} +\newcommand{\subfigrefb}[2]{\figb~\subfref{#1}{#2}} +\newcommand{\subfigsrefb}[2]{\figsb~\subfref{#1}{#2}} + +% references to tables: +\newcommand{\tref}[1]{\textup{\ref{#1}}} +% references to tables in normal text: +\newcommand{\tab}{Tab.} +\newcommand{\Tab}{Table} +\newcommand{\tabs}{Tabs.} +\newcommand{\Tabs}{Tables} +\newcommand{\tabref}[1]{\tab~\tref{#1}} +\newcommand{\Tabref}[1]{\Tab~\tref{#1}} +\newcommand{\tabsref}[1]{\tabs~\tref{#1}} +\newcommand{\Tabsref}[1]{\Tabs~\tref{#1}} +% references to tables within bracketed text: +\newcommand{\tabb}{Tab.} +\newcommand{\tabsb}{Tab.} +\newcommand{\tabrefb}[1]{\tabb~\tref{#1}} +\newcommand{\tabsrefb}[1]{\tabsb~\tref{#1}} + + +%%%%% equation references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\newcommand{\eqref}[1]{(\ref{#1})} +\newcommand{\eqn}{Eq.} +\newcommand{\Eqn}{Eq.} +\newcommand{\eqns}{Eqs.} +\newcommand{\Eqns}{Eqs.} +\newcommand{\eqnref}[1]{\eqn~\eqref{#1}} +\newcommand{\Eqnref}[1]{\Eqn~\eqref{#1}} +\newcommand{\eqnsref}[1]{\eqns~\eqref{#1}} +\newcommand{\Eqnsref}[1]{\Eqns~\eqref{#1}} + %%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \usepackage{listings} @@ -290,6 +421,36 @@ spricht von einer Wahrscheinlichkeitsdichte. \end{figure} +\subsection{Korrelation} + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{correlation} + \caption{\label{correlationfig} Korrelationen zwischen zwei Datens\"atzen $x$ und $y$.} +\end{figure} + +Bisher haben wir Eigenschaften einer einzelnen Me{\ss}gr\"o{\ss}e angeschaut. +Bei mehreren Me{\ss}gr\"o{\ss}en, kann nach Abh\"angigkeiten gefragt werden. +Der Korrelationskoeffizient +\[ r_{x,y} = \frac{Cov(x,y)}{\sigma_x \sigma_y} = \frac{\langle + (x-\langle x \rangle)(y-\langle y \rangle) \rangle}{\sqrt{\langle + (x-\langle x \rangle)^2} \rangle \sqrt{\langle (y-\langle y + \rangle)^2} \rangle} \] quantifiziert einfache lineare +Zusammenh\"ange. Perfekt korrelierte Variablen ergeben einen +Korrelationskoeffizienten von $+1$, antikorrelierte Daten einen +Korrelationskoeffizienten von $-1$ und nicht korrelierte Daten einen +Korrelationskoeffizienten nahe 0 (\figrefb{correlationfig}). + +\begin{figure}[t] + \includegraphics[width=1\textwidth]{nonlincorrelation} + \caption{\label{nonlincorrelationfig} Nichtlineare Zusammenh\"ange + werden durch den Korrelationskoeffizienten nicht erfasst! Sowohl + die quadratische Abh\"angigkeit (links) als auch eine + Rauschkorrelation (rechts), bei der die Streuung der $y$-Werte von + $x$ abh\"angen, ergeben Korrelationskeffizienten nahe Null. + $\xi$ sind normalverteilte Zufallszahlen.} +\end{figure} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Data types} diff --git a/statistics/lecture/nonlincorrelation.py b/statistics/lecture/nonlincorrelation.py new file mode 100644 index 0000000..c0ca723 --- /dev/null +++ b/statistics/lecture/nonlincorrelation.py @@ -0,0 +1,42 @@ +import numpy as np +import matplotlib.pyplot as plt + +plt.xkcd() +fig = plt.figure( figsize=(6,3) ) +n = 200 +x = np.random.randn( n ) +y = np.random.randn( n ) + +z = x*x+0.2*y +r =np.corrcoef(x,z)[0,1] +ax = fig.add_subplot( 1, 2, 1 ) +ax.spines['right'].set_visible(False) +ax.spines['top'].set_visible(False) +ax.yaxis.set_ticks_position('left') +ax.xaxis.set_ticks_position('bottom') +ax.text( 0, 4.0, 'r=%.1f' % r, ha='center' ) +ax.text( 0, 5.5, r'$y = x^2+\xi/5$', ha='center' ) +ax.set_xlabel('x') +ax.set_ylabel('y') +ax.set_xlim( -3.0, 3.0) +ax.set_ylim( -0.5, 6.0) +ax.scatter( x, z ) + +z = 0.5*x*y +r =np.corrcoef(x,z)[0,1] +ax = fig.add_subplot( 1, 2, 2 ) +ax.spines['right'].set_visible(False) +ax.spines['top'].set_visible(False) +ax.yaxis.set_ticks_position('left') +ax.xaxis.set_ticks_position('bottom') +ax.text( 0, 1.5, 'r=%.1f' % r, ha='center' ) +ax.text( 0, 2.5, r'$y = x \cdot \xi/2$', ha='center' ) +ax.set_xlabel('x') +ax.set_ylabel('y') +ax.set_xlim( -3.0, 3.0) +ax.set_ylim( -3.0, 3.0) +ax.scatter( x, z ) + +plt.tight_layout() +plt.savefig('nonlincorrelation.pdf') +plt.show()