diff --git a/likelihood/lecture/likelihood.tex b/likelihood/lecture/likelihood.tex index 12d1e3a..6bf1bfc 100644 --- a/likelihood/lecture/likelihood.tex +++ b/likelihood/lecture/likelihood.tex @@ -228,7 +228,12 @@ maximized respectively. \begin{figure}[t] \includegraphics[width=1\textwidth]{mlepropline} \titlecaption{\label{mleproplinefig} Maximum likelihood estimation - of the slope of line through the origin.}{} + of the slope of line through the origin.}{The data (blue and + left histogram) originate from a straight line $y=mx$ trough the origin + (red). The maximum-likelihood estimation of the slope $m$ of the + regression line (orange), \eqnref{mleslope}, is close to the true + one. The residuals, the data minus the estimated line (right), reveal + the normal distribution of the data around the line (right histogram).} \end{figure} @@ -282,10 +287,11 @@ To see what this expression is, we need to standardize the data. We make the data mean free and normalize them to their standard deviation, i.e. $x \mapsto (x - \bar x)/\sigma_x$. The resulting numbers are also called \enterm{$z$-values} or $z$-scores and they -have the property $\bar x = 0$ and $\sigma_x = 1$. If this is the -case, the variance +have the property $\bar x = 0$ and $\sigma_x = 1$. $z$-scores are +often used in Biology to make quantities that differ in their units +comparable. For standardized data the variance \[ \sigma_x^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)^2 = \frac{1}{n} \sum_{i=1}^n x_i^2 = 1 \] -is the mean squared data and equals one. +is given by the mean squared data and equals one. The covariance between $x$ and $y$ also simplifies to \[ \text{cov}(x, y) = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)(y_i - \bar y) =\frac{1}{n} \sum_{i=1}^n x_i y_i \] diff --git a/likelihood/lecture/mlepropline.py b/likelihood/lecture/mlepropline.py index fcaa97a..c89310c 100644 --- a/likelihood/lecture/mlepropline.py +++ b/likelihood/lecture/mlepropline.py @@ -1,16 +1,17 @@ import numpy as np +import scipy.stats as st import matplotlib.pyplot as plt plt.xkcd() -fig = plt.figure( figsize=(6,3.5) ) +fig = plt.figure(figsize=(6, 3)) # the line: slope = 2.0 xx = np.arange(0.0, 4.1, 0.1) yy = slope*xx # the data: -n = 80 -rng = np.random.RandomState(218) +n = 40 +rng = np.random.RandomState(5218) sigma = 1.5 x = 4.0*rng.rand(n) y = slope*x+rng.randn(n)*sigma @@ -19,7 +20,7 @@ slopef = np.sum(x*y)/np.sum(x*x) yf = slopef*xx # plot it: -ax = fig.add_subplot( 1, 1, 1 ) +ax = fig.add_axes([0.09, 0.02, 0.33, 0.9]) ax.spines['left'].set_position('zero') ax.spines['bottom'].set_position('zero') ax.spines['right'].set_visible(False) @@ -30,20 +31,60 @@ ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.set_xticks(np.arange(0.0, 4.1)) ax.set_xlim(0.0, 4.2) -#ax.set_ylim(-1, 5) -#ax.set_xticks( np.arange(0, 5)) -#ax.set_yticks( np.arange(0, 0.9, 0.2)) +ax.set_ylim(-4.0, 12.0) ax.set_xlabel('x') ax.set_ylabel('y') -#ax.annotate('', xy=(mu, 0.02), xycoords='data', -# xytext=(mu, 0.75), textcoords='data', -# arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5), -# connectionstyle=cs), zorder=1 ) -ax.scatter(x, y, label='data', s=50, zorder=10) -ax.plot(xx, yy, 'r', lw=6.0, color='#ff0000', label='original', zorder=5) -ax.plot(xx, yf, '--', lw=2.0, color='#ffcc00', label='fit', zorder=7) -ax.legend(loc='upper left', frameon=False) +ax.scatter(x, y, label='data', s=40, zorder=10) +ax.plot(xx, yy, 'r', lw=5.0, color='#ff0000', label='original', zorder=5) +ax.plot(xx, yf, '--', lw=1.0, color='#ffcc00', label='fit', zorder=7) +ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.15), frameon=False) + +ax = fig.add_axes([0.42, 0.02, 0.07, 0.9]) +ax.spines['left'].set_position('zero') +ax.spines['right'].set_visible(False) +ax.spines['top'].set_visible(False) +ax.spines['bottom'].set_visible(False) +ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2) +ax.yaxis.set_ticks_position('left') +ax.set_xticks([]) +ax.set_ylim(-4.0, 12.0) +ax.set_yticks([]) +bins = np.arange(-4.0, 12.1, 0.75) +ax.hist(y, bins, orientation='horizontal', zorder=10) + +ax = fig.add_axes([0.6, 0.02, 0.33, 0.9]) +ax.spines['left'].set_position('zero') +ax.spines['bottom'].set_position('zero') +ax.spines['right'].set_visible(False) +ax.spines['top'].set_visible(False) +ax.get_xaxis().set_tick_params(direction='inout', length=10, width=2) +ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2) +ax.yaxis.set_ticks_position('left') +ax.xaxis.set_ticks_position('bottom') +ax.set_xticks(np.arange(0.0, 4.1)) +ax.set_xlim(0.0, 4.2) +ax.set_ylim(-4.0, 12.0) +ax.set_xlabel('x') +ax.set_ylabel('y - mx') +ax.scatter(x, y - slopef*x, label='residuals', s=40, zorder=10) +#ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.0), frameon=False) + +ax = fig.add_axes([0.93, 0.02, 0.07, 0.9]) +ax.spines['left'].set_position('zero') +ax.spines['right'].set_visible(False) +ax.spines['top'].set_visible(False) +ax.spines['bottom'].set_visible(False) +ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2) +ax.yaxis.set_ticks_position('left') +ax.set_xlim(0.0, 11.0) +ax.set_xticks([]) +ax.set_ylim(-4.0, 12.0) +ax.set_yticks([]) +r = y - slopef*x +ax.hist(r, bins, orientation='horizontal', zorder=10) +gx = np.arange(-4.0, 12.1, 0.1) +gy = st.norm.pdf(gx, np.mean(r), np.std(r)) +ax.plot(1.0+gy*29.0, gx, 'r', lw=2, zorder=5) -plt.tight_layout(); plt.savefig('mlepropline.pdf') #plt.show();