[likelihood] improved figure for line fit

This commit is contained in:
Jan Benda 2019-11-30 21:36:56 +01:00
parent 934a1e976c
commit 5e3d665cd3
2 changed files with 67 additions and 20 deletions

View File

@ -228,7 +228,12 @@ maximized respectively.
\begin{figure}[t] \begin{figure}[t]
\includegraphics[width=1\textwidth]{mlepropline} \includegraphics[width=1\textwidth]{mlepropline}
\titlecaption{\label{mleproplinefig} Maximum likelihood estimation \titlecaption{\label{mleproplinefig} Maximum likelihood estimation
of the slope of line through the origin.}{} of the slope of line through the origin.}{The data (blue and
left histogram) originate from a straight line $y=mx$ trough the origin
(red). The maximum-likelihood estimation of the slope $m$ of the
regression line (orange), \eqnref{mleslope}, is close to the true
one. The residuals, the data minus the estimated line (right), reveal
the normal distribution of the data around the line (right histogram).}
\end{figure} \end{figure}
@ -282,10 +287,11 @@ To see what this expression is, we need to standardize the data. We
make the data mean free and normalize them to their standard make the data mean free and normalize them to their standard
deviation, i.e. $x \mapsto (x - \bar x)/\sigma_x$. The resulting deviation, i.e. $x \mapsto (x - \bar x)/\sigma_x$. The resulting
numbers are also called \enterm{$z$-values} or $z$-scores and they numbers are also called \enterm{$z$-values} or $z$-scores and they
have the property $\bar x = 0$ and $\sigma_x = 1$. If this is the have the property $\bar x = 0$ and $\sigma_x = 1$. $z$-scores are
case, the variance often used in Biology to make quantities that differ in their units
comparable. For standardized data the variance
\[ \sigma_x^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)^2 = \frac{1}{n} \sum_{i=1}^n x_i^2 = 1 \] \[ \sigma_x^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)^2 = \frac{1}{n} \sum_{i=1}^n x_i^2 = 1 \]
is the mean squared data and equals one. is given by the mean squared data and equals one.
The covariance between $x$ and $y$ also simplifies to The covariance between $x$ and $y$ also simplifies to
\[ \text{cov}(x, y) = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)(y_i - \[ \text{cov}(x, y) = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)(y_i -
\bar y) =\frac{1}{n} \sum_{i=1}^n x_i y_i \] \bar y) =\frac{1}{n} \sum_{i=1}^n x_i y_i \]

View File

@ -1,16 +1,17 @@
import numpy as np import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
plt.xkcd() plt.xkcd()
fig = plt.figure( figsize=(6,3.5) ) fig = plt.figure(figsize=(6, 3))
# the line: # the line:
slope = 2.0 slope = 2.0
xx = np.arange(0.0, 4.1, 0.1) xx = np.arange(0.0, 4.1, 0.1)
yy = slope*xx yy = slope*xx
# the data: # the data:
n = 80 n = 40
rng = np.random.RandomState(218) rng = np.random.RandomState(5218)
sigma = 1.5 sigma = 1.5
x = 4.0*rng.rand(n) x = 4.0*rng.rand(n)
y = slope*x+rng.randn(n)*sigma y = slope*x+rng.randn(n)*sigma
@ -19,7 +20,7 @@ slopef = np.sum(x*y)/np.sum(x*x)
yf = slopef*xx yf = slopef*xx
# plot it: # plot it:
ax = fig.add_subplot( 1, 1, 1 ) ax = fig.add_axes([0.09, 0.02, 0.33, 0.9])
ax.spines['left'].set_position('zero') ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero') ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_visible(False) ax.spines['right'].set_visible(False)
@ -30,20 +31,60 @@ ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom') ax.xaxis.set_ticks_position('bottom')
ax.set_xticks(np.arange(0.0, 4.1)) ax.set_xticks(np.arange(0.0, 4.1))
ax.set_xlim(0.0, 4.2) ax.set_xlim(0.0, 4.2)
#ax.set_ylim(-1, 5) ax.set_ylim(-4.0, 12.0)
#ax.set_xticks( np.arange(0, 5))
#ax.set_yticks( np.arange(0, 0.9, 0.2))
ax.set_xlabel('x') ax.set_xlabel('x')
ax.set_ylabel('y') ax.set_ylabel('y')
#ax.annotate('', xy=(mu, 0.02), xycoords='data', ax.scatter(x, y, label='data', s=40, zorder=10)
# xytext=(mu, 0.75), textcoords='data', ax.plot(xx, yy, 'r', lw=5.0, color='#ff0000', label='original', zorder=5)
# arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5), ax.plot(xx, yf, '--', lw=1.0, color='#ffcc00', label='fit', zorder=7)
# connectionstyle=cs), zorder=1 ) ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.15), frameon=False)
ax.scatter(x, y, label='data', s=50, zorder=10)
ax.plot(xx, yy, 'r', lw=6.0, color='#ff0000', label='original', zorder=5) ax = fig.add_axes([0.42, 0.02, 0.07, 0.9])
ax.plot(xx, yf, '--', lw=2.0, color='#ffcc00', label='fit', zorder=7) ax.spines['left'].set_position('zero')
ax.legend(loc='upper left', frameon=False) ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.set_xticks([])
ax.set_ylim(-4.0, 12.0)
ax.set_yticks([])
bins = np.arange(-4.0, 12.1, 0.75)
ax.hist(y, bins, orientation='horizontal', zorder=10)
ax = fig.add_axes([0.6, 0.02, 0.33, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().set_tick_params(direction='inout', length=10, width=2)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticks(np.arange(0.0, 4.1))
ax.set_xlim(0.0, 4.2)
ax.set_ylim(-4.0, 12.0)
ax.set_xlabel('x')
ax.set_ylabel('y - mx')
ax.scatter(x, y - slopef*x, label='residuals', s=40, zorder=10)
#ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.0), frameon=False)
ax = fig.add_axes([0.93, 0.02, 0.07, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.set_xlim(0.0, 11.0)
ax.set_xticks([])
ax.set_ylim(-4.0, 12.0)
ax.set_yticks([])
r = y - slopef*x
ax.hist(r, bins, orientation='horizontal', zorder=10)
gx = np.arange(-4.0, 12.1, 0.1)
gy = st.norm.pdf(gx, np.mean(r), np.std(r))
ax.plot(1.0+gy*29.0, gx, 'r', lw=2, zorder=5)
plt.tight_layout();
plt.savefig('mlepropline.pdf') plt.savefig('mlepropline.pdf')
#plt.show(); #plt.show();