[likelihood] improved figure for line fit

This commit is contained in:
Jan Benda 2019-11-30 21:36:56 +01:00
parent 934a1e976c
commit 5e3d665cd3
2 changed files with 67 additions and 20 deletions

View File

@ -228,7 +228,12 @@ maximized respectively.
\begin{figure}[t]
\includegraphics[width=1\textwidth]{mlepropline}
\titlecaption{\label{mleproplinefig} Maximum likelihood estimation
of the slope of line through the origin.}{}
of the slope of line through the origin.}{The data (blue and
left histogram) originate from a straight line $y=mx$ trough the origin
(red). The maximum-likelihood estimation of the slope $m$ of the
regression line (orange), \eqnref{mleslope}, is close to the true
one. The residuals, the data minus the estimated line (right), reveal
the normal distribution of the data around the line (right histogram).}
\end{figure}
@ -282,10 +287,11 @@ To see what this expression is, we need to standardize the data. We
make the data mean free and normalize them to their standard
deviation, i.e. $x \mapsto (x - \bar x)/\sigma_x$. The resulting
numbers are also called \enterm{$z$-values} or $z$-scores and they
have the property $\bar x = 0$ and $\sigma_x = 1$. If this is the
case, the variance
have the property $\bar x = 0$ and $\sigma_x = 1$. $z$-scores are
often used in Biology to make quantities that differ in their units
comparable. For standardized data the variance
\[ \sigma_x^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)^2 = \frac{1}{n} \sum_{i=1}^n x_i^2 = 1 \]
is the mean squared data and equals one.
is given by the mean squared data and equals one.
The covariance between $x$ and $y$ also simplifies to
\[ \text{cov}(x, y) = \frac{1}{n} \sum_{i=1}^n (x_i - \bar x)(y_i -
\bar y) =\frac{1}{n} \sum_{i=1}^n x_i y_i \]

View File

@ -1,16 +1,17 @@
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
plt.xkcd()
fig = plt.figure( figsize=(6,3.5) )
fig = plt.figure(figsize=(6, 3))
# the line:
slope = 2.0
xx = np.arange(0.0, 4.1, 0.1)
yy = slope*xx
# the data:
n = 80
rng = np.random.RandomState(218)
n = 40
rng = np.random.RandomState(5218)
sigma = 1.5
x = 4.0*rng.rand(n)
y = slope*x+rng.randn(n)*sigma
@ -19,7 +20,7 @@ slopef = np.sum(x*y)/np.sum(x*x)
yf = slopef*xx
# plot it:
ax = fig.add_subplot( 1, 1, 1 )
ax = fig.add_axes([0.09, 0.02, 0.33, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_visible(False)
@ -30,20 +31,60 @@ ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticks(np.arange(0.0, 4.1))
ax.set_xlim(0.0, 4.2)
#ax.set_ylim(-1, 5)
#ax.set_xticks( np.arange(0, 5))
#ax.set_yticks( np.arange(0, 0.9, 0.2))
ax.set_ylim(-4.0, 12.0)
ax.set_xlabel('x')
ax.set_ylabel('y')
#ax.annotate('', xy=(mu, 0.02), xycoords='data',
# xytext=(mu, 0.75), textcoords='data',
# arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
# connectionstyle=cs), zorder=1 )
ax.scatter(x, y, label='data', s=50, zorder=10)
ax.plot(xx, yy, 'r', lw=6.0, color='#ff0000', label='original', zorder=5)
ax.plot(xx, yf, '--', lw=2.0, color='#ffcc00', label='fit', zorder=7)
ax.legend(loc='upper left', frameon=False)
ax.scatter(x, y, label='data', s=40, zorder=10)
ax.plot(xx, yy, 'r', lw=5.0, color='#ff0000', label='original', zorder=5)
ax.plot(xx, yf, '--', lw=1.0, color='#ffcc00', label='fit', zorder=7)
ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.15), frameon=False)
ax = fig.add_axes([0.42, 0.02, 0.07, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.set_xticks([])
ax.set_ylim(-4.0, 12.0)
ax.set_yticks([])
bins = np.arange(-4.0, 12.1, 0.75)
ax.hist(y, bins, orientation='horizontal', zorder=10)
ax = fig.add_axes([0.6, 0.02, 0.33, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().set_tick_params(direction='inout', length=10, width=2)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticks(np.arange(0.0, 4.1))
ax.set_xlim(0.0, 4.2)
ax.set_ylim(-4.0, 12.0)
ax.set_xlabel('x')
ax.set_ylabel('y - mx')
ax.scatter(x, y - slopef*x, label='residuals', s=40, zorder=10)
#ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.0), frameon=False)
ax = fig.add_axes([0.93, 0.02, 0.07, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.set_xlim(0.0, 11.0)
ax.set_xticks([])
ax.set_ylim(-4.0, 12.0)
ax.set_yticks([])
r = y - slopef*x
ax.hist(r, bins, orientation='horizontal', zorder=10)
gx = np.arange(-4.0, 12.1, 0.1)
gy = st.norm.pdf(gx, np.mean(r), np.std(r))
ax.plot(1.0+gy*29.0, gx, 'r', lw=2, zorder=5)
plt.tight_layout();
plt.savefig('mlepropline.pdf')
#plt.show();