[regression] new figure for minimum of a cost function
This commit is contained in:
parent
1b14bca164
commit
3541f30798
@ -1,4 +1,4 @@
|
||||
function mse = meanSquaredError(x, y, c)
|
||||
function mse = meanSquaredErrorCubic(x, y, c)
|
||||
% Mean squared error between data pairs and a cubic relation.
|
||||
%
|
||||
% Arguments: x, vector of the input values
|
||||
|
80
regression/lecture/cubiccost.py
Normal file
80
regression/lecture/cubiccost.py
Normal file
@ -0,0 +1,80 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mt
|
||||
from plotstyle import *
|
||||
|
||||
def create_data():
|
||||
# wikipedia:
|
||||
# Generally, males vary in total length from 250 to 390 cm and
|
||||
# weigh between 90 and 306 kg
|
||||
c = 6
|
||||
x = np.arange(2.2, 3.9, 0.05)
|
||||
y = c * x**3.0
|
||||
rng = np.random.RandomState(32281)
|
||||
noise = rng.randn(len(x))*50
|
||||
y += noise
|
||||
return x, y, c
|
||||
|
||||
|
||||
def plot_mse(ax, x, y, c):
|
||||
ccs = np.linspace(0.5, 10.0, 200)
|
||||
mses = np.zeros(len(ccs))
|
||||
for i, cc in enumerate(ccs):
|
||||
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||
imin = np.argmin(mses)
|
||||
|
||||
ax.plot(ccs, mses, **lsAm)
|
||||
ax.plot(c, 500.0, **psB)
|
||||
ax.plot(ccs[imin], mses[imin], **psC)
|
||||
ax.annotate('Minimum of\ncost\nfunction',
|
||||
xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
|
||||
xytext=(4, 7000), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
||||
connectionstyle="angle3,angleA=10,angleB=90") )
|
||||
ax.text(2.2, 500, 'True\nparameter\nvalue')
|
||||
ax.annotate('', xy=(c-0.2, 500), xycoords='data',
|
||||
xytext=(4.1, 700), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.0),
|
||||
connectionstyle="angle3,angleA=-10,angleB=0") )
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('Mean squared error')
|
||||
ax.set_xlim(2, 8.2)
|
||||
ax.set_ylim(0, 10000)
|
||||
ax.set_xticks(np.arange(2.0, 8.1, 2.0))
|
||||
ax.set_yticks(np.arange(0, 10001, 5000))
|
||||
|
||||
|
||||
def plot_mse_min(ax, x, y, c):
|
||||
ccs = np.arange(0.5, 10.0, 0.05)
|
||||
mses = np.zeros(len(ccs))
|
||||
for i, cc in enumerate(ccs):
|
||||
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||
di = 20
|
||||
i0 = 14
|
||||
imin = np.argmin(mses[i0::di])*di + i0
|
||||
|
||||
ax.plot(c, 500.0, **psB)
|
||||
ax.plot(ccs, mses, **lsAm)
|
||||
ax.plot(ccs[i0::di], mses[i0::di], **psAm)
|
||||
ax.plot(ccs[imin], mses[imin], **psC)
|
||||
ax.annotate('Estimated\nminimum of\ncost\nfunction',
|
||||
xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
|
||||
xytext=(4, 6700), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
|
||||
connectionstyle="angle3,angleA=0,angleB=85") )
|
||||
ax.set_xlabel('c')
|
||||
ax.set_xlim(2, 8.2)
|
||||
ax.set_ylim(0, 10000)
|
||||
ax.set_xticks(np.arange(2.0, 8.1, 2.0))
|
||||
ax.set_yticks(np.arange(0, 10001, 5000))
|
||||
ax.yaxis.set_major_formatter(mt.NullFormatter())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
|
||||
fig.subplots_adjust(**adjust_fs(left=8.0, right=1))
|
||||
plot_mse(ax1, x, y, c)
|
||||
plot_mse_min(ax2, x, y, c)
|
||||
fig.savefig("cubiccost.pdf")
|
||||
plt.close()
|
@ -103,7 +103,7 @@ distributed around the model prediction.
|
||||
\end{exercise}
|
||||
|
||||
|
||||
\section{Objective function}
|
||||
\section{Cost function}
|
||||
|
||||
The mean squared error is a so called \enterm{objective function} or
|
||||
\enterm{cost function} (\determ{Kostenfunktion}). A cost function
|
||||
@ -146,42 +146,39 @@ Fehler!kleinster]{Methode der kleinsten Quadrate}).
|
||||
\end{exercise}
|
||||
|
||||
|
||||
\section{Error surface}
|
||||
For each combination of the two parameters $m$ and $b$ of the model we
|
||||
can use \eqnref{mseline} to calculate the corresponding value of the
|
||||
cost function. The cost function $f_{cost}(m,b|\{(x_i, y_i)\}|)$ is a
|
||||
function $f_{cost}(m,b)$, that maps the parameter values $m$ and $b$
|
||||
to a scalar error value. The error values describe a landscape over the
|
||||
$m$-$b$ plane, the error surface, that can be illustrated graphically
|
||||
using a 3-d surface-plot. $m$ and $b$ are plotted on the $x$- and $y$-
|
||||
axis while the third dimension indicates the error value
|
||||
(\figref{errorsurfacefig}).
|
||||
\section{Graph of the cost function}
|
||||
For each value of the parameter $c$ of the model we can use
|
||||
\eqnref{msecube} to calculate the corresponding value of the cost
|
||||
function. The cost function $f_{cost}(c|\{(x_i, y_i)\}|)$ is a
|
||||
function $f_{cost}(c)$ that maps the parameter value $c$ to a scalar
|
||||
error value. For a given data set we thus can simply plot the cost
|
||||
function as a function of $c$ (\figref{cubiccostfig}).
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=0.75\textwidth]{error_surface}
|
||||
\titlecaption{Error surface.}{The two model parameters $m$ and $b$
|
||||
define the base area of the surface plot. For each parameter
|
||||
combination of slope and intercept the error is calculated. The
|
||||
resulting surface has a minimum which indicates the parameter
|
||||
combination that best fits the data.}\label{errorsurfacefig}
|
||||
\includegraphics{cubiccost}
|
||||
\titlecaption{Minimum of the cost function.}{For a given data set
|
||||
the cost function, the mean squared error \eqnref{msecube}, as a
|
||||
function of the unknown parameter $c$ has a minimum close to the
|
||||
true value of $c$ that was used to generate the data
|
||||
(left). Simply taking the absolute minimum of the cost function
|
||||
computed for a pre-set range of values for the parameter $c$, has
|
||||
the disadvantage to be limited in precision (right) and the
|
||||
possibility to entirely miss the global minimum if it is outside
|
||||
the computed range.}\label{cubiccostfig}
|
||||
\end{figure}
|
||||
|
||||
\begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
|
||||
Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
|
||||
slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
|
||||
drawing $x$ values between 0 and 120 and \varcode{randn()} for
|
||||
jittering the $y$ values with a standard deviation of 15. Then
|
||||
calculate the mean squared error between the data and straight lines
|
||||
for a range of slopes and intercepts using the
|
||||
\varcode{meanSquaredError()} function from the previous exercise.
|
||||
Illustrates the error surface using the \code{surface()} function.
|
||||
Consult the documentation to find out how to use \code{surface()}.
|
||||
Then calculate the mean squared error between the data and straight
|
||||
lines for a range of slopes and intercepts using the
|
||||
\varcode{meanSquaredErrorCubic()} function from the previous
|
||||
exercise. Illustrate the error surface using the \code{surface()}
|
||||
function.
|
||||
\end{exercise}
|
||||
|
||||
By looking at the error surface we can directly see the position of
|
||||
the minimum and thus estimate the optimal parameter combination. How
|
||||
can we use the error surface to guide an automatic optimization
|
||||
process?
|
||||
By looking at the plot of the cost function we can visually identify
|
||||
the position of the minimum and thus estimate the optimal value for
|
||||
the parameter $c$. How can we use the error surface to guide an
|
||||
automatic optimization process?
|
||||
|
||||
The obvious approach would be to calculate the error surface for any
|
||||
combination of slope and intercept values and then find the position
|
||||
|
Reference in New Issue
Block a user