[regression] new figure for minimum of a cost function
This commit is contained in:
parent
1b14bca164
commit
3541f30798
@ -1,4 +1,4 @@
|
|||||||
function mse = meanSquaredError(x, y, c)
|
function mse = meanSquaredErrorCubic(x, y, c)
|
||||||
% Mean squared error between data pairs and a cubic relation.
|
% Mean squared error between data pairs and a cubic relation.
|
||||||
%
|
%
|
||||||
% Arguments: x, vector of the input values
|
% Arguments: x, vector of the input values
|
||||||
|
80
regression/lecture/cubiccost.py
Normal file
80
regression/lecture/cubiccost.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.ticker as mt
|
||||||
|
from plotstyle import *
|
||||||
|
|
||||||
|
def create_data():
|
||||||
|
# wikipedia:
|
||||||
|
# Generally, males vary in total length from 250 to 390 cm and
|
||||||
|
# weigh between 90 and 306 kg
|
||||||
|
c = 6
|
||||||
|
x = np.arange(2.2, 3.9, 0.05)
|
||||||
|
y = c * x**3.0
|
||||||
|
rng = np.random.RandomState(32281)
|
||||||
|
noise = rng.randn(len(x))*50
|
||||||
|
y += noise
|
||||||
|
return x, y, c
|
||||||
|
|
||||||
|
|
||||||
|
def plot_mse(ax, x, y, c):
|
||||||
|
ccs = np.linspace(0.5, 10.0, 200)
|
||||||
|
mses = np.zeros(len(ccs))
|
||||||
|
for i, cc in enumerate(ccs):
|
||||||
|
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||||
|
imin = np.argmin(mses)
|
||||||
|
|
||||||
|
ax.plot(ccs, mses, **lsAm)
|
||||||
|
ax.plot(c, 500.0, **psB)
|
||||||
|
ax.plot(ccs[imin], mses[imin], **psC)
|
||||||
|
ax.annotate('Minimum of\ncost\nfunction',
|
||||||
|
xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
|
||||||
|
xytext=(4, 7000), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=90") )
|
||||||
|
ax.text(2.2, 500, 'True\nparameter\nvalue')
|
||||||
|
ax.annotate('', xy=(c-0.2, 500), xycoords='data',
|
||||||
|
xytext=(4.1, 700), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.0),
|
||||||
|
connectionstyle="angle3,angleA=-10,angleB=0") )
|
||||||
|
ax.set_xlabel('c')
|
||||||
|
ax.set_ylabel('Mean squared error')
|
||||||
|
ax.set_xlim(2, 8.2)
|
||||||
|
ax.set_ylim(0, 10000)
|
||||||
|
ax.set_xticks(np.arange(2.0, 8.1, 2.0))
|
||||||
|
ax.set_yticks(np.arange(0, 10001, 5000))
|
||||||
|
|
||||||
|
|
||||||
|
def plot_mse_min(ax, x, y, c):
|
||||||
|
ccs = np.arange(0.5, 10.0, 0.05)
|
||||||
|
mses = np.zeros(len(ccs))
|
||||||
|
for i, cc in enumerate(ccs):
|
||||||
|
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||||
|
di = 20
|
||||||
|
i0 = 14
|
||||||
|
imin = np.argmin(mses[i0::di])*di + i0
|
||||||
|
|
||||||
|
ax.plot(c, 500.0, **psB)
|
||||||
|
ax.plot(ccs, mses, **lsAm)
|
||||||
|
ax.plot(ccs[i0::di], mses[i0::di], **psAm)
|
||||||
|
ax.plot(ccs[imin], mses[imin], **psC)
|
||||||
|
ax.annotate('Estimated\nminimum of\ncost\nfunction',
|
||||||
|
xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
|
||||||
|
xytext=(4, 6700), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
|
||||||
|
connectionstyle="angle3,angleA=0,angleB=85") )
|
||||||
|
ax.set_xlabel('c')
|
||||||
|
ax.set_xlim(2, 8.2)
|
||||||
|
ax.set_ylim(0, 10000)
|
||||||
|
ax.set_xticks(np.arange(2.0, 8.1, 2.0))
|
||||||
|
ax.set_yticks(np.arange(0, 10001, 5000))
|
||||||
|
ax.yaxis.set_major_formatter(mt.NullFormatter())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
x, y, c = create_data()
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
|
||||||
|
fig.subplots_adjust(**adjust_fs(left=8.0, right=1))
|
||||||
|
plot_mse(ax1, x, y, c)
|
||||||
|
plot_mse_min(ax2, x, y, c)
|
||||||
|
fig.savefig("cubiccost.pdf")
|
||||||
|
plt.close()
|
@ -103,7 +103,7 @@ distributed around the model prediction.
|
|||||||
\end{exercise}
|
\end{exercise}
|
||||||
|
|
||||||
|
|
||||||
\section{Objective function}
|
\section{Cost function}
|
||||||
|
|
||||||
The mean squared error is a so called \enterm{objective function} or
|
The mean squared error is a so called \enterm{objective function} or
|
||||||
\enterm{cost function} (\determ{Kostenfunktion}). A cost function
|
\enterm{cost function} (\determ{Kostenfunktion}). A cost function
|
||||||
@ -146,42 +146,39 @@ Fehler!kleinster]{Methode der kleinsten Quadrate}).
|
|||||||
\end{exercise}
|
\end{exercise}
|
||||||
|
|
||||||
|
|
||||||
\section{Error surface}
|
\section{Graph of the cost function}
|
||||||
For each combination of the two parameters $m$ and $b$ of the model we
|
For each value of the parameter $c$ of the model we can use
|
||||||
can use \eqnref{mseline} to calculate the corresponding value of the
|
\eqnref{msecube} to calculate the corresponding value of the cost
|
||||||
cost function. The cost function $f_{cost}(m,b|\{(x_i, y_i)\}|)$ is a
|
function. The cost function $f_{cost}(c|\{(x_i, y_i)\}|)$ is a
|
||||||
function $f_{cost}(m,b)$, that maps the parameter values $m$ and $b$
|
function $f_{cost}(c)$ that maps the parameter value $c$ to a scalar
|
||||||
to a scalar error value. The error values describe a landscape over the
|
error value. For a given data set we thus can simply plot the cost
|
||||||
$m$-$b$ plane, the error surface, that can be illustrated graphically
|
function as a function of $c$ (\figref{cubiccostfig}).
|
||||||
using a 3-d surface-plot. $m$ and $b$ are plotted on the $x$- and $y$-
|
|
||||||
axis while the third dimension indicates the error value
|
|
||||||
(\figref{errorsurfacefig}).
|
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\includegraphics[width=0.75\textwidth]{error_surface}
|
\includegraphics{cubiccost}
|
||||||
\titlecaption{Error surface.}{The two model parameters $m$ and $b$
|
\titlecaption{Minimum of the cost function.}{For a given data set
|
||||||
define the base area of the surface plot. For each parameter
|
the cost function, the mean squared error \eqnref{msecube}, as a
|
||||||
combination of slope and intercept the error is calculated. The
|
function of the unknown parameter $c$ has a minimum close to the
|
||||||
resulting surface has a minimum which indicates the parameter
|
true value of $c$ that was used to generate the data
|
||||||
combination that best fits the data.}\label{errorsurfacefig}
|
(left). Simply taking the absolute minimum of the cost function
|
||||||
|
computed for a pre-set range of values for the parameter $c$, has
|
||||||
|
the disadvantage to be limited in precision (right) and the
|
||||||
|
possibility to entirely miss the global minimum if it is outside
|
||||||
|
the computed range.}\label{cubiccostfig}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
|
\begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
|
||||||
Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
|
Then calculate the mean squared error between the data and straight
|
||||||
slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
|
lines for a range of slopes and intercepts using the
|
||||||
drawing $x$ values between 0 and 120 and \varcode{randn()} for
|
\varcode{meanSquaredErrorCubic()} function from the previous
|
||||||
jittering the $y$ values with a standard deviation of 15. Then
|
exercise. Illustrate the error surface using the \code{surface()}
|
||||||
calculate the mean squared error between the data and straight lines
|
function.
|
||||||
for a range of slopes and intercepts using the
|
|
||||||
\varcode{meanSquaredError()} function from the previous exercise.
|
|
||||||
Illustrates the error surface using the \code{surface()} function.
|
|
||||||
Consult the documentation to find out how to use \code{surface()}.
|
|
||||||
\end{exercise}
|
\end{exercise}
|
||||||
|
|
||||||
By looking at the error surface we can directly see the position of
|
By looking at the plot of the cost function we can visually identify
|
||||||
the minimum and thus estimate the optimal parameter combination. How
|
the position of the minimum and thus estimate the optimal value for
|
||||||
can we use the error surface to guide an automatic optimization
|
the parameter $c$. How can we use the error surface to guide an
|
||||||
process?
|
automatic optimization process?
|
||||||
|
|
||||||
The obvious approach would be to calculate the error surface for any
|
The obvious approach would be to calculate the error surface for any
|
||||||
combination of slope and intercept values and then find the position
|
combination of slope and intercept values and then find the position
|
||||||
|
Reference in New Issue
Block a user