diff --git a/regression/code/meanSquaredErrorCubic.m b/regression/code/meanSquaredErrorCubic.m index bb17137..44870a1 100644 --- a/regression/code/meanSquaredErrorCubic.m +++ b/regression/code/meanSquaredErrorCubic.m @@ -1,4 +1,4 @@ -function mse = meanSquaredError(x, y, c) +function mse = meanSquaredErrorCubic(x, y, c) % Mean squared error between data pairs and a cubic relation. % % Arguments: x, vector of the input values diff --git a/regression/lecture/cubiccost.py b/regression/lecture/cubiccost.py new file mode 100644 index 0000000..0b8d2dc --- /dev/null +++ b/regression/lecture/cubiccost.py @@ -0,0 +1,80 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.ticker as mt +from plotstyle import * + +def create_data(): + # wikipedia: + # Generally, males vary in total length from 250 to 390 cm and + # weigh between 90 and 306 kg + c = 6 + x = np.arange(2.2, 3.9, 0.05) + y = c * x**3.0 + rng = np.random.RandomState(32281) + noise = rng.randn(len(x))*50 + y += noise + return x, y, c + + +def plot_mse(ax, x, y, c): + ccs = np.linspace(0.5, 10.0, 200) + mses = np.zeros(len(ccs)) + for i, cc in enumerate(ccs): + mses[i] = np.mean((y-(cc*x**3.0))**2.0) + imin = np.argmin(mses) + + ax.plot(ccs, mses, **lsAm) + ax.plot(c, 500.0, **psB) + ax.plot(ccs[imin], mses[imin], **psC) + ax.annotate('Minimum of\ncost\nfunction', + xy=(ccs[imin], mses[imin]*1.2), xycoords='data', + xytext=(4, 7000), textcoords='data', ha='left', + arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0), + connectionstyle="angle3,angleA=10,angleB=90") ) + ax.text(2.2, 500, 'True\nparameter\nvalue') + ax.annotate('', xy=(c-0.2, 500), xycoords='data', + xytext=(4.1, 700), textcoords='data', ha='left', + arrowprops=dict(arrowstyle="->", relpos=(1.0,0.0), + connectionstyle="angle3,angleA=-10,angleB=0") ) + ax.set_xlabel('c') + ax.set_ylabel('Mean squared error') + ax.set_xlim(2, 8.2) + ax.set_ylim(0, 10000) + ax.set_xticks(np.arange(2.0, 8.1, 2.0)) + ax.set_yticks(np.arange(0, 10001, 5000)) + + +def plot_mse_min(ax, x, y, c): + ccs = np.arange(0.5, 10.0, 0.05) + mses = np.zeros(len(ccs)) + for i, cc in enumerate(ccs): + mses[i] = np.mean((y-(cc*x**3.0))**2.0) + di = 20 + i0 = 14 + imin = np.argmin(mses[i0::di])*di + i0 + + ax.plot(c, 500.0, **psB) + ax.plot(ccs, mses, **lsAm) + ax.plot(ccs[i0::di], mses[i0::di], **psAm) + ax.plot(ccs[imin], mses[imin], **psC) + ax.annotate('Estimated\nminimum of\ncost\nfunction', + xy=(ccs[imin], mses[imin]*1.2), xycoords='data', + xytext=(4, 6700), textcoords='data', ha='left', + arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0), + connectionstyle="angle3,angleA=0,angleB=85") ) + ax.set_xlabel('c') + ax.set_xlim(2, 8.2) + ax.set_ylim(0, 10000) + ax.set_xticks(np.arange(2.0, 8.1, 2.0)) + ax.set_yticks(np.arange(0, 10001, 5000)) + ax.yaxis.set_major_formatter(mt.NullFormatter()) + + +if __name__ == "__main__": + x, y, c = create_data() + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height)) + fig.subplots_adjust(**adjust_fs(left=8.0, right=1)) + plot_mse(ax1, x, y, c) + plot_mse_min(ax2, x, y, c) + fig.savefig("cubiccost.pdf") + plt.close() diff --git a/regression/lecture/regression.tex b/regression/lecture/regression.tex index 7fbffbf..8867744 100644 --- a/regression/lecture/regression.tex +++ b/regression/lecture/regression.tex @@ -103,7 +103,7 @@ distributed around the model prediction. \end{exercise} -\section{Objective function} +\section{Cost function} The mean squared error is a so called \enterm{objective function} or \enterm{cost function} (\determ{Kostenfunktion}). A cost function @@ -146,42 +146,39 @@ Fehler!kleinster]{Methode der kleinsten Quadrate}). \end{exercise} -\section{Error surface} -For each combination of the two parameters $m$ and $b$ of the model we -can use \eqnref{mseline} to calculate the corresponding value of the -cost function. The cost function $f_{cost}(m,b|\{(x_i, y_i)\}|)$ is a -function $f_{cost}(m,b)$, that maps the parameter values $m$ and $b$ -to a scalar error value. The error values describe a landscape over the -$m$-$b$ plane, the error surface, that can be illustrated graphically -using a 3-d surface-plot. $m$ and $b$ are plotted on the $x$- and $y$- -axis while the third dimension indicates the error value -(\figref{errorsurfacefig}). +\section{Graph of the cost function} +For each value of the parameter $c$ of the model we can use +\eqnref{msecube} to calculate the corresponding value of the cost +function. The cost function $f_{cost}(c|\{(x_i, y_i)\}|)$ is a +function $f_{cost}(c)$ that maps the parameter value $c$ to a scalar +error value. For a given data set we thus can simply plot the cost +function as a function of $c$ (\figref{cubiccostfig}). \begin{figure}[t] - \includegraphics[width=0.75\textwidth]{error_surface} - \titlecaption{Error surface.}{The two model parameters $m$ and $b$ - define the base area of the surface plot. For each parameter - combination of slope and intercept the error is calculated. The - resulting surface has a minimum which indicates the parameter - combination that best fits the data.}\label{errorsurfacefig} + \includegraphics{cubiccost} + \titlecaption{Minimum of the cost function.}{For a given data set + the cost function, the mean squared error \eqnref{msecube}, as a + function of the unknown parameter $c$ has a minimum close to the + true value of $c$ that was used to generate the data + (left). Simply taking the absolute minimum of the cost function + computed for a pre-set range of values for the parameter $c$, has + the disadvantage to be limited in precision (right) and the + possibility to entirely miss the global minimum if it is outside + the computed range.}\label{cubiccostfig} \end{figure} \begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise} - Generate 20 data pairs $(x_i|y_i)$ that are linearly related with - slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for - drawing $x$ values between 0 and 120 and \varcode{randn()} for - jittering the $y$ values with a standard deviation of 15. Then - calculate the mean squared error between the data and straight lines - for a range of slopes and intercepts using the - \varcode{meanSquaredError()} function from the previous exercise. - Illustrates the error surface using the \code{surface()} function. - Consult the documentation to find out how to use \code{surface()}. + Then calculate the mean squared error between the data and straight + lines for a range of slopes and intercepts using the + \varcode{meanSquaredErrorCubic()} function from the previous + exercise. Illustrate the error surface using the \code{surface()} + function. \end{exercise} -By looking at the error surface we can directly see the position of -the minimum and thus estimate the optimal parameter combination. How -can we use the error surface to guide an automatic optimization -process? +By looking at the plot of the cost function we can visually identify +the position of the minimum and thus estimate the optimal value for +the parameter $c$. How can we use the error surface to guide an +automatic optimization process? The obvious approach would be to calculate the error surface for any combination of slope and intercept values and then find the position