[regression] new figure for minimum of a cost function

2020-12-16 15:14:57 +01:00 · 2020-12-16 15:14:57 +01:00 · 3541f30798
commit 3541f30798
parent 1b14bca164
3 changed files with 108 additions and 31 deletions
--- a/regression/code/meanSquaredErrorCubic.m
+++ b/regression/code/meanSquaredErrorCubic.m
@ -1,4 +1,4 @@
-function mse = meanSquaredError(x, y, c)
+function mse = meanSquaredErrorCubic(x, y, c)
 % Mean squared error between data pairs and a cubic relation.
 %
 % Arguments: x, vector of the input values
--- a/regression/lecture/cubiccost.py
+++ b/regression/lecture/cubiccost.py
@ -0,0 +1,80 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.ticker as mt
 from plotstyle import *
 def create_data():
    # wikipedia:
    # Generally, males vary in total length from 250 to 390 cm and
    # weigh between 90 and 306 kg
    c = 6
    x = np.arange(2.2, 3.9, 0.05)
    y = c * x**3.0
    rng = np.random.RandomState(32281)
    noise = rng.randn(len(x))*50
    y += noise
    return x, y, c
 def plot_mse(ax, x, y, c):
    ccs = np.linspace(0.5, 10.0, 200)
    mses = np.zeros(len(ccs))
    for i, cc in enumerate(ccs):
        mses[i] = np.mean((y-(cc*x**3.0))**2.0)
    imin = np.argmin(mses)
    ax.plot(ccs, mses, **lsAm)
    ax.plot(c, 500.0, **psB)
    ax.plot(ccs[imin], mses[imin], **psC)
    ax.annotate('Minimum of\ncost\nfunction',
                xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
                xytext=(4, 7000), textcoords='data', ha='left',
                arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
                connectionstyle="angle3,angleA=10,angleB=90") )
    ax.text(2.2, 500, 'True\nparameter\nvalue')
    ax.annotate('', xy=(c-0.2, 500), xycoords='data',
                xytext=(4.1, 700), textcoords='data', ha='left',
                arrowprops=dict(arrowstyle="->", relpos=(1.0,0.0),
                connectionstyle="angle3,angleA=-10,angleB=0") )
    ax.set_xlabel('c')
    ax.set_ylabel('Mean squared error')
    ax.set_xlim(2, 8.2)
    ax.set_ylim(0, 10000)
    ax.set_xticks(np.arange(2.0, 8.1, 2.0))
    ax.set_yticks(np.arange(0, 10001, 5000))
 def plot_mse_min(ax, x, y, c):
    ccs = np.arange(0.5, 10.0, 0.05)
    mses = np.zeros(len(ccs))
    for i, cc in enumerate(ccs):
        mses[i] = np.mean((y-(cc*x**3.0))**2.0)
    di = 20
    i0 = 14
    imin = np.argmin(mses[i0::di])*di + i0
    ax.plot(c, 500.0, **psB)
    ax.plot(ccs, mses, **lsAm)
    ax.plot(ccs[i0::di], mses[i0::di], **psAm)
    ax.plot(ccs[imin], mses[imin], **psC)
    ax.annotate('Estimated\nminimum of\ncost\nfunction',
                xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
                xytext=(4, 6700), textcoords='data', ha='left',
                arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
                connectionstyle="angle3,angleA=0,angleB=85") )
    ax.set_xlabel('c')
    ax.set_xlim(2, 8.2)
    ax.set_ylim(0, 10000)
    ax.set_xticks(np.arange(2.0, 8.1, 2.0))
    ax.set_yticks(np.arange(0, 10001, 5000))
    ax.yaxis.set_major_formatter(mt.NullFormatter())
 if __name__ == "__main__":
    x, y, c = create_data()
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
    fig.subplots_adjust(**adjust_fs(left=8.0, right=1))
    plot_mse(ax1, x, y, c)
    plot_mse_min(ax2, x, y, c)
    fig.savefig("cubiccost.pdf")
    plt.close()
--- a/regression/lecture/regression.tex
+++ b/regression/lecture/regression.tex
@ -103,7 +103,7 @@ distributed around the model prediction.
 \end{exercise}
-\section{Objective function}
+\section{Cost function}
 The mean squared error is a so called \enterm{objective function} or
 \enterm{cost function} (\determ{Kostenfunktion}). A cost function
@ -146,42 +146,39 @@ Fehler!kleinster]{Methode der kleinsten Quadrate}).
 \end{exercise}
-\section{Error surface}
+\section{Graph of the cost function}
-For each combination of the two parameters $m$ and $b$ of the model we
+For each value of the parameter $c$ of the model we can use
-can use \eqnref{mseline} to calculate the corresponding value of the
+\eqnref{msecube} to calculate the corresponding value of the cost
-cost function. The cost function $f_{cost}(m,b|\{(x_i, y_i)\}|)$ is a
+function. The cost function $f_{cost}(c|\{(x_i, y_i)\}|)$ is a
-function $f_{cost}(m,b)$, that maps the parameter values $m$ and $b$
+function $f_{cost}(c)$ that maps the parameter value $c$ to a scalar
-to a scalar error value.  The error values describe a landscape over the
+error value. For a given data set we thus can simply plot the cost
-$m$-$b$ plane, the error surface, that can be illustrated graphically
+function as a function of $c$ (\figref{cubiccostfig}).
 using a 3-d surface-plot. $m$ and $b$ are plotted on the $x$- and $y$-
 axis while the third dimension indicates the error value
 (\figref{errorsurfacefig}).
 \begin{figure}[t]
-  \includegraphics[width=0.75\textwidth]{error_surface}
+  \includegraphics{cubiccost}
-  \titlecaption{Error surface.}{The two model parameters $m$ and $b$
+  \titlecaption{Minimum of the cost function.}{For a given data set
-    define the base area of the surface plot. For each parameter
+    the cost function, the mean squared error \eqnref{msecube}, as a
-    combination of slope and intercept the error is calculated. The
+    function of the unknown parameter $c$ has a minimum close to the
-    resulting surface has a minimum which indicates the parameter
+    true value of $c$ that was used to generate the data
-    combination that best fits the data.}\label{errorsurfacefig}
+    (left). Simply taking the absolute minimum of the cost function
    computed for a pre-set range of values for the parameter $c$, has
    the disadvantage to be limited in precision (right) and the
    possibility to entirely miss the global minimum if it is outside
    the computed range.}\label{cubiccostfig}
 \end{figure}
 \begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
-  Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
+  Then calculate the mean squared error between the data and straight
-  slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
+  lines for a range of slopes and intercepts using the
-  drawing $x$ values between 0 and 120 and \varcode{randn()} for
+  \varcode{meanSquaredErrorCubic()} function from the previous
-  jittering the $y$ values with a standard deviation of 15.  Then
+  exercise.  Illustrate the error surface using the \code{surface()}
-  calculate the mean squared error between the data and straight lines
+  function.
  for a range of slopes and intercepts using the
  \varcode{meanSquaredError()} function from the previous exercise.
  Illustrates the error surface using the \code{surface()} function.
  Consult the documentation to find out how to use \code{surface()}.
 \end{exercise}
-By looking at the error surface we can directly see the position of
+By looking at the plot of the cost function we can visually identify
-the minimum and thus estimate the optimal parameter combination. How
+the position of the minimum and thus estimate the optimal value for
-can we use the error surface to guide an automatic optimization
+the parameter $c$. How can we use the error surface to guide an
-process?
+automatic optimization process?
 The obvious approach would be to calculate the error surface for any
 combination of slope and intercept values and then find the position