[regression] new figure for minimum of a cost function

2020-12-16 15:14:57 +01:00 · 2020-12-16 15:14:57 +01:00 · 3541f30798
commit 3541f30798
parent 1b14bca164
3 changed files with 108 additions and 31 deletions
--- a/regression/code/meanSquaredErrorCubic.m
+++ b/regression/code/meanSquaredErrorCubic.m
@ -1,4 +1,4 @@
-function mse = meanSquaredError(x, y, c)
+function mse = meanSquaredErrorCubic(x, y, c)
 % Mean squared error between data pairs and a cubic relation.
 %
 % Arguments: x, vector of the input values
--- a/regression/lecture/cubiccost.py
+++ b/regression/lecture/cubiccost.py
@ -0,0 +1,80 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mt
+from plotstyle import *
+
+def create_data():
+    # wikipedia:
+    # Generally, males vary in total length from 250 to 390 cm and
+    # weigh between 90 and 306 kg
+    c = 6
+    x = np.arange(2.2, 3.9, 0.05)
+    y = c * x**3.0
+    rng = np.random.RandomState(32281)
+    noise = rng.randn(len(x))*50
+    y += noise
+    return x, y, c
+
+    
+def plot_mse(ax, x, y, c):
+    ccs = np.linspace(0.5, 10.0, 200)
+    mses = np.zeros(len(ccs))
+    for i, cc in enumerate(ccs):
+        mses[i] = np.mean((y-(cc*x**3.0))**2.0)
+    imin = np.argmin(mses)
+        
+    ax.plot(ccs, mses, **lsAm)
+    ax.plot(c, 500.0, **psB)
+    ax.plot(ccs[imin], mses[imin], **psC)
+    ax.annotate('Minimum of\ncost\nfunction',
+                xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
+                xytext=(4, 7000), textcoords='data', ha='left',
+                arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
+                connectionstyle="angle3,angleA=10,angleB=90") )
+    ax.text(2.2, 500, 'True\nparameter\nvalue')
+    ax.annotate('', xy=(c-0.2, 500), xycoords='data',
+                xytext=(4.1, 700), textcoords='data', ha='left',
+                arrowprops=dict(arrowstyle="->", relpos=(1.0,0.0),
+                connectionstyle="angle3,angleA=-10,angleB=0") )
+    ax.set_xlabel('c')
+    ax.set_ylabel('Mean squared error')
+    ax.set_xlim(2, 8.2)
+    ax.set_ylim(0, 10000)
+    ax.set_xticks(np.arange(2.0, 8.1, 2.0))
+    ax.set_yticks(np.arange(0, 10001, 5000))
+
+    
+def plot_mse_min(ax, x, y, c):
+    ccs = np.arange(0.5, 10.0, 0.05)
+    mses = np.zeros(len(ccs))
+    for i, cc in enumerate(ccs):
+        mses[i] = np.mean((y-(cc*x**3.0))**2.0)
+    di = 20
+    i0 = 14
+    imin = np.argmin(mses[i0::di])*di + i0
+        
+    ax.plot(c, 500.0, **psB)
+    ax.plot(ccs, mses, **lsAm)
+    ax.plot(ccs[i0::di], mses[i0::di], **psAm)
+    ax.plot(ccs[imin], mses[imin], **psC)
+    ax.annotate('Estimated\nminimum of\ncost\nfunction',
+                xy=(ccs[imin], mses[imin]*1.2), xycoords='data',
+                xytext=(4, 6700), textcoords='data', ha='left',
+                arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
+                connectionstyle="angle3,angleA=0,angleB=85") )
+    ax.set_xlabel('c')
+    ax.set_xlim(2, 8.2)
+    ax.set_ylim(0, 10000)
+    ax.set_xticks(np.arange(2.0, 8.1, 2.0))
+    ax.set_yticks(np.arange(0, 10001, 5000))
+    ax.yaxis.set_major_formatter(mt.NullFormatter())
+
+
+if __name__ == "__main__":
+    x, y, c = create_data()
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
+    fig.subplots_adjust(**adjust_fs(left=8.0, right=1))
+    plot_mse(ax1, x, y, c)
+    plot_mse_min(ax2, x, y, c)
+    fig.savefig("cubiccost.pdf")
+    plt.close()
--- a/regression/lecture/regression.tex
+++ b/regression/lecture/regression.tex
@ -103,7 +103,7 @@ distributed around the model prediction.
 \end{exercise}


-\section{Objective function}
+\section{Cost function}

 The mean squared error is a so called \enterm{objective function} or
 \enterm{cost function} (\determ{Kostenfunktion}). A cost function
@ -146,42 +146,39 @@ Fehler!kleinster]{Methode der kleinsten Quadrate}).
 \end{exercise}


-\section{Error surface}
-For each combination of the two parameters $m$ and $b$ of the model we
-can use \eqnref{mseline} to calculate the corresponding value of the
-cost function. The cost function $f_{cost}(m,b|\{(x_i, y_i)\}|)$ is a
-function $f_{cost}(m,b)$, that maps the parameter values $m$ and $b$
-to a scalar error value.  The error values describe a landscape over the
-$m$-$b$ plane, the error surface, that can be illustrated graphically
-using a 3-d surface-plot. $m$ and $b$ are plotted on the $x$- and $y$-
-axis while the third dimension indicates the error value
-(\figref{errorsurfacefig}).
+\section{Graph of the cost function}
+For each value of the parameter $c$ of the model we can use
+\eqnref{msecube} to calculate the corresponding value of the cost
+function. The cost function $f_{cost}(c|\{(x_i, y_i)\}|)$ is a
+function $f_{cost}(c)$ that maps the parameter value $c$ to a scalar
+error value. For a given data set we thus can simply plot the cost
+function as a function of $c$ (\figref{cubiccostfig}).

 \begin{figure}[t]
-  \includegraphics[width=0.75\textwidth]{error_surface}
-  \titlecaption{Error surface.}{The two model parameters $m$ and $b$
-    define the base area of the surface plot. For each parameter
-    combination of slope and intercept the error is calculated. The
-    resulting surface has a minimum which indicates the parameter
-    combination that best fits the data.}\label{errorsurfacefig}
+  \includegraphics{cubiccost}
+  \titlecaption{Minimum of the cost function.}{For a given data set
+    the cost function, the mean squared error \eqnref{msecube}, as a
+    function of the unknown parameter $c$ has a minimum close to the
+    true value of $c$ that was used to generate the data
+    (left). Simply taking the absolute minimum of the cost function
+    computed for a pre-set range of values for the parameter $c$, has
+    the disadvantage to be limited in precision (right) and the
+    possibility to entirely miss the global minimum if it is outside
+    the computed range.}\label{cubiccostfig}
 \end{figure}

 \begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
-  Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
-  slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
-  drawing $x$ values between 0 and 120 and \varcode{randn()} for
-  jittering the $y$ values with a standard deviation of 15.  Then
-  calculate the mean squared error between the data and straight lines
-  for a range of slopes and intercepts using the
-  \varcode{meanSquaredError()} function from the previous exercise.
-  Illustrates the error surface using the \code{surface()} function.
-  Consult the documentation to find out how to use \code{surface()}.
+  Then calculate the mean squared error between the data and straight
+  lines for a range of slopes and intercepts using the
+  \varcode{meanSquaredErrorCubic()} function from the previous
+  exercise.  Illustrate the error surface using the \code{surface()}
+  function.
 \end{exercise}

-By looking at the error surface we can directly see the position of
-the minimum and thus estimate the optimal parameter combination. How
-can we use the error surface to guide an automatic optimization
-process?
+By looking at the plot of the cost function we can visually identify
+the position of the minimum and thus estimate the optimal value for
+the parameter $c$. How can we use the error surface to guide an
+automatic optimization process?

 The obvious approach would be to calculate the error surface for any
 combination of slope and intercept values and then find the position