From 1b14bca16419b8782b1af6e388b66168f0506676 Mon Sep 17 00:00:00 2001 From: Jan Benda Date: Wed, 16 Dec 2020 09:25:01 +0100 Subject: [PATCH] [regression] objective function section done --- chapter.mk | 4 +-- regression/code/meanSquaredError.m | 12 --------- regression/code/meanSquaredErrorCubic.m | 11 ++++++++ regression/code/meansquarederrorline.m | 4 +-- regression/lecture/cubicfunc.py | 2 +- regression/lecture/regression-chapter.tex | 11 -------- regression/lecture/regression.tex | 32 +++++++++++------------ 7 files changed, 31 insertions(+), 45 deletions(-) delete mode 100644 regression/code/meanSquaredError.m create mode 100644 regression/code/meanSquaredErrorCubic.m diff --git a/chapter.mk b/chapter.mk index 0482aed..f642e2e 100644 --- a/chapter.mk +++ b/chapter.mk @@ -10,8 +10,8 @@ pythonplots : $(PYPDFFILES) $(PYPDFFILES) : %.pdf: %.py ../../plotstyle.py PYTHONPATH="../../" python3 $< - #ps2pdf $@ $(@:.pdf=-temp.pdf) # strip fonts, saves only about 1.5MB - #mv $(@:.pdf=-temp.pdf) $@ +#ps2pdf $@ $(@:.pdf=-temp.pdf) # strip fonts, saves only about 1.5MB +#mv $(@:.pdf=-temp.pdf) $@ cleanpythonplots : rm -f $(PYPDFFILES) diff --git a/regression/code/meanSquaredError.m b/regression/code/meanSquaredError.m deleted file mode 100644 index 6eeea7b..0000000 --- a/regression/code/meanSquaredError.m +++ /dev/null @@ -1,12 +0,0 @@ -function mse = meanSquaredError(x, y, parameter) -% Mean squared error between a straight line and data pairs. -% -% Arguments: x, vector of the input values -% y, vector of the corresponding measured output values -% parameter, vector containing slope and intercept -% as the 1st and 2nd element, respectively. -% -% Returns: mse, the mean-squared-error. - - mse = mean((y - x * parameter(1) - parameter(2)).^2); -end diff --git a/regression/code/meanSquaredErrorCubic.m b/regression/code/meanSquaredErrorCubic.m new file mode 100644 index 0000000..bb17137 --- /dev/null +++ b/regression/code/meanSquaredErrorCubic.m @@ -0,0 +1,11 @@ +function mse = meanSquaredError(x, y, c) +% Mean squared error between data pairs and a cubic relation. +% +% Arguments: x, vector of the input values +% y, vector of the corresponding measured output values +% c, the factor for the cubic relation. +% +% Returns: mse, the mean-squared-error. + + mse = mean((y - c*x.^3).^2); +end diff --git a/regression/code/meansquarederrorline.m b/regression/code/meansquarederrorline.m index 17f9d2f..4eca4a3 100644 --- a/regression/code/meansquarederrorline.m +++ b/regression/code/meansquarederrorline.m @@ -10,6 +10,6 @@ yest = c * x.^3; y = yest + noise*randn(n, 1); % compute mean squared error: -mse = mean((y - y_est).^2); +mse = mean((y - yest).^2); -fprintf('the mean squared error is %g kg^2\n', mse)) +fprintf('the mean squared error is %.0f kg^2\n', mse) diff --git a/regression/lecture/cubicfunc.py b/regression/lecture/cubicfunc.py index 45dd67b..8b5b44a 100644 --- a/regression/lecture/cubicfunc.py +++ b/regression/lecture/cubicfunc.py @@ -41,7 +41,7 @@ def plot_data_fac(ax, x, y, c): if __name__ == "__main__": x, y, c = create_data() - print(len(x)) + print('n=%d' % len(x)) fig, (ax1, ax2) = plt.subplots(1, 2) fig.subplots_adjust(wspace=0.5, **adjust_fs(fig, left=6.0, right=1.5)) plot_data(ax1, x, y) diff --git a/regression/lecture/regression-chapter.tex b/regression/lecture/regression-chapter.tex index 2169854..8afed44 100644 --- a/regression/lecture/regression-chapter.tex +++ b/regression/lecture/regression-chapter.tex @@ -42,17 +42,6 @@ \subsection{2D fit} -\begin{exercise}{meanSquaredError.m}{} - Implement the objective function \eqref{mseline} as a function - \varcode{meanSquaredError()}. The function takes three - arguments. The first is a vector of $x$-values and the second - contains the measurements $y$ for each value of $x$. The third - argument is a 2-element vector that contains the values of - parameters \varcode{m} and \varcode{b}. The function returns the - mean square error. -\end{exercise} - - \begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise} Generate 20 data pairs $(x_i|y_i)$ that are linearly related with slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for diff --git a/regression/lecture/regression.tex b/regression/lecture/regression.tex index 779309b..7fbffbf 100644 --- a/regression/lecture/regression.tex +++ b/regression/lecture/regression.tex @@ -109,10 +109,10 @@ The mean squared error is a so called \enterm{objective function} or \enterm{cost function} (\determ{Kostenfunktion}). A cost function assigns to a model prediction $\{y^{est}(x_i)\}$ for a given data set $\{(x_i, y_i)\}$ a single scalar value that we want to minimize. Here -we aim to adapt the model parameters to minimize the mean squared -error \eqref{meansquarederror}. In general, the \enterm{cost function} -can be any function that describes the quality of the fit by mapping -the data and the predictions to a single scalar value. +we aim to adapt the model parameter to minimize the mean squared error +\eqref{meansquarederror}. In general, the \enterm{cost function} can +be any function that describes the quality of a fit by mapping the +data and the predictions to a single scalar value. \begin{figure}[t] \includegraphics{cubicerrors} @@ -125,25 +125,23 @@ the data and the predictions to a single scalar value. \end{figure} Replacing $y^{est}$ in the mean squared error \eqref{meansquarederror} -with our model, the straight line \eqref{straightline}, the cost -function reads +with our cubic model \eqref{cubicfunc}, the cost function reads \begin{eqnarray} - f_{cost}(m,b|\{(x_i, y_i)\}) & = & \frac{1}{N} \sum_{i=1}^N (y_i - f(x_i;m,b))^2 \label{msefunc} \\ - & = & \frac{1}{N} \sum_{i=1}^N (y_i - m x_i - b)^2 \label{mseline} + f_{cost}(c|\{(x_i, y_i)\}) & = & \frac{1}{N} \sum_{i=1}^N (y_i - f(x_i;c))^2 \label{msefunc} \\ + & = & \frac{1}{N} \sum_{i=1}^N (y_i - c x_i^3)^2 \label{msecube} \end{eqnarray} -The optimization process tries to find the slope $m$ and the intercept -$b$ such that the cost function is minimized. With the mean squared -error as the cost function this optimization process is also called -method of the \enterm{least square error} (\determ[quadratischer +The optimization process tries to find a value for the factor $c$ such +that the cost function is minimized. With the mean squared error as +the cost function this optimization process is also called method of + \enterm{least squares} (\determ[quadratischer Fehler!kleinster]{Methode der kleinsten Quadrate}). -\begin{exercise}{meanSquaredError.m}{} - Implement the objective function \eqref{mseline} as a function - \varcode{meanSquaredError()}. The function takes three +\begin{exercise}{meanSquaredErrorCubic.m}{} + Implement the objective function \eqref{msecube} as a function + \varcode{meanSquaredErrorCubic()}. The function takes three arguments. The first is a vector of $x$-values and the second contains the measurements $y$ for each value of $x$. The third - argument is a 2-element vector that contains the values of - parameters \varcode{m} and \varcode{b}. The function returns the + argument is the value of the factor $c$. The function returns the mean squared error. \end{exercise}