[regression] finished chapter
This commit is contained in:
parent
46affef86d
commit
dea6319e75
@ -1,8 +1,8 @@
|
||||
function mse = meanSquaredErrorCubic(x, y, c)
|
||||
% Mean squared error between data pairs and a cubic relation.
|
||||
%
|
||||
% Arguments: x, vector of the input values
|
||||
% y, vector of the corresponding measured output values
|
||||
% Arguments: x, vector of the x-data values
|
||||
% y, vector of the corresponding y-data values
|
||||
% c, the factor for the cubic relation.
|
||||
%
|
||||
% Returns: mse, the mean-squared-error.
|
||||
|
@ -75,7 +75,7 @@ def plot_mse_min(ax, x, y, c):
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
|
||||
fig.subplots_adjust(**adjust_fs(left=8.0, right=1))
|
||||
fig.subplots_adjust(**adjust_fs(left=8.0, right=1.2))
|
||||
plot_mse(ax1, x, y, c)
|
||||
plot_mse_min(ax2, x, y, c)
|
||||
fig.savefig("cubiccost.pdf")
|
||||
|
@ -58,8 +58,8 @@ def plot_error_hist(ax, x, y, c):
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2)
|
||||
fig.subplots_adjust(wspace=0.4, **adjust_fs(left=6.0, right=1.2))
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 0.9*figure_height))
|
||||
fig.subplots_adjust(wspace=0.5, **adjust_fs(left=6.0, right=1.2))
|
||||
plot_data_errors(ax1, x, y, c)
|
||||
plot_error_hist(ax2, x, y, c)
|
||||
fig.savefig("cubicerrors.pdf")
|
||||
|
@ -42,8 +42,8 @@ def plot_data_fac(ax, x, y, c):
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
print('n=%d' % len(x))
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2)
|
||||
fig.subplots_adjust(wspace=0.5, **adjust_fs(fig, left=6.0, right=1.5))
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 0.9*figure_height))
|
||||
fig.subplots_adjust(wspace=0.5, **adjust_fs(fig, left=6.0, right=1.2))
|
||||
plot_data(ax1, x, y)
|
||||
plot_data_fac(ax2, x, y, c)
|
||||
fig.savefig("cubicfunc.pdf")
|
||||
|
@ -73,7 +73,7 @@ if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
cs, mses = gradient_descent(x, y)
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
|
||||
fig.subplots_adjust(wspace=0.2, **adjust_fs(left=8.0, right=0.5))
|
||||
fig.subplots_adjust(**adjust_fs(left=8.0, right=1.2))
|
||||
plot_mse(ax1, x, y, c, cs)
|
||||
plot_descent(ax2, cs, mses)
|
||||
fig.savefig("cubicmse.pdf")
|
||||
|
@ -1,56 +0,0 @@
|
||||
import numpy as np
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.cm as cm
|
||||
from plotstyle import *
|
||||
|
||||
def create_data():
|
||||
m = 0.75
|
||||
n= -40
|
||||
x = np.arange(10.,110., 2.5)
|
||||
y = m * x + n;
|
||||
rng = np.random.RandomState(37281)
|
||||
noise = rng.randn(len(x))*15
|
||||
y += noise
|
||||
return x, y, m, n
|
||||
|
||||
|
||||
def plot_error_plane(ax, x, y, m, n):
|
||||
ax.set_xlabel('Slope m')
|
||||
ax.set_ylabel('Intercept b')
|
||||
ax.set_zlabel('Mean squared error')
|
||||
ax.set_xlim(-4.5, 5.0)
|
||||
ax.set_ylim(-60.0, -20.0)
|
||||
ax.set_zlim(0.0, 700.0)
|
||||
ax.set_xticks(np.arange(-4, 5, 2))
|
||||
ax.set_yticks(np.arange(-60, -19, 10))
|
||||
ax.set_zticks(np.arange(0, 700, 200))
|
||||
ax.grid(True)
|
||||
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
|
||||
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
|
||||
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
|
||||
ax.invert_xaxis()
|
||||
ax.view_init(25, 40)
|
||||
slopes = np.linspace(-4.5, 5, 40)
|
||||
intercepts = np.linspace(-60, -20, 40)
|
||||
x, y = np.meshgrid(slopes, intercepts)
|
||||
error_surf = np.zeros(x.shape)
|
||||
for i, s in enumerate(slopes) :
|
||||
for j, b in enumerate(intercepts) :
|
||||
error_surf[j,i] = np.mean((y-s*x-b)**2.0)
|
||||
ax.plot_surface(x, y, error_surf, rstride=1, cstride=1, cmap=cm.coolwarm,
|
||||
linewidth=0, shade=True)
|
||||
# Minimum:
|
||||
mini = np.unravel_index(np.argmin(error_surf), error_surf.shape)
|
||||
ax.scatter(slopes[mini[1]], intercepts[mini[0]], [0.0], color='#cc0000', s=60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, m, n = create_data()
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(1, 1, 1, projection='3d')
|
||||
plot_error_plane(ax, x, y, m, n)
|
||||
fig.set_size_inches(7., 5.)
|
||||
fig.subplots_adjust(**adjust_fs(fig, 1.0, 0.0, 0.0, 0.0))
|
||||
fig.savefig("error_surface.pdf")
|
||||
plt.close()
|
@ -1,60 +0,0 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from plotstyle import *
|
||||
|
||||
def create_data():
|
||||
m = 0.75
|
||||
n= -40
|
||||
x = np.arange(10.,110., 2.5)
|
||||
y = m * x + n;
|
||||
rng = np.random.RandomState(37281)
|
||||
noise = rng.randn(len(x))*15
|
||||
y += noise
|
||||
return x, y, m, n
|
||||
|
||||
|
||||
def plot_data(ax, x, y):
|
||||
ax.plot(x, y, **psA)
|
||||
ax.set_xlabel('Input x')
|
||||
ax.set_ylabel('Output y')
|
||||
ax.set_xlim(0, 120)
|
||||
ax.set_ylim(-80, 80)
|
||||
ax.set_xticks(np.arange(0,121, 40))
|
||||
ax.set_yticks(np.arange(-80,81, 40))
|
||||
|
||||
|
||||
def plot_data_slopes(ax, x, y, m, n):
|
||||
ax.plot(x, y, **psA)
|
||||
xx = np.asarray([2, 118])
|
||||
for i in np.linspace(0.3*m, 2.0*m, 5):
|
||||
ax.plot(xx, i*xx+n, **lsBm)
|
||||
ax.set_xlabel('Input x')
|
||||
#ax.set_ylabel('Output y')
|
||||
ax.set_xlim(0, 120)
|
||||
ax.set_ylim(-80, 80)
|
||||
ax.set_xticks(np.arange(0,121, 40))
|
||||
ax.set_yticks(np.arange(-80,81, 40))
|
||||
|
||||
|
||||
def plot_data_intercepts(ax, x, y, m, n):
|
||||
ax.plot(x, y, **psA)
|
||||
xx = np.asarray([2, 118])
|
||||
for i in np.linspace(n-1*n, n+1*n, 5):
|
||||
ax.plot(xx, m*xx + i, **lsBm)
|
||||
ax.set_xlabel('Input x')
|
||||
#ax.set_ylabel('Output y')
|
||||
ax.set_xlim(0, 120)
|
||||
ax.set_ylim(-80, 80)
|
||||
ax.set_xticks(np.arange(0,121, 40))
|
||||
ax.set_yticks(np.arange(-80,81, 40))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, m, n = create_data()
|
||||
fig, axs = plt.subplots(1, 3)
|
||||
fig.subplots_adjust(wspace=0.5, **adjust_fs(fig, left=6.0, right=1.5))
|
||||
plot_data(axs[0], x, y)
|
||||
plot_data_slopes(axs[1], x, y, m, n)
|
||||
plot_data_intercepts(axs[2], x, y, m, n)
|
||||
fig.savefig("lin_regress.pdf")
|
||||
plt.close()
|
@ -1,65 +0,0 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from plotstyle import *
|
||||
|
||||
def create_data():
|
||||
m = 0.75
|
||||
n= -40
|
||||
x = np.concatenate( (np.arange(10.,110., 2.5), np.arange(0.,120., 2.0)) )
|
||||
y = m * x + n;
|
||||
rng = np.random.RandomState(37281)
|
||||
noise = rng.randn(len(x))*15
|
||||
y += noise
|
||||
return x, y, m, n
|
||||
|
||||
|
||||
def plot_data(ax, x, y, m, n):
|
||||
ax.set_xlabel('Input x')
|
||||
ax.set_ylabel('Output y')
|
||||
ax.set_xlim(0, 120)
|
||||
ax.set_ylim(-80, 80)
|
||||
ax.set_xticks(np.arange(0,121, 40))
|
||||
ax.set_yticks(np.arange(-80,81, 40))
|
||||
ax.annotate('Error',
|
||||
xy=(x[34]+1, y[34]+15), xycoords='data',
|
||||
xytext=(80, -50), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.9,1.0),
|
||||
connectionstyle="angle3,angleA=50,angleB=-30") )
|
||||
ax.plot(x[:40], y[:40], zorder=0, **psAm)
|
||||
inxs = [3, 13, 16, 19, 25, 34, 36]
|
||||
ax.plot(x[inxs], y[inxs], zorder=10, **psA)
|
||||
xx = np.asarray([2, 118])
|
||||
ax.plot(xx, m*xx+n, **lsBm)
|
||||
for i in inxs :
|
||||
xx = [x[i], x[i]]
|
||||
yy = [m*x[i]+n, y[i]]
|
||||
ax.plot(xx, yy, zorder=5, **lsDm)
|
||||
|
||||
|
||||
def plot_error_hist(ax, x, y, m, n):
|
||||
ax.set_xlabel('Squared error')
|
||||
ax.set_ylabel('Frequency')
|
||||
bins = np.arange(0.0, 602.0, 50.0)
|
||||
ax.set_xlim(bins[0], bins[-1])
|
||||
ax.set_ylim(0, 35)
|
||||
ax.set_xticks(np.arange(bins[0], bins[-1], 100))
|
||||
ax.set_yticks(np.arange(0, 36, 10))
|
||||
errors = (y-(m*x+n))**2.0
|
||||
mls = np.mean(errors)
|
||||
ax.annotate('Mean\nsquared\nerror',
|
||||
xy=(mls, 0.5), xycoords='data',
|
||||
xytext=(350, 20), textcoords='data', ha='left',
|
||||
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.2),
|
||||
connectionstyle="angle3,angleA=10,angleB=90") )
|
||||
ax.hist(errors, bins, **fsD)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, m, n = create_data()
|
||||
fig, axs = plt.subplots(1, 2)
|
||||
fig.subplots_adjust(**adjust_fs(fig, left=6.0))
|
||||
plot_data(axs[0], x, y, m, n)
|
||||
plot_error_hist(axs[1], x, y, m, n)
|
||||
fig.savefig("linear_least_squares.pdf")
|
||||
plt.close()
|
@ -23,47 +23,12 @@
|
||||
\item Fig 8.2 right: this should be a chi-squared distribution with one degree of freedom!
|
||||
\end{itemize}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=0.75\textwidth]{error_surface}
|
||||
\titlecaption{Error surface.}{The two model parameters $m$ and $b$
|
||||
define the base area of the surface plot. For each parameter
|
||||
combination of slope and intercept the error is calculated. The
|
||||
resulting surface has a minimum which indicates the parameter
|
||||
combination that best fits the data.}\label{errorsurfacefig}
|
||||
\end{figure}
|
||||
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=0.75\textwidth]{error_gradient}
|
||||
\titlecaption{Gradient of the error surface.} {Each arrow points
|
||||
into the direction of the greatest ascend at different positions
|
||||
of the error surface shown in \figref{errorsurfacefig}. The
|
||||
contour lines in the background illustrate the error surface. Warm
|
||||
colors indicate high errors, colder colors low error values. Each
|
||||
contour line connects points of equal
|
||||
error.}\label{gradientquiverfig}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=0.45\textwidth]{gradient_descent}
|
||||
\titlecaption{Gradient descent.}{The algorithm starts at an
|
||||
arbitrary position. At each point the gradient is estimated and
|
||||
the position is updated as long as the length of the gradient is
|
||||
sufficiently large.The dots show the positions after each
|
||||
iteration of the algorithm.} \label{gradientdescentfig}
|
||||
\end{figure}
|
||||
|
||||
|
||||
\subsection{Linear fits}
|
||||
\begin{itemize}
|
||||
\item Polyfit is easy: unique solution! $c x^2$ is also a linear fit.
|
||||
\item Example for overfitting with polyfit of a high order (=number of data points)
|
||||
\end{itemize}
|
||||
|
||||
\section{Fitting in practice}
|
||||
|
||||
Fit with matlab functions lsqcurvefit, polyfit
|
||||
|
||||
|
||||
\subsection{Non-linear fits}
|
||||
\begin{itemize}
|
||||
|
@ -227,6 +227,7 @@ to arbitrary precision.
|
||||
\end{ibox}
|
||||
|
||||
\section{Gradient}
|
||||
|
||||
Imagine to place a ball at some point on the cost function
|
||||
\figref{cubiccostfig}. Naturally, it would roll down the slope and
|
||||
eventually stop at the minimum of the error surface (if it had no
|
||||
@ -235,10 +236,21 @@ way to the minimum of the cost function. The ball always follows the
|
||||
steepest slope. Thus we need to figure out the direction of the slope
|
||||
at the position of the ball.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics{cubicgradient}
|
||||
\titlecaption{Derivative of the cost function.}{The gradient, the
|
||||
derivative \eqref{costderivative} of the cost function, is
|
||||
negative to the left of the minimum of the cost function, zero at,
|
||||
and positive to the right of the minimum (left). For each value of
|
||||
the parameter $c$ the negative gradient (arrows) points towards
|
||||
the minimum of the cost function
|
||||
(right).} \label{gradientcubicfig}
|
||||
\end{figure}
|
||||
|
||||
In our one-dimensional example of a single free parameter the slope is
|
||||
simply the derivative of the cost function with respect to the
|
||||
parameter $c$. This derivative is called the
|
||||
\entermde{Gradient}{gradient} of the cost function:
|
||||
parameter $c$ (\figref{gradientcubicfig}, left). This derivative is called
|
||||
the \entermde{Gradient}{gradient} of the cost function:
|
||||
\begin{equation}
|
||||
\label{costderivative}
|
||||
\nabla f_{cost}(c) = \frac{{\rm d} f_{cost}(c)}{{\rm d} c}
|
||||
@ -252,7 +264,8 @@ can be approximated numerically by the difference quotient
|
||||
\approx \frac{f_{cost}(c + \Delta c) - f_{cost}(c)}{\Delta c}
|
||||
\end{equation}
|
||||
The derivative is positive for positive slopes. Since want to go down
|
||||
the hill, we choose the opposite direction.
|
||||
the hill, we choose the opposite direction (\figref{gradientcubicfig},
|
||||
right).
|
||||
|
||||
\begin{exercise}{meanSquaredGradientCubic.m}{}\label{gradientcubic}
|
||||
Implement a function \varcode{meanSquaredGradientCubic()}, that
|
||||
@ -275,7 +288,7 @@ the hill, we choose the opposite direction.
|
||||
\titlecaption{Gradient descent.}{The algorithm starts at an
|
||||
arbitrary position. At each point the gradient is estimated and
|
||||
the position is updated as long as the length of the gradient is
|
||||
sufficiently large.The dots show the positions after each
|
||||
sufficiently large. The dots show the positions after each
|
||||
iteration of the algorithm.} \label{gradientdescentcubicfig}
|
||||
\end{figure}
|
||||
|
||||
@ -378,18 +391,18 @@ functions that have more than a single parameter, in general $n$
|
||||
parameters. We then need to find the minimum in an $n$ dimensional
|
||||
parameter space.
|
||||
|
||||
For our tiger problem, we could have also fitted the exponent $\alpha$
|
||||
of the power-law relation between size and weight, instead of assuming
|
||||
a cubic relation with $\alpha=3$:
|
||||
For our tiger problem, we could have also fitted the exponent $a$ of
|
||||
the power-law relation between size and weight, instead of assuming a
|
||||
cubic relation with $a=3$:
|
||||
\begin{equation}
|
||||
\label{powerfunc}
|
||||
y = f(x; c, \alpha) = f(x; \vec p) = c\cdot x^\alpha
|
||||
y = f(x; c, a) = f(x; \vec p) = c\cdot x^a
|
||||
\end{equation}
|
||||
We then could check whether the resulting estimate of the exponent
|
||||
$\alpha$ indeed is close to the expected power of three. The
|
||||
power-law \eqref{powerfunc} has two free parameters $c$ and $\alpha$.
|
||||
$a$ indeed is close to the expected power of three. The
|
||||
power-law \eqref{powerfunc} has two free parameters $c$ and $a$.
|
||||
Instead of a single parameter we are now dealing with a vector $\vec
|
||||
p$ containing $n$ parameter values. Here, $\vec p = (c, \alpha)$. All
|
||||
p$ containing $n$ parameter values. Here, $\vec p = (c, a)$. All
|
||||
the concepts we introduced on the example of the one dimensional
|
||||
problem of tiger weights generalize to $n$-dimensional problems. We
|
||||
only need to adapt a few things. The cost function for the mean
|
||||
@ -399,12 +412,27 @@ squared error reads
|
||||
f_{cost}(\vec p|\{(x_i, y_i)\}) = \frac{1}{N} \sum_{i=1}^N (y_i - f(x_i;\vec p))^2
|
||||
\end{equation}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics{powergradientdescent}
|
||||
\titlecaption{Gradient descent on an error surface.}{Contour plot of
|
||||
the cost function \eqref{ndimcostfunc} for the fit of a power law
|
||||
\eqref{powerfunc} to some data. Here the cost function is a long
|
||||
and narrow valley on the plane spanned by the two parameters $c$
|
||||
and $a$ of the power law. The red line marks the path of the
|
||||
gradient descent algorithm. The gradient is always perpendicular
|
||||
to the contour lines. The algorithm quickly descends into the
|
||||
valley and then slowly creeps on the shallow bottom of the valley
|
||||
towards the global minimum where it terminates (yellow circle).
|
||||
} \label{powergradientdescentfig}
|
||||
\end{figure}
|
||||
|
||||
For two-dimensional problems the graph of the cost function is an
|
||||
\enterm{error surface} (\determ{{Fehlerfl\"ache}}). The two parameters
|
||||
span a two-dimensional plane. The cost function assigns to each
|
||||
parameter combination on this plane a single value. This results in a
|
||||
landscape over the parameter plane with mountains and valleys and we
|
||||
are searching for the position of the bottom of the deepest valley.
|
||||
are searching for the position of the bottom of the deepest valley
|
||||
(\figref{powergradientdescentfig}).
|
||||
|
||||
\begin{ibox}[tp]{\label{partialderivativebox}Partial derivatives and gradient}
|
||||
Some functions depend on more than a single variable. For example, the function
|
||||
@ -446,13 +474,13 @@ space of our example, the \entermde{Gradient}{gradient}
|
||||
(box~\ref{partialderivativebox}) of the cost function is a vector
|
||||
\begin{equation}
|
||||
\label{gradientpowerlaw}
|
||||
\nabla f_{cost}(c, \alpha) = \left( \frac{\partial f_{cost}(c, \alpha)}{\partial c},
|
||||
\frac{\partial f_{cost}(c, \alpha)}{\partial \alpha} \right)
|
||||
\nabla f_{cost}(c, a) = \left( \frac{\partial f_{cost}(c, a)}{\partial c},
|
||||
\frac{\partial f_{cost}(c, a)}{\partial a} \right)
|
||||
\end{equation}
|
||||
that points into the direction of the strongest ascend of the cost
|
||||
function. The gradient is given by the partial derivatives
|
||||
(box~\ref{partialderivativebox}) of the mean squared error with
|
||||
respect to the parameters $c$ and $\alpha$ of the power law
|
||||
respect to the parameters $c$ and $a$ of the power law
|
||||
relation. In general, for $n$-dimensional problems, the gradient is an
|
||||
$n-$ dimensional vector containing for each of the $n$ parameters
|
||||
$p_j$ the respective partial derivatives as coordinates:
|
||||
@ -468,6 +496,9 @@ parameter value $p_i$ becomes a vector $\vec p_i$ of parameter values:
|
||||
\label{ndimgradientdescent}
|
||||
\vec p_{i+1} = \vec p_i - \epsilon \cdot \nabla f_{cost}(\vec p_i)
|
||||
\end{equation}
|
||||
The algorithm proceeds along the negative gradient
|
||||
(\figref{powergradientdescentfig}).
|
||||
|
||||
For the termination condition we need the length of the gradient. In
|
||||
one dimension it was just the absolute value of the derivative. For
|
||||
$n$ dimensions this is according to the \enterm{Pythagorean theorem}
|
||||
@ -479,8 +510,7 @@ the sum of the squared partial derivatives:
|
||||
\end{equation}
|
||||
The \code{norm()} function implements this.
|
||||
|
||||
|
||||
\section{Passing a function as an argument to another function}
|
||||
\subsection{Passing a function as an argument to another function}
|
||||
|
||||
So far, all our code for the gradient descent algorithm was tailored
|
||||
to a specific function, the cubic relation \eqref{cubicfunc}. It would
|
||||
@ -500,7 +530,7 @@ argument to our function:
|
||||
\pageinputlisting[caption={Passing a function handle as an argument to a function.}]{funcplotterexamples.m}
|
||||
|
||||
|
||||
\section{Gradient descent algorithm for arbitrary functions}
|
||||
\subsection{Gradient descent algorithm for arbitrary functions}
|
||||
|
||||
Now we are ready to adapt the gradient descent algorithm from
|
||||
exercise~\ref{gradientdescentexercise} to arbitrary functions with $n$
|
||||
@ -524,21 +554,21 @@ For testing our new function we need to implement the power law
|
||||
Write a function that implements \eqref{powerfunc}. The function
|
||||
gets as arguments a vector $x$ containing the $x$-data values and
|
||||
another vector containing as elements the parameters for the power
|
||||
law, i.e. the factor $c$ and the power $\alpha$. It returns a vector
|
||||
law, i.e. the factor $c$ and the power $a$. It returns a vector
|
||||
with the computed $y$ values for each $x$ value.
|
||||
\end{exercise}
|
||||
|
||||
Now let's use the new gradient descent function to fit a power law to
|
||||
our tiger data-set:
|
||||
our tiger data-set (\figref{powergradientdescentfig}):
|
||||
|
||||
\begin{exercise}{plotgradientdescentpower.m}{}
|
||||
Use the function \varcode{gradientDescent()} to fit the
|
||||
\varcode{powerLaw()} function to the simulated data from
|
||||
exercise~\ref{mseexercise}. Plot the returned values of the two
|
||||
parameters against each other. Compare the result of the gradient
|
||||
descent method with the true values of $c$ and $\alpha$ used to
|
||||
simulate the data. Observe the norm of the gradient and inspect the
|
||||
plots to adapt $\epsilon$ (smaller than in
|
||||
descent method with the true values of $c$ and $a$ used to simulate
|
||||
the data. Observe the norm of the gradient and inspect the plots to
|
||||
adapt $\epsilon$ (smaller than in
|
||||
exercise~\ref{plotgradientdescentexercise}) and the threshold (much
|
||||
larger) appropriately. Finally plot the data together with the best
|
||||
fitting power-law \eqref{powerfunc}.
|
||||
|
Reference in New Issue
Block a user