[regression] first exercise
This commit is contained in:
parent
c2e4d4e40c
commit
4b18c855b9
@ -72,14 +72,14 @@
|
||||
\newcommand{\figlabel}[1]{\textsf{\textbf{\large \uppercase{#1}}}}
|
||||
|
||||
% maximum number of floats:
|
||||
\setcounter{topnumber}{2}
|
||||
\setcounter{bottomnumber}{0}
|
||||
\setcounter{topnumber}{1}
|
||||
\setcounter{bottomnumber}{1}
|
||||
\setcounter{totalnumber}{2}
|
||||
|
||||
% float placement fractions:
|
||||
\renewcommand{\textfraction}{0.1}
|
||||
\renewcommand{\topfraction}{0.9}
|
||||
\renewcommand{\bottomfraction}{0.0}
|
||||
\renewcommand{\bottomfraction}{0.9}
|
||||
\renewcommand{\floatpagefraction}{0.7}
|
||||
|
||||
% spacing for floats:
|
||||
|
4
regression/code/funcPlotter.m
Normal file
4
regression/code/funcPlotter.m
Normal file
@ -0,0 +1,4 @@
|
||||
function funcPlotter(func)
|
||||
x = 0:0.1:10.0;
|
||||
plot(x,func(x));
|
||||
end
|
2
regression/code/funcplotterexamples.m
Normal file
2
regression/code/funcplotterexamples.m
Normal file
@ -0,0 +1,2 @@
|
||||
funcPlotter(@sin);
|
||||
|
13
regression/code/plotlsqcurvefitpower.m
Normal file
13
regression/code/plotlsqcurvefitpower.m
Normal file
@ -0,0 +1,13 @@
|
||||
meansquarederrorline; % generate data
|
||||
p0 = [2.0, 1.0];
|
||||
pest = lsqcurvefit(@powerLaw, p0, x, y)
|
||||
|
||||
hold on;
|
||||
% generate x-values for plottig the fit:
|
||||
xx = min(x):0.01:max(x);
|
||||
yy = powerLaw(xx, pest);
|
||||
plot(xx, yy); % plot fit
|
||||
plot(x, y, 'o'); % plot original data
|
||||
xlabel('Size [m]');
|
||||
ylabel('Weight [kg]');
|
||||
legend('fit', 'data', 'location', 'northwest');
|
9
regression/code/powerLaw.m
Normal file
9
regression/code/powerLaw.m
Normal file
@ -0,0 +1,9 @@
|
||||
function y = powerLaw(x, p)
|
||||
% Power law function y = c*x^alpha.
|
||||
%
|
||||
% Arguments: x, vector of the x-data values.
|
||||
% p, vector with parameter values c and alpha
|
||||
%
|
||||
% Returns: y, vector of the computed y-data values.
|
||||
y = p(1)*x.^p(2);
|
||||
end
|
4
regression/exercises/expdecay.m
Normal file
4
regression/exercises/expdecay.m
Normal file
@ -0,0 +1,4 @@
|
||||
function x = expdecay(t, tau)
|
||||
% return the exponential function x = e^{-t/tau}
|
||||
x = exp(-t./tau);
|
||||
end
|
10
regression/exercises/expdecaydata.m
Normal file
10
regression/exercises/expdecaydata.m
Normal file
@ -0,0 +1,10 @@
|
||||
tau = 10.0; % membrane time constant in ms
|
||||
dt = 0.05; % sampling interval in ms
|
||||
noisesd = 0.05; % measurement noise in mV
|
||||
|
||||
time = 0.0:dt:5*tau; % time vector
|
||||
voltage = expdecay(time, tau); % exponential decay
|
||||
voltage = voltage + noisesd*randn(1, length(voltage)); % plus noise
|
||||
|
||||
plot(time, voltage);
|
||||
|
34
regression/exercises/expdecaydescent.m
Normal file
34
regression/exercises/expdecaydescent.m
Normal file
@ -0,0 +1,34 @@
|
||||
function [tau, taus, mses] = expdecaydescent(t, x, tau0, epsilon, threshold)
|
||||
% Gradient descent for fitting a decaying exponential.
|
||||
%
|
||||
% Arguments: t, vector of time points.
|
||||
% x, vector of the corresponding measured data values.
|
||||
% tau0, initial value for the time constant.
|
||||
% epsilon: factor multiplying the gradient.
|
||||
% threshold: minimum value for gradient
|
||||
%
|
||||
% Returns: tau, the final value of the time constant.
|
||||
% taus: vector with all the tau-values traversed.
|
||||
% mses: vector with the corresponding mean squared errors
|
||||
tau = tau0;
|
||||
gradient = 1000.0;
|
||||
taus = [];
|
||||
mses = [];
|
||||
count = 1;
|
||||
while abs(gradient) > threshold
|
||||
taus(count) = tau;
|
||||
mses(count) = expdecaymse(t, x, tau);
|
||||
gradient = expdecaygradient(t, x, tau);
|
||||
tau = tau - epsilon * gradient;
|
||||
count = count + 1;
|
||||
end
|
||||
end
|
||||
|
||||
function mse = expdecaymse(t, x, tau)
|
||||
mse = mean((x - expdecay(t, tau)).^2);
|
||||
end
|
||||
|
||||
function gradient = expdecaygradient(t, x, tau)
|
||||
h = 1e-7; % stepsize for derivative
|
||||
gradient = (expdecaymse(t, x, tau+h) - expdecaymse(t, x, tau))/h;
|
||||
end
|
29
regression/exercises/expdecayplot.m
Normal file
29
regression/exercises/expdecayplot.m
Normal file
@ -0,0 +1,29 @@
|
||||
expdecaydata; % generate data
|
||||
|
||||
tau0 = 2.0;
|
||||
eps = 1.0;
|
||||
thresh = 0.00001;
|
||||
[tauest, taus, mses] = expdecaydescent(time, voltage, tau0, eps, thresh);
|
||||
|
||||
subplot(2, 2, 1); % top left panel
|
||||
hold on;
|
||||
plot(taus, '-o');
|
||||
plot([1, length(taus)], [tau, tau], 'k'); % line indicating true tau value
|
||||
hold off;
|
||||
xlabel('Iteration');
|
||||
ylabel('tau');
|
||||
subplot(2, 2, 3); % bottom left panel
|
||||
plot(mses, '-o');
|
||||
xlabel('Iteration steps');
|
||||
ylabel('MSE');
|
||||
subplot(1, 2, 2); % right panel
|
||||
hold on;
|
||||
% generate x-values for plottig the fit:
|
||||
tt = 0.0:0.01:max(time);
|
||||
xx = expdecay(tt, tauest);
|
||||
plot(time, voltage, '.'); % plot original data
|
||||
plot(tt, xx, '-r'); % plot fit
|
||||
xlabel('Time [ms]');
|
||||
ylabel('Voltage [mV]');
|
||||
legend('data', 'fit', 'location', 'northeast');
|
||||
pause
|
@ -1,6 +1,6 @@
|
||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||
|
||||
\newcommand{\exercisetopic}{Resampling}
|
||||
\newcommand{\exercisetopic}{Gradient descent}
|
||||
\newcommand{\exercisenum}{9}
|
||||
\newcommand{\exercisedate}{December 22th, 2020}
|
||||
|
||||
@ -15,67 +15,83 @@
|
||||
|
||||
\begin{questions}
|
||||
|
||||
\question We want to fit the straigth line \[ y = mx+b \] to the
|
||||
data in the file \emph{lin\_regression.mat}.
|
||||
|
||||
In the lecture we already prepared the cost function
|
||||
(\code{meanSquaredError()}), and the gradient
|
||||
(\code{meanSquaredGradient()}) (read chapter 8 ``Optimization and
|
||||
gradient descent'' in the script, in particular section 8.4 and
|
||||
exercise 8.4!). With these functions in place we here want to
|
||||
implement a gradient descend algorithm that finds the minimum of the
|
||||
cost function and thus the slope and intercept of the straigth line
|
||||
that minimizes the squared distance to the data values.
|
||||
|
||||
The algorithm for the descent towards the minimum of the cost
|
||||
function is as follows:
|
||||
\begin{enumerate}
|
||||
\item Start with some arbitrary parameter values (intercept $b_0$
|
||||
and slope $m_0$, $\vec p_0 = (b_0, m_0)$ for the slope and the
|
||||
intercept of the straight line.
|
||||
\item \label{computegradient} Compute the gradient of the cost function
|
||||
at the current values of the parameters $\vec p_i$.
|
||||
\item If the magnitude (length) of the gradient is smaller than some
|
||||
small number, the algorithm converged close to the minimum of the
|
||||
cost function and we abort the descent. Right at the minimum the
|
||||
magnitude of the gradient is zero. However, since we determine
|
||||
the gradient numerically, it will never be exactly zero. This is
|
||||
why we just require the gradient to be sufficiently small
|
||||
(e.g. \code{norm(gradient) < 0.1}).
|
||||
\item \label{gradientstep} Move against the gradient by a small step
|
||||
$\epsilon = 0.01$:
|
||||
\[\vec p_{i+1} = \vec p_i - \epsilon \cdot \nabla f_{cost}(m_i, b_i)\]
|
||||
\item Repeat steps \ref{computegradient} -- \ref{gradientstep}.
|
||||
\end{enumerate}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Read sections 8.1 -- 8.5 of chapter 8 on ``optimization
|
||||
and gradient descent!}\vspace{-3ex}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Fitting the time constant of an exponential function}
|
||||
Let's assume we record the membrane potential from a photoreceptor
|
||||
neuron. We define the resting potential of the neuron to be at
|
||||
0\,mV. By means of a brief current injection we increase the
|
||||
membrane potential by exactly 1\,mV. We then record how the membrane
|
||||
potential decays exponentially down to the resting potential. We are
|
||||
interested in the membrane time constant and therefore want to fit
|
||||
an exponential function to the recorded time course of the membrane
|
||||
potential.
|
||||
|
||||
\begin{parts}
|
||||
\part Implement the gradient descent in a function that returns
|
||||
the parameter values at the minimum of the cost function and a vector
|
||||
with the value of the cost function at each step of the algorithm.
|
||||
\part Implement (and document!) the exponential function
|
||||
\begin{equation}
|
||||
\label{expfunc}
|
||||
x(t) = e^{-t/\tau}
|
||||
\end{equation}
|
||||
with the membrane time constant $\tau$ as a matlab function
|
||||
\code{expdecay(t, tau)} that takes as arguments a vector of
|
||||
time points and the membrane time constant. The function returns
|
||||
\eqnref{expfunc} computed for each time point as a vector.
|
||||
\begin{solution}
|
||||
\lstinputlisting{descent.m}
|
||||
\lstinputlisting{expdecay.m}
|
||||
\end{solution}
|
||||
|
||||
\part Plot the data and the straight line with the parameter
|
||||
values that you found with the gradient descent method.
|
||||
\part Let's first generate the data. Set the membrane time
|
||||
constant to 10\,ms. Generate a time vector with sample times
|
||||
between zero and five times the membrane time constant and a
|
||||
sampling interval of 0.05\,ms. Then compute a vector containing
|
||||
the corresponding measurements of the membrane potential using the
|
||||
\code{expdecay()} function and adding some measurement noise with
|
||||
a standard deviation of 0.05\.mV (\code{randn()} function). Also
|
||||
plot the data.
|
||||
\begin{solution}
|
||||
\lstinputlisting{expdecaydata.m}
|
||||
\end{solution}
|
||||
|
||||
\part Plot the development of the costs as a function of the
|
||||
iteration step.
|
||||
\part Implement the gradient descent algorithm for finding the
|
||||
least squares for the exponential function \eqref{expfunc}. The
|
||||
function takes as arguments the measured data, an initial value
|
||||
for the estimation of the membrane time constant, the $\epsilon$
|
||||
factor, and the threshold for the length of the gradient where to
|
||||
terminate the algorithm. The function should return the estimated
|
||||
membrane time constant at the minimum of the mean squared error, a
|
||||
vector with the time constants, and a vector with the mean squared
|
||||
errors for each step of the algorithm.
|
||||
\begin{solution}
|
||||
\lstinputlisting{descentfit.m}
|
||||
\lstinputlisting{expdecaydescent.m}
|
||||
\end{solution}
|
||||
|
||||
\part For checking the gradient descend method from (a) compare
|
||||
its result for slope and intercept with the position of the
|
||||
minimum of the cost function that you get when computing the cost
|
||||
function for many values of the slope and intercept and then using
|
||||
the \code{min()} function. Vary the value of $\epsilon$ and the
|
||||
minimum gradient. What are good values such that the gradient
|
||||
descent gets closest to the true minimum of the cost function?
|
||||
\part Call the gradient descent function with the generated data.
|
||||
Watch the value of the gradient and of tau and adapt $\epsilon$
|
||||
and the threshold accordingly (they differ quite dramatically from
|
||||
the ones in the script for the cubic fit).
|
||||
|
||||
\part Generate three plots: (i) the values of the time constant
|
||||
for each iteration step, (ii) the mean squared error for each
|
||||
iteration step, and (iii) the measured data and the fitted
|
||||
exponential function.
|
||||
\begin{solution}
|
||||
\lstinputlisting{checkdescent.m}
|
||||
\lstinputlisting{expdecayplot.m}
|
||||
\end{solution}
|
||||
|
||||
\end{parts}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Read sections 8.6 -- 8.8 of chapter 8 on ``optimization
|
||||
and gradient descent!}\vspace{-3ex}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question \qt{Fitting the full exponential function}
|
||||
\begin{parts}
|
||||
|
||||
\part Use the functions \code{polyfit()} and \code{lsqcurvefit()}
|
||||
provided by matlab to find the slope and intercept of a straight
|
||||
line that fits the data. Compare the resulting fit parameters of
|
||||
|
92
regression/exercises/gradientdescent-2.tex
Normal file
92
regression/exercises/gradientdescent-2.tex
Normal file
@ -0,0 +1,92 @@
|
||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||
|
||||
\newcommand{\exercisetopic}{Gradient descent}
|
||||
\newcommand{\exercisenum}{9}
|
||||
\newcommand{\exercisedate}{December 22th, 2020}
|
||||
|
||||
\input{../../exercisesheader}
|
||||
|
||||
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\input{../../exercisestitle}
|
||||
|
||||
\begin{questions}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\question We want to fit the straigth line \[ y = mx+b \] to the
|
||||
data in the file \emph{lin\_regression.mat}.
|
||||
|
||||
In the lecture we already prepared the cost function
|
||||
(\code{meanSquaredError()}), and the gradient
|
||||
(\code{meanSquaredGradient()}) (read chapter 8 ``Optimization and
|
||||
gradient descent'' in the script, in particular section 8.4 and
|
||||
exercise 8.5!). With these functions in place we here want to
|
||||
implement a gradient descend algorithm that finds the minimum of the
|
||||
cost function and thus the slope and intercept of the straigth line
|
||||
that minimizes the squared distance to the data values.
|
||||
|
||||
The algorithm for the descent towards the minimum of the cost
|
||||
function is as follows:
|
||||
\begin{enumerate}
|
||||
\item Start with some arbitrary parameter values (intercept $b_0$
|
||||
and slope $m_0$, $\vec p_0 = (b_0, m_0)$ for the slope and the
|
||||
intercept of the straight line.
|
||||
\item \label{computegradient} Compute the gradient of the cost function
|
||||
at the current values of the parameters $\vec p_i$.
|
||||
\item If the magnitude (length) of the gradient is smaller than some
|
||||
small number, the algorithm converged close to the minimum of the
|
||||
cost function and we abort the descent. Right at the minimum the
|
||||
magnitude of the gradient is zero. However, since we determine
|
||||
the gradient numerically, it will never be exactly zero. This is
|
||||
why we just require the gradient to be sufficiently small
|
||||
(e.g. \code{norm(gradient) < 0.1}).
|
||||
\item \label{gradientstep} Move against the gradient by a small step
|
||||
$\epsilon = 0.01$:
|
||||
\[\vec p_{i+1} = \vec p_i - \epsilon \cdot \nabla f_{cost}(m_i, b_i)\]
|
||||
\item Repeat steps \ref{computegradient} -- \ref{gradientstep}.
|
||||
\end{enumerate}
|
||||
|
||||
\begin{parts}
|
||||
\part Implement the gradient descent in a function that returns
|
||||
the parameter values at the minimum of the cost function and a vector
|
||||
with the value of the cost function at each step of the algorithm.
|
||||
\begin{solution}
|
||||
\lstinputlisting{descent.m}
|
||||
\end{solution}
|
||||
|
||||
\part Plot the data and the straight line with the parameter
|
||||
values that you found with the gradient descent method.
|
||||
|
||||
\part Plot the development of the costs as a function of the
|
||||
iteration step.
|
||||
\begin{solution}
|
||||
\lstinputlisting{descentfit.m}
|
||||
\end{solution}
|
||||
|
||||
\part For checking the gradient descend method from (a) compare
|
||||
its result for slope and intercept with the position of the
|
||||
minimum of the cost function that you get when computing the cost
|
||||
function for many values of the slope and intercept and then using
|
||||
the \code{min()} function. Vary the value of $\epsilon$ and the
|
||||
minimum gradient. What are good values such that the gradient
|
||||
descent gets closest to the true minimum of the cost function?
|
||||
\begin{solution}
|
||||
\lstinputlisting{checkdescent.m}
|
||||
\end{solution}
|
||||
|
||||
\part Use the functions \code{polyfit()} and \code{lsqcurvefit()}
|
||||
provided by matlab to find the slope and intercept of a straight
|
||||
line that fits the data. Compare the resulting fit parameters of
|
||||
those functions with the ones of your gradient descent algorithm.
|
||||
\begin{solution}
|
||||
\lstinputlisting{linefit.m}
|
||||
\end{solution}
|
||||
|
||||
\end{parts}
|
||||
|
||||
\end{questions}
|
||||
|
||||
\end{document}
|
89
regression/lecture/cubicgradient.py
Normal file
89
regression/lecture/cubicgradient.py
Normal file
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mt
|
||||
from plotstyle import *
|
||||
|
||||
def create_data():
|
||||
# wikipedia:
|
||||
# Generally, males vary in total length from 250 to 390 cm and
|
||||
# weigh between 90 and 306 kg
|
||||
c = 6
|
||||
x = np.arange(2.2, 3.9, 0.05)
|
||||
y = c * x**3.0
|
||||
rng = np.random.RandomState(32281)
|
||||
noise = rng.randn(len(x))*50
|
||||
y += noise
|
||||
return x, y, c
|
||||
|
||||
|
||||
def gradient_descent(x, y):
|
||||
n = 20
|
||||
dc = 0.01
|
||||
eps = 0.0001
|
||||
cc = 1.1
|
||||
cs = []
|
||||
mses = []
|
||||
for k in range(n):
|
||||
m0 = np.mean((y-(cc*x**3.0))**2.0)
|
||||
m1 = np.mean((y-((cc+dc)*x**3.0))**2.0)
|
||||
dmdc = (m1 - m0)/dc
|
||||
cs.append(cc)
|
||||
mses.append(m0)
|
||||
cc -= eps*dmdc
|
||||
return cs, mses
|
||||
|
||||
|
||||
def plot_gradient(ax, x, y, c):
|
||||
ccs = np.linspace(0.5, 10.0, 200)
|
||||
mses = np.zeros(len(ccs))
|
||||
for i, cc in enumerate(ccs):
|
||||
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||
cmin = ccs[np.argmin(mses)]
|
||||
gradient = np.diff(mses)/(ccs[1]-ccs[0])
|
||||
|
||||
ax.plot([cmin, cmin], [-10000, 10000], **lsSpine)
|
||||
ax.plot([ccs[0], ccs[-1]], [0, 0], **lsSpine)
|
||||
ax.plot(ccs[:-1], gradient, **lsBm)
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('Derivative')
|
||||
ax.set_xlim(0, 10)
|
||||
ax.set_ylim(-10000, 10000)
|
||||
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
|
||||
ax.set_yticks(np.arange(-10000, 10001, 10000))
|
||||
ax.set_yticklabels(['', '0', ''])
|
||||
|
||||
|
||||
def plot_mse(ax, x, y, c):
|
||||
ccs = np.linspace(0.5, 10.0, 200)
|
||||
mses = np.zeros(len(ccs))
|
||||
for i, cc in enumerate(ccs):
|
||||
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||
cmin = ccs[np.argmin(mses)]
|
||||
gradient = np.diff(mses)/(ccs[1]-ccs[0])
|
||||
|
||||
ay = 1500.0
|
||||
asB = dict(arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=0,
|
||||
color=lsB['color'], lw=2))
|
||||
ax.annotate('', xy=(3.0, ay), xytext=(1.0, ay), **asB)
|
||||
ax.annotate('', xy=(5.0, ay), xytext=(3.8, ay), **asB)
|
||||
ax.annotate('', xy=(6.2, ay), xytext=(7.4, ay), **asB)
|
||||
ax.annotate('', xy=(8.0, ay), xytext=(10.0, ay), **asB)
|
||||
ax.plot([cmin, cmin], [0, 30000], **lsSpine)
|
||||
ax.plot(ccs, mses, zorder=10, **lsAm)
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('Mean squared error')
|
||||
ax.set_xlim(0, 10)
|
||||
ax.set_ylim(0, 25000)
|
||||
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
|
||||
ax.set_yticks(np.arange(0, 30001, 10000))
|
||||
ax.set_yticklabels(['0', '', '', ''])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
|
||||
fig.subplots_adjust(wspace=0.5, **adjust_fs(left=5.0, right=1.2))
|
||||
plot_gradient(ax1, x, y, c)
|
||||
plot_mse(ax2, x, y, c)
|
||||
fig.savefig("cubicgradient.pdf")
|
||||
plt.close()
|
67
regression/lecture/powergradientdescent.py
Normal file
67
regression/lecture/powergradientdescent.py
Normal file
@ -0,0 +1,67 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mt
|
||||
from plotstyle import *
|
||||
|
||||
|
||||
def power_law(x, c, a):
|
||||
return c*x**a
|
||||
|
||||
|
||||
def create_data():
|
||||
# wikipedia:
|
||||
# Generally, males vary in total length from 250 to 390 cm and
|
||||
# weigh between 90 and 306 kg
|
||||
c = 6.0
|
||||
x = np.arange(2.2, 3.9, 0.05)
|
||||
y = power_law(x, c, 3.0)
|
||||
rng = np.random.RandomState(32281)
|
||||
noise = rng.randn(len(x))*50
|
||||
y += noise
|
||||
return x, y, c
|
||||
|
||||
|
||||
def gradient_descent(x, y, func, p0):
|
||||
n = 20000
|
||||
h = 1e-7
|
||||
ph = np.identity(len(p0))*h
|
||||
eps = 0.00001
|
||||
p = p0
|
||||
ps = np.zeros((n, len(p0)))
|
||||
mses = np.zeros(n)
|
||||
for k in range(n):
|
||||
m0 = np.mean((y-func(x, *p))**2.0)
|
||||
gradient = np.array([(np.mean((y-func(x, *(p+ph[:,i])))**2.0) - m0)/h
|
||||
for i in range(len(p))])
|
||||
ps[k,:] = p
|
||||
mses[k] = m0
|
||||
p -= eps*gradient
|
||||
return ps, mses
|
||||
|
||||
|
||||
def plot_gradient_descent(ax, x, y, c, ps, mses):
|
||||
cs = np.linspace(0.0, 10.0, 300)
|
||||
bs = np.linspace(1.0, 5.5, 180)
|
||||
mse = np.zeros((len(bs), len(cs)))
|
||||
for i in range(len(bs)):
|
||||
for k in range(len(cs)):
|
||||
mse[i, k] = np.mean((y-power_law(x, cs[k], bs[i]))**2.0)
|
||||
z = np.log10(mse)
|
||||
ax.contourf(cs, bs, z, levels=(3.3, 3.36, 3.5, 4.0, 4.5, 5.5, 6.5, 7.5, 8.5),
|
||||
cmap='Blues_r')
|
||||
ax.plot(ps[::5,0], ps[::5,1], **lsBm)
|
||||
ax.plot(ps[-1,0], ps[-1,1], **psC)
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('a')
|
||||
ax.yaxis.set_major_locator(mt.MultipleLocator(1.0))
|
||||
ax.set_aspect('equal')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
ps, mses = gradient_descent(x, y, power_law, [1.0, 1.0])
|
||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.3*figure_height))
|
||||
fig.subplots_adjust(**adjust_fs(left=4.5, right=1.0))
|
||||
plot_gradient_descent(ax, x, y, c, ps, mses)
|
||||
fig.savefig("powergradientdescent.pdf")
|
||||
plt.close()
|
@ -198,7 +198,20 @@ So we need a different approach. We want a procedure that finds the
|
||||
minimum of the cost function with a minimal number of computations and
|
||||
to arbitrary precision.
|
||||
|
||||
\begin{ibox}[t]{\label{differentialquotientbox}Difference quotient and derivative}
|
||||
\section{Gradient}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics{cubicgradient}
|
||||
\titlecaption{Derivative of the cost function.}{The gradient, the
|
||||
derivative \eqref{costderivative} of the cost function, is
|
||||
negative to the left of the minimum (vertical line) of the cost
|
||||
function, zero (horizontal line) at, and positive to the right of
|
||||
the minimum (left). For each value of the parameter $c$ the
|
||||
negative gradient (arrows) points towards the minimum of the cost
|
||||
function (right).} \label{gradientcubicfig}
|
||||
\end{figure}
|
||||
|
||||
\begin{ibox}[b]{\label{differentialquotientbox}Difference quotient and derivative}
|
||||
\includegraphics[width=0.33\textwidth]{derivative}
|
||||
\hfill
|
||||
\begin{minipage}[b]{0.63\textwidth}
|
||||
@ -226,8 +239,6 @@ to arbitrary precision.
|
||||
sufficiently small $\Delta x$.
|
||||
\end{ibox}
|
||||
|
||||
\section{Gradient}
|
||||
|
||||
Imagine to place a ball at some point on the cost function
|
||||
\figref{cubiccostfig}. Naturally, it would roll down the slope and
|
||||
eventually stop at the minimum of the error surface (if it had no
|
||||
@ -236,17 +247,6 @@ way to the minimum of the cost function. The ball always follows the
|
||||
steepest slope. Thus we need to figure out the direction of the slope
|
||||
at the position of the ball.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics{cubicgradient}
|
||||
\titlecaption{Derivative of the cost function.}{The gradient, the
|
||||
derivative \eqref{costderivative} of the cost function, is
|
||||
negative to the left of the minimum (vertical line) of the cost
|
||||
function, zero (horizontal line) at, and positive to the right of
|
||||
the minimum (left). For each value of the parameter $c$ the
|
||||
negative gradient (arrows) points towards the minimum of the cost
|
||||
function (right).} \label{gradientcubicfig}
|
||||
\end{figure}
|
||||
|
||||
In our one-dimensional example of a single free parameter the slope is
|
||||
simply the derivative of the cost function with respect to the
|
||||
parameter $c$ (\figref{gradientcubicfig}, left). This derivative is called
|
||||
@ -434,7 +434,7 @@ landscape over the parameter plane with mountains and valleys and we
|
||||
are searching for the position of the bottom of the deepest valley
|
||||
(\figref{powergradientdescentfig}).
|
||||
|
||||
\begin{ibox}[tp]{\label{partialderivativebox}Partial derivatives and gradient}
|
||||
\begin{ibox}[t]{\label{partialderivativebox}Partial derivatives and gradient}
|
||||
Some functions depend on more than a single variable. For example, the function
|
||||
\[ z = f(x,y) \]
|
||||
depends on both $x$ and $y$. Using the partial derivatives
|
||||
@ -642,13 +642,13 @@ generations. In this way the algorithm is not directed towards higher
|
||||
fitness, as the gradient descent method would be. Rather, some
|
||||
neighborhood of the parameter space is randomly probed. That way it is
|
||||
even possible to escape a local maximum and find a potentially better
|
||||
maximum. For this reason, \enterm{genetic algorithms} try to mimic
|
||||
evolution in the context of high-dimensional optimization problems, in
|
||||
particular with discrete parameter values. In biological evolution,
|
||||
the objective function, however, is not a fixed function. It may
|
||||
change in time by changing abiotic and biotic environmental
|
||||
conditions, making this a very complex but also interesting
|
||||
optimization problem.
|
||||
maximum. For this reason, \enterm[genetic algorithm]{genetic
|
||||
algorithms} try to mimic evolution in the context of
|
||||
high-dimensional optimization problems, in particular with discrete
|
||||
parameter values. In biological evolution, the objective function,
|
||||
however, is not a fixed function. It may change in time by changing
|
||||
abiotic and biotic environmental conditions, making this a very
|
||||
complex but also interesting optimization problem.
|
||||
|
||||
How should a neuron or neural network be designed? As a particular
|
||||
aspect of the general evolution of a species, this is a fundamental
|
||||
|
Reference in New Issue
Block a user