[regression] first exercise
This commit is contained in:
89
regression/lecture/cubicgradient.py
Normal file
89
regression/lecture/cubicgradient.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mt
|
||||
from plotstyle import *
|
||||
|
||||
def create_data():
|
||||
# wikipedia:
|
||||
# Generally, males vary in total length from 250 to 390 cm and
|
||||
# weigh between 90 and 306 kg
|
||||
c = 6
|
||||
x = np.arange(2.2, 3.9, 0.05)
|
||||
y = c * x**3.0
|
||||
rng = np.random.RandomState(32281)
|
||||
noise = rng.randn(len(x))*50
|
||||
y += noise
|
||||
return x, y, c
|
||||
|
||||
|
||||
def gradient_descent(x, y):
|
||||
n = 20
|
||||
dc = 0.01
|
||||
eps = 0.0001
|
||||
cc = 1.1
|
||||
cs = []
|
||||
mses = []
|
||||
for k in range(n):
|
||||
m0 = np.mean((y-(cc*x**3.0))**2.0)
|
||||
m1 = np.mean((y-((cc+dc)*x**3.0))**2.0)
|
||||
dmdc = (m1 - m0)/dc
|
||||
cs.append(cc)
|
||||
mses.append(m0)
|
||||
cc -= eps*dmdc
|
||||
return cs, mses
|
||||
|
||||
|
||||
def plot_gradient(ax, x, y, c):
|
||||
ccs = np.linspace(0.5, 10.0, 200)
|
||||
mses = np.zeros(len(ccs))
|
||||
for i, cc in enumerate(ccs):
|
||||
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||
cmin = ccs[np.argmin(mses)]
|
||||
gradient = np.diff(mses)/(ccs[1]-ccs[0])
|
||||
|
||||
ax.plot([cmin, cmin], [-10000, 10000], **lsSpine)
|
||||
ax.plot([ccs[0], ccs[-1]], [0, 0], **lsSpine)
|
||||
ax.plot(ccs[:-1], gradient, **lsBm)
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('Derivative')
|
||||
ax.set_xlim(0, 10)
|
||||
ax.set_ylim(-10000, 10000)
|
||||
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
|
||||
ax.set_yticks(np.arange(-10000, 10001, 10000))
|
||||
ax.set_yticklabels(['', '0', ''])
|
||||
|
||||
|
||||
def plot_mse(ax, x, y, c):
|
||||
ccs = np.linspace(0.5, 10.0, 200)
|
||||
mses = np.zeros(len(ccs))
|
||||
for i, cc in enumerate(ccs):
|
||||
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||
cmin = ccs[np.argmin(mses)]
|
||||
gradient = np.diff(mses)/(ccs[1]-ccs[0])
|
||||
|
||||
ay = 1500.0
|
||||
asB = dict(arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=0,
|
||||
color=lsB['color'], lw=2))
|
||||
ax.annotate('', xy=(3.0, ay), xytext=(1.0, ay), **asB)
|
||||
ax.annotate('', xy=(5.0, ay), xytext=(3.8, ay), **asB)
|
||||
ax.annotate('', xy=(6.2, ay), xytext=(7.4, ay), **asB)
|
||||
ax.annotate('', xy=(8.0, ay), xytext=(10.0, ay), **asB)
|
||||
ax.plot([cmin, cmin], [0, 30000], **lsSpine)
|
||||
ax.plot(ccs, mses, zorder=10, **lsAm)
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('Mean squared error')
|
||||
ax.set_xlim(0, 10)
|
||||
ax.set_ylim(0, 25000)
|
||||
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
|
||||
ax.set_yticks(np.arange(0, 30001, 10000))
|
||||
ax.set_yticklabels(['0', '', '', ''])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
|
||||
fig.subplots_adjust(wspace=0.5, **adjust_fs(left=5.0, right=1.2))
|
||||
plot_gradient(ax1, x, y, c)
|
||||
plot_mse(ax2, x, y, c)
|
||||
fig.savefig("cubicgradient.pdf")
|
||||
plt.close()
|
||||
67
regression/lecture/powergradientdescent.py
Normal file
67
regression/lecture/powergradientdescent.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mt
|
||||
from plotstyle import *
|
||||
|
||||
|
||||
def power_law(x, c, a):
|
||||
return c*x**a
|
||||
|
||||
|
||||
def create_data():
|
||||
# wikipedia:
|
||||
# Generally, males vary in total length from 250 to 390 cm and
|
||||
# weigh between 90 and 306 kg
|
||||
c = 6.0
|
||||
x = np.arange(2.2, 3.9, 0.05)
|
||||
y = power_law(x, c, 3.0)
|
||||
rng = np.random.RandomState(32281)
|
||||
noise = rng.randn(len(x))*50
|
||||
y += noise
|
||||
return x, y, c
|
||||
|
||||
|
||||
def gradient_descent(x, y, func, p0):
|
||||
n = 20000
|
||||
h = 1e-7
|
||||
ph = np.identity(len(p0))*h
|
||||
eps = 0.00001
|
||||
p = p0
|
||||
ps = np.zeros((n, len(p0)))
|
||||
mses = np.zeros(n)
|
||||
for k in range(n):
|
||||
m0 = np.mean((y-func(x, *p))**2.0)
|
||||
gradient = np.array([(np.mean((y-func(x, *(p+ph[:,i])))**2.0) - m0)/h
|
||||
for i in range(len(p))])
|
||||
ps[k,:] = p
|
||||
mses[k] = m0
|
||||
p -= eps*gradient
|
||||
return ps, mses
|
||||
|
||||
|
||||
def plot_gradient_descent(ax, x, y, c, ps, mses):
|
||||
cs = np.linspace(0.0, 10.0, 300)
|
||||
bs = np.linspace(1.0, 5.5, 180)
|
||||
mse = np.zeros((len(bs), len(cs)))
|
||||
for i in range(len(bs)):
|
||||
for k in range(len(cs)):
|
||||
mse[i, k] = np.mean((y-power_law(x, cs[k], bs[i]))**2.0)
|
||||
z = np.log10(mse)
|
||||
ax.contourf(cs, bs, z, levels=(3.3, 3.36, 3.5, 4.0, 4.5, 5.5, 6.5, 7.5, 8.5),
|
||||
cmap='Blues_r')
|
||||
ax.plot(ps[::5,0], ps[::5,1], **lsBm)
|
||||
ax.plot(ps[-1,0], ps[-1,1], **psC)
|
||||
ax.set_xlabel('c')
|
||||
ax.set_ylabel('a')
|
||||
ax.yaxis.set_major_locator(mt.MultipleLocator(1.0))
|
||||
ax.set_aspect('equal')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
x, y, c = create_data()
|
||||
ps, mses = gradient_descent(x, y, power_law, [1.0, 1.0])
|
||||
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.3*figure_height))
|
||||
fig.subplots_adjust(**adjust_fs(left=4.5, right=1.0))
|
||||
plot_gradient_descent(ax, x, y, c, ps, mses)
|
||||
fig.savefig("powergradientdescent.pdf")
|
||||
plt.close()
|
||||
@@ -198,7 +198,20 @@ So we need a different approach. We want a procedure that finds the
|
||||
minimum of the cost function with a minimal number of computations and
|
||||
to arbitrary precision.
|
||||
|
||||
\begin{ibox}[t]{\label{differentialquotientbox}Difference quotient and derivative}
|
||||
\section{Gradient}
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics{cubicgradient}
|
||||
\titlecaption{Derivative of the cost function.}{The gradient, the
|
||||
derivative \eqref{costderivative} of the cost function, is
|
||||
negative to the left of the minimum (vertical line) of the cost
|
||||
function, zero (horizontal line) at, and positive to the right of
|
||||
the minimum (left). For each value of the parameter $c$ the
|
||||
negative gradient (arrows) points towards the minimum of the cost
|
||||
function (right).} \label{gradientcubicfig}
|
||||
\end{figure}
|
||||
|
||||
\begin{ibox}[b]{\label{differentialquotientbox}Difference quotient and derivative}
|
||||
\includegraphics[width=0.33\textwidth]{derivative}
|
||||
\hfill
|
||||
\begin{minipage}[b]{0.63\textwidth}
|
||||
@@ -226,8 +239,6 @@ to arbitrary precision.
|
||||
sufficiently small $\Delta x$.
|
||||
\end{ibox}
|
||||
|
||||
\section{Gradient}
|
||||
|
||||
Imagine to place a ball at some point on the cost function
|
||||
\figref{cubiccostfig}. Naturally, it would roll down the slope and
|
||||
eventually stop at the minimum of the error surface (if it had no
|
||||
@@ -236,17 +247,6 @@ way to the minimum of the cost function. The ball always follows the
|
||||
steepest slope. Thus we need to figure out the direction of the slope
|
||||
at the position of the ball.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics{cubicgradient}
|
||||
\titlecaption{Derivative of the cost function.}{The gradient, the
|
||||
derivative \eqref{costderivative} of the cost function, is
|
||||
negative to the left of the minimum (vertical line) of the cost
|
||||
function, zero (horizontal line) at, and positive to the right of
|
||||
the minimum (left). For each value of the parameter $c$ the
|
||||
negative gradient (arrows) points towards the minimum of the cost
|
||||
function (right).} \label{gradientcubicfig}
|
||||
\end{figure}
|
||||
|
||||
In our one-dimensional example of a single free parameter the slope is
|
||||
simply the derivative of the cost function with respect to the
|
||||
parameter $c$ (\figref{gradientcubicfig}, left). This derivative is called
|
||||
@@ -434,7 +434,7 @@ landscape over the parameter plane with mountains and valleys and we
|
||||
are searching for the position of the bottom of the deepest valley
|
||||
(\figref{powergradientdescentfig}).
|
||||
|
||||
\begin{ibox}[tp]{\label{partialderivativebox}Partial derivatives and gradient}
|
||||
\begin{ibox}[t]{\label{partialderivativebox}Partial derivatives and gradient}
|
||||
Some functions depend on more than a single variable. For example, the function
|
||||
\[ z = f(x,y) \]
|
||||
depends on both $x$ and $y$. Using the partial derivatives
|
||||
@@ -642,13 +642,13 @@ generations. In this way the algorithm is not directed towards higher
|
||||
fitness, as the gradient descent method would be. Rather, some
|
||||
neighborhood of the parameter space is randomly probed. That way it is
|
||||
even possible to escape a local maximum and find a potentially better
|
||||
maximum. For this reason, \enterm{genetic algorithms} try to mimic
|
||||
evolution in the context of high-dimensional optimization problems, in
|
||||
particular with discrete parameter values. In biological evolution,
|
||||
the objective function, however, is not a fixed function. It may
|
||||
change in time by changing abiotic and biotic environmental
|
||||
conditions, making this a very complex but also interesting
|
||||
optimization problem.
|
||||
maximum. For this reason, \enterm[genetic algorithm]{genetic
|
||||
algorithms} try to mimic evolution in the context of
|
||||
high-dimensional optimization problems, in particular with discrete
|
||||
parameter values. In biological evolution, the objective function,
|
||||
however, is not a fixed function. It may change in time by changing
|
||||
abiotic and biotic environmental conditions, making this a very
|
||||
complex but also interesting optimization problem.
|
||||
|
||||
How should a neuron or neural network be designed? As a particular
|
||||
aspect of the general evolution of a species, this is a fundamental
|
||||
|
||||
Reference in New Issue
Block a user