[regression] first exercise

This commit is contained in:
2020-12-20 23:16:56 +01:00
parent c2e4d4e40c
commit 4b18c855b9
15 changed files with 444 additions and 75 deletions

View File

@@ -0,0 +1,89 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mt
from plotstyle import *
def create_data():
# wikipedia:
# Generally, males vary in total length from 250 to 390 cm and
# weigh between 90 and 306 kg
c = 6
x = np.arange(2.2, 3.9, 0.05)
y = c * x**3.0
rng = np.random.RandomState(32281)
noise = rng.randn(len(x))*50
y += noise
return x, y, c
def gradient_descent(x, y):
n = 20
dc = 0.01
eps = 0.0001
cc = 1.1
cs = []
mses = []
for k in range(n):
m0 = np.mean((y-(cc*x**3.0))**2.0)
m1 = np.mean((y-((cc+dc)*x**3.0))**2.0)
dmdc = (m1 - m0)/dc
cs.append(cc)
mses.append(m0)
cc -= eps*dmdc
return cs, mses
def plot_gradient(ax, x, y, c):
ccs = np.linspace(0.5, 10.0, 200)
mses = np.zeros(len(ccs))
for i, cc in enumerate(ccs):
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
cmin = ccs[np.argmin(mses)]
gradient = np.diff(mses)/(ccs[1]-ccs[0])
ax.plot([cmin, cmin], [-10000, 10000], **lsSpine)
ax.plot([ccs[0], ccs[-1]], [0, 0], **lsSpine)
ax.plot(ccs[:-1], gradient, **lsBm)
ax.set_xlabel('c')
ax.set_ylabel('Derivative')
ax.set_xlim(0, 10)
ax.set_ylim(-10000, 10000)
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
ax.set_yticks(np.arange(-10000, 10001, 10000))
ax.set_yticklabels(['', '0', ''])
def plot_mse(ax, x, y, c):
ccs = np.linspace(0.5, 10.0, 200)
mses = np.zeros(len(ccs))
for i, cc in enumerate(ccs):
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
cmin = ccs[np.argmin(mses)]
gradient = np.diff(mses)/(ccs[1]-ccs[0])
ay = 1500.0
asB = dict(arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=0,
color=lsB['color'], lw=2))
ax.annotate('', xy=(3.0, ay), xytext=(1.0, ay), **asB)
ax.annotate('', xy=(5.0, ay), xytext=(3.8, ay), **asB)
ax.annotate('', xy=(6.2, ay), xytext=(7.4, ay), **asB)
ax.annotate('', xy=(8.0, ay), xytext=(10.0, ay), **asB)
ax.plot([cmin, cmin], [0, 30000], **lsSpine)
ax.plot(ccs, mses, zorder=10, **lsAm)
ax.set_xlabel('c')
ax.set_ylabel('Mean squared error')
ax.set_xlim(0, 10)
ax.set_ylim(0, 25000)
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
ax.set_yticks(np.arange(0, 30001, 10000))
ax.set_yticklabels(['0', '', '', ''])
if __name__ == "__main__":
x, y, c = create_data()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=cm_size(figure_width, 1.1*figure_height))
fig.subplots_adjust(wspace=0.5, **adjust_fs(left=5.0, right=1.2))
plot_gradient(ax1, x, y, c)
plot_mse(ax2, x, y, c)
fig.savefig("cubicgradient.pdf")
plt.close()

View File

@@ -0,0 +1,67 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mt
from plotstyle import *
def power_law(x, c, a):
return c*x**a
def create_data():
# wikipedia:
# Generally, males vary in total length from 250 to 390 cm and
# weigh between 90 and 306 kg
c = 6.0
x = np.arange(2.2, 3.9, 0.05)
y = power_law(x, c, 3.0)
rng = np.random.RandomState(32281)
noise = rng.randn(len(x))*50
y += noise
return x, y, c
def gradient_descent(x, y, func, p0):
n = 20000
h = 1e-7
ph = np.identity(len(p0))*h
eps = 0.00001
p = p0
ps = np.zeros((n, len(p0)))
mses = np.zeros(n)
for k in range(n):
m0 = np.mean((y-func(x, *p))**2.0)
gradient = np.array([(np.mean((y-func(x, *(p+ph[:,i])))**2.0) - m0)/h
for i in range(len(p))])
ps[k,:] = p
mses[k] = m0
p -= eps*gradient
return ps, mses
def plot_gradient_descent(ax, x, y, c, ps, mses):
cs = np.linspace(0.0, 10.0, 300)
bs = np.linspace(1.0, 5.5, 180)
mse = np.zeros((len(bs), len(cs)))
for i in range(len(bs)):
for k in range(len(cs)):
mse[i, k] = np.mean((y-power_law(x, cs[k], bs[i]))**2.0)
z = np.log10(mse)
ax.contourf(cs, bs, z, levels=(3.3, 3.36, 3.5, 4.0, 4.5, 5.5, 6.5, 7.5, 8.5),
cmap='Blues_r')
ax.plot(ps[::5,0], ps[::5,1], **lsBm)
ax.plot(ps[-1,0], ps[-1,1], **psC)
ax.set_xlabel('c')
ax.set_ylabel('a')
ax.yaxis.set_major_locator(mt.MultipleLocator(1.0))
ax.set_aspect('equal')
if __name__ == "__main__":
x, y, c = create_data()
ps, mses = gradient_descent(x, y, power_law, [1.0, 1.0])
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.3*figure_height))
fig.subplots_adjust(**adjust_fs(left=4.5, right=1.0))
plot_gradient_descent(ax, x, y, c, ps, mses)
fig.savefig("powergradientdescent.pdf")
plt.close()

View File

@@ -198,7 +198,20 @@ So we need a different approach. We want a procedure that finds the
minimum of the cost function with a minimal number of computations and
to arbitrary precision.
\begin{ibox}[t]{\label{differentialquotientbox}Difference quotient and derivative}
\section{Gradient}
\begin{figure}[t]
\includegraphics{cubicgradient}
\titlecaption{Derivative of the cost function.}{The gradient, the
derivative \eqref{costderivative} of the cost function, is
negative to the left of the minimum (vertical line) of the cost
function, zero (horizontal line) at, and positive to the right of
the minimum (left). For each value of the parameter $c$ the
negative gradient (arrows) points towards the minimum of the cost
function (right).} \label{gradientcubicfig}
\end{figure}
\begin{ibox}[b]{\label{differentialquotientbox}Difference quotient and derivative}
\includegraphics[width=0.33\textwidth]{derivative}
\hfill
\begin{minipage}[b]{0.63\textwidth}
@@ -226,8 +239,6 @@ to arbitrary precision.
sufficiently small $\Delta x$.
\end{ibox}
\section{Gradient}
Imagine to place a ball at some point on the cost function
\figref{cubiccostfig}. Naturally, it would roll down the slope and
eventually stop at the minimum of the error surface (if it had no
@@ -236,17 +247,6 @@ way to the minimum of the cost function. The ball always follows the
steepest slope. Thus we need to figure out the direction of the slope
at the position of the ball.
\begin{figure}[t]
\includegraphics{cubicgradient}
\titlecaption{Derivative of the cost function.}{The gradient, the
derivative \eqref{costderivative} of the cost function, is
negative to the left of the minimum (vertical line) of the cost
function, zero (horizontal line) at, and positive to the right of
the minimum (left). For each value of the parameter $c$ the
negative gradient (arrows) points towards the minimum of the cost
function (right).} \label{gradientcubicfig}
\end{figure}
In our one-dimensional example of a single free parameter the slope is
simply the derivative of the cost function with respect to the
parameter $c$ (\figref{gradientcubicfig}, left). This derivative is called
@@ -434,7 +434,7 @@ landscape over the parameter plane with mountains and valleys and we
are searching for the position of the bottom of the deepest valley
(\figref{powergradientdescentfig}).
\begin{ibox}[tp]{\label{partialderivativebox}Partial derivatives and gradient}
\begin{ibox}[t]{\label{partialderivativebox}Partial derivatives and gradient}
Some functions depend on more than a single variable. For example, the function
\[ z = f(x,y) \]
depends on both $x$ and $y$. Using the partial derivatives
@@ -642,13 +642,13 @@ generations. In this way the algorithm is not directed towards higher
fitness, as the gradient descent method would be. Rather, some
neighborhood of the parameter space is randomly probed. That way it is
even possible to escape a local maximum and find a potentially better
maximum. For this reason, \enterm{genetic algorithms} try to mimic
evolution in the context of high-dimensional optimization problems, in
particular with discrete parameter values. In biological evolution,
the objective function, however, is not a fixed function. It may
change in time by changing abiotic and biotic environmental
conditions, making this a very complex but also interesting
optimization problem.
maximum. For this reason, \enterm[genetic algorithm]{genetic
algorithms} try to mimic evolution in the context of
high-dimensional optimization problems, in particular with discrete
parameter values. In biological evolution, the objective function,
however, is not a fixed function. It may change in time by changing
abiotic and biotic environmental conditions, making this a very
complex but also interesting optimization problem.
How should a neuron or neural network be designed? As a particular
aspect of the general evolution of a species, this is a fundamental