[regression] new figures for fit of cubic functions
This commit is contained in:
parent
60a8250590
commit
ebff6cf5ad
102
regression/lecture/cubicerrors.py
Normal file
102
regression/lecture/cubicerrors.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def create_data():
|
||||||
|
# wikipedia:
|
||||||
|
# Generally, males vary in total length from 250 to 390 cm and
|
||||||
|
# weigh between 90 and 306 kg
|
||||||
|
c = 6
|
||||||
|
x = np.arange(2.2, 3.9, 0.05)
|
||||||
|
y = c * x**3.0
|
||||||
|
rng = np.random.RandomState(32281)
|
||||||
|
noise = rng.randn(len(x))*50
|
||||||
|
y += noise
|
||||||
|
return x, y, c
|
||||||
|
|
||||||
|
|
||||||
|
def plot_data(ax, x, y, c):
|
||||||
|
ax.scatter(x, y, marker='o', color='b', s=40, zorder=10)
|
||||||
|
xx = np.linspace(2.1, 3.9, 100)
|
||||||
|
ax.plot(xx, c*xx**3.0, color='#CC0000', lw=2, zorder=5)
|
||||||
|
for cc in [0.25*c, 0.5*c, 2.0*c, 4.0*c]:
|
||||||
|
ax.plot(xx, cc*xx**3.0, color='#FF9900', lw=1.5, zorder=5)
|
||||||
|
|
||||||
|
ax.spines["right"].set_visible(False)
|
||||||
|
ax.spines["top"].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.set_xlabel('Size x / m')
|
||||||
|
ax.set_ylabel('Weight y / kg')
|
||||||
|
ax.set_xlim(2, 4)
|
||||||
|
ax.set_ylim(0, 400)
|
||||||
|
ax.set_xticks(np.arange(2.0, 4.1, 0.5))
|
||||||
|
ax.set_yticks(np.arange(0, 401, 100))
|
||||||
|
|
||||||
|
|
||||||
|
def plot_data_errors(ax, x, y, c):
|
||||||
|
ax.spines["right"].set_visible(False)
|
||||||
|
ax.spines["top"].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.set_xlabel('Size x / m')
|
||||||
|
#ax.set_ylabel('Weight y / kg')
|
||||||
|
ax.set_xlim(2, 4)
|
||||||
|
ax.set_ylim(0, 400)
|
||||||
|
ax.set_xticks(np.arange(2.0, 4.1, 0.5))
|
||||||
|
ax.set_yticks(np.arange(0, 401, 100))
|
||||||
|
ax.set_yticklabels([])
|
||||||
|
ax.annotate('Error',
|
||||||
|
xy=(x[28]+0.05, y[28]+60), xycoords='data',
|
||||||
|
xytext=(3.4, 70), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.9,1.0),
|
||||||
|
connectionstyle="angle3,angleA=50,angleB=-30") )
|
||||||
|
ax.scatter(x[:40], y[:40], color='b', s=10, zorder=0)
|
||||||
|
inxs = [3, 10, 11, 17, 18, 21, 28, 30, 33]
|
||||||
|
ax.scatter(x[inxs], y[inxs], color='b', s=40, zorder=10)
|
||||||
|
xx = np.linspace(2.1, 3.9, 100)
|
||||||
|
ax.plot(xx, c*xx**3.0, color='#CC0000', lw=2)
|
||||||
|
for i in inxs :
|
||||||
|
xx = [x[i], x[i]]
|
||||||
|
yy = [c*x[i]**3.0, y[i]]
|
||||||
|
ax.plot(xx, yy, color='#FF9900', lw=2, zorder=5)
|
||||||
|
|
||||||
|
def plot_error_hist(ax, x, y, c):
|
||||||
|
ax.spines["right"].set_visible(False)
|
||||||
|
ax.spines["top"].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.set_xlabel('Squared error')
|
||||||
|
ax.set_ylabel('Frequency')
|
||||||
|
bins = np.arange(0.0, 1250.0, 100)
|
||||||
|
ax.set_xlim(bins[0], bins[-1])
|
||||||
|
#ax.set_ylim(0, 35)
|
||||||
|
ax.set_xticks(np.arange(bins[0], bins[-1], 200))
|
||||||
|
#ax.set_yticks(np.arange(0, 36, 10))
|
||||||
|
errors = (y-(c*x**3.0))**2.0
|
||||||
|
mls = np.mean(errors)
|
||||||
|
ax.annotate('Mean\nsquared\nerror',
|
||||||
|
xy=(mls, 0.5), xycoords='data',
|
||||||
|
xytext=(800, 3), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.2),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=90") )
|
||||||
|
ax.hist(errors, bins, color='#FF9900')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
x, y, c = create_data()
|
||||||
|
plt.xkcd()
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7., 2.6))
|
||||||
|
plot_data(ax1, x, y, c)
|
||||||
|
plot_data_errors(ax2, x, y, c)
|
||||||
|
#plot_error_hist(ax2, x, y, c)
|
||||||
|
fig.set_facecolor("white")
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig("cubicerrors.pdf")
|
||||||
|
plt.close()
|
40
regression/lecture/cubicfunc.py
Normal file
40
regression/lecture/cubicfunc.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# wikipedia:
|
||||||
|
# Generally, males vary in total length from 250 to 390 cm and
|
||||||
|
# weigh between 90 and 306 kg
|
||||||
|
c = 6
|
||||||
|
x = np.arange(2.2, 3.9, 0.05)
|
||||||
|
y = c * x**3.0
|
||||||
|
rng = np.random.RandomState(32281)
|
||||||
|
noise = rng.randn(len(x))*50
|
||||||
|
y += noise
|
||||||
|
|
||||||
|
plt.xkcd()
|
||||||
|
fig, ax = plt.subplots(figsize=(7., 3.6))
|
||||||
|
|
||||||
|
ax.scatter(x, y, marker='o', color='b', s=40, zorder=10)
|
||||||
|
xx = np.linspace(2.1, 3.9, 100)
|
||||||
|
ax.plot(xx, c*xx**3.0, color='#CC0000', lw=3, zorder=5)
|
||||||
|
for cc in [0.25*c, 0.5*c, 2.0*c, 4.0*c]:
|
||||||
|
ax.plot(xx, cc*xx**3.0, color='#FF9900', lw=2, zorder=5)
|
||||||
|
|
||||||
|
ax.spines["right"].set_visible(False)
|
||||||
|
ax.spines["top"].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.set_xlabel('Size x / m')
|
||||||
|
ax.set_ylabel('Weight y / kg')
|
||||||
|
ax.set_xlim(2, 4)
|
||||||
|
ax.set_ylim(0, 400)
|
||||||
|
ax.set_xticks(np.arange(2.0, 4.1, 0.5))
|
||||||
|
ax.set_yticks(np.arange(0, 401, 100))
|
||||||
|
|
||||||
|
fig.set_facecolor("white")
|
||||||
|
fig.subplots_adjust(0.11, 0.16, 0.98, 0.97)
|
||||||
|
fig.savefig("cubicfunc.pdf")
|
||||||
|
plt.close()
|
93
regression/lecture/cubicmse.py
Normal file
93
regression/lecture/cubicmse.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def create_data():
|
||||||
|
# wikipedia:
|
||||||
|
# Generally, males vary in total length from 250 to 390 cm and
|
||||||
|
# weigh between 90 and 306 kg
|
||||||
|
c = 6
|
||||||
|
x = np.arange(2.2, 3.9, 0.05)
|
||||||
|
y = c * x**3.0
|
||||||
|
rng = np.random.RandomState(32281)
|
||||||
|
noise = rng.randn(len(x))*50
|
||||||
|
y += noise
|
||||||
|
return x, y, c
|
||||||
|
|
||||||
|
def gradient_descent(x, y):
|
||||||
|
n = 20
|
||||||
|
dc = 0.01
|
||||||
|
eps = 0.0001
|
||||||
|
cc = 1.1
|
||||||
|
cs = []
|
||||||
|
mses = []
|
||||||
|
for k in range(n):
|
||||||
|
m0 = np.mean((y-(cc*x**3.0))**2.0)
|
||||||
|
m1 = np.mean((y-((cc+dc)*x**3.0))**2.0)
|
||||||
|
dmdc = (m1 - m0)/dc
|
||||||
|
cs.append(cc)
|
||||||
|
mses.append(m0)
|
||||||
|
cc -= eps*dmdc
|
||||||
|
return cs, mses
|
||||||
|
|
||||||
|
def plot_mse(ax, x, y, c, cs):
|
||||||
|
ms = np.zeros(len(cs))
|
||||||
|
for i, cc in enumerate(cs):
|
||||||
|
ms[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||||
|
ccs = np.linspace(0.5, 10.0, 200)
|
||||||
|
mses = np.zeros(len(ccs))
|
||||||
|
for i, cc in enumerate(ccs):
|
||||||
|
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
|
||||||
|
|
||||||
|
ax.plot(ccs, mses, 'b', lw=2, zorder=10)
|
||||||
|
ax.scatter(cs, ms, color='#cc0000', s=40, zorder=20)
|
||||||
|
ax.scatter(cs[-1], ms[-1], color='#FF9900', s=60, zorder=30)
|
||||||
|
for i in range(4):
|
||||||
|
ax.annotate('',
|
||||||
|
xy=(cs[i+1]+0.2, ms[i+1]), xycoords='data',
|
||||||
|
xytext=(cs[i]+0.3, ms[i]+200), textcoords='data', ha='left',
|
||||||
|
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.0),
|
||||||
|
connectionstyle="angle3,angleA=10,angleB=70") )
|
||||||
|
|
||||||
|
|
||||||
|
ax.spines["right"].set_visible(False)
|
||||||
|
ax.spines["top"].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.set_xlabel('c')
|
||||||
|
ax.set_ylabel('mean squared error')
|
||||||
|
ax.set_xlim(0, 10)
|
||||||
|
ax.set_ylim(0, 25000)
|
||||||
|
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
|
||||||
|
ax.set_yticks(np.arange(0, 30001, 10000))
|
||||||
|
|
||||||
|
def plot_descent(ax, cs, mses):
|
||||||
|
ax.plot(np.arange(len(mses))+1, mses, '-o', c='#cc0000', mew=0, ms=8)
|
||||||
|
|
||||||
|
ax.spines["right"].set_visible(False)
|
||||||
|
ax.spines["top"].set_visible(False)
|
||||||
|
ax.yaxis.set_ticks_position('left')
|
||||||
|
ax.xaxis.set_ticks_position('bottom')
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.tick_params(direction="out", width=1.25)
|
||||||
|
ax.set_xlabel('iteration')
|
||||||
|
#ax.set_ylabel('mean squared error')
|
||||||
|
ax.set_xlim(0, 10.5)
|
||||||
|
ax.set_ylim(0, 25000)
|
||||||
|
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
|
||||||
|
ax.set_yticks(np.arange(0, 30001, 10000))
|
||||||
|
ax.set_yticklabels([])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
x, y, c = create_data()
|
||||||
|
cs, mses = gradient_descent(x, y)
|
||||||
|
plt.xkcd()
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7., 2.6))
|
||||||
|
plot_mse(ax1, x, y, c, cs)
|
||||||
|
plot_descent(ax2, cs, mses)
|
||||||
|
fig.set_facecolor("white")
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig("cubicmse.pdf")
|
||||||
|
plt.close()
|
@ -25,13 +25,16 @@
|
|||||||
|
|
||||||
\subsection{Start with one-dimensional problem!}
|
\subsection{Start with one-dimensional problem!}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger)
|
\item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger)\\
|
||||||
|
\includegraphics[width=0.8\textwidth]{cubicfunc}
|
||||||
\item Introduce the problem, $c$ is density and form factor
|
\item Introduce the problem, $c$ is density and form factor
|
||||||
\item How to generate an artificial data set (refer to simulation chapter)
|
\item How to generate an artificial data set (refer to simulation chapter)
|
||||||
\item How to plot a function (do not use the data x values!)
|
\item How to plot a function (do not use the data x values!)
|
||||||
\item Just the mean square error as a function of the factor c
|
\item Just the mean square error as a function of the factor c\\
|
||||||
|
\includegraphics[width=0.8\textwidth]{cubicerrors}
|
||||||
\item Also mention the cost function for a straight line
|
\item Also mention the cost function for a straight line
|
||||||
\item 1-d gradient, NO quiver plot (it is a nightmare to get this right)
|
\item 1-d gradient, NO quiver plot (it is a nightmare to get this right)\\
|
||||||
|
\includegraphics[width=0.8\textwidth]{cubicmse}
|
||||||
\item 1-d gradient descend
|
\item 1-d gradient descend
|
||||||
\item Describe in words the n-d problem.
|
\item Describe in words the n-d problem.
|
||||||
\item Homework is to do the 2d problem with the straight line!
|
\item Homework is to do the 2d problem with the straight line!
|
||||||
|
Reference in New Issue
Block a user