[regression] new figures for fit of cubic functions

This commit is contained in:
Jan Benda 2019-12-19 19:25:01 +01:00
parent 60a8250590
commit ebff6cf5ad
4 changed files with 241 additions and 3 deletions

View File

@ -0,0 +1,102 @@
import matplotlib.pyplot as plt
import numpy as np
def create_data():
# wikipedia:
# Generally, males vary in total length from 250 to 390 cm and
# weigh between 90 and 306 kg
c = 6
x = np.arange(2.2, 3.9, 0.05)
y = c * x**3.0
rng = np.random.RandomState(32281)
noise = rng.randn(len(x))*50
y += noise
return x, y, c
def plot_data(ax, x, y, c):
ax.scatter(x, y, marker='o', color='b', s=40, zorder=10)
xx = np.linspace(2.1, 3.9, 100)
ax.plot(xx, c*xx**3.0, color='#CC0000', lw=2, zorder=5)
for cc in [0.25*c, 0.5*c, 2.0*c, 4.0*c]:
ax.plot(xx, cc*xx**3.0, color='#FF9900', lw=1.5, zorder=5)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(direction="out", width=1.25)
ax.tick_params(direction="out", width=1.25)
ax.set_xlabel('Size x / m')
ax.set_ylabel('Weight y / kg')
ax.set_xlim(2, 4)
ax.set_ylim(0, 400)
ax.set_xticks(np.arange(2.0, 4.1, 0.5))
ax.set_yticks(np.arange(0, 401, 100))
def plot_data_errors(ax, x, y, c):
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(direction="out", width=1.25)
ax.tick_params(direction="out", width=1.25)
ax.set_xlabel('Size x / m')
#ax.set_ylabel('Weight y / kg')
ax.set_xlim(2, 4)
ax.set_ylim(0, 400)
ax.set_xticks(np.arange(2.0, 4.1, 0.5))
ax.set_yticks(np.arange(0, 401, 100))
ax.set_yticklabels([])
ax.annotate('Error',
xy=(x[28]+0.05, y[28]+60), xycoords='data',
xytext=(3.4, 70), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.9,1.0),
connectionstyle="angle3,angleA=50,angleB=-30") )
ax.scatter(x[:40], y[:40], color='b', s=10, zorder=0)
inxs = [3, 10, 11, 17, 18, 21, 28, 30, 33]
ax.scatter(x[inxs], y[inxs], color='b', s=40, zorder=10)
xx = np.linspace(2.1, 3.9, 100)
ax.plot(xx, c*xx**3.0, color='#CC0000', lw=2)
for i in inxs :
xx = [x[i], x[i]]
yy = [c*x[i]**3.0, y[i]]
ax.plot(xx, yy, color='#FF9900', lw=2, zorder=5)
def plot_error_hist(ax, x, y, c):
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(direction="out", width=1.25)
ax.tick_params(direction="out", width=1.25)
ax.set_xlabel('Squared error')
ax.set_ylabel('Frequency')
bins = np.arange(0.0, 1250.0, 100)
ax.set_xlim(bins[0], bins[-1])
#ax.set_ylim(0, 35)
ax.set_xticks(np.arange(bins[0], bins[-1], 200))
#ax.set_yticks(np.arange(0, 36, 10))
errors = (y-(c*x**3.0))**2.0
mls = np.mean(errors)
ax.annotate('Mean\nsquared\nerror',
xy=(mls, 0.5), xycoords='data',
xytext=(800, 3), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.2),
connectionstyle="angle3,angleA=10,angleB=90") )
ax.hist(errors, bins, color='#FF9900')
if __name__ == "__main__":
x, y, c = create_data()
plt.xkcd()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7., 2.6))
plot_data(ax1, x, y, c)
plot_data_errors(ax2, x, y, c)
#plot_error_hist(ax2, x, y, c)
fig.set_facecolor("white")
fig.tight_layout()
fig.savefig("cubicerrors.pdf")
plt.close()

View File

@ -0,0 +1,40 @@
import matplotlib.pyplot as plt
import numpy as np
if __name__ == "__main__":
# wikipedia:
# Generally, males vary in total length from 250 to 390 cm and
# weigh between 90 and 306 kg
c = 6
x = np.arange(2.2, 3.9, 0.05)
y = c * x**3.0
rng = np.random.RandomState(32281)
noise = rng.randn(len(x))*50
y += noise
plt.xkcd()
fig, ax = plt.subplots(figsize=(7., 3.6))
ax.scatter(x, y, marker='o', color='b', s=40, zorder=10)
xx = np.linspace(2.1, 3.9, 100)
ax.plot(xx, c*xx**3.0, color='#CC0000', lw=3, zorder=5)
for cc in [0.25*c, 0.5*c, 2.0*c, 4.0*c]:
ax.plot(xx, cc*xx**3.0, color='#FF9900', lw=2, zorder=5)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(direction="out", width=1.25)
ax.tick_params(direction="out", width=1.25)
ax.set_xlabel('Size x / m')
ax.set_ylabel('Weight y / kg')
ax.set_xlim(2, 4)
ax.set_ylim(0, 400)
ax.set_xticks(np.arange(2.0, 4.1, 0.5))
ax.set_yticks(np.arange(0, 401, 100))
fig.set_facecolor("white")
fig.subplots_adjust(0.11, 0.16, 0.98, 0.97)
fig.savefig("cubicfunc.pdf")
plt.close()

View File

@ -0,0 +1,93 @@
import matplotlib.pyplot as plt
import numpy as np
def create_data():
# wikipedia:
# Generally, males vary in total length from 250 to 390 cm and
# weigh between 90 and 306 kg
c = 6
x = np.arange(2.2, 3.9, 0.05)
y = c * x**3.0
rng = np.random.RandomState(32281)
noise = rng.randn(len(x))*50
y += noise
return x, y, c
def gradient_descent(x, y):
n = 20
dc = 0.01
eps = 0.0001
cc = 1.1
cs = []
mses = []
for k in range(n):
m0 = np.mean((y-(cc*x**3.0))**2.0)
m1 = np.mean((y-((cc+dc)*x**3.0))**2.0)
dmdc = (m1 - m0)/dc
cs.append(cc)
mses.append(m0)
cc -= eps*dmdc
return cs, mses
def plot_mse(ax, x, y, c, cs):
ms = np.zeros(len(cs))
for i, cc in enumerate(cs):
ms[i] = np.mean((y-(cc*x**3.0))**2.0)
ccs = np.linspace(0.5, 10.0, 200)
mses = np.zeros(len(ccs))
for i, cc in enumerate(ccs):
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
ax.plot(ccs, mses, 'b', lw=2, zorder=10)
ax.scatter(cs, ms, color='#cc0000', s=40, zorder=20)
ax.scatter(cs[-1], ms[-1], color='#FF9900', s=60, zorder=30)
for i in range(4):
ax.annotate('',
xy=(cs[i+1]+0.2, ms[i+1]), xycoords='data',
xytext=(cs[i]+0.3, ms[i]+200), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.0),
connectionstyle="angle3,angleA=10,angleB=70") )
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(direction="out", width=1.25)
ax.tick_params(direction="out", width=1.25)
ax.set_xlabel('c')
ax.set_ylabel('mean squared error')
ax.set_xlim(0, 10)
ax.set_ylim(0, 25000)
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
ax.set_yticks(np.arange(0, 30001, 10000))
def plot_descent(ax, cs, mses):
ax.plot(np.arange(len(mses))+1, mses, '-o', c='#cc0000', mew=0, ms=8)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(direction="out", width=1.25)
ax.tick_params(direction="out", width=1.25)
ax.set_xlabel('iteration')
#ax.set_ylabel('mean squared error')
ax.set_xlim(0, 10.5)
ax.set_ylim(0, 25000)
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
ax.set_yticks(np.arange(0, 30001, 10000))
ax.set_yticklabels([])
if __name__ == "__main__":
x, y, c = create_data()
cs, mses = gradient_descent(x, y)
plt.xkcd()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7., 2.6))
plot_mse(ax1, x, y, c, cs)
plot_descent(ax2, cs, mses)
fig.set_facecolor("white")
fig.tight_layout()
fig.savefig("cubicmse.pdf")
plt.close()

View File

@ -25,13 +25,16 @@
\subsection{Start with one-dimensional problem!} \subsection{Start with one-dimensional problem!}
\begin{itemize} \begin{itemize}
\item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger) \item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger)\\
\includegraphics[width=0.8\textwidth]{cubicfunc}
\item Introduce the problem, $c$ is density and form factor \item Introduce the problem, $c$ is density and form factor
\item How to generate an artificial data set (refer to simulation chapter) \item How to generate an artificial data set (refer to simulation chapter)
\item How to plot a function (do not use the data x values!) \item How to plot a function (do not use the data x values!)
\item Just the mean square error as a function of the factor c \item Just the mean square error as a function of the factor c\\
\includegraphics[width=0.8\textwidth]{cubicerrors}
\item Also mention the cost function for a straight line \item Also mention the cost function for a straight line
\item 1-d gradient, NO quiver plot (it is a nightmare to get this right) \item 1-d gradient, NO quiver plot (it is a nightmare to get this right)\\
\includegraphics[width=0.8\textwidth]{cubicmse}
\item 1-d gradient descend \item 1-d gradient descend
\item Describe in words the n-d problem. \item Describe in words the n-d problem.
\item Homework is to do the 2d problem with the straight line! \item Homework is to do the 2d problem with the straight line!