solution for regression exercises
This commit is contained in:
parent
9265f75d12
commit
9c0559ab03
25
regression/code/checkdescent.m
Normal file
25
regression/code/checkdescent.m
Normal file
@ -0,0 +1,25 @@
|
||||
% data:
|
||||
load('lin_regression.mat')
|
||||
|
||||
% compute mean squared error for a range of slopes and intercepts:
|
||||
slopes = -5:0.25:5;
|
||||
intercepts = -30:1:30;
|
||||
errors = zeros(length(slopes), length(intercepts));
|
||||
for i = 1:length(slopes)
|
||||
for j = 1:length(intercepts)
|
||||
errors(i,j) = lsqError([slopes(i), intercepts(j)], x, y);
|
||||
end
|
||||
end
|
||||
|
||||
% minimum of error surface:
|
||||
[me, mi] = min(errors(:));
|
||||
[ia, ib] = ind2sub(size(errors), mi);
|
||||
eparams = [errors(ia), errors(ib)];
|
||||
|
||||
% gradient descent:
|
||||
pstart = [-2. 10.];
|
||||
[params, errors] = descent(x, y, pstart);
|
||||
|
||||
% comparison:
|
||||
fprintf('descent: %6.3f %6.3f\n', params(1), params(2));
|
||||
fprintf('errors: %6.3f %6.3f\n', eparams(1), eparams(2));
|
15
regression/code/descent.m
Normal file
15
regression/code/descent.m
Normal file
@ -0,0 +1,15 @@
|
||||
function [params, errors] = descent(xdata, ydata, pstart)
|
||||
mingradient = 0.1;
|
||||
eps = 0.01;
|
||||
|
||||
errors = [];
|
||||
params = pstart;
|
||||
count = 1;
|
||||
gradient = [100.0, 100.0];
|
||||
while norm(gradient) > mingradient
|
||||
gradient = lsqGradient(params, xdata, ydata);
|
||||
errors(count) = lsqError(params, xdata, ydata);
|
||||
params = params - eps .* gradient;
|
||||
count = count + 1;
|
||||
end
|
||||
end
|
22
regression/code/descentfit.m
Normal file
22
regression/code/descentfit.m
Normal file
@ -0,0 +1,22 @@
|
||||
clear
|
||||
close all
|
||||
load('lin_regression.mat')
|
||||
|
||||
pstart = [-2. 10.];
|
||||
[params, errors] = descent(x, y, pstart);
|
||||
|
||||
figure()
|
||||
subplot(2,1,1)
|
||||
hold on
|
||||
scatter(x, y, 'displayname', 'data')
|
||||
xx = min(x):0.01:max(x);
|
||||
fx = params(1)*xx + params(2);
|
||||
plot(xx, fx, 'displayname', 'fit')
|
||||
xlabel('Input')
|
||||
ylabel('Output')
|
||||
grid on
|
||||
legend show
|
||||
subplot(2,1,2)
|
||||
plot(errors)
|
||||
xlabel('optimization steps')
|
||||
ylabel('error')
|
@ -1,6 +1,6 @@
|
||||
load('lin_regression.mat');
|
||||
|
||||
% compute mean squared error for a range of sloopes and intercepts:
|
||||
% compute mean squared error for a range of slopes and intercepts:
|
||||
slopes = -5:0.25:5;
|
||||
intercepts = -30:1:30;
|
||||
error_surf = zeros(length(slopes), length(intercepts));
|
||||
|
18
regression/code/linefit.m
Normal file
18
regression/code/linefit.m
Normal file
@ -0,0 +1,18 @@
|
||||
% data:
|
||||
load('lin_regression.mat')
|
||||
|
||||
% gradient descent:
|
||||
pstart = [-2. 10.];
|
||||
[params, errors] = descent(x, y, pstart);
|
||||
|
||||
% lsqcurvefit:
|
||||
line = @(p, x) x.* p(1) + p(2);
|
||||
cparams = lsqcurvefit(line, pstart, x, y);
|
||||
|
||||
% polyfit:
|
||||
pparams = polyfit(x, y, 1);
|
||||
|
||||
% comparison:
|
||||
fprintf('descent: %6.3f %6.3f\n', params(1), params(2));
|
||||
fprintf('lsqcurvefit: %6.3f %6.3f\n', cparams(1), cparams(2));
|
||||
fprintf('polyfit: %6.3f %6.3f\n', pparams(1), pparams(2));
|
82
regression/exercises/exercises01-de.tex
Normal file
82
regression/exercises/exercises01-de.tex
Normal file
@ -0,0 +1,82 @@
|
||||
\documentclass[12pt,a4paper,pdftex]{exam}
|
||||
|
||||
\usepackage[german]{babel}
|
||||
\usepackage{natbib}
|
||||
\usepackage{graphicx}
|
||||
\usepackage[small]{caption}
|
||||
\usepackage{sidecap}
|
||||
\usepackage{pslatex}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\setlength{\marginparwidth}{2cm}
|
||||
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
||||
|
||||
%%%%% text size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
||||
\pagestyle{headandfoot}
|
||||
\ifprintanswers
|
||||
\newcommand{\stitle}{: Solutions}
|
||||
\else
|
||||
\newcommand{\stitle}{}
|
||||
\fi
|
||||
\header{{\bfseries\large Exercise 11\stitle}}{{\bfseries\large Gradient descent}}{{\bfseries\large January 9th, 2018}}
|
||||
\firstpagefooter{Dr. Jan Grewe}{Phone: 29 74588}{Email:
|
||||
jan.grewe@uni-tuebingen.de}
|
||||
\runningfooter{}{\thepage}{}
|
||||
|
||||
\setlength{\baselineskip}{15pt}
|
||||
\setlength{\parindent}{0.0cm}
|
||||
\setlength{\parskip}{0.3cm}
|
||||
\renewcommand{\baselinestretch}{1.15}
|
||||
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
\renewcommand{\solutiontitle}{\noindent\textbf{Solution:}\par\noindent}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
|
||||
\input{instructions}
|
||||
|
||||
\begin{questions}
|
||||
|
||||
\question Implementiere den Gradientenabstieg f\"ur das Problem der
|
||||
Parameteranpassung der linearen Geradengleichung an die Messdaten in
|
||||
der Datei \emph{lin\_regression.mat}.
|
||||
|
||||
Die daf\"ur ben\"otigten Zutaten haben wir aus den vorangegangenen
|
||||
\"Ubungen bereits vorbereitet. Wir brauchen: 1. Die Fehlerfunktion
|
||||
(\code{meanSquareError()}), 2. die Zielfunktion (\code{lsqError()})
|
||||
und 3. den Gradienten (\code{lsqGradient()}). Der Algorithmus f\"ur
|
||||
den Abstieg lautet:
|
||||
|
||||
\begin{enumerate}
|
||||
\item Starte mit einer beliebigen Parameterkombination $p_0 = (m_0,
|
||||
b_0)$.
|
||||
\item \label{computegradient} Berechne den Gradienten an der
|
||||
akutellen Position $p_i$.
|
||||
\item Wenn die L\"ange des Gradienten einen bestimmten Wert
|
||||
unterschreitet, haben wir das Minum gefunden und k\"onnen die
|
||||
Suche abbrechen. Wir suchen ja das Minimum, bei dem der Gradient
|
||||
gleich Null ist. Da aus numerischen Gr\"unden der Gradient nie
|
||||
exakt Null werden wird, k\"onnen wir nur fordern, dass er
|
||||
hinreichend klein wird (z.B. \code{norm(gradient) < 0.1}).
|
||||
\item \label{gradientstep} Gehe einen kleinen Schritt ($\epsilon =
|
||||
0.01$) in die entgegensetzte Richtung des Gradienten:
|
||||
\[p_{i+1} = p_i - \epsilon \cdot \nabla f_{cost}(m_i, b_i)\]
|
||||
\item Wiederhole die Schritte \ref{computegradient} --
|
||||
\ref{gradientstep}.
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
\begin{parts}
|
||||
\part Implementiere den Gradientenabstieg und merke Dir f\"ur jeden Schritt
|
||||
die Parameterkombination und den zugehörigen Fehler.
|
||||
\part Erstelle einen Plot der die Originaldaten sowie die Vorhersage mit der
|
||||
besten Parameterkombination darstellt.
|
||||
\part Stelle in einem weiteren Plot die Entwicklung des Fehlers als Funktion der
|
||||
Optimierungsschritte dar.
|
||||
\end{parts}
|
||||
|
||||
\end{questions}
|
||||
|
||||
\end{document}
|
@ -19,7 +19,7 @@
|
||||
\else
|
||||
\newcommand{\stitle}{}
|
||||
\fi
|
||||
\header{{\bfseries\large Exercise 11\stitle}}{{\bfseries\large Gradient descend}}{{\bfseries\large January 9th, 2018}}
|
||||
\header{{\bfseries\large Exercise 11\stitle}}{{\bfseries\large Gradient descent}}{{\bfseries\large January 9th, 2018}}
|
||||
\firstpagefooter{Dr. Jan Grewe}{Phone: 29 74588}{Email:
|
||||
jan.grewe@uni-tuebingen.de}
|
||||
\runningfooter{}{\thepage}{}
|
||||
@ -31,6 +31,24 @@
|
||||
|
||||
\newcommand{\code}[1]{\texttt{#1}}
|
||||
\renewcommand{\solutiontitle}{\noindent\textbf{Solution:}\par\noindent}
|
||||
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\usepackage{listings}
|
||||
\lstset{
|
||||
language=Matlab,
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
numbers=left,
|
||||
numberstyle=\tiny,
|
||||
title=\lstname,
|
||||
showstringspaces=false,
|
||||
commentstyle=\itshape\color{darkgray},
|
||||
breaklines=true,
|
||||
breakautoindent=true,
|
||||
columns=flexible,
|
||||
frame=single,
|
||||
xleftmargin=1em,
|
||||
xrightmargin=1em,
|
||||
aboveskip=10pt
|
||||
}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{document}
|
||||
@ -39,42 +57,68 @@
|
||||
|
||||
\begin{questions}
|
||||
|
||||
\question Implementiere den Gradientenabstieg f\"ur das Problem der
|
||||
Parameteranpassung der linearen Geradengleichung an die Messdaten in
|
||||
der Datei \emph{lin\_regression.mat}.
|
||||
\question Implement the gradient descent for finding the parameters
|
||||
of a straigth line that we want to fit to the data in the file
|
||||
\emph{lin\_regression.mat}.
|
||||
|
||||
Die daf\"ur ben\"otigten Zutaten haben wir aus den vorangegangenen
|
||||
\"Ubungen bereits vorbereitet. Wir brauchen: 1. Die Fehlerfunktion
|
||||
(\code{meanSquareError()}), 2. die Zielfunktion (\code{lsqError()})
|
||||
und 3. den Gradienten (\code{lsqGradient()}). Der Algorithmus f\"ur
|
||||
den Abstieg lautet:
|
||||
In the lecture we already prepared the necessary functions: 1. the
|
||||
error function (\code{meanSquareError()}), 2. the cost function
|
||||
(\code{lsqError()}), and 3. the gradient (\code{lsqGradient()}).
|
||||
|
||||
The algorithm for the descent towards the minimum of the cost
|
||||
function is as follows:
|
||||
|
||||
\begin{enumerate}
|
||||
\item Starte mit einer beliebigen Parameterkombination $p_0 = (m_0,
|
||||
b_0)$.
|
||||
\item \label{computegradient} Berechne den Gradienten an der
|
||||
akutellen Position $p_i$.
|
||||
\item Wenn die L\"ange des Gradienten einen bestimmten Wert
|
||||
unterschreitet, haben wir das Minum gefunden und k\"onnen die
|
||||
Suche abbrechen. Wir suchen ja das Minimum, bei dem der Gradient
|
||||
gleich Null ist. Da aus numerischen Gr\"unden der Gradient nie
|
||||
exakt Null werden wird, k\"onnen wir nur fordern, dass er
|
||||
hinreichend klein wird (z.B. \code{norm(gradient) < 0.1}).
|
||||
\item \label{gradientstep} Gehe einen kleinen Schritt ($\epsilon =
|
||||
0.01$) in die entgegensetzte Richtung des Gradienten:
|
||||
\item Start with some arbitrary parameter values $p_0 = (m_0, b_0)$
|
||||
for the slope and the intercept of the straight line.
|
||||
\item \label{computegradient} Compute the gradient of the cost function
|
||||
at the current values of the parameters $p_i$.
|
||||
\item If the magnitude (length) of the gradient is smaller than some
|
||||
small number, the algorithm converged close to the minimum of the
|
||||
cost function and we abort the descent. Right at the minimum the
|
||||
magnitude of the gradient is zero. However, since we determine
|
||||
the gradient numerically, it will never be exactly zero. This is
|
||||
why we require the gradient to be sufficiently small
|
||||
(e.g. \code{norm(gradient) < 0.1}).
|
||||
\item \label{gradientstep} Move against the gradient by a small step
|
||||
($\epsilon = 0.01$):
|
||||
\[p_{i+1} = p_i - \epsilon \cdot \nabla f_{cost}(m_i, b_i)\]
|
||||
\item Wiederhole die Schritte \ref{computegradient} --
|
||||
\ref{gradientstep}.
|
||||
\item Repeat steps \ref{computegradient} -- \ref{gradientstep}.
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
\begin{parts}
|
||||
\part Implementiere den Gradientenabstieg und merke Dir f\"ur jeden Schritt
|
||||
die Parameterkombination und den zugehörigen Fehler.
|
||||
\part Erstelle einen Plot der die Originaldaten sowie die Vorhersage mit der
|
||||
besten Parameterkombination darstellt.
|
||||
\part Stelle in einem weiteren Plot die Entwicklung des Fehlers als Funktion der
|
||||
Optimierungsschritte dar.
|
||||
\part Implement the gradient descent in a function that returns
|
||||
the parameter values at the minimum of the cost function and a vector
|
||||
with the value of the cost function at each step of the algorithm.
|
||||
\begin{solution}
|
||||
\lstinputlisting{../code/descent.m}
|
||||
\end{solution}
|
||||
|
||||
\part Plot the data and the straight line with the parameter
|
||||
values that you found with the gradient descent method.
|
||||
|
||||
\part Plot the development of the costs as a function of the
|
||||
iteration step.
|
||||
\begin{solution}
|
||||
\lstinputlisting{../code/descentfit.m}
|
||||
\end{solution}
|
||||
|
||||
\part Find the position of the minimum of the cost function by
|
||||
means of the \code{min()} function. Compare with the result of the
|
||||
gradient descent method. Vary the value of $\epsilon$ and the
|
||||
minimum gradient. What are good values such that the gradient
|
||||
descent gets closest to the true minimum of the cost function?
|
||||
\begin{solution}
|
||||
\lstinputlisting{../code/checkdescent.m}
|
||||
\end{solution}
|
||||
|
||||
\part Use the functions \code{polyfit()} and \code{lsqcurvefit()}
|
||||
provided by matlab to find the slope and intercept of a straight
|
||||
line that fits the data.
|
||||
\begin{solution}
|
||||
\lstinputlisting{../code/linefit.m}
|
||||
\end{solution}
|
||||
|
||||
\end{parts}
|
||||
|
||||
\end{questions}
|
||||
|
@ -368,6 +368,7 @@ Punkte in Abbildung \ref{gradientdescentfig} gro{\ss}.
|
||||
Optimierungsschritt an.} \label{gradientdescentfig}
|
||||
\end{figure}
|
||||
|
||||
\setboolean{showexercisesolutions}{false}
|
||||
\begin{exercise}{gradientDescent.m}{}
|
||||
Implementiere den Gradientenabstieg f\"ur das Problem der
|
||||
Parameteranpassung der linearen Geradengleichung an die Messdaten in
|
||||
@ -409,6 +410,7 @@ Kostenfunktionen gemacht \matlabfun{fminsearch()}, w\"ahrend spezielle
|
||||
Funktionen z.B. f\"ur die Minimierung des quadratischen Abstands bei
|
||||
einem Kurvenfit angeboten werden \matlabfun{lsqcurvefit()}.
|
||||
|
||||
\newpage
|
||||
\begin{important}[Achtung Nebenminima!]
|
||||
Das Finden des globalen Minimums ist leider nur selten so leicht wie
|
||||
bei einem Geradenfit. Oft hat die Kostenfunktion viele Nebenminima,
|
||||
|
@ -63,7 +63,6 @@
|
||||
\lstset{inputpath=bootstrap/code}
|
||||
\include{bootstrap/lecture/bootstrap}
|
||||
|
||||
\setboolean{showexercisesolutions}{false}
|
||||
\graphicspath{{regression/lecture/}{regression/lecture/figures/}}
|
||||
\lstset{inputpath=regression/code}
|
||||
\include{regression/lecture/regression}
|
||||
|
@ -348,14 +348,14 @@ probability density functions like the one of the normal distribution
|
||||
\subsection{Kernel densities}
|
||||
|
||||
A problem of using histograms for estimating probability densities is
|
||||
that the have hard bin edges. Depending on where the bin edges are placed
|
||||
that they have hard bin edges. Depending on where the bin edges are placed
|
||||
a data value falls in one or the other bin.
|
||||
|
||||
\begin{figure}[t]
|
||||
\includegraphics[width=1\textwidth]{kerneldensity}
|
||||
\titlecaption{\label{kerneldensityfig} Kernel densities.}{Left: The
|
||||
histogram-based estimation of the probability density is dependent
|
||||
also on the position of the bins. In the bottom plot the bins have
|
||||
on the position of the bins. In the bottom plot the bins have
|
||||
bin shifted by half a bin width (here $\Delta x=0.4$) and as a
|
||||
result details of the probability density look different. Look,
|
||||
for example at the height of the largest bin. Right: In contrast,
|
||||
@ -366,7 +366,7 @@ a data value falls in one or the other bin.
|
||||
To avoid this problem one can use so called \enterm {kernel densities}
|
||||
for estimating probability densities from data. Here every data point
|
||||
is replaced by a kernel (a function with integral one, like for
|
||||
example the Gaussian function) that is moved exactly to the position
|
||||
example the Gaussian) that is moved exactly to the position
|
||||
indicated by the data value. Then all the kernels of all the data
|
||||
values are summed up, the sum is divided by the number of data values,
|
||||
and we get an estimate of the probability density.
|
||||
@ -417,7 +417,7 @@ and percentiles can be determined from the inverse cumulative function.
|
||||
100 data values drawn from a normal distribution (red) in
|
||||
comparison to the true cumulative distribution function computed
|
||||
by numerically integrating the normal distribution function
|
||||
(blue). From the cumulative distribution function one can read of
|
||||
(blue). From the cumulative distribution function one can read off
|
||||
the probabilities of getting values smaller than a given value
|
||||
(here: $P(x \ge -1) \approx 0.15$). From the inverse cumulative
|
||||
distribution the position of percentiles can be computed (here:
|
||||
|
Reference in New Issue
Block a user