This repository has been archived on 2021-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
scientificComputing/regression/exercises/gradientdescent-1.tex

92 lines
3.6 KiB
TeX

\documentclass[12pt,a4paper,pdftex]{exam}
\newcommand{\exercisetopic}{Resampling}
\newcommand{\exercisenum}{9}
\newcommand{\exercisedate}{December 22th, 2020}
\input{../../exercisesheader}
\firstpagefooter{Prof. Dr. Jan Benda}{}{jan.benda@uni-tuebingen.de}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\input{../../exercisestitle}
\begin{questions}
\question We want to fit the straigth line \[ y = mx+b \] to the
data in the file \emph{lin\_regression.mat}.
In the lecture we already prepared the cost function
(\code{meanSquaredError()}), and the gradient
(\code{meanSquaredGradient()}) (read chapter 8 ``Optimization and
gradient descent'' in the script, in particular section 8.4 and
exercise 8.4!). With these functions in place we here want to
implement a gradient descend algorithm that finds the minimum of the
cost function and thus the slope and intercept of the straigth line
that minimizes the squared distance to the data values.
The algorithm for the descent towards the minimum of the cost
function is as follows:
\begin{enumerate}
\item Start with some arbitrary parameter values (intercept $b_0$
and slope $m_0$, $\vec p_0 = (b_0, m_0)$ for the slope and the
intercept of the straight line.
\item \label{computegradient} Compute the gradient of the cost function
at the current values of the parameters $\vec p_i$.
\item If the magnitude (length) of the gradient is smaller than some
small number, the algorithm converged close to the minimum of the
cost function and we abort the descent. Right at the minimum the
magnitude of the gradient is zero. However, since we determine
the gradient numerically, it will never be exactly zero. This is
why we just require the gradient to be sufficiently small
(e.g. \code{norm(gradient) < 0.1}).
\item \label{gradientstep} Move against the gradient by a small step
$\epsilon = 0.01$:
\[\vec p_{i+1} = \vec p_i - \epsilon \cdot \nabla f_{cost}(m_i, b_i)\]
\item Repeat steps \ref{computegradient} -- \ref{gradientstep}.
\end{enumerate}
\begin{parts}
\part Implement the gradient descent in a function that returns
the parameter values at the minimum of the cost function and a vector
with the value of the cost function at each step of the algorithm.
\begin{solution}
\lstinputlisting{descent.m}
\end{solution}
\part Plot the data and the straight line with the parameter
values that you found with the gradient descent method.
\part Plot the development of the costs as a function of the
iteration step.
\begin{solution}
\lstinputlisting{descentfit.m}
\end{solution}
\part For checking the gradient descend method from (a) compare
its result for slope and intercept with the position of the
minimum of the cost function that you get when computing the cost
function for many values of the slope and intercept and then using
the \code{min()} function. Vary the value of $\epsilon$ and the
minimum gradient. What are good values such that the gradient
descent gets closest to the true minimum of the cost function?
\begin{solution}
\lstinputlisting{checkdescent.m}
\end{solution}
\part Use the functions \code{polyfit()} and \code{lsqcurvefit()}
provided by matlab to find the slope and intercept of a straight
line that fits the data. Compare the resulting fit parameters of
those functions with the ones of your gradient descent algorithm.
\begin{solution}
\lstinputlisting{linefit.m}
\end{solution}
\end{parts}
\end{questions}
\end{document}