This repository has been archived on 2021-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
scientificComputing/regression/lecture/regression-chapter.tex

161 lines
6.2 KiB
TeX

\documentclass[12pt]{book}
\input{../../header}
\renewcommand{\exercisesolutions}{here} % 0: here, 1: chapter, 2: end
\lstset{inputpath=../code}
\graphicspath{{figures/}}
\typein[\pagenumber]{Number of first page}
\typein[\chapternumber]{Chapter number}
\setcounter{page}{\pagenumber}
\setcounter{chapter}{\chapternumber}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\include{regression}
\subsection{Notes}
\begin{itemize}
\item Fig 8.2 right: this should be a chi-squared distribution with one degree of freedom!
\end{itemize}
\subsection{Start with one-dimensional problem!}
\begin{itemize}
\item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger)\\
\includegraphics[width=0.8\textwidth]{cubicfunc}
\item Introduce the problem, $c$ is density and form factor
\item How to generate an artificial data set (refer to simulation chapter)
\item How to plot a function (do not use the data x values!)
\item Just the mean square error as a function of the factor c\\
\includegraphics[width=0.8\textwidth]{cubicerrors}
\item Also mention the cost function for a straight line
\item 1-d gradient, NO quiver plot (it is a nightmare to get this right)\\
\includegraphics[width=0.8\textwidth]{cubicmse}
\item 1-d gradient descend
\item Describe in words the n-d problem.
\item Homework is to do the 2d problem with the straight line!
\end{itemize}
\subsection{2D fit}
\begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
drawing $x$ values between 0 and 120 and \varcode{randn()} for
jittering the $y$ values with a standard deviation of 15. Then
calculate the mean squared error between the data and straight lines
for a range of slopes and intercepts using the
\varcode{meanSquaredError()} function from the previous exercise.
Illustrates the error surface using the \code{surface()} function.
Consult the documentation to find out how to use \code{surface()}.
\end{exercise}
\begin{exercise}{meanSquaredGradient.m}{}\label{gradientexercise}%
Implement a function \varcode{meanSquaredGradient()}, that takes the
$x$- and $y$-data and the set of parameters $(m, b)$ of a straight
line as a two-element vector as input arguments. The function should
return the gradient at the position $(m, b)$ as a vector with two
elements.
\end{exercise}
\begin{exercise}{errorGradient.m}{}
Extend the script of exercises~\ref{errorsurfaceexercise} to plot
both the error surface and gradients using the
\varcode{meanSquaredGradient()} function from
exercise~\ref{gradientexercise}. Vectors in space can be easily
plotted using the function \code{quiver()}. Use \code{contour()}
instead of \code{surface()} to plot the error surface.
\end{exercise}
\begin{exercise}{gradientDescent.m}{}
Implement the gradient descent for the problem of fitting a straight
line to some measured data. Reuse the data generated in
exercise~\ref{errorsurfaceexercise}.
\begin{enumerate}
\item Store for each iteration the error value.
\item Plot the error values as a function of the iterations, the
number of optimization steps.
\item Plot the measured data together with the best fitting straight line.
\end{enumerate}\vspace{-4.5ex}
\end{exercise}
\begin{figure}[t]
\includegraphics[width=1\textwidth]{lin_regress}\hfill
\titlecaption{Example data suggesting a linear relation.}{A set of
input signals $x$, e.g. stimulus intensities, were used to probe a
system. The system's output $y$ to the inputs are noted
(left). Assuming a linear relation between $x$ and $y$ leaves us
with 2 parameters, the slope (center) and the intercept with the
y-axis (right panel).}\label{linregressiondatafig}
\end{figure}
\begin{figure}[t]
\includegraphics[width=1\textwidth]{linear_least_squares}
\titlecaption{Estimating the \emph{mean square error}.} {The
deviation error (orange) between the prediction (red line) and the
observations (blue dots) is calculated for each data point
(left). Then the deviations are squared and the average is
calculated (right).}
\label{leastsquareerrorfig}
\end{figure}
\begin{figure}[t]
\includegraphics[width=0.75\textwidth]{error_surface}
\titlecaption{Error surface.}{The two model parameters $m$ and $b$
define the base area of the surface plot. For each parameter
combination of slope and intercept the error is calculated. The
resulting surface has a minimum which indicates the parameter
combination that best fits the data.}\label{errorsurfacefig}
\end{figure}
\begin{figure}[t]
\includegraphics[width=0.75\textwidth]{error_gradient}
\titlecaption{Gradient of the error surface.} {Each arrow points
into the direction of the greatest ascend at different positions
of the error surface shown in \figref{errorsurfacefig}. The
contour lines in the background illustrate the error surface. Warm
colors indicate high errors, colder colors low error values. Each
contour line connects points of equal
error.}\label{gradientquiverfig}
\end{figure}
\begin{figure}[t]
\includegraphics[width=0.45\textwidth]{gradient_descent}
\titlecaption{Gradient descent.}{The algorithm starts at an
arbitrary position. At each point the gradient is estimated and
the position is updated as long as the length of the gradient is
sufficiently large.The dots show the positions after each
iteration of the algorithm.} \label{gradientdescentfig}
\end{figure}
\subsection{Linear fits}
\begin{itemize}
\item Polyfit is easy: unique solution! $c x^2$ is also a linear fit.
\item Example for overfitting with polyfit of a high order (=number of data points)
\end{itemize}
\section{Fitting in practice}
Fit with matlab functions lsqcurvefit, polyfit
\subsection{Non-linear fits}
\begin{itemize}
\item Example that illustrates the Nebenminima Problem (with error surface)
\item You need initial values for the parameter!
\item Example that fitting gets harder the more parameter you have.
\item Try to fix as many parameter before doing the fit.
\item How to test the quality of a fit? Residuals. $\chi^2$ test. Run-test.
\end{itemize}
\end{document}