scientificComputing/regression/lecture/regression-chapter.tex

\documentclass[12pt]{book}

\input{../../header}

\renewcommand{\exercisesolutions}{here}  % 0: here, 1: chapter, 2: end

\lstset{inputpath=../code}
\graphicspath{{figures/}}

\typein[\pagenumber]{Number of first page}
\typein[\chapternumber]{Chapter number}
\setcounter{page}{\pagenumber}
\setcounter{chapter}{\chapternumber}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\include{regression}

\subsection{Notes}
\begin{itemize}
\item Fig 8.2 right: this should be a chi-squared distribution with one degree of freedom!
\end{itemize}

\subsection{Start with one-dimensional problem!}
\begin{itemize}
\item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger)\\
\includegraphics[width=0.8\textwidth]{cubicfunc}
\item Introduce the problem, $c$ is density and form factor
\item How to generate an artificial data set (refer to simulation chapter)
\item How to plot a function (do not use the data x values!)
\item Just the mean square error as a function of the factor c\\
\includegraphics[width=0.8\textwidth]{cubicerrors}
\item Also mention the cost function for a straight line
\item 1-d gradient, NO quiver plot (it is a nightmare to get this right)\\
\includegraphics[width=0.8\textwidth]{cubicmse}
\item 1-d gradient descend
\item Describe in words the n-d problem.
\item Homework is to do the 2d problem with the straight line!
\end{itemize}

\subsection{2D fit}

\begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
  Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
  slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
  drawing $x$ values between 0 and 120 and \varcode{randn()} for
  jittering the $y$ values with a standard deviation of 15.  Then
  calculate the mean squared error between the data and straight lines
  for a range of slopes and intercepts using the
  \varcode{meanSquaredError()} function from the previous exercise.
  Illustrates the error surface using the \code{surface()} function.
  Consult the documentation to find out how to use \code{surface()}.
\end{exercise}

\begin{exercise}{meanSquaredGradient.m}{}\label{gradientexercise}%
  Implement a function \varcode{meanSquaredGradient()}, that takes the
  $x$- and $y$-data and the set of parameters $(m, b)$ of a straight
  line as a two-element vector as input arguments. The function should
  return the gradient at the position $(m, b)$ as a vector with two
  elements.
\end{exercise}

\begin{exercise}{errorGradient.m}{}
  Extend the script of exercises~\ref{errorsurfaceexercise} to plot
  both the error surface and gradients using the
  \varcode{meanSquaredGradient()} function from
  exercise~\ref{gradientexercise}. Vectors in space can be easily
  plotted using the function \code{quiver()}. Use \code{contour()}
  instead of \code{surface()} to plot the error surface.
\end{exercise}


\begin{exercise}{gradientDescent.m}{}
  Implement the gradient descent for the problem of fitting a straight
  line to some measured data. Reuse the data generated in
  exercise~\ref{errorsurfaceexercise}.
  \begin{enumerate}
  \item Store for each iteration the error value.
  \item Plot the error values as a function of the iterations, the
    number of optimization steps.
  \item Plot the measured data together with the best fitting straight line.
  \end{enumerate}\vspace{-4.5ex}
\end{exercise}


\begin{figure}[t]
  \includegraphics[width=1\textwidth]{lin_regress}\hfill
  \titlecaption{Example data suggesting a linear relation.}{A set of
    input signals $x$, e.g. stimulus intensities, were used to probe a
    system. The system's output $y$ to the inputs are noted
    (left). Assuming a linear relation between $x$ and $y$ leaves us
    with 2 parameters, the slope (center) and the intercept with the
    y-axis (right panel).}\label{linregressiondatafig}
\end{figure}

\begin{figure}[t]
  \includegraphics[width=1\textwidth]{linear_least_squares}
  \titlecaption{Estimating the \emph{mean square error}.}  {The
    deviation error (orange) between the prediction (red line) and the
    observations (blue dots) is calculated for each data point
    (left). Then the deviations are squared and the average is
    calculated (right).}
  \label{leastsquareerrorfig}
\end{figure}

\begin{figure}[t]
  \includegraphics[width=0.75\textwidth]{error_surface}
  \titlecaption{Error surface.}{The two model parameters $m$ and $b$
    define the base area of the surface plot. For each parameter
    combination of slope and intercept the error is calculated. The
    resulting surface has a minimum which indicates the parameter
    combination that best fits the data.}\label{errorsurfacefig}
\end{figure}


\begin{figure}[t]
  \includegraphics[width=0.75\textwidth]{error_gradient}
  \titlecaption{Gradient of the error surface.}  {Each arrow points
    into the direction of the greatest ascend at different positions
    of the error surface shown in \figref{errorsurfacefig}. The
    contour lines in the background illustrate the error surface. Warm
    colors indicate high errors, colder colors low error values. Each
    contour line connects points of equal
    error.}\label{gradientquiverfig}
\end{figure}

\begin{figure}[t]
  \includegraphics[width=0.45\textwidth]{gradient_descent}
  \titlecaption{Gradient descent.}{The algorithm starts at an
    arbitrary position. At each point the gradient is estimated and
    the position is updated as long as the length of the gradient is
    sufficiently large.The dots show the positions after each
    iteration of the algorithm.} \label{gradientdescentfig}
\end{figure}


\subsection{Linear fits}
\begin{itemize}
\item Polyfit is easy: unique solution! $c x^2$ is also a linear fit.
\item Example for overfitting with polyfit of a high order (=number of data points)
\end{itemize}

\section{Fitting in practice}

Fit with matlab functions lsqcurvefit, polyfit


\subsection{Non-linear fits}
\begin{itemize}
\item Example that illustrates the Nebenminima Problem (with error surface)
\item You need initial values for the parameter!
\item Example that fitting gets harder the more parameter you have.
\item Try to fix as many parameter before doing the fit.
\item How to test the quality of a fit? Residuals. $\chi^2$ test. Run-test.
\end{itemize}


\end{document}