161 lines
6.2 KiB
TeX
161 lines
6.2 KiB
TeX
\documentclass[12pt]{book}
|
|
|
|
\input{../../header}
|
|
|
|
\renewcommand{\exercisesolutions}{here} % 0: here, 1: chapter, 2: end
|
|
|
|
\lstset{inputpath=../code}
|
|
\graphicspath{{figures/}}
|
|
|
|
\typein[\pagenumber]{Number of first page}
|
|
\typein[\chapternumber]{Chapter number}
|
|
\setcounter{page}{\pagenumber}
|
|
\setcounter{chapter}{\chapternumber}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\begin{document}
|
|
|
|
\include{regression}
|
|
|
|
\subsection{Notes}
|
|
\begin{itemize}
|
|
\item Fig 8.2 right: this should be a chi-squared distribution with one degree of freedom!
|
|
\end{itemize}
|
|
|
|
\subsection{Start with one-dimensional problem!}
|
|
\begin{itemize}
|
|
\item Let's fit a cubic function $y=cx^3$ (weight versus length of a tiger)\\
|
|
\includegraphics[width=0.8\textwidth]{cubicfunc}
|
|
\item Introduce the problem, $c$ is density and form factor
|
|
\item How to generate an artificial data set (refer to simulation chapter)
|
|
\item How to plot a function (do not use the data x values!)
|
|
\item Just the mean square error as a function of the factor c\\
|
|
\includegraphics[width=0.8\textwidth]{cubicerrors}
|
|
\item Also mention the cost function for a straight line
|
|
\item 1-d gradient, NO quiver plot (it is a nightmare to get this right)\\
|
|
\includegraphics[width=0.8\textwidth]{cubicmse}
|
|
\item 1-d gradient descend
|
|
\item Describe in words the n-d problem.
|
|
\item Homework is to do the 2d problem with the straight line!
|
|
\end{itemize}
|
|
|
|
\subsection{2D fit}
|
|
|
|
\begin{exercise}{errorSurface.m}{}\label{errorsurfaceexercise}
|
|
Generate 20 data pairs $(x_i|y_i)$ that are linearly related with
|
|
slope $m=0.75$ and intercept $b=-40$, using \varcode{rand()} for
|
|
drawing $x$ values between 0 and 120 and \varcode{randn()} for
|
|
jittering the $y$ values with a standard deviation of 15. Then
|
|
calculate the mean squared error between the data and straight lines
|
|
for a range of slopes and intercepts using the
|
|
\varcode{meanSquaredError()} function from the previous exercise.
|
|
Illustrates the error surface using the \code{surface()} function.
|
|
Consult the documentation to find out how to use \code{surface()}.
|
|
\end{exercise}
|
|
|
|
\begin{exercise}{meanSquaredGradient.m}{}\label{gradientexercise}%
|
|
Implement a function \varcode{meanSquaredGradient()}, that takes the
|
|
$x$- and $y$-data and the set of parameters $(m, b)$ of a straight
|
|
line as a two-element vector as input arguments. The function should
|
|
return the gradient at the position $(m, b)$ as a vector with two
|
|
elements.
|
|
\end{exercise}
|
|
|
|
\begin{exercise}{errorGradient.m}{}
|
|
Extend the script of exercises~\ref{errorsurfaceexercise} to plot
|
|
both the error surface and gradients using the
|
|
\varcode{meanSquaredGradient()} function from
|
|
exercise~\ref{gradientexercise}. Vectors in space can be easily
|
|
plotted using the function \code{quiver()}. Use \code{contour()}
|
|
instead of \code{surface()} to plot the error surface.
|
|
\end{exercise}
|
|
|
|
|
|
\begin{exercise}{gradientDescent.m}{}
|
|
Implement the gradient descent for the problem of fitting a straight
|
|
line to some measured data. Reuse the data generated in
|
|
exercise~\ref{errorsurfaceexercise}.
|
|
\begin{enumerate}
|
|
\item Store for each iteration the error value.
|
|
\item Plot the error values as a function of the iterations, the
|
|
number of optimization steps.
|
|
\item Plot the measured data together with the best fitting straight line.
|
|
\end{enumerate}\vspace{-4.5ex}
|
|
\end{exercise}
|
|
|
|
|
|
\begin{figure}[t]
|
|
\includegraphics[width=1\textwidth]{lin_regress}\hfill
|
|
\titlecaption{Example data suggesting a linear relation.}{A set of
|
|
input signals $x$, e.g. stimulus intensities, were used to probe a
|
|
system. The system's output $y$ to the inputs are noted
|
|
(left). Assuming a linear relation between $x$ and $y$ leaves us
|
|
with 2 parameters, the slope (center) and the intercept with the
|
|
y-axis (right panel).}\label{linregressiondatafig}
|
|
\end{figure}
|
|
|
|
\begin{figure}[t]
|
|
\includegraphics[width=1\textwidth]{linear_least_squares}
|
|
\titlecaption{Estimating the \emph{mean square error}.} {The
|
|
deviation error (orange) between the prediction (red line) and the
|
|
observations (blue dots) is calculated for each data point
|
|
(left). Then the deviations are squared and the average is
|
|
calculated (right).}
|
|
\label{leastsquareerrorfig}
|
|
\end{figure}
|
|
|
|
\begin{figure}[t]
|
|
\includegraphics[width=0.75\textwidth]{error_surface}
|
|
\titlecaption{Error surface.}{The two model parameters $m$ and $b$
|
|
define the base area of the surface plot. For each parameter
|
|
combination of slope and intercept the error is calculated. The
|
|
resulting surface has a minimum which indicates the parameter
|
|
combination that best fits the data.}\label{errorsurfacefig}
|
|
\end{figure}
|
|
|
|
|
|
\begin{figure}[t]
|
|
\includegraphics[width=0.75\textwidth]{error_gradient}
|
|
\titlecaption{Gradient of the error surface.} {Each arrow points
|
|
into the direction of the greatest ascend at different positions
|
|
of the error surface shown in \figref{errorsurfacefig}. The
|
|
contour lines in the background illustrate the error surface. Warm
|
|
colors indicate high errors, colder colors low error values. Each
|
|
contour line connects points of equal
|
|
error.}\label{gradientquiverfig}
|
|
\end{figure}
|
|
|
|
\begin{figure}[t]
|
|
\includegraphics[width=0.45\textwidth]{gradient_descent}
|
|
\titlecaption{Gradient descent.}{The algorithm starts at an
|
|
arbitrary position. At each point the gradient is estimated and
|
|
the position is updated as long as the length of the gradient is
|
|
sufficiently large.The dots show the positions after each
|
|
iteration of the algorithm.} \label{gradientdescentfig}
|
|
\end{figure}
|
|
|
|
|
|
\subsection{Linear fits}
|
|
\begin{itemize}
|
|
\item Polyfit is easy: unique solution! $c x^2$ is also a linear fit.
|
|
\item Example for overfitting with polyfit of a high order (=number of data points)
|
|
\end{itemize}
|
|
|
|
\section{Fitting in practice}
|
|
|
|
Fit with matlab functions lsqcurvefit, polyfit
|
|
|
|
|
|
\subsection{Non-linear fits}
|
|
\begin{itemize}
|
|
\item Example that illustrates the Nebenminima Problem (with error surface)
|
|
\item You need initial values for the parameter!
|
|
\item Example that fitting gets harder the more parameter you have.
|
|
\item Try to fix as many parameter before doing the fit.
|
|
\item How to test the quality of a fit? Residuals. $\chi^2$ test. Run-test.
|
|
\end{itemize}
|
|
|
|
|
|
\end{document}
|