\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade} 
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath, amssymb}
\usepackage{bm} 
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
  \usetheme{Singapore}
  \setbeamercovered{opaque}
  \usecolortheme{tuebingen}
  \setbeamertemplate{navigation symbols}{}
  \usefonttheme{default}
  \useoutertheme{infolines}
  % \useoutertheme{miniframes}
}

\AtBeginSubsection[]
{
  \begin{frame}<beamer>
    \begin{center}
      \Huge \insertsectionhead
    \end{center}
    \tableofcontents[ 
    currentsubsection, 
    hideothersubsections, 
    sectionstyle=show/hide, 
    subsectionstyle=show/shaded, 
] 
    % \frametitle{\insertsectionhead}
  \end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5

\setbeamertemplate{blocks}[rounded][shadow=true]

\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
  University T\"ubingen\\
Bernstein Center T\"ubingen}

\institute[Scientific Computing]{}
 \date{10/23/2014}
%\logo{\pgfuseimage{logo}}

\subject{Lectures}

%%%%%%%%%% configuration for code
\lstset{
 basicstyle=\ttfamily,
 numbers=left,
 showstringspaces=false,
 language=Matlab,
 commentstyle=\itshape\color{darkgray},
 keywordstyle=\color{blue},
 stringstyle=\color{green},
 backgroundcolor=\color{blue!10},
 breaklines=true,
 breakautoindent=true,
 columns=flexible,
 frame=single,
 captionpos=b,
 xleftmargin=1em,
 xrightmargin=1em,
 aboveskip=10pt
 }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}

\input{../latex/environments.tex}
\makeatother
 
\begin{document} 
 
\begin{frame} 
  \titlepage 

\end{frame} 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 4-5 -- curve fitting and maximum likelihood}
\subsection{curve fitting and optimization}
\begin{frame}
  \frametitle{Overview}
  \begin{itemize}
  \item minimizing/maximizing a function numerically (optimization) is
    ubiquitous in science (curve fitting, maximum likelihood, ...)
  \item today we will look at the basic elements of optimization and
    apply it to curve fitting
  \item tomorrow, we will apply it to maximum likelihood
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{plotting surfaces}
\begin{lstlisting}
range = linspace(-1,1,20);
[X,Y] = meshgrid(range, range);

surf(X,Y, (X.^2 + Y.^2));
colormap('winter');
\end{lstlisting}
\end{frame}

\begin{frame}
  \frametitle{linear least squares}
  \begin{minipage}{1.0\linewidth}
    \begin{minipage}{0.3\linewidth}
      \includegraphics[width=\linewidth]{figs/leastsquares.png}
      \source{http://en.wikipedia.org/wiki/Linear\_least_squares\_\%28mathematics\%29}
    \end{minipage}
    \begin{minipage}{0.7\linewidth}
      \begin{itemize}
      \item The most common curve fitting problem is {\em linear least
        squares}.
      \item Its goal is to predict a set of output values $y_1, ...,
        y_n$ from their corresponding input values $x_1,...,x_n$ with
        a line $f_{a,b}(x) = a x+b$. 
      \item How is the line chosen?\pause
      \item[] By minimization of the mean squared error
        $$g(a,b) = \sum_{i=1}^n (y_i - f_{a,b}(x_i))^2$$
      \end{itemize}
    \end{minipage}
  \end{minipage}
\end{frame}

\begin{frame}
  \frametitle{error surface}
  \begin{task}{plotting the error surface}
    \begin{itemize}
    \item Write a function {\tt lserr} that takes 2-dimensional
      parameter vector (slope $a$ and offset $b$), an array of inputs
      {\tt x}, an array of corresponding outputs {\tt y}, and compute
      the least squares error
       $$g(a,b) = \sum_{i=1}^n (y_i - f_{a,b}(x_i))^2$$
       with $$f_{a,b}(x_i) = a x_i + b.$$
     \item Generate an example dataset with {\tt x=linspace(-5,5,20)}
       and {\tt y = .5*x + 1 + randn(length(x),1)}.
     \item Write a script that plots the error surface as a function
       of $a$ and $b$. 
     \end{itemize}
  \end{task}
\end{frame}

\begin{frame}
  \frametitle{optima and derivatives}
  \begin{itemize}
  \item How did you find maxima/minima of functions at school?\pause
  \item[] Compute the derivative, set it to zero, and solve for $x$.\pause
  \item Can anybody remember how a derivative is defined (hint:
    differential quotient)?\pause
  \item[]$$f'(x) = \lim_{h\rightarrow 0} \frac{f(x+h) - f(x)}{h}$$
  \item Could you write down this expression for the partial
    derivative $\frac{\partial g(a,b)}{\partial a}$ of $g(a,b)$ w.r.t. $a$?\pause
  \item[]$$\frac{\partial g(a,b)}{\partial a} = \lim_{h\rightarrow 0}
    \frac{g(a+h,b) - g(a,b)}{h}$$\pause
  \item What about $\frac{\partial g(a,b)}{\partial b}$?
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{gradient and numerical derivative}
  \begin{definition}{gradient}
    The {\em gradient} $$\nabla g(a,b) = \left(\frac{\partial
        g(a,b)}{\partial a}, \frac{\partial g(a,b)}{\partial b}
    \right)$$ is the vector with partial derivatives of $g$ w.r.t. $a$
    and $b$. 
  \end{definition}
  We can numerically approximate it, by using the definition of the
  derivative
  $$\frac{\partial g(a,b)}{\partial a} = \lim_{h\rightarrow 0}
    \frac{g(a+h,b) - g(a,b)}{h} \approx \frac{g(a+h,b) - g(a,b)}{h},$$
    for very small $h$ (e.g. {\tt h=1e-6}).
\end{frame}


\begin{frame}
  \frametitle{error surface}
  \begin{task}{plotting the gradient field}
    \begin{itemize}
    \item Write a function {\tt lserr\_gradient} that takes the same
      arguments as {\tt lserr}, but numerically computes the gradient 
  $$\nabla g(a,b) = \left(\frac{\partial g(a,b)}{\partial a}, \frac{\partial g(a,b)}{\partial b} \right)$$
    \item Add the gradient field as a vector field to your plot (use
      {\tt quiver}).
      \item Add a contour plot of the error surface as well (use {\tt contour}).
      \item What can you observer about the directions of the gradient
        with respect to the contour lines?
     \end{itemize}
  \end{task}
\end{frame}


\begin{frame}
  \frametitle{gradient descent}
  \begin{itemize}
    \item The gradient $\nabla g(a,b)$ always points in the direction
      of steepest ascent. \pause
    \item How do we get the direction of steepest descent? \pause
    \item[] We take minus the gradient $-\nabla g(a,b)$. \pause
  \end{itemize}
  {\bf gradient descent algorithm}
  \begin{enumerate}
  \item Start at some starting point $\mathbf p_0 = (a_0,b_0)$.
  \item Repeat while gradient is large enough
    \begin{itemize}
    \item Compute the gradient at the current position $\mathbf p_t=(a_t,b_t)$.
    \item Walk a small step into the gradient direction via $$\mathbf p_{t+1} =
      \mathbf p_{t} - \varepsilon \nabla g(a_t,b_t)$$ where
      $\varepsilon$ is a small number. 
    \end{itemize}
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{gradient descent}
  \begin{task}{gradient descent}
    \begin{itemize}
    \item Implement a gradient descent for our linear regression
      problem.
    \item At each step in the algorithm, plot the error surface and
      the current parameter point (hint use {\tt plot3} to plot a
      point in 3D).
    \item At each step also plot the linear regression line along with
      the data points in a separate plot.
    \item It is a good idea to use {\tt pause(.1)} after each plot, so
      matlab has time updating the plots and you have time watching
      the gradient descent at work. 
    \end{itemize}
    
  \end{task}
\end{frame}


\begin{frame}[fragile]
  \frametitle{optimization with matlab}
\scriptsize
A little adaptation for the objective function
\begin{lstlisting}
function [err, grad] = lserr(param, x, y)
    err = mean( (param(1)*x + param(2) - y).^2 );
   
    if nargout == 2
        grad = lserr_gradient(param, x,y);
    end
\end{lstlisting}
The actual optimization
\begin{lstlisting}
function param = estimate_regression(x,y, param0)
    myfunc = @(p)(lserr(p,x,y));
    param = fminunc(myfunc,param0, options);
\end{lstlisting}
\end{frame}

\begin{frame}
  \frametitle{nonlinear regression}
  \scriptsize
  \begin{task}{fit a charging curve}
    The following problem arises when estimating the time constant of
    a membrane from data. 

    \vspace{.5cm}

    \begin{minipage}{1.0\linewidth}
      \begin{minipage}{0.5\linewidth}
        \begin{center}
          \includegraphics[width=\linewidth]{figs/charging.png}
        \end{center}
      \end{minipage}
      \begin{minipage}{0.5\linewidth}
        \begin{itemize}
        \item Download the data {\tt membraneVoltage.mat}. It contains
          the points plotted on the right hand side. 
        \item Write a nonlinear least squares fit to fit the function 
          $$ f_{A,\tau}(t) = A\cdot \left(1 -
            e^{-\frac{t}{\tau}}\right)$$
          to the data.
        \item This looks scary, but it is not: If you programmed
          everything correctly beforehand you only need to adapt the
          function {\tt lserr} and use the optimization from the slide
          before.
        \item Plot the final result along with the data points. 
        \end{itemize}
      \end{minipage}
    \end{minipage}
  \end{task}
\end{frame}

\begin{frame}
  \begin{center}
    \Huge That's it.
  \end{center}
\end{frame}

\end{document}