stubs for plotting exercise

2019-11-19 19:58:54 +01:00
parent ed561f2f19
commit 2fbbf04e98
9 changed files with 574 additions and 121 deletions
--- a/plotting/exercises/UT_WBMW_Black_RGB.pdf
+++ b/plotting/exercises/UT_WBMW_Black_RGB.pdf
--- a/plotting/exercises/exercises.tex
+++ b/plotting/exercises/exercises.tex
@@ -0,0 +1,213 @@
+\documentclass[12pt,a4paper,pdftex]{exam}
+
+\usepackage[german]{babel}
+\usepackage{pslatex}
+\usepackage[mediumspace,mediumqspace,Gray]{SIunits}      % \ohm, \micro
+\usepackage{xcolor}
+\usepackage{graphicx}
+\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
+
+%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
+\pagestyle{headandfoot}
+\ifprintanswers
+\newcommand{\stitle}{: Solutions}
+\else
+\newcommand{\stitle}{}
+\fi
+\header{{\bfseries\large Exercise 12\stitle}}{{\bfseries\large Maximum likelihood}}{{\bfseries\large January 7th, 2019}}
+\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
+jan.benda@uni-tuebingen.de}
+\runningfooter{}{\thepage}{}
+
+\setlength{\baselineskip}{15pt}
+\setlength{\parindent}{0.0cm}
+\setlength{\parskip}{0.3cm}
+\renewcommand{\baselinestretch}{1.15}
+
+%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{listings}
+\lstset{
+  language=Matlab,
+  basicstyle=\ttfamily\footnotesize,
+  numbers=left,
+  numberstyle=\tiny,
+  title=\lstname,
+  showstringspaces=false,
+  commentstyle=\itshape\color{darkgray},
+  breaklines=true,
+  breakautoindent=true,
+  columns=flexible,
+  frame=single,
+  xleftmargin=1em,
+  xrightmargin=1em,
+  aboveskip=10pt
+}
+
+%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{bm} 
+\usepackage{dsfont}
+\newcommand{\naZ}{\mathds{N}}
+\newcommand{\gaZ}{\mathds{Z}}
+\newcommand{\raZ}{\mathds{Q}}
+\newcommand{\reZ}{\mathds{R}}
+\newcommand{\reZp}{\mathds{R^+}}
+\newcommand{\reZpN}{\mathds{R^+_0}}
+\newcommand{\koZ}{\mathds{C}}
+
+%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newcommand{\continue}{\ifprintanswers%
+\else
+\vfill\hspace*{\fill}$\rightarrow$\newpage%
+\fi}
+\newcommand{\continuepage}{\ifprintanswers%
+\newpage
+\else
+\vfill\hspace*{\fill}$\rightarrow$\newpage%
+\fi}
+\newcommand{\newsolutionpage}{\ifprintanswers%
+\newpage%
+\else
+\fi}
+
+%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newcommand{\qt}[1]{\textbf{#1}\\}
+\newcommand{\pref}[1]{(\ref{#1})}
+\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
+\newcommand{\code}[1]{\texttt{#1}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{document}
+
+\input{instructions}
+
+
+\begin{questions}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Maximum likelihood of the standard deviation}
+Let's compute the likelihood and the log-likelihood for the estimation
+of the standard deviation.
+\begin{parts}
+  \part Draw $n=50$ random numbers from a normal distribution with
+  mean $\mu=3$ and standard deviation $\sigma=2$.
+
+  \part Plot the likelihood (computed as the product of probabilities)
+  and the log-likelihood (sum of the logarithms of the probabilities)
+  as a function of the standard deviation. Compare the position of the
+  maxima with the standard deviation that you compute directly from
+  the data.
+
+  \part Increase $n$ to 1000. What happens to the likelihood, what
+  happens to the log-likelihood? Why?
+\end{parts}
+\begin{solution}
+  \lstinputlisting{mlestd.m}
+  \includegraphics[width=1\textwidth]{mlestd}\\
+
+  The more data the smaller the product of the probabilities ($\approx
+  p^n$ with $0 \le p < 1$) and the smaller the sum of the logarithms
+  of the probabilities ($\approx n\log p$, note that $\log p < 0$).
+
+  The product eventually gets smaller than the precision of the
+  floating point numbers support. Therefore for $n=1000$ the products
+  becomes zero. Using the logarithm avoids this numerical problem.
+\end{solution}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Maximum-likelihood estimator of a line through the origin} 
+In the lecture we derived the following equation for an
+maximum-likelihood estimate of the slope $\theta$ of a straight line
+through the origin fitted to $n$ pairs of data values $(x_i|y_i)$ with
+standard deviation $\sigma_i$:
+\[\theta = \frac{\sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2}}{ \sum_{i=1}^n
+  \frac{x_i^2}{\sigma_i^2}} \]
+\begin{parts}
+  \part \label{mleslopefunc} Write a function that takes two vectors
+  $x$ and $y$ containing the data pairs and returns the slope,
+  computed according to this equation. For simplicity we assume
+  $\sigma_i=\sigma$ for all $1 \le i \le n$. How does this simplify
+  the equation for the slope?
+  \begin{solution}
+    \lstinputlisting{mleslope.m}
+  \end{solution}
+
+  \part Write a script that generates data pairs that scatter around a
+  line through the origin with a given slope. Use the function from
+  \pref{mleslopefunc} to compute the slope from the generated data.
+  Compare the computed slope with the true slope that has been used to
+  generate the data. Plot the data togehther with the line from which
+  the data were generated and the maximum-likelihood fit.
+  \begin{solution}
+    \lstinputlisting{mlepropfit.m}
+    \includegraphics[width=1\textwidth]{mlepropfit}
+  \end{solution}
+
+  \part \label{mleslopecomp} Vary the number of data pairs, the slope,
+  as well as the variance of the data points around the true
+  line. Under which conditions is the maximum-likelihood estimation of
+  the slope closer to the true slope?
+
+  \part To answer \pref{mleslopecomp} more precisely, generate for
+  each condition let's say 1000 data sets and plot a histogram of the
+  estimated slopes. How does the histogram, its mean and standard
+  deviation relate to the true slope?
+\end{parts}
+\begin{solution}
+  \lstinputlisting{mlepropest.m}
+  \includegraphics[width=1\textwidth]{mlepropest}\\
+  The estimated slopes are centered around the true slope. The
+  standard deviation of the estimated slopes gets smaller for larger
+  $n$ and less noise in the data.
+\end{solution}
+
+\continue
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\question \qt{Maximum-likelihood-estimation of a probability-density function}
+Many probability-density functions have parameters that cannot be
+computed directly from the data, like, for example, the mean of
+normally-distributed data. Such parameter need to be estimated by
+means of the maximum-likelihood from the data.
+
+Let us demonstrate this approach by means of data that are drawn from a
+gamma distribution,
+\begin{parts}
+  \part Find out which \code{matlab} function computes the
+  probability-density function of the gamma distribution.
+
+  \part \label{gammaplot} Use this function to plot the
+  probability-density function of the gamma distribution for various
+  values of the (positive) ``shape'' parameter. Wet set the ``scale''
+  parameter to one.
+
+  \part Find out which \code{matlab} function generates random numbers
+  that are distributed according to a gamma distribution. Generate
+  with this function 50 random numbers using one of the values of the
+  ``shape'' parameter used in \pref{gammaplot}.
+
+  \part Compute and plot a properly normalized histogram of these
+  random numbers.
+
+  \part Find out which \code{matlab} function fit a distribution to a
+  vector of random numbers according to the maximum-likelihood method.
+  How do you need to use this function in order to fit a gamma
+  distribution to the data?
+
+  \part Estimate with this function the parameter of the gamma
+  distribution used to generate the data.
+
+  \part Finally, plot the fitted gamma distribution on top of the
+  normalized histogram of the data.
+\end{parts}
+\begin{solution}
+  \lstinputlisting{mlepdffit.m}
+  \includegraphics[width=1\textwidth]{mlepdffit}
+\end{solution}
+
+\end{questions}
+
+\end{document}
--- a/plotting/exercises/instructions.tex
+++ b/plotting/exercises/instructions.tex
@@ -0,0 +1,41 @@
+\vspace*{-8ex}
+\begin{center}
+\textbf{\Large Introduction to scientific computing}\\[1ex]
+{\large Jan Grewe, Jan Benda}\\[-3ex]
+Neuroethology lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
+\end{center}
+
+% \ifprintanswers%
+% \else
+
+% % Die folgenden Aufgaben dienen der Wiederholung, \"Ubung und
+% % Selbstkontrolle und sollten eigenst\"andig bearbeitet und gel\"ost
+% % werden. Die L\"osung soll in Form eines einzelnen Skriptes (m-files)
+% % im ILIAS hochgeladen werden. Jede Aufgabe sollte in einer eigenen
+% % ``Zelle'' gel\"ost sein. Die Zellen \textbf{m\"ussen} unabh\"angig
+% % voneinander ausf\"uhrbar sein. Das Skript sollte nach dem Muster:
+% % ``variablen\_datentypen\_\{nachname\}.m'' benannt werden
+% % (z.B. variablen\_datentypen\_mueller.m).
+
+% \begin{itemize}
+% \item \"Uberzeuge dich von jeder einzelnen Zeile deines Codes, dass
+%   sie auch wirklich das macht, was sie machen soll! Teste dies mit
+%   kleinen Beispielen direkt in der Kommandozeile.
+% \item Versuche die L\"osungen der Aufgaben m\"oglichst in
+%   sinnvolle kleine Funktionen herunterzubrechen.
+%   Sobald etwas \"ahnliches mehr als einmal berechnet werden soll,
+%   lohnt es sich eine Funktion daraus zu schreiben!
+% \item Teste rechenintensive \code{for} Schleifen, Vektoren, Matrizen
+%   zuerst mit einer kleinen Anzahl von Wiederholungen oder kleiner
+%   Gr\"o{\ss}e, und benutze erst am Ende, wenn alles \"uberpr\"uft
+%   ist, eine gro{\ss}e Anzahl von Wiederholungen oder Elementen, um eine gute
+%   Statistik zu bekommen.
+% \item Benutze die Hilfsfunktion von \code{matlab} (\code{help
+%     commando} oder \code{doc commando}) und das Internet, um
+%   herauszufinden, wie bestimmte \code{matlab} Funktionen zu verwenden
+%   sind und was f\"ur M\"oglichkeiten sie bieten.
+%   Auch zu inhaltlichen Konzepten bietet das Internet oft viele
+%   Antworten!
+% \end{itemize}
+
+% \fi
--- a/plotting/exercises/plotting_exercise.py
+++ b/plotting/exercises/plotting_exercise.py
@@ -0,0 +1,74 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.io as scio
+from IPython import embed
+
+def boltzmann(x, y_max, slope, inflection):
+    """
+    The underlying Boltzmann function.
+    .. math::
+        f(x) = y_max / \exp{-slope*(x-inflection}
+
+    :param x: The x values.
+    :param y_max: The maximum value.
+    :param slope: The slope parameter k
+    :param inflection: the position of the inflection point.
+    :return: the y values.
+    """
+    y = y_max / (1 + np.exp(-slope * (x - inflection)))
+    return y
+
+
+class Animal(object):
+
+    def __init__(self, delay, learning_rate, volatility, responsiveness):
+        """
+        :param delay:
+        :param learning_rate: delta percent_correct per session
+        :param volatility: 0 -> 1 the noise in the decision
+        :param responsiveness: 0 -> 1 probability of actually conducting a trial
+        """
+        self.__delay = delay
+        self.__learning_rate = learning_rate
+        self.__volatility = volatility
+        self.__responsiveness = responsiveness
+
+    def simulate(self, session_count=10, trials=20, task_difficulties=[]):
+        tasks = 1 if len(task_difficulties) == 0 else len(task_difficulties)
+        if len(task_difficulties) == 0:
+            task_difficulties = [0]
+        avg_perf = np.zeros((session_count, tasks))
+        err_perf = np.zeros((session_count, tasks))
+        trials_performed = np.zeros(session_count)
+        for i in range(session_count):
+            for j in range(tasks):
+                base_performance = boltzmann(i, 1.0, self.__learning_rate/20, self.__delay)
+                penalty = base_performance * task_difficulties[j] * 0.5
+                base_perf = 50 + 50 * (base_performance - penalty)
+                trials_completed = np.random.rand(trials) < self.__responsiveness
+                performances = np.random.randn(trials) * self.__volatility * 100 + base_perf
+                avg_perf[i, j] = np.mean(performances[trials_completed])
+                err_perf[i, j] = np.std(performances[trials_completed])
+                trials_performed = np.sum(trials_completed)
+        return avg_perf, err_perf, trials_performed
+
+
+if __name__ == "__main__":
+
+    session_count = 30
+    task_difficulties = [0, 0.3, 1.]
+
+    delays = [5, 10, 12, 20]
+    learning_rates = np.array([5, 10, 2, 20])
+    volatilities = np.random.rand(4) * 0.5
+    responsivness = np.random.rand(4) * 0.5 + 0.5
+
+    for i in range(len(delays)):
+        d = delays[i]
+        lr = learning_rates[i]
+        v = volatilities[i],
+        r = responsivness[i]
+        a = Animal(d, lr, v, r)
+        ap, ep, tp = a.simulate(session_count=session_count, task_difficulties=[0, 0.3, 0.6])
+        plt.plot(ap)
+        embed()