stubs for plotting exercise

This commit is contained in:
2019-11-19 19:58:54 +01:00
parent ed561f2f19
commit 2fbbf04e98
9 changed files with 574 additions and 121 deletions

Binary file not shown.

View File

@@ -0,0 +1,213 @@
\documentclass[12pt,a4paper,pdftex]{exam}
\usepackage[german]{babel}
\usepackage{pslatex}
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
\usepackage{xcolor}
\usepackage{graphicx}
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
\pagestyle{headandfoot}
\ifprintanswers
\newcommand{\stitle}{: Solutions}
\else
\newcommand{\stitle}{}
\fi
\header{{\bfseries\large Exercise 12\stitle}}{{\bfseries\large Maximum likelihood}}{{\bfseries\large January 7th, 2019}}
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
jan.benda@uni-tuebingen.de}
\runningfooter{}{\thepage}{}
\setlength{\baselineskip}{15pt}
\setlength{\parindent}{0.0cm}
\setlength{\parskip}{0.3cm}
\renewcommand{\baselinestretch}{1.15}
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
language=Matlab,
basicstyle=\ttfamily\footnotesize,
numbers=left,
numberstyle=\tiny,
title=\lstname,
showstringspaces=false,
commentstyle=\itshape\color{darkgray},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{dsfont}
\newcommand{\naZ}{\mathds{N}}
\newcommand{\gaZ}{\mathds{Z}}
\newcommand{\raZ}{\mathds{Q}}
\newcommand{\reZ}{\mathds{R}}
\newcommand{\reZp}{\mathds{R^+}}
\newcommand{\reZpN}{\mathds{R^+_0}}
\newcommand{\koZ}{\mathds{C}}
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\continue}{\ifprintanswers%
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\continuepage}{\ifprintanswers%
\newpage
\else
\vfill\hspace*{\fill}$\rightarrow$\newpage%
\fi}
\newcommand{\newsolutionpage}{\ifprintanswers%
\newpage%
\else
\fi}
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\qt}[1]{\textbf{#1}\\}
\newcommand{\pref}[1]{(\ref{#1})}
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
\newcommand{\code}[1]{\texttt{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\input{instructions}
\begin{questions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Maximum likelihood of the standard deviation}
Let's compute the likelihood and the log-likelihood for the estimation
of the standard deviation.
\begin{parts}
\part Draw $n=50$ random numbers from a normal distribution with
mean $\mu=3$ and standard deviation $\sigma=2$.
\part Plot the likelihood (computed as the product of probabilities)
and the log-likelihood (sum of the logarithms of the probabilities)
as a function of the standard deviation. Compare the position of the
maxima with the standard deviation that you compute directly from
the data.
\part Increase $n$ to 1000. What happens to the likelihood, what
happens to the log-likelihood? Why?
\end{parts}
\begin{solution}
\lstinputlisting{mlestd.m}
\includegraphics[width=1\textwidth]{mlestd}\\
The more data the smaller the product of the probabilities ($\approx
p^n$ with $0 \le p < 1$) and the smaller the sum of the logarithms
of the probabilities ($\approx n\log p$, note that $\log p < 0$).
The product eventually gets smaller than the precision of the
floating point numbers support. Therefore for $n=1000$ the products
becomes zero. Using the logarithm avoids this numerical problem.
\end{solution}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Maximum-likelihood estimator of a line through the origin}
In the lecture we derived the following equation for an
maximum-likelihood estimate of the slope $\theta$ of a straight line
through the origin fitted to $n$ pairs of data values $(x_i|y_i)$ with
standard deviation $\sigma_i$:
\[\theta = \frac{\sum_{i=1}^n \frac{x_i y_i}{\sigma_i^2}}{ \sum_{i=1}^n
\frac{x_i^2}{\sigma_i^2}} \]
\begin{parts}
\part \label{mleslopefunc} Write a function that takes two vectors
$x$ and $y$ containing the data pairs and returns the slope,
computed according to this equation. For simplicity we assume
$\sigma_i=\sigma$ for all $1 \le i \le n$. How does this simplify
the equation for the slope?
\begin{solution}
\lstinputlisting{mleslope.m}
\end{solution}
\part Write a script that generates data pairs that scatter around a
line through the origin with a given slope. Use the function from
\pref{mleslopefunc} to compute the slope from the generated data.
Compare the computed slope with the true slope that has been used to
generate the data. Plot the data togehther with the line from which
the data were generated and the maximum-likelihood fit.
\begin{solution}
\lstinputlisting{mlepropfit.m}
\includegraphics[width=1\textwidth]{mlepropfit}
\end{solution}
\part \label{mleslopecomp} Vary the number of data pairs, the slope,
as well as the variance of the data points around the true
line. Under which conditions is the maximum-likelihood estimation of
the slope closer to the true slope?
\part To answer \pref{mleslopecomp} more precisely, generate for
each condition let's say 1000 data sets and plot a histogram of the
estimated slopes. How does the histogram, its mean and standard
deviation relate to the true slope?
\end{parts}
\begin{solution}
\lstinputlisting{mlepropest.m}
\includegraphics[width=1\textwidth]{mlepropest}\\
The estimated slopes are centered around the true slope. The
standard deviation of the estimated slopes gets smaller for larger
$n$ and less noise in the data.
\end{solution}
\continue
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question \qt{Maximum-likelihood-estimation of a probability-density function}
Many probability-density functions have parameters that cannot be
computed directly from the data, like, for example, the mean of
normally-distributed data. Such parameter need to be estimated by
means of the maximum-likelihood from the data.
Let us demonstrate this approach by means of data that are drawn from a
gamma distribution,
\begin{parts}
\part Find out which \code{matlab} function computes the
probability-density function of the gamma distribution.
\part \label{gammaplot} Use this function to plot the
probability-density function of the gamma distribution for various
values of the (positive) ``shape'' parameter. Wet set the ``scale''
parameter to one.
\part Find out which \code{matlab} function generates random numbers
that are distributed according to a gamma distribution. Generate
with this function 50 random numbers using one of the values of the
``shape'' parameter used in \pref{gammaplot}.
\part Compute and plot a properly normalized histogram of these
random numbers.
\part Find out which \code{matlab} function fit a distribution to a
vector of random numbers according to the maximum-likelihood method.
How do you need to use this function in order to fit a gamma
distribution to the data?
\part Estimate with this function the parameter of the gamma
distribution used to generate the data.
\part Finally, plot the fitted gamma distribution on top of the
normalized histogram of the data.
\end{parts}
\begin{solution}
\lstinputlisting{mlepdffit.m}
\includegraphics[width=1\textwidth]{mlepdffit}
\end{solution}
\end{questions}
\end{document}

View File

@@ -0,0 +1,41 @@
\vspace*{-8ex}
\begin{center}
\textbf{\Large Introduction to scientific computing}\\[1ex]
{\large Jan Grewe, Jan Benda}\\[-3ex]
Neuroethology lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
\end{center}
% \ifprintanswers%
% \else
% % Die folgenden Aufgaben dienen der Wiederholung, \"Ubung und
% % Selbstkontrolle und sollten eigenst\"andig bearbeitet und gel\"ost
% % werden. Die L\"osung soll in Form eines einzelnen Skriptes (m-files)
% % im ILIAS hochgeladen werden. Jede Aufgabe sollte in einer eigenen
% % ``Zelle'' gel\"ost sein. Die Zellen \textbf{m\"ussen} unabh\"angig
% % voneinander ausf\"uhrbar sein. Das Skript sollte nach dem Muster:
% % ``variablen\_datentypen\_\{nachname\}.m'' benannt werden
% % (z.B. variablen\_datentypen\_mueller.m).
% \begin{itemize}
% \item \"Uberzeuge dich von jeder einzelnen Zeile deines Codes, dass
% sie auch wirklich das macht, was sie machen soll! Teste dies mit
% kleinen Beispielen direkt in der Kommandozeile.
% \item Versuche die L\"osungen der Aufgaben m\"oglichst in
% sinnvolle kleine Funktionen herunterzubrechen.
% Sobald etwas \"ahnliches mehr als einmal berechnet werden soll,
% lohnt es sich eine Funktion daraus zu schreiben!
% \item Teste rechenintensive \code{for} Schleifen, Vektoren, Matrizen
% zuerst mit einer kleinen Anzahl von Wiederholungen oder kleiner
% Gr\"o{\ss}e, und benutze erst am Ende, wenn alles \"uberpr\"uft
% ist, eine gro{\ss}e Anzahl von Wiederholungen oder Elementen, um eine gute
% Statistik zu bekommen.
% \item Benutze die Hilfsfunktion von \code{matlab} (\code{help
% commando} oder \code{doc commando}) und das Internet, um
% herauszufinden, wie bestimmte \code{matlab} Funktionen zu verwenden
% sind und was f\"ur M\"oglichkeiten sie bieten.
% Auch zu inhaltlichen Konzepten bietet das Internet oft viele
% Antworten!
% \end{itemize}
% \fi

View File

@@ -0,0 +1,74 @@
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as scio
from IPython import embed
def boltzmann(x, y_max, slope, inflection):
"""
The underlying Boltzmann function.
.. math::
f(x) = y_max / \exp{-slope*(x-inflection}
:param x: The x values.
:param y_max: The maximum value.
:param slope: The slope parameter k
:param inflection: the position of the inflection point.
:return: the y values.
"""
y = y_max / (1 + np.exp(-slope * (x - inflection)))
return y
class Animal(object):
def __init__(self, delay, learning_rate, volatility, responsiveness):
"""
:param delay:
:param learning_rate: delta percent_correct per session
:param volatility: 0 -> 1 the noise in the decision
:param responsiveness: 0 -> 1 probability of actually conducting a trial
"""
self.__delay = delay
self.__learning_rate = learning_rate
self.__volatility = volatility
self.__responsiveness = responsiveness
def simulate(self, session_count=10, trials=20, task_difficulties=[]):
tasks = 1 if len(task_difficulties) == 0 else len(task_difficulties)
if len(task_difficulties) == 0:
task_difficulties = [0]
avg_perf = np.zeros((session_count, tasks))
err_perf = np.zeros((session_count, tasks))
trials_performed = np.zeros(session_count)
for i in range(session_count):
for j in range(tasks):
base_performance = boltzmann(i, 1.0, self.__learning_rate/20, self.__delay)
penalty = base_performance * task_difficulties[j] * 0.5
base_perf = 50 + 50 * (base_performance - penalty)
trials_completed = np.random.rand(trials) < self.__responsiveness
performances = np.random.randn(trials) * self.__volatility * 100 + base_perf
avg_perf[i, j] = np.mean(performances[trials_completed])
err_perf[i, j] = np.std(performances[trials_completed])
trials_performed = np.sum(trials_completed)
return avg_perf, err_perf, trials_performed
if __name__ == "__main__":
session_count = 30
task_difficulties = [0, 0.3, 1.]
delays = [5, 10, 12, 20]
learning_rates = np.array([5, 10, 2, 20])
volatilities = np.random.rand(4) * 0.5
responsivness = np.random.rand(4) * 0.5 + 0.5
for i in range(len(delays)):
d = delays[i]
lr = learning_rates[i]
v = volatilities[i],
r = responsivness[i]
a = Animal(d, lr, v, r)
ap, ep, tp = a.simulate(session_count=session_count, task_difficulties=[0, 0.3, 0.6])
plt.plot(ap)
embed()