diff --git a/bootstrap/exercises/correlationbootstrap.m b/bootstrap/exercises/correlationbootstrap.m
new file mode 100644
index 0000000..5abb951
--- /dev/null
+++ b/bootstrap/exercises/correlationbootstrap.m
@@ -0,0 +1,37 @@
+%% (a) bootstrap:
+nperm = 1000;
+rb = zeros(nperm,1);
+for i=1:nperm
+    % indices for resampling the data:
+    inx = randi(length(x), length(x), 1);
+    % resampled data pairs:
+    xb=x(inx);
+    yb=y(inx);
+    rb(i) = corr(xb, yb);
+end
+
+%% (b) pdf of the correlation coefficients:
+[hb,bb] = hist(rb, 20 );
+hb = hb/sum(hb)/(bb(2)-bb(1));  % normalization
+
+%% (c) significance:
+rbq = quantile(rb, 0.05);
+fprintf('correlation coefficient at 5%% significance = %.2f\n', rbq );
+if rbq > 0.0
+    fprintf('--> correlation r=%.2f is significant\n', rd);
+else
+    fprintf('--> r=%.2f is not a significant correlation\n', rd);
+end
+
+%% plot:
+hold on;
+bar(b, h, 'facecolor', [0.5 0.5 0.5]);
+bar(bb, hb, 'facecolor', 'b');
+bar(bb(bb<=rbq), hb(bb<=rbq), 'facecolor', 'r');
+plot( [rd rd], [0 4], 'r', 'linewidth', 2 );
+xlim([-0.25 0.75])
+xlabel('Correlation coefficient');
+ylabel('Probability density');
+hold off;
+
+savefigpdf( gcf, 'correlationbootstrap.pdf', 12, 6 );
diff --git a/bootstrap/exercises/correlationbootstrap.pdf b/bootstrap/exercises/correlationbootstrap.pdf
new file mode 100644
index 0000000..68f35d1
Binary files /dev/null and b/bootstrap/exercises/correlationbootstrap.pdf differ
diff --git a/bootstrap/exercises/correlationsignificance.pdf b/bootstrap/exercises/correlationsignificance.pdf
index 9240e4f..1094f94 100644
Binary files a/bootstrap/exercises/correlationsignificance.pdf and b/bootstrap/exercises/correlationsignificance.pdf differ
diff --git a/bootstrap/exercises/exercises01.tex b/bootstrap/exercises/exercises01.tex
index 6366da8..b5065de 100644
--- a/bootstrap/exercises/exercises01.tex
+++ b/bootstrap/exercises/exercises01.tex
@@ -148,32 +148,56 @@ distributed?
 
 
 \continue
-\question \qt{Permutation test}
+\question \qt{Permutation test} \label{permutationtest}
 We want to compute the significance of a correlation by means of a permutation test.
 \begin{parts}
-\part Generate 1000 correlated pairs $x$, $y$ of random numbers according to:
+  \part \label{permutationtestdata} Generate 1000 correlated pairs
+  $x$, $y$ of random numbers according to:
 \begin{verbatim}
 n = 1000
 a = 0.2;
 x = randn(n, 1);
 y = randn(n, 1) + a*x;
 \end{verbatim}
-\part Generate a scatter plot of the two variables.
-\part Why is $y$ correlated with $x$?
-\part Compute the correlation coefficient between $x$ and $y$.
-\part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
-\part Do exactly this 1000 times and compute each time the correlation coefficient.
-\part Compute the probability density of these correlation coefficients.
-\part Is the correlation of the original data set significant?
-\part What does significance of the correlation mean?
-\part Vary the sample size \code{n} and compute in the same way the
-significance of the correlation.
+  \part Generate a scatter plot of the two variables.
+  \part Why is $y$ correlated with $x$?
+  \part Compute the correlation coefficient between $x$ and $y$.
+  \part What do you need to do in order to destroy the correlations between the $x$-$y$ pairs?
+  \part Do exactly this 1000 times and compute each time the correlation coefficient.
+  \part Compute and plot the probability density of these correlation
+  coefficients.
+  \part Is the correlation of the original data set significant?
+  \part What does significance of the correlation mean?
+  \part Vary the sample size \code{n} and compute in the same way the
+  significance of the correlation.
 \end{parts}
 \begin{solution}
   \lstinputlisting{correlationsignificance.m}
   \includegraphics[width=1\textwidth]{correlationsignificance}
 \end{solution}
 
+\question \qt{Bootstrap of the correlation coefficient} 
+The permutation test generates the distribution of the null hypothesis
+of uncorrelated data and we check whether the correlation coefficient
+of the data differs significantly from this
+distribution. Alternatively we can bootstrap the data while keeping
+the pairs and determine the confidence interval of the correlation
+coefficient of the data. If this differs significantly from a
+correlation coefficient of zero we can conclude that the correlation
+coefficient of the data quantifies indeed a correlated data.
+
+We take the same data set that we have generated in exercise
+\ref{permutationtest} (\ref{permutationtestdata}).
+\begin{parts}
+  \part Bootstrap 1000 times the correlation coefficient from the data.
+  \part Compute and plot the probability density of these correlation
+  coefficients.
+  \part Is the correlation of the original data set significant?
+\end{parts}
+\begin{solution}
+  \lstinputlisting{correlationbootstrap.m}
+  \includegraphics[width=1\textwidth]{correlationbootstrap}
+\end{solution}
 
 \end{questions}
 
diff --git a/header.tex b/header.tex
index f233f22..e100d4b 100644
--- a/header.tex
+++ b/header.tex
@@ -279,6 +279,9 @@
 % content of someoutput.out
 %
 % Within the exercise environment enumerate is redefined to generate (a), (b), (c), ...
+%
+% The boolean showexercisesolutions controls whether solutions for the exercises
+% are actually included.
 \usepackage{mdframed}
 \usepackage{xstring}
 \newlistof{exercisef}{loe}{\tr{Exercises}{\"Ubungen}}
diff --git a/likelihood/lecture/likelihood.tex b/likelihood/lecture/likelihood.tex
index f4c12c4..91ba87f 100644
--- a/likelihood/lecture/likelihood.tex
+++ b/likelihood/lecture/likelihood.tex
@@ -28,7 +28,7 @@ den Parametern $\theta$.
 
 Wenn nun den $n$ unabh\"angigen Beobachtungen $x_1, x_2, \ldots x_n$
 die gleiche Wahrscheinlichkeitsverteilung $p(x|\theta)$ zugrundeliegt
-(\enterm{i.i.d.} idependent and identically distributed), dann ist die
+(\enterm{i.i.d.} independent and identically distributed), dann ist die
 Verbundwahrscheinlichkeit $p(x_1,x_2, \ldots x_n|\theta)$ des
 Auftretens der Werte $x_1, x_2, \ldots x_n$, gegeben ein bestimmtes
 $\theta$,
@@ -71,14 +71,14 @@ Gr\"unden wird meistens das Maximum der logarithmierten Likelihood
 Wenn die Me{\ss}daten $x_1, x_2, \ldots x_n$ der Normalverteilung
 \eqnref{normpdfmean} entstammen, und wir den Mittelwert $\mu=\theta$ als
 einzigen Parameter der Verteilung betrachten, welcher Wert von
-$\theta$ maximiert dessen Likelhood?
+$\theta$ maximiert dessen Likelihood?
 
 \begin{figure}[t]
   \includegraphics[width=1\textwidth]{mlemean}
   \titlecaption{\label{mlemeanfig} Maximum Likelihood Sch\"atzung des
     Mittelwerts.}{Oben: Die Daten zusammen mit drei m\"oglichen
     Normalverteilungen mit unterschiedlichen Mittelwerten (Pfeile) aus
-    denen die Daten stammen k\"onnten.  Unteln links: Die Likelihood
+    denen die Daten stammen k\"onnten.  Unten links: Die Likelihood
     in Abh\"angigkeit des Mittelwerts als Parameter der
     Normalverteilungen. Unten rechts: die entsprechende
     Log-Likelihood. An der Position des Maximums bei $\theta=2$
@@ -91,15 +91,15 @@ Die Log-Likelihood \eqnref{loglikelihood} ist
   & = & \sum_{i=1}^n \log \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x_i-\theta)^2}{2\sigma^2}} \\
   & = & \sum_{i=1}^n - \log \sqrt{2\pi \sigma^2} -\frac{(x_i-\theta)^2}{2\sigma^2} \; .
 \end{eqnarray*}
-Der Logarithmus hat die sch\"one Eigenschaft die Exponentialfunktion
+Der Logarithmus hat die sch\"one Eigenschaft, die Exponentialfunktion
 der Normalverteilung auszul\"oschen, da der Logarithmus die
 Umkehrfunktion der Exponentialfunktion ist ($\log(e^x)=x$).
 
 Zur Bestimmung des Maximums der Log-Likelihood berechnen wir deren Ableitung
 nach dem Parameter $\theta$ und setzen diese gleich Null: 
 \begin{eqnarray*}
-  \frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
-  \Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n x_i \theta & = & 0 \\
+  \frac{\text{d}}{\text{d}\theta} \log {\cal L}(\theta|x_1,x_2, \ldots x_n) & = & \sum_{i=1}^n - \frac{2(x_i-\theta)}{2\sigma^2} \;\; = \;\; 0 \\
+  \Leftrightarrow \quad \sum_{i=1}^n x_i - \sum_{i=1}^n \theta & = & 0 \\
   \Leftrightarrow \quad n \theta & = & \sum_{i=1}^n x_i \\
   \Leftrightarrow \quad \theta & = & \frac{1}{n} \sum_{i=1}^n x_i \;\; = \;\; \bar x
 \end{eqnarray*}
@@ -188,12 +188,12 @@ und setzen diese gleich Null:
 \Leftrightarrow \quad  \theta & = & \frac{\sum_{i=1}^n \frac{x_iy_i}{\sigma_i^2}}{ \sum_{i=1}^n \frac{x_i^2}{\sigma_i^2}} \label{mleslope}
 \end{eqnarray}
 Damit haben wir nun einen anlytischen Ausdruck f\"ur die Bestimmung
-der Steigung $\theta$ des Regressionsgeraden gewonnen
+der Steigung $\theta$ der Regressionsgeraden gewonnen
 (\figref{mleproplinefig}).
 
 Ein Gradientenabstieg ist f\"ur das Fitten der Geradensteigung also
 gar nicht n\"otig. Das gilt allgemein f\"ur das Fitten von
-Koeffizienten von linear kombinierten Basisfunktionen. Wie z.B. die
+Koeffizienten von linear kombinierten Basisfunktionen. Wie z.B.
 die Steigung $m$ und der y-Achsenabschnitt $b$ einer Geradengleichung
 \[ y = m \cdot x +b \]
 oder allgemeiner die Koeffizienten $a_k$ eines Polynoms
@@ -279,8 +279,8 @@ als Funktion des Orientierungswinkels).
     bevorzugte Orientierung des Stimulus (farbige Linien).  Ein
     Stimulus einer bestimmten Orientierung aktiviert die Neurone in
     spezifischer Weise (Punkte). Unten: Die Log-Likelihood dieser
-    Aktivit\"aten wir maximal in der N\"ahe der wahren Orientierung
-    des Stimulus.}
+    Aktivit\"aten wird in der N\"ahe der wahren Orientierung
+    des Stimulus maximiert.}
 \end{figure}
 
 Das Gehirn ist aber mit dem umgekehrten Problem konfrontiert: gegeben
diff --git a/projects/header.tex b/projects/header.tex
index 6b19320..4d4fe9b 100644
--- a/projects/header.tex
+++ b/projects/header.tex
@@ -7,7 +7,7 @@
 %%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
 \pagestyle{headandfoot}
-\header{{\bfseries\large Scientific Computing}}{{\bfseries\large Project: \ptitle}}{{\bfseries\large Januar 24th, 2017}}
+\header{{\bfseries\large Scientific Computing}}{{\bfseries\large Project: \ptitle}}{{\bfseries\large Januar 18th, 2018}}
 \runningfooter{}{\thepage}{}
 
 \setlength{\baselineskip}{15pt}
diff --git a/projects/instructions.tex b/projects/instructions.tex
index 4e0b1cf..63ddbec 100644
--- a/projects/instructions.tex
+++ b/projects/instructions.tex
@@ -10,10 +10,10 @@
       {\bf Dates:}
 
       The {\bf code} and the {\bf presentation} should be uploaded to
-      ILIAS at latest on Thursday, February 9th, 12:59h. We will
+      ILIAS at latest on Sunday, February 4th, 23:59h. We will
       store all presentations on one computer to allow fast
-      transitions between talks. The presentations start on Thursday,
-      February 9th at 1:00h c.t.. 
+      transitions between talks. The presentations start on Monday,
+      February 5th at 9:15h. 
 
       \vspace{1ex}
       {\bf Files:}
diff --git a/regression/code/checkdescent.m b/regression/code/checkdescent.m
new file mode 100644
index 0000000..cc56466
--- /dev/null
+++ b/regression/code/checkdescent.m
@@ -0,0 +1,25 @@
+% data:
+load('lin_regression.mat')
+
+% compute mean squared error for a range of slopes and intercepts:
+slopes = -5:0.25:5;
+intercepts = -30:1:30;
+errors = zeros(length(slopes), length(intercepts));
+for i = 1:length(slopes)
+    for j = 1:length(intercepts)
+	      errors(i,j) = lsqError([slopes(i), intercepts(j)], x, y);
+    end
+end
+
+% minimum of error surface:
+[me, mi] = min(errors(:));
+[ia, ib] = ind2sub(size(errors), mi);
+eparams = [errors(ia), errors(ib)];
+
+% gradient descent:
+pstart = [-2. 10.];
+[params, errors] = descent(x, y, pstart);
+
+% comparison:
+fprintf('descent: %6.3f  %6.3f\n', params(1), params(2));
+fprintf('errors:  %6.3f  %6.3f\n', eparams(1), eparams(2));
diff --git a/regression/code/descent.m b/regression/code/descent.m
new file mode 100644
index 0000000..1888414
--- /dev/null
+++ b/regression/code/descent.m
@@ -0,0 +1,15 @@
+function [params, errors] = descent(xdata, ydata, pstart)
+    mingradient = 0.1;
+    eps = 0.01;
+    
+    errors = [];
+    params = pstart;
+    count = 1;
+    gradient = [100.0, 100.0];
+    while norm(gradient) > mingradient
+        gradient = lsqGradient(params, xdata, ydata);
+        errors(count) = lsqError(params, xdata, ydata);
+        params = params - eps .* gradient; 
+        count = count + 1;
+    end
+end
diff --git a/regression/code/descentfit.m b/regression/code/descentfit.m
new file mode 100644
index 0000000..ecabb68
--- /dev/null
+++ b/regression/code/descentfit.m
@@ -0,0 +1,22 @@
+clear 
+close all
+load('lin_regression.mat')
+
+pstart = [-2. 10.];
+[params, errors] = descent(x, y, pstart);
+
+figure()
+subplot(2,1,1)
+hold on
+scatter(x, y, 'displayname', 'data')
+xx = min(x):0.01:max(x);
+fx = params(1)*xx + params(2);
+plot(xx, fx, 'displayname', 'fit')
+xlabel('Input')
+ylabel('Output')
+grid on
+legend show
+subplot(2,1,2)
+plot(errors)
+xlabel('optimization steps')
+ylabel('error')
\ No newline at end of file
diff --git a/regression/code/errorSurface.m b/regression/code/errorSurface.m
index 4bd3fd2..0e88c1e 100644
--- a/regression/code/errorSurface.m
+++ b/regression/code/errorSurface.m
@@ -1,6 +1,6 @@
 load('lin_regression.mat');
 
-% compute mean squared error for a range of sloopes and intercepts:
+% compute mean squared error for a range of slopes and intercepts:
 slopes = -5:0.25:5;
 intercepts = -30:1:30;
 error_surf = zeros(length(slopes), length(intercepts));
diff --git a/regression/code/linefit.m b/regression/code/linefit.m
new file mode 100644
index 0000000..df600e2
--- /dev/null
+++ b/regression/code/linefit.m
@@ -0,0 +1,18 @@
+% data:
+load('lin_regression.mat')
+
+% gradient descent:
+pstart = [-2. 10.];
+[params, errors] = descent(x, y, pstart);
+
+% lsqcurvefit:
+line = @(p, x)  x.* p(1) + p(2);
+cparams = lsqcurvefit(line, pstart, x, y);
+
+% polyfit:
+pparams = polyfit(x, y, 1);
+
+% comparison:
+fprintf('descent:     %6.3f  %6.3f\n', params(1), params(2));
+fprintf('lsqcurvefit: %6.3f  %6.3f\n', cparams(1), cparams(2));
+fprintf('polyfit:     %6.3f  %6.3f\n', pparams(1), pparams(2));
diff --git a/regression/exercises/Makefile b/regression/exercises/Makefile
new file mode 100644
index 0000000..27691d9
--- /dev/null
+++ b/regression/exercises/Makefile
@@ -0,0 +1,34 @@
+TEXFILES=$(wildcard exercises??.tex)
+EXERCISES=$(TEXFILES:.tex=.pdf)
+SOLUTIONS=$(EXERCISES:exercises%=solutions%)
+
+.PHONY: pdf exercises solutions watch watchexercises watchsolutions clean
+
+pdf : $(SOLUTIONS) $(EXERCISES)
+
+exercises : $(EXERCISES)
+
+solutions : $(SOLUTIONS)
+
+$(SOLUTIONS) : solutions%.pdf : exercises%.tex instructions.tex
+	{ echo "\\documentclass[answers,12pt,a4paper,pdftex]{exam}"; sed -e '1d' $<; } > $(patsubst %.pdf,%.tex,$@)
+	pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $(patsubst %.pdf,%.tex,$@) || true
+	rm $(patsubst %.pdf,%,$@).[!p]*
+
+$(EXERCISES) : %.pdf : %.tex instructions.tex
+	pdflatex -interaction=scrollmode $< | tee /dev/stderr | fgrep -q "Rerun to get cross-references right" && pdflatex -interaction=scrollmode $< || true
+
+watch :
+	while true; do ! make -q pdf && make pdf; sleep 0.5; done
+
+watchexercises :
+	while true; do ! make -q exercises && make exercises; sleep 0.5; done
+
+watchsolutions :
+	while true; do ! make -q solutions && make solutions; sleep 0.5; done
+
+clean :
+	rm -f *~ *.aux *.log *.out
+
+cleanup : clean
+	rm -f $(SOLUTIONS) $(EXERCISES)
diff --git a/regression/exercises/gradient_descent.tex b/regression/exercises/exercises01-de.tex
similarity index 81%
rename from regression/exercises/gradient_descent.tex
rename to regression/exercises/exercises01-de.tex
index bd103ec..b7a835f 100644
--- a/regression/exercises/gradient_descent.tex
+++ b/regression/exercises/exercises01-de.tex
@@ -13,10 +13,16 @@
 
 %%%%% text size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
-\pagestyle{headandfoot} \header{{\bfseries\large \"Ubung
-}}{{\bfseries\large Gradientenabstiegsverfahren}}{{\bfseries\large 10. Januar, 2017}}
+\pagestyle{headandfoot}
+\ifprintanswers
+\newcommand{\stitle}{: Solutions}
+\else
+\newcommand{\stitle}{}
+\fi
+\header{{\bfseries\large Exercise 11\stitle}}{{\bfseries\large Gradient descent}}{{\bfseries\large January 9th, 2018}}
 \firstpagefooter{Dr. Jan Grewe}{Phone: 29 74588}{Email:
-  jan.grewe@uni-tuebingen.de} \runningfooter{}{\thepage}{}
+  jan.grewe@uni-tuebingen.de}
+\runningfooter{}{\thepage}{}
 
 \setlength{\baselineskip}{15pt}
 \setlength{\parindent}{0.0cm}
@@ -24,21 +30,15 @@
 \renewcommand{\baselinestretch}{1.15}
 
 \newcommand{\code}[1]{\texttt{#1}}
-\renewcommand{\solutiontitle}{\noindent\textbf{L\"osung:}\par\noindent}
+\renewcommand{\solutiontitle}{\noindent\textbf{Solution:}\par\noindent}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 \begin{document}
 
-\vspace*{-6.5ex}
-\begin{center}
-  \textbf{\Large Einf\"uhrung in die wissenschaftliche
-    Datenverarbeitung}\\[1ex] {\large Jan Grewe, Jan Benda}\\[-3ex]
-  Abteilung Neuroethologie \hfill --- \hfill Institut f\"ur
-  Neurobiologie \hfill --- \hfill
-  \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
-\end{center}
+\input{instructions}
 
 \begin{questions}
+
   \question Implementiere den Gradientenabstieg f\"ur das Problem der
   Parameteranpassung der linearen Geradengleichung an die Messdaten in
   der Datei \emph{lin\_regression.mat}.
diff --git a/regression/exercises/exercises01.tex b/regression/exercises/exercises01.tex
new file mode 100644
index 0000000..c6eadc1
--- /dev/null
+++ b/regression/exercises/exercises01.tex
@@ -0,0 +1,129 @@
+\documentclass[12pt,a4paper,pdftex]{exam}
+
+\usepackage[german]{babel}
+\usepackage{natbib}
+\usepackage{xcolor}
+\usepackage{graphicx}
+\usepackage[small]{caption}
+\usepackage{sidecap}
+\usepackage{pslatex}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\setlength{\marginparwidth}{2cm}
+\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
+
+%%%%% text size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
+\pagestyle{headandfoot}
+\ifprintanswers
+\newcommand{\stitle}{: Solutions}
+\else
+\newcommand{\stitle}{}
+\fi
+\header{{\bfseries\large Exercise 11\stitle}}{{\bfseries\large Gradient descent}}{{\bfseries\large January 9th, 2018}}
+\firstpagefooter{Dr. Jan Grewe}{Phone: 29 74588}{Email:
+  jan.grewe@uni-tuebingen.de}
+\runningfooter{}{\thepage}{}
+
+\setlength{\baselineskip}{15pt}
+\setlength{\parindent}{0.0cm}
+\setlength{\parskip}{0.3cm}
+\renewcommand{\baselinestretch}{1.15}
+
+\newcommand{\code}[1]{\texttt{#1}}
+\renewcommand{\solutiontitle}{\noindent\textbf{Solution:}\par\noindent}
+%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\usepackage{listings}
+\lstset{
+  language=Matlab,
+  basicstyle=\ttfamily\footnotesize,
+  numbers=left,
+  numberstyle=\tiny,
+  title=\lstname,
+  showstringspaces=false,
+  commentstyle=\itshape\color{darkgray},
+  breaklines=true,
+  breakautoindent=true,
+  columns=flexible,
+  frame=single,
+  xleftmargin=1em,
+  xrightmargin=1em,
+  aboveskip=10pt
+}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+\begin{document}
+
+\input{instructions}
+
+\begin{questions}
+
+  \question Implement the gradient descent for finding the parameters
+  of a straigth line that we want to fit to the data in the file
+  \emph{lin\_regression.mat}.
+
+  In the lecture we already prepared most of the necessary functions:
+  1. the error function (\code{meanSquareError()}), 2. the cost
+  function (\code{lsqError()}), and 3. the gradient
+  (\code{lsqGradient()}). Read chapter 8 ``Optimization and gradient
+  descent'' in the script, in particular section 8.4 and exercise 8.4!
+
+  The algorithm for the descent towards the minimum of the cost
+  function is as follows:
+  
+  \begin{enumerate}
+  \item Start with some arbitrary parameter values $\vec p_0 = (m_0, b_0)$
+    for the slope and the intercept of the straight line.
+  \item \label{computegradient} Compute the gradient of the cost function
+    at the current values of the parameters $\vec p_i$.
+  \item If the magnitude (length) of the gradient is smaller than some
+    small number, the algorithm converged close to the minimum of the
+    cost function and we abort the descent.  Right at the minimum the
+    magnitude of the gradient is zero.  However, since we determine
+    the gradient numerically, it will never be exactly zero. This is
+    why we just require the gradient to be sufficiently small
+    (e.g. \code{norm(gradient) < 0.1}).
+  \item \label{gradientstep} Move against the gradient by a small step
+    ($\epsilon = 0.01$):
+    \[\vec p_{i+1} = \vec p_i - \epsilon \cdot \nabla f_{cost}(m_i, b_i)\]
+  \item Repeat steps \ref{computegradient} -- \ref{gradientstep}.
+  \end{enumerate}
+
+  \begin{parts}
+    \part Implement the gradient descent in a function that returns
+    the parameter values at the minimum of the cost function and a vector
+    with the value of the cost function at each step of the algorithm.
+    \begin{solution}
+      \lstinputlisting{../code/descent.m}
+    \end{solution}
+
+    \part Plot the data and the straight line with the parameter
+    values that you found with the gradient descent method.
+
+    \part Plot the development of the costs as a function of the
+    iteration step.
+    \begin{solution}
+      \lstinputlisting{../code/descentfit.m}
+    \end{solution}
+
+    \part Find the position of the minimum of the cost function by
+    means of the \code{min()} function. Compare with the result of the
+    gradient descent method. Vary the value of $\epsilon$ and the
+    minimum gradient. What are good values such that the gradient
+    descent gets closest to the true minimum of the cost function?
+    \begin{solution}
+      \lstinputlisting{../code/checkdescent.m}
+    \end{solution}
+
+    \part Use the functions \code{polyfit()} and \code{lsqcurvefit()}
+    provided by matlab to find the slope and intercept of a straight
+    line that fits the data.
+    \begin{solution}
+      \lstinputlisting{../code/linefit.m}
+    \end{solution}
+
+  \end{parts}
+
+\end{questions}
+
+\end{document}
diff --git a/regression/exercises/instructions.tex b/regression/exercises/instructions.tex
new file mode 100644
index 0000000..3041d3e
--- /dev/null
+++ b/regression/exercises/instructions.tex
@@ -0,0 +1,6 @@
+\vspace*{-7.8ex}
+\begin{center}
+\textbf{\Large Introduction to Scientific Computing}\\[2.3ex]
+{\large Jan Grewe, Jan Benda}\\[-3ex]
+Neuroethology Lab \hfill --- \hfill Institute for Neurobiology \hfill --- \hfill \includegraphics[width=0.28\textwidth]{UT_WBMW_Black_RGB} \\
+\end{center}
diff --git a/regression/lecture/regression-chapter.tex b/regression/lecture/regression-chapter.tex
index 0c37634..fcb8f16 100644
--- a/regression/lecture/regression-chapter.tex
+++ b/regression/lecture/regression-chapter.tex
@@ -16,4 +16,8 @@
 
 \input{regression}
 
+Example for fit with matlab functions lsqcurvefit, polyfit
+
+Example for overfitting with polyfit of a high order (=number of data points)
+
 \end{document}
diff --git a/regression/lecture/regression.tex b/regression/lecture/regression.tex
index f7e3b47..f72a406 100644
--- a/regression/lecture/regression.tex
+++ b/regression/lecture/regression.tex
@@ -264,7 +264,7 @@ Kostenfunktion mit m\"oglichst wenigen Berechnungen findet.
     und zeigt in Richtung des st\"arksten Anstiegs der Funktion $f(x,y)$.
   \end{minipage}
 
-  \vspace{1ex} Die Abbildung zeigt die Kontourlinien einer bivariaten
+  \vspace{1ex} Die Abbildung zeigt die Konturlinien einer bivariaten
   Gau{\ss}glocke $f(x,y) = \exp(-(x^2+y^2)/2)$ und den Gradienten mit
   seinen partiellen Ableitungen an drei verschiedenen Stellen.
 \end{ibox}
@@ -283,7 +283,7 @@ Gef\"alles rollt, ben\"otigen wir Information \"uber die Richtung des
 Gef\"alles an der jeweils aktuellen Position.
 
 Der \determ{Gradient} (Box~\ref{partialderivativebox}) der Kostenfunktion
-\[ \nabla f_{cost}(m,b) = \left( \frac{\partial e(m,b)}{\partial m},
+\[ \nabla f_{cost}(m,b) = \left( \frac{\partial f(m,b)}{\partial m},
   \frac{\partial f(m,b)}{\partial b} \right) \] bzgl. der beiden
 Parameter $m$ und $b$ der Geradengleichung ist ein Vektor, der in
 Richtung des steilsten Anstiegs der Kostenfunktion $f_{cost}(m,b)$ zeigt.
@@ -306,10 +306,10 @@ partielle Ableitung nach $m$ durch
   \titlecaption{Gradient der Fehlerfl\"ache.} 
   {Jeder Pfeil zeigt die Richtung und die
     Steigung f\"ur verschiedene Parameterkombination aus Steigung und
-    $y$-Achsenabschnitt an. Die Kontourlinien im Hintergrund
+    $y$-Achsenabschnitt an. Die Konturlinien im Hintergrund
     illustrieren die Fehlerfl\"ache. Warme Farben stehen f\"ur
     gro{\ss}e Fehlerwerte, kalte Farben f\"ur kleine. Jede
-    Kontourlinie steht f\"ur eine Linie gleichen
+    Konturlinie steht f\"ur eine Linie gleichen
     Fehlers.}\label{gradientquiverfig}
 \end{figure}
 
@@ -368,6 +368,7 @@ Punkte in Abbildung \ref{gradientdescentfig} gro{\ss}.
     Optimierungsschritt an.} \label{gradientdescentfig}
 \end{figure}
 
+\setboolean{showexercisesolutions}{false}
 \begin{exercise}{gradientDescent.m}{}
   Implementiere den Gradientenabstieg f\"ur das Problem der
   Parameteranpassung der linearen Geradengleichung an die Messdaten in
@@ -409,6 +410,7 @@ Kostenfunktionen gemacht \matlabfun{fminsearch()}, w\"ahrend spezielle
 Funktionen z.B. f\"ur die Minimierung des quadratischen Abstands bei
 einem Kurvenfit angeboten werden \matlabfun{lsqcurvefit()}.
 
+\newpage
 \begin{important}[Achtung Nebenminima!]
   Das Finden des globalen Minimums ist leider nur selten so leicht wie
   bei einem Geradenfit. Oft hat die Kostenfunktion viele Nebenminima,
diff --git a/scientificcomputing-script.tex b/scientificcomputing-script.tex
index 1e5fb9d..22bbed8 100644
--- a/scientificcomputing-script.tex
+++ b/scientificcomputing-script.tex
@@ -67,6 +67,7 @@
 \lstset{inputpath=regression/code}
 \include{regression/lecture/regression}
 
+\setboolean{showexercisesolutions}{true}
 \graphicspath{{likelihood/lecture/}{likelihood/lecture/figures/}}
 \lstset{inputpath=likelihood/code}
 \include{likelihood/lecture/likelihood}
diff --git a/statistics/lecture/statistics.tex b/statistics/lecture/statistics.tex
index 1cdfeff..c6c217f 100644
--- a/statistics/lecture/statistics.tex
+++ b/statistics/lecture/statistics.tex
@@ -348,14 +348,14 @@ probability density functions like the one of the normal distribution
 \subsection{Kernel densities}
 
 A problem of using histograms for estimating probability densities is
-that the have hard bin edges. Depending on where the bin edges are placed
+that they have hard bin edges. Depending on where the bin edges are placed
 a data value falls in one or the other bin. 
 
 \begin{figure}[t]
   \includegraphics[width=1\textwidth]{kerneldensity}
   \titlecaption{\label{kerneldensityfig} Kernel densities.}{Left: The
     histogram-based estimation of the probability density is dependent
-    also on the position of the bins. In the bottom plot the bins have
+    on the position of the bins. In the bottom plot the bins have
     bin shifted by half a bin width (here $\Delta x=0.4$) and as a
     result details of the probability density look different. Look,
     for example at the height of the largest bin. Right: In contrast,
@@ -366,7 +366,7 @@ a data value falls in one or the other bin.
 To avoid this problem one can use so called \enterm {kernel densities}
 for estimating probability densities from data. Here every data point
 is replaced by a kernel (a function with integral one, like for
-example the Gaussian function) that is moved exactly to the position
+example the Gaussian) that is moved exactly to the position
 indicated by the data value. Then all the kernels of all the data
 values are summed up, the sum is divided by the number of data values,
 and we get an estimate of the probability density.
@@ -417,7 +417,7 @@ and percentiles can be determined from the inverse cumulative function.
     100 data values drawn from a normal distribution (red) in
     comparison to the true cumulative distribution function computed
     by numerically integrating the normal distribution function
-    (blue). From the cumulative distribution function one can read of
+    (blue). From the cumulative distribution function one can read off
     the probabilities of getting values smaller than a given value
     (here: $P(x \ge -1) \approx 0.15$). From the inverse cumulative
     distribution the position of percentiles can be computed (here: