244 lines
8.9 KiB
TeX
244 lines
8.9 KiB
TeX
\documentclass[12pt,a4paper,pdftex]{exam}
|
|
|
|
\usepackage[english]{babel}
|
|
\usepackage{pslatex}
|
|
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
|
|
\usepackage{xcolor}
|
|
\usepackage{graphicx}
|
|
\usepackage[breaklinks=true,bookmarks=true,bookmarksopen=true,pdfpagemode=UseNone,pdfstartview=FitH,colorlinks=true,citecolor=blue]{hyperref}
|
|
|
|
%%%%% layout %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\usepackage[left=20mm,right=20mm,top=25mm,bottom=25mm]{geometry}
|
|
\pagestyle{headandfoot}
|
|
\ifprintanswers
|
|
\newcommand{\stitle}{: Solutions}
|
|
\else
|
|
\newcommand{\stitle}{}
|
|
\fi
|
|
\header{{\bfseries\large Exercise\stitle}}{{\bfseries\large PCA}}{{\bfseries\large January 7th, 2019}}
|
|
\firstpagefooter{Prof. Dr. Jan Benda}{Phone: 29 74573}{Email:
|
|
jan.benda@uni-tuebingen.de}
|
|
\runningfooter{}{\thepage}{}
|
|
|
|
\setlength{\baselineskip}{15pt}
|
|
\setlength{\parindent}{0.0cm}
|
|
\setlength{\parskip}{0.3cm}
|
|
\renewcommand{\baselinestretch}{1.15}
|
|
|
|
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\usepackage{listings}
|
|
\lstset{
|
|
language=Matlab,
|
|
basicstyle=\ttfamily\footnotesize,
|
|
numbers=left,
|
|
numberstyle=\tiny,
|
|
title=\lstname,
|
|
showstringspaces=false,
|
|
commentstyle=\itshape\color{darkgray},
|
|
breaklines=true,
|
|
breakautoindent=true,
|
|
columns=flexible,
|
|
frame=single,
|
|
xleftmargin=1em,
|
|
xrightmargin=1em,
|
|
aboveskip=10pt
|
|
}
|
|
|
|
%%%%% math stuff: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\usepackage{amsmath}
|
|
\usepackage{amssymb}
|
|
\usepackage{bm}
|
|
\usepackage{dsfont}
|
|
\newcommand{\naZ}{\mathds{N}}
|
|
\newcommand{\gaZ}{\mathds{Z}}
|
|
\newcommand{\raZ}{\mathds{Q}}
|
|
\newcommand{\reZ}{\mathds{R}}
|
|
\newcommand{\reZp}{\mathds{R^+}}
|
|
\newcommand{\reZpN}{\mathds{R^+_0}}
|
|
\newcommand{\koZ}{\mathds{C}}
|
|
|
|
%%%%% page breaks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\newcommand{\continue}{\ifprintanswers%
|
|
\else
|
|
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
\fi}
|
|
\newcommand{\continuepage}{\ifprintanswers%
|
|
\newpage
|
|
\else
|
|
\vfill\hspace*{\fill}$\rightarrow$\newpage%
|
|
\fi}
|
|
\newcommand{\newsolutionpage}{\ifprintanswers%
|
|
\newpage%
|
|
\else
|
|
\fi}
|
|
|
|
%%%%% new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\newcommand{\qt}[1]{\textbf{#1}\\}
|
|
\newcommand{\pref}[1]{(\ref{#1})}
|
|
\newcommand{\extra}{--- Zusatzaufgabe ---\ \mbox{}}
|
|
\newcommand{\code}[1]{\texttt{#1}}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\begin{document}
|
|
|
|
\input{instructions}
|
|
|
|
\begin{questions}
|
|
|
|
\question \qt{Covariance and correlation coefficient\vspace{-3ex}}
|
|
\begin{parts}
|
|
\part Generate two vectors $x$ and $z$ with $n=1000$ Gausian distributed random numbers.
|
|
\part Compute $y$ as a linear combination of $x$ and $z$ according to
|
|
\[ y = r \cdot x + \sqrt{1-r^2}\cdot z \]
|
|
where $r$ is a parameter $-1 \le r \le 1$.
|
|
What does $r$ do?
|
|
\part Plot a scatter plot of $y$ versus $x$ for about 10 different values of $r$.
|
|
What do you observe?
|
|
\part Also compute the covariance matrix and the correlation
|
|
coefficient matrix between $x$ and $y$ (functions \texttt{cov} and
|
|
\texttt{corrcoef}). How do these matrices look like for different
|
|
values of $r$? How do the values of the matrices change if you generate
|
|
$x$ and $z$ with larger variances?
|
|
\begin{solution}
|
|
\lstinputlisting{covariance.m}
|
|
\includegraphics[width=0.8\textwidth]{covariance}
|
|
\end{solution}
|
|
\part Do the same analysis (scatter plot, covariance, and correlation coefficient)
|
|
for \[ y = x^2 + 0.5 \cdot z \]
|
|
Are $x$ and $y$ really independent?
|
|
\begin{solution}
|
|
\lstinputlisting{nonlinearcorrelation.m}
|
|
\includegraphics[width=0.8\textwidth]{nonlinearcorrelation}
|
|
\end{solution}
|
|
\end{parts}
|
|
|
|
\question \qt{Principal component analysis in 2D\vspace{-3ex}}
|
|
\begin{parts}
|
|
\part Generate $n=1000$ pairs $(x,y)$ of Gaussian distributed random numbers such
|
|
that all $x$ values have zero mean, half of the $y$ values have mean $+d$
|
|
and the other half mean $-d$, with $d \ge0$.
|
|
\part Plot scatter plots of the pairs $(x,y)$ for $d=0$, 1, 2, 3, 4 and 5.
|
|
Also plot a histogram of the $x$ values.
|
|
\part Apply PCA on the data and plot a histogram of the data projected onto
|
|
the PCA axis with the largest eigenvalue.
|
|
What do you observe?
|
|
\begin{solution}
|
|
\lstinputlisting{pca2d.m}
|
|
\includegraphics[width=0.8\textwidth]{pca2d-2}
|
|
\end{solution}
|
|
\end{parts}
|
|
|
|
\newsolutionpage
|
|
\question \qt{Principal component analysis in 3D\vspace{-3ex}}
|
|
\begin{parts}
|
|
\part Generate $n=1000$ triplets $(x,y,z)$ of Gaussian distributed random numbers such
|
|
that all $x$ values have zero mean, half of the $y$ and $z$ values have mean $+d$
|
|
and the other half mean $-d$, with $d \ge0$.
|
|
\part Plot 3D scatter plots of the pairs $(x,y)$ for $d=0$, 1, 2, 3, 4 and 5.
|
|
Also plot a histogram of the $x$ values.
|
|
\part Apply PCA on the data and plot a histogram of the data projected onto
|
|
the PCA axis with the largest eigenvalue.
|
|
What do you observe?
|
|
\begin{solution}
|
|
\lstinputlisting{pca3d.m}
|
|
\includegraphics[width=0.8\textwidth]{pca3d-2}
|
|
\end{solution}
|
|
\end{parts}
|
|
|
|
\continuepage
|
|
\question \qt{Spike sorting}
|
|
Extracellular recordings often pick up action potentials originating
|
|
from more than a single neuron. In case the waveforms of the action
|
|
potentials differ between the neurons one could assign each action
|
|
potential to the neuron it originated from. This process is called
|
|
``spike sorting''. Here we explore this method on a simulated
|
|
recording that contains action potentials from two different
|
|
neurons.
|
|
\begin{parts}
|
|
\part Load the data from the file \texttt{extdata.mat}. This file
|
|
contains the voltage trace of the recording (\texttt{voltage}),
|
|
the corresponding time vector in seconds (\texttt{time}), and a
|
|
vector containing the times of the peaks of detected action
|
|
potentials (\texttt{spiketimes}). Further, and in contrast to real
|
|
data, the waveforms of the actionpotentials of the two neurons
|
|
(\texttt{waveform1} and \texttt{waveform2}) and the corresponding
|
|
time vector (\texttt{waveformt}) are also contained in the file.
|
|
|
|
\part Plot the voltage trace and mark the peaks of the detected
|
|
action potentials (using \texttt{spiketimes}). Zoom into the plot
|
|
and look whether you can differentiate between two different
|
|
waveforms of action potentials. How do they differ?
|
|
|
|
\part Cut out the waveform of each action potential (5\,ms before
|
|
and after the peak). Plot all these snippets in a single
|
|
plot. Can you differentiate the two actionpotential waveforms?
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting1}
|
|
\end{solution}
|
|
|
|
\newsolutionpage
|
|
\part Apply PCA on the waveform snippets. That is compute the
|
|
eigenvalues and eigenvectors of their covariance matrix, which is
|
|
a $n \times n$ matrix, with $n$ being the number of data points
|
|
contained in a single waveform snippet. Plot the sorted
|
|
eigenvalues (the ``eigenvalue spectrum''). How many eigenvalues
|
|
are clearly larger than zero?
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting2}
|
|
\end{solution}
|
|
|
|
\part Plot the two eigenvectors (``features'') with the two
|
|
largest eigenvalues as a function of time.
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting3}
|
|
\end{solution}
|
|
|
|
\part Project the waveform snippets onto these two eigenvectors
|
|
and display them with a scatter plot. What do you observe? Can you
|
|
imagine how to separate two ``clouds'' of data points
|
|
(``clusters'')?
|
|
|
|
\newsolutionpage
|
|
\part Think about a very simply way how to separate the two
|
|
clusters. Generate a vector whose elements label the action
|
|
potentials, e.g. that contains '1' for all snippets belonging to
|
|
the one cluster and '2' for the waveforms of the other
|
|
cluster. Use this vector to mark the two clusters in the previous
|
|
plot with two different colors.
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting4}
|
|
\end{solution}
|
|
|
|
\part Plot the waveform snippets of each cluster together with the
|
|
true waveform obtained from the data file. Do they match?
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting5}
|
|
\end{solution}
|
|
|
|
\newsolutionpage
|
|
\part Mark the action potentials in the recording according to
|
|
their cluster identity.
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting6}
|
|
\end{solution}
|
|
|
|
\part Compute interspike-interval histograms of all the (unsorted)
|
|
action potentials, and of each of the two neurons. What do they
|
|
tell you?
|
|
\begin{solution}
|
|
\mbox{}\\[-3ex]\hspace*{5em}
|
|
\includegraphics[width=0.8\textwidth]{spikesorting7}
|
|
\lstinputlisting{spikesorting.m}
|
|
\end{solution}
|
|
\end{parts}
|
|
|
|
\end{questions}
|
|
|
|
\end{document} |