diff --git a/linearalgebra/exercises/covariance.m b/linearalgebra/exercises/covariance.m new file mode 100644 index 0000000..2f60060 --- /dev/null +++ b/linearalgebra/exercises/covariance.m @@ -0,0 +1,19 @@ +n = 1000; +x = 2.0*randn(n, 1); +z = 3.0*randn(n, 1); + +rs = [-1.0:0.25:1.0]; +for k = 1:length(rs) + r = rs(k); + y = r*x + sqrt(1.0-r^2)*z; + r + cv = cov([x y]) + corrcoef([x y]) + subplot(3, 3, k) + scatter(x, y, 'filled', 'MarkerEdgeColor', 'white') + title(sprintf('r=%g', r)) + xlim([-10, 10]) + ylim([-20, 20]) + text(-8, -15, sprintf('cov(x,y)=%.2f', cv(1, 2))) +end +savefigpdf(gcf, 'covariance.pdf', 20, 20); diff --git a/linearalgebra/exercises/covariance.pdf b/linearalgebra/exercises/covariance.pdf new file mode 100644 index 0000000..6eb41e8 Binary files /dev/null and b/linearalgebra/exercises/covariance.pdf differ diff --git a/linearalgebra/exercises/exercises01.tex b/linearalgebra/exercises/exercises01.tex index 80433df..4264a75 100644 --- a/linearalgebra/exercises/exercises01.tex +++ b/linearalgebra/exercises/exercises01.tex @@ -100,14 +100,22 @@ jan.benda@uni-tuebingen.de} \texttt{corrcoef}). How do these matrices look like for different values of $r$? How do the values of the matrices change if you generate $x$ and $z$ with larger variances? + \begin{solution} + \lstinputlisting{covariance.m} + \includegraphics[width=0.8\textwidth]{covariance} + \end{solution} \part Do the same analysis (scatter plot, covariance, and correlation coefficient) for \[ y = x^2 + 0.5 \cdot z \] Are $x$ and $y$ really independent? + \begin{solution} + \lstinputlisting{nonlinearcorrelation.m} + \includegraphics[width=0.8\textwidth]{nonlinearcorrelation} + \end{solution} \end{parts} \question \qt{Principal component analysis in 2D\vspace{-3ex}} \begin{parts} - \part Generate pairs $(x,y)$ of Gaussian distributed random numbers such + \part Generate $n=1000$ pairs $(x,y)$ of Gaussian distributed random numbers such that all $x$ values have zero mean, half of the $y$ values have mean $+d$ and the other half mean $-d$, with $d \ge0$. \part Plot scatter plots of the pairs $(x,y)$ for $d=0$, 1, 2, 3, 4 and 5. @@ -115,11 +123,16 @@ jan.benda@uni-tuebingen.de} \part Apply PCA on the data and plot a histogram of the data projected onto the PCA axis with the largest eigenvalue. What do you observe? + \begin{solution} + \lstinputlisting{pca2d.m} + \includegraphics[width=0.8\textwidth]{pca2d-2} + \end{solution} \end{parts} + \newsolutionpage \question \qt{Principal component analysis in 3D\vspace{-3ex}} \begin{parts} - \part Generate triplets $(x,y,z)$ of Gaussian distributed random numbers such + \part Generate $n=1000$ triplets $(x,y,z)$ of Gaussian distributed random numbers such that all $x$ values have zero mean, half of the $y$ and $z$ values have mean $+d$ and the other half mean $-d$, with $d \ge0$. \part Plot 3D scatter plots of the pairs $(x,y)$ for $d=0$, 1, 2, 3, 4 and 5. @@ -127,15 +140,19 @@ jan.benda@uni-tuebingen.de} \part Apply PCA on the data and plot a histogram of the data projected onto the PCA axis with the largest eigenvalue. What do you observe? + \begin{solution} + \lstinputlisting{pca3d.m} + \includegraphics[width=0.8\textwidth]{pca3d-2} + \end{solution} \end{parts} - \continue + \continuepage \question \qt{Spike sorting} Extracellular recordings often pick up action potentials originating from more than a single neuron. In case the waveforms of the action potentials differ between the neurons one could assign each action potential to the neuron it originated from. This process is called - ``spike sorting''. Here we explore this methods on a simulated + ``spike sorting''. Here we explore this method on a simulated recording that contains action potentials from two different neurons. \begin{parts} @@ -149,42 +166,77 @@ jan.benda@uni-tuebingen.de} time vector (\texttt{waveformt}) are also contained in the file. \part Plot the voltage trace and mark the peaks of the detected - action potentials. Zoom into the plot and look whether you can - differentiate between two different waveforms of action - potentials. How do they differ? + action potentials (using \texttt{spiketimes}). Zoom into the plot + and look whether you can differentiate between two different + waveforms of action potentials. How do they differ? \part Cut out the waveform of each action potential (5\,ms before and after the peak). Plot all these snippets in a single plot. Can you differentiate the two actionpotential waveforms? - - \part Apply PCA on the waveform snippets, that is compute the - eigenvalues and eigenvectors of their covariance matrix, and plot - the sorted eigenvalues (the ``eigenvalue spectrum''). How many - eigenvalues are clearly larger than zero? + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting1} + \end{solution} + + \newsolutionpage + \part Apply PCA on the waveform snippets. That is compute the + eigenvalues and eigenvectors of their covariance matrix, which is + a $n \times n$ matrix, with $n$ being the number of data points + contained in a single waveform snippet. Plot the sorted + eigenvalues (the ``eigenvalue spectrum''). How many eigenvalues + are clearly larger than zero? + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting2} + \end{solution} \part Plot the two eigenvectors (``features'') with the two - largest eigenvalues. + largest eigenvalues as a function of time. + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting3} + \end{solution} \part Project the waveform snippets onto these two eigenvectors and display them with a scatter plot. What do you observe? Can you - separate two ``clouds'' of data points (``clusters'')? + imagine how to separate two ``clouds'' of data points + (``clusters'')? + \newsolutionpage \part Think about a very simply way how to separate the two clusters. Generate a vector whose elements label the action potentials, e.g. that contains '1' for all snippets belonging to the one cluster and '2' for the waveforms of the other cluster. Use this vector to mark the two clusters in the previous plot with two different colors. + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting4} + \end{solution} \part Plot the waveform snippets of each cluster together with the true waveform obtained from the data file. Do they match? + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting5} + \end{solution} + \newsolutionpage \part Mark the action potentials in the recording according to their cluster identity. + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting6} + \end{solution} \part Compute interspike-interval histograms of all the (unsorted) action potentials, and of each of the two neurons. What do they tell you? + \begin{solution} + \mbox{}\\[-3ex]\hspace*{5em} + \includegraphics[width=0.8\textwidth]{spikesorting7} + \lstinputlisting{spikesorting.m} + \end{solution} \end{parts} \end{questions} diff --git a/linearalgebra/exercises/extdata.mat b/linearalgebra/exercises/extdata.mat new file mode 100644 index 0000000..619c0c1 Binary files /dev/null and b/linearalgebra/exercises/extdata.mat differ diff --git a/linearalgebra/exercises/nonlinearcorrelation.m b/linearalgebra/exercises/nonlinearcorrelation.m new file mode 100644 index 0000000..bcf9a59 --- /dev/null +++ b/linearalgebra/exercises/nonlinearcorrelation.m @@ -0,0 +1,9 @@ +n = 1000; +x = randn(n, 1); +z = randn(n, 1); +y = x.^2 + 0.5*z; +scatter(x, y) +cov([x y]) +r = corrcoef([x y]) +text(-2, 8, sprintf('r=%.2f', r(1, 2))) +savefigpdf(gcf, 'nonlinearcorrelation.pdf', 15, 10); diff --git a/linearalgebra/exercises/nonlinearcorrelation.pdf b/linearalgebra/exercises/nonlinearcorrelation.pdf new file mode 100644 index 0000000..b65389b Binary files /dev/null and b/linearalgebra/exercises/nonlinearcorrelation.pdf differ diff --git a/linearalgebra/exercises/pca2d-2.pdf b/linearalgebra/exercises/pca2d-2.pdf new file mode 100644 index 0000000..f413a21 Binary files /dev/null and b/linearalgebra/exercises/pca2d-2.pdf differ diff --git a/linearalgebra/exercises/pca2d.m b/linearalgebra/exercises/pca2d.m new file mode 100644 index 0000000..972590b --- /dev/null +++ b/linearalgebra/exercises/pca2d.m @@ -0,0 +1,39 @@ +n = 1000; +ds = [0:5]; +for k = 1:length(ds) + d = ds(k); + % generate data: + x = randn(n, 1); + y = randn(n, 1); + y(1:n/2) = y(1:n/2) - d; + y(1+n/2:end) = y(1+n/2:end) + d; + % scatter plot of data: + subplot(2, 2, 1) + scatter(x, y, 'filled', 'MarkerEdgeColor', 'white') + title(sprintf('d=%g', d)) + xlabel('x') + ylabel('y') + % histogram of data: + subplot(2, 2, 3) + hist(x, 20) + xlabel('x') + % pca: + cv = cov([x y]); + [ev en] = eig(cv); + [en inx] = sort(diag(en), 'descend'); + nc = [x y]*ev(:,inx); + % scatter in new coordinates: + subplot(2, 2, 2) + scatter(nc(:, 1), nc(:, 2), 'filled', 'MarkerEdgeColor', 'white') + title(sprintf('d=%g', d)) + xlabel('pca 1') + ylabel('pca 2') + % histogram of data: + subplot(2, 2, 4) + hist(nc(:, 1), 20) + xlabel('pca 1') + if d == 2 + savefigpdf(gcf, 'pca2d-2.pdf', 15, 15); + end + pause(1.0) +end diff --git a/linearalgebra/exercises/pca3d-2.pdf b/linearalgebra/exercises/pca3d-2.pdf new file mode 100644 index 0000000..57b6225 Binary files /dev/null and b/linearalgebra/exercises/pca3d-2.pdf differ diff --git a/linearalgebra/exercises/pca3d.m b/linearalgebra/exercises/pca3d.m new file mode 100644 index 0000000..4bd05dc --- /dev/null +++ b/linearalgebra/exercises/pca3d.m @@ -0,0 +1,44 @@ +n = 1000; +ds = [0:5]; +for k = 1:length(ds) + d = ds(k); + % generate data: + x = randn(n, 1); + y = randn(n, 1); + z = randn(n, 1); + y(1:n/2) = y(1:n/2) - d; + y(1+n/2:end) = y(1+n/2:end) + d; + z(1:n/2) = z(1:n/2) - d; + z(1+n/2:end) = z(1+n/2:end) + d; + % scatter plot of data: + subplot(2, 2, 1) + scatter3(x, y, z, 'filled', 'MarkerEdgeColor', 'white') + title(sprintf('d=%g', d)) + xlabel('x') + ylabel('y') + zlabel('z') + % histogram of data: + subplot(2, 2, 3) + hist(x, 20) + xlabel('x') + % pca: + cv = cov([x y z]); + [ev en] = eig(cv); + [en inx] = sort(diag(en), 'descend'); + nc = [x y z]*ev(:,inx); + % scatter in new coordinates: + subplot(2, 2, 2) + scatter3(nc(:, 1), nc(:, 2), nc(:, 3), 'filled', 'MarkerEdgeColor', 'white') + title(sprintf('d=%g', d)) + xlabel('pca 1') + ylabel('pca 2') + zlabel('pca 3') + % histogram of data: + subplot(2, 2, 4) + hist(nc(:, 1), 20) + xlabel('pca 1') + if d == 2 + savefigpdf(gcf, 'pca3d-2.pdf', 15, 15); + end + pause(1.0) +end diff --git a/linearalgebra/exercises/savefigpdf.m b/linearalgebra/exercises/savefigpdf.m new file mode 100644 index 0000000..2a11c11 --- /dev/null +++ b/linearalgebra/exercises/savefigpdf.m @@ -0,0 +1,28 @@ +function savefigpdf(fig, name, width, height) +% Saves figure fig in pdf file name.pdf with appropriately set page size +% and fonts + +% default width: +if nargin < 3 + width = 11.7; +end +% default height: +if nargin < 4 + height = 9.0; +end + +% paper: +set(fig, 'PaperUnits', 'centimeters'); +set(fig, 'PaperSize', [width height]); +set(fig, 'PaperPosition', [0.0 0.0 width height]); +set(fig, 'Color', 'white') + +% font: +set(findall(fig, 'type', 'axes'), 'FontSize', 12) +set(findall(fig, 'type', 'text'), 'FontSize', 12) + +% save: +saveas(fig, name, 'pdf') + +end + diff --git a/linearalgebra/exercises/spikesorting.m b/linearalgebra/exercises/spikesorting.m new file mode 100644 index 0000000..af958ca --- /dev/null +++ b/linearalgebra/exercises/spikesorting.m @@ -0,0 +1,161 @@ +% load data into time, voltage and spiketimes +x = load('extdata'); +time = x.time; +voltage = x.voltage; +spiketimes = x.spiketimes; +waveformt = x.waveformt; +waveform1 = x.waveform1; +waveform2 = x.waveform2; + +% indices into voltage trace of spike times: +dt = time(2) - time(1); +tinx = round(spiketimes/dt) + 1; + +% plot voltage trace with detected spikes: +figure(1); +plot(time, voltage, '-b') +hold on +scatter(time(tinx), voltage(tinx), 'r', 'filled'); +xlabel('time [s]'); +ylabel('voltage'); +xlim([0.1, 0.4]) % zoom in +hold off + +% spike waveform snippets: +snippetwindow = 0.005; % milliseconds +w = ceil(snippetwindow/dt); +vs = zeros(length(tinx), 2*w); +for k=1:length(tinx) + vs(k,:) = voltage(tinx(k)-w:tinx(k)+w-1); +end +% time axis for snippets: +ts = time(1:size(vs,2)); +ts = ts - ts(floor(length(ts)/2)); + +% plot all snippets: +figure(2); +hold on +plot(1000.0*ts, vs, '-b'); +title('spike snippets') +xlabel('time [ms]'); +ylabel('voltage'); +hold off +savefigpdf(gcf, 'spikesorting1.pdf', 12, 6); + +% pca: +cv = cov(vs); +[ev , ed] = eig(cv); +[d, dinx] = sort(diag(ed), 'descend'); + +% features: +figure(2) +subplot(1, 2, 1); +plot(1000.0*ts, ev(:,dinx(1)), 'r', 'LineWidth', 2); +xlabel('time [ms]'); +ylabel('eigenvector 1'); +subplot(1, 2, 2); +plot(1000.0*ts, ev(:,dinx(2)), 'g', 'LineWidth', 2); +xlabel('time [ms]'); +ylabel('eigenvector 2'); +savefigpdf(gcf, 'spikesorting2.pdf', 12, 6); + +% plot covariance matrix: +figure(3); +subplot(1, 2, 1); +imagesc(cv); +xlabel('time bin'); +ylabel('time bin'); +title('covariance matrix'); +caxis([-0.1 0.1]) + +% spectrum of eigenvalues: +subplot(1, 2, 2); +scatter(1:length(d), d, 'b', 'filled'); +title('spectrum'); +xlabel('index'); +ylabel('eigenvalue'); + +savefigpdf(gcf, 'spikesorting3.pdf', 12, 6); + +% project onto eigenvectors: +nx = vs * ev(:,dinx(1)); +ny = vs * ev(:,dinx(2)); + +% clustering (two clusters): +%kx = kmeans([nx, ny], 2); +% nx smaller or greater a threshold: +kthresh = 1.6; +kx = ones(size(nx)); +kx(nx