This repository has been archived on 2021-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
scientificComputing/linearalgebra/exercises/correlations.tex
2014-11-12 18:39:02 +01:00

96 lines
3.2 KiB
TeX

\documentclass[addpoints,10pt]{exam}
\usepackage{url}
\usepackage{color}
\usepackage{hyperref}
\usepackage{graphicx}
\usepackage{amsmath}
\pagestyle{headandfoot}
\runningheadrule
\firstpageheadrule
\firstpageheader{Scientific Computing}{Principal Component Analysis}{Oct 29, 2014}
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
\firstpagefooter{}{}{}
\runningfooter{}{}{}
\pointsinmargin
\bracketedpoints
%\printanswers
\shadedsolutions
\usepackage[mediumspace,mediumqspace,Gray]{SIunits} % \ohm, \micro
%%%%% listings %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{listings}
\lstset{
basicstyle=\ttfamily,
numbers=left,
showstringspaces=false,
language=Matlab,
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
captionpos=t,
xleftmargin=2em,
xrightmargin=1em,
aboveskip=10pt,
%title=\lstname,
title={\protect\filename@parse{\lstname}\protect\filename@base.\protect\filename@ext}
}
\begin{document}
\sffamily
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
\begin{questions}
\question \textbf{Gaussian distribution}
\begin{parts}
\part Use \texttt{randn} to generate 1000000 normally (zero mean, unit variance) distributed random numbers.
\part Plot a properly normalized histogram of these random numbers.
\part Compare the histogram with the probability density of the Gaussian distribution
\[ p(x) = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{(x-\mu)^2}{2\sigma^2}} \]
where $\mu$ is the mean and $\sigma^2$ is the variance of the Gaussian distribution.
\part Generate Gaussian distributed random numbers with mean $\mu=2$ and
standard deviation $\sigma=\frac{1}{2}$.
\end{parts}
\question \textbf{Covariance and correlation coefficient}
\begin{parts}
\part Generate two vectors $x$ and $z$ with Gausian distributed random numbers.
\part Compute $y$ as a linear combination of $x$ and $z$ according to
\[ y = r \cdot x + \sqrt{1-r^2}\cdot z \]
where $r$ is a parameter $-1 \le r \le 1$.
What does $r$ do?
\part Plot a scatter plot of $y$ versus $x$ for about 10 different values of $r$.
What do you observe?
\part Also compute the covariance matrix and the correlation
coefficient matrix between $x$ and $y$ (functions \texttt{cov} and
\texttt{corrcoef}). How do these matrices look like for different
values of $r$? How do the values of the matrices change if you generate
$x$ and $z$ with larger variances?
\part Do the same analysis (Scatter plot, covariance, and correlation coefficient)
for \[ y = x^2 + 0.5 \cdot z \]
Are $x$ and $y$ really independent?
\end{parts}
\question \textbf{Principal component analysis}
\begin{parts}
\part Generate pairs $(x,y)$ of Gaussian distributed random numbers such
that all $x$ values have zero mean, half of the $y$ values have mean $+d$
and the other half mean $-d$, with $d \ge0$.
\part Plot scatter plots of the pairs $(x,y)$ for $d=0$, 1, 2, 3, 4 and 5.
Also plot a histogram of the $x$ values.
\part Apply PCA on the data and plot a histogram of the data projected onto
the PCA axis with the largest eigenvalue.
What do you observe?
\end{parts}
\end{questions}
\end{document}