From 4e5361f6d2ec9ebfb2ebff5dc6682072da218061 Mon Sep 17 00:00:00 2001 From: Fabian Sinz Date: Thu, 30 Oct 2014 11:37:05 +0100 Subject: [PATCH] Makefiles for projects done --- projects/Makefile | 16 ++ projects/disclaimer.tex | 15 ++ projects/project_PCA_natural_images/Makefile | 10 + .../pca_natural_images.tex | 217 ++++++++++++++++++ 4 files changed, 258 insertions(+) create mode 100644 projects/Makefile create mode 100644 projects/disclaimer.tex create mode 100644 projects/project_PCA_natural_images/Makefile create mode 100755 projects/project_PCA_natural_images/pca_natural_images.tex diff --git a/projects/Makefile b/projects/Makefile new file mode 100644 index 0000000..986268d --- /dev/null +++ b/projects/Makefile @@ -0,0 +1,16 @@ +all: + for d in `ls -d project_*/`; do \ + echo "Processing $$d" ; \ + cd $$d; $(MAKE) zip ; \ + done + + mv project_*/*zip . + +clean: + for d in `ls -d project_*/`; do \ + echo "Cleaning up $$d" ; \ + cd $$d; $(MAKE) clean ; \ + done + + rm -f *.zip + rm -rf auto diff --git a/projects/disclaimer.tex b/projects/disclaimer.tex new file mode 100644 index 0000000..11e7bab --- /dev/null +++ b/projects/disclaimer.tex @@ -0,0 +1,15 @@ + \fbox{\parbox{0.985\linewidth}{ \small Please answer all questions + in an electronic file (.txt, .doc are ok, but we prefer .pdf) and + submit in ILIAS. If the assignments include programming + exercises, hand in a pdf for the questions, the .py files for + the programs, and the data in one zip file. + + Use complete and correct sentences unless otherwise + noted. Please be succinct. Use your own words. Write down a + concise reasoning, not just the result. We expect you to do + exercises on your own, but you are encouraged to discuss the + exercises with your fellow students. If you blindly copy your + results from others, you miss out on a chance to learn something + new. Use all resources available to you, but always make sure + that you truly understand why you give the answer you give. + }} \ No newline at end of file diff --git a/projects/project_PCA_natural_images/Makefile b/projects/project_PCA_natural_images/Makefile new file mode 100644 index 0000000..b7abffb --- /dev/null +++ b/projects/project_PCA_natural_images/Makefile @@ -0,0 +1,10 @@ +latex: + pdflatex *.tex + pdflatex *.tex + +clean: + rm -f *.log *.aux *.zip *.out + rm -f `basename *.tex .tex`.pdf + +zip: latex + zip `basename *.tex .tex`.zip *.pdf *.dat *.mat diff --git a/projects/project_PCA_natural_images/pca_natural_images.tex b/projects/project_PCA_natural_images/pca_natural_images.tex new file mode 100755 index 0000000..25f903c --- /dev/null +++ b/projects/project_PCA_natural_images/pca_natural_images.tex @@ -0,0 +1,217 @@ +\documentclass[addpoints,10pt]{exam} +\usepackage{url} +\usepackage{color} +\usepackage{hyperref} + +\pagestyle{headandfoot} +\runningheadrule +\firstpageheadrule + +\firstpageheader{Essential Statistics}{Homework 01 due 10/29/2014 23:59 am}{23. October 2014} +\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014} +\firstpagefooter{}{}{} +\runningfooter{}{}{} +\pointsinmargin +\bracketedpoints + +%\printanswers +\shadedsolutions + + +\begin{document} +%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%% +\sffamily +\begin{flushright} +\gradetable[h][questions] +\end{flushright} + +\begin{center} + \fbox{\parbox{0.985\linewidth}{ \small Please answer all questions + in an electronic file (.txt, .doc are ok, but we prefer .pdf) and + submit in ILIAS. + + Use complete and correct sentences unless otherwise + noted. Please be succinct. Use your own words. Write down a + concise reasoning, not just the result. We expect you to do + exercises on your own, but you are encouraged to discuss the + exercises with your fellow students. If you blindly copy your + results from others, you miss out on a chance to learn something + new. Use all resources available to you, but always make sure + that you truly understand why you give the answer you give. + }} +\end{center} + +%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{questions} + \question {\bf Reading assignment: Do not submit answers to this + question! } + + Read chapter 1. up to 2.4 (including) of Samuels/Wittmer/Schaffner. + + Pay special attention to the following questions. + \begin{enumerate} + \item What types of scientific evidence do the authors list? How + strong are these evidences? + \item What are the different types of data encountered in + statistical analysis? + \item What is a population? What is a random sample? What are + sampling errors? What are nonsampling errors? + \item What is a descriptive statistic? + \item What property do robust statistics have? + \end{enumerate} + + \question Install python and a suitable editor on your computer. + \begin{parts} + \part For installing python, I recommend the anaconda + distribution: \url{http://continuum.io/downloads}. It does not + matter whether you install python 2.7 or 3.4. I will use python + 3.4 syntax. + + \part As editor I recommend either sublime text (for people new to + programming) or pycharm (for people with programming + experience). I do not recommend to use a text editor that comes + with your operating system (like word pad). Text processing + programs like Mircosoft Word or Libre-Office {\bf won't work at + all}. Programming needs a little more than just typing text and + you will make your life unnecessarily hard by using an editor not + suited for it. + \part Find out how to run a python program on your operating + system and how to install new python packages. Install the + packages {\tt pandas} and {\tt seaborn}. + \end{parts} + + \question To publish scientific results, you will usually need + to use statistical methods. Some journals provide you with a brief + description of how they expect you to apply statistical methods. One + example can be found in the author guidelines of the journal + Nature + + \begin{center} + \url{http://www.nature.com/neuro/pdf/sm_checklist.pdf} + \end{center} + + Please read the ‘checklist’ and answer the following questions: + + \begin{parts} + \part[2] Why is it important that statistical methods are applied + correctly? + + \begin{solution} + When not applied correctly, the results of statistical methods + might not support your hypothesis and can lead to false + conclusions. + \end{solution} + + \part[2] Name two common descriptive statistics and what you have + to specify for them in nature. + + \begin{solution} + \begin{itemize} + \item A clearly defined number $n$ of data points should be + specified. If the sample is small, plot points instead of + using descriptive statistics. Errorbars should be clearly + defined. + \item measure of center: mean, median + \item measure of variability: standard deviation, range + \end{itemize} + \end{solution} + + \part[3] Name one statistical test that you have heard of or + used. If you were to apply any of them, what would you have to + specify to follow the Nature guidelines? + + \begin{solution} + {\bf Student's T-Test} for testing whether the mean of two + populations is the same + \begin{itemize} + \item a clearly defined $n$ for the test + \item a justification for the sample size used + \item a clear description of the statistical method: since the + t-test is very common, stating that a two independent sample + t-test was used should be sufficient. + \item Justify that the data meets the definition: the two + populations should be normally distributed with the same + variance; the data was sampled independently from the two + populations being compared. + \item Is the variance in the different groups different. + \item was it one-sided or two-sided + \end{itemize} + \end{solution} + + \part[3] Why are you asked to justify each incidence in which + you exclude some of the data that you collected? What could be a + valid reason to exclude a data point? + + \begin{solution} + Excluded data points might make a sample from a population not + representative anymore, and can therefore alter the outcome and + conclusions of a study. They might be excluded if there is a + good reason to believe that they are not part of the population + under investigation. + \end{solution} + + \end{parts} + + \question {\bf Robust statistics} In 1888, P. Topinard published + data on the brain weights of hundreds of French men and women. Here + are ten brain weights (in Gramm) of female brains from the dataset + \begin{center} [1125, 1027, 1112, 983, 1090, 1247, 1045, 983, 972, 1045] + \end{center} + + Open a new file ``brain\_weight.py'' with you text editor to write + the following python program (please hand in the plots and the program). + \begin{parts} + \part[2] Create a list that contains the above brain weights. + \part[2] Create a new list that contains the following ten means: + Each mean is computed from the original list after removing one + element (hint use slicing and adding lists for that; we did this + in the lecture already). {\bf Warning:} I {\em do not} expect you + to use {\tt for}-loops. Only use them if you know them already. If + you do use them, be prepared to explain your code to me to get + credits for this task. + \part[2] Create yet another list that does the same, only for the + median. + \part[2] Make a boxplot with the different means and medias (like + in the lecture). To show the plot at the end of the program + you need to put a {\tt plt.show()} at the end of the program. If + you want to save the plot, put the command {\tt + plt.gcf().savefig('YOUR\_NAME\_homework01.pdf')} before that. Label + the y-axis by using the function {\tt plt.ylabel('FILL IN YOUR LABEL')} + \part[2] What can you observe and what does that tell you about + the robustness of the statistic? + \end{parts} + \begin{solution} + \begin{verbatim} +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np + +sns.set_context("paper", font_scale=1.5, rc={"lines.linewidth": 2.5}) + +w = [1125, 1027, 1112, 983, 1090, 1247, 1045, 983, 972, 1045] + +brain_means = [ np.mean(w[1:]), np.mean(w[:1] + w[2:]), np.mean(w[:2] + w[3:]), \ + np.mean(w[:3] + w[4:]), np.mean(w[:4] + w[5:]), np.mean(w[:5] + w[6:]), \ + np.mean(w[:6] + w[7:]), np.mean(w[:7] + w[8:]), np.mean(w[:8] + w[9:]),\ + np.mean(w[:9]) ] +brain_medians = [ np.median(w[1:]), np.median(w[:1] + w[2:]), np.median(w[:2] + w[3:]), \ + np.median(w[:3] + w[4:]), np.median(w[:4] + w[5:]), np.median(w[:5] + w[6:]), \ + np.median(w[:6] + w[7:]), np.median(w[:7] + w[8:]), np.median(w[:8] + w[9:]),\ + np.median(w[:9]) ] + +sns.boxplot([brain_means, brain_medians], names=['means', 'medians']) +plt.ylabel('brain weight [g]') +plt.gcf().savefig('fabian_sinz_homework01.pdf') +plt.show() + \end{verbatim} + \end{solution} + + +\end{questions} + + + + + +\end{document}