This commit is contained in:
Fabian Sinz 2014-10-16 17:53:22 +02:00
parent d9921cc9ae
commit 763b332eea
22 changed files with 929 additions and 3 deletions

View File

@ -0,0 +1,65 @@
\documentclass[addpoints,10pt]{exam}
\usepackage{url}
\usepackage{color}
\usepackage{hyperref}
\pagestyle{headandfoot}
\runningheadrule
\firstpageheadrule
\firstpageheader{Scientific Computing}{afternoon assignment day 02}{10/22/2014}
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
\firstpagefooter{}{}{}
\runningfooter{}{}{}
\pointsinmargin
\bracketedpoints
%\printanswers
\shadedsolutions
\begin{document}
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
\sffamily
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
\begin{questions}
\question When the p-value is small, we reject the null
hypothesis. For example, if you want to test whether two means are
not equal, the null hypothesis is ``means are equal''. If e.g. $p\le
0.05$ then we take it as sufficient evidence that the null
hypothesis is not true. Therefore, we assume that the means are not
equal (which is what you want to show).
In this exercise we will look at what kind of p-values we expect if
the null hypothesis is true. In our example, this would be the case
if the true means of two datasets are actually equal.
\begin{parts}
\part Think about how you expect the p-values to behave in that
situation.
\part Simulate the situation in which the means are equal by
repeating the following at least $1000$ times:
\begin{enumerate}
\item Generate two arrays {\tt x} and {\tt y} with $10$ normally
(Gaussian) distributed random numbers using {\tt randn}. By
construction, the true means behind the random number are zero.
\item Perform a two sample t-test ({\tt ttest2}) on {\tt x} and
{\tt y}. Store the p-value.
\end{enumerate}
\part Plot a histogram of the $1000$ p-values. What do you think
is the distribution the p-values (i.e. if you repeated this
experiment many more times, how would the histogram look like)?
\part Given what you find, think about whether the following
strategy is statistically valid: You collect $10$ data points from
each group and perform a test. If the test is not significant, you
collect $10$ more and repeat the test. If the test tells you that
there is a significant difference you stop. Otherwise you repeat
the procedure until the test is significant.
\end{parts}
\end{questions}
\end{document}

Binary file not shown.

1
statistics/environments.tex Symbolic link
View File

@ -0,0 +1 @@
../latex/environments.tex

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Binary file not shown.

View File

@ -47,7 +47,7 @@
Bernstein Center T\"ubingen}
\institute[Scientific Computing]{}
\date{10/20/2014}
\date{10/21/2014}
%\logo{\pgfuseimage{logo}}
\subject{Lectures}

View File

@ -0,0 +1,329 @@
\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
\usetheme{Singapore}
\setbeamercovered{opaque}
\usecolortheme{tuebingen}
\setbeamertemplate{navigation symbols}{}
\usefonttheme{default}
\useoutertheme{infolines}
% \useoutertheme{miniframes}
}
\AtBeginSubsection[]
{
\begin{frame}<beamer>
\begin{center}
\Huge \insertsectionhead
\end{center}
\tableofcontents[
currentsubsection,
hideothersubsections,
sectionstyle=show/hide,
subsectionstyle=show/shaded,
]
% \frametitle{\insertsectionhead}
\end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\setbeamertemplate{blocks}[rounded][shadow=true]
\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
University T\"ubingen\\
Bernstein Center T\"ubingen}
\institute[Scientific Computing]{}
\date{10/22/2014}
%\logo{\pgfuseimage{logo}}
\subject{Lectures}
%%%%%%%%%% configuration for code
\lstset{
basicstyle=\ttfamily,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
captionpos=b,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}
\input{../latex/environments.tex}
\makeatother
\begin{document}
\begin{frame}
\titlepage
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 3 -- study design: choosing n}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{choosing n for confidence intervals}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{general theme}
\begin{enumerate}
\item make an educated guess about the true parameters
\item state how accurate/powerful you want to be
\item select $n$ based on that
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{estimating a single mean}
\framesubtitle{standard error and $\alpha$}
\begin{itemize}
\item Assume you have an estimate $s$ of the standard deviation from
the literature.
\item The $95$\% confidence interval is given by
$$\underbrace{|\tilde\mu - \mu_0|}_{=:\delta} \ge t_{97.5\%,
\nu}\frac{s}{\sqrt{n}}$$\pause
\item How should we choose $n$ to get a confidence interval of a
particular size $\pm \delta$?\pause
\item[] We should set $n$ to be
$$n \ge \left(\frac{t_{97.5\%, \nu}\cdot s}{\delta}\right)^2 $$
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{exercise}
\begin{task}{choosing $n$}
Example from last lecture: Literature value of thymus gland
weights is $34.3$g. The estimate of the standard deviation from
the literature is $s=10$g.
The equation for $n$ is
$$n \ge \left(\frac{t_{97.5\%, \nu}\cdot s}{\delta}\right)^2 $$
\begin{itemize}
\item Assume we want to sacrifice as few animals as possible. We
say we are fine with a confidence interval of size $\pm\delta=5$, how
should we choose $n$?
\item What $n$ should we choose for $n$ if we want $\pm\delta=2$?
\end{itemize}
Extend your bootstrapping script from yesterday to check that the
equation is correct.
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{How to interrupt for/while loops}
\begin{itemize}
\item Sometimes you want to stop a for/while loop early.
\item The command for that is {\tt break}
\end{itemize}
{\bf Example}
\begin{lstlisting}
% silly way to find a random number larger than .8
for i = 1:2000
u = rand();
if u >= .8
disp('Found it!');
break
end
end
\end{lstlisting}
\end{frame}
\begin{frame}
\frametitle{winner's curse}
\begin{task}{Why it is important to estimate $n$ beforehand}
Use the thymus gland dataset to repeat the following procedure
\begin{enumerate}
\item Randomly select $n=10$ numbers from the whole dataset.
\item Perform a one-sample ttest ({\tt ttest}) to test against the
mean of $34.3$g.
\item If the p-value is smaller than $0.05$, stop the loop and
print the mean of the $10$ datapoints. Also print the mean of
the entire thymus gland dataset.
\item Why is it better to use a {\tt for} instead of a {\tt while} loop?
\item What can you observe? Why does that tell you that choosing
$n$ is important?
\end{enumerate}
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{solution}
\scriptsize
\begin{lstlisting}
load thymusglandweights.dat
n = 10;
x = thymusglandweights;
for i = 1:5000
idx = randi(length(x), n,1);
y = x(idx);
[h,p] = ttest(y, 34.3);
if h == 1
disp(['p-value: ', num2str(p)]);
disp(['mu: ', num2str(mean(y))]);
disp(['mu total: ', num2str(mean(x))]);
break
end
end
\end{lstlisting}
\end{frame}
\subsection{power}
\begin{frame}
\frametitle{test nomenclature}
\begin{center}
\only<1>{\includegraphics[width=\linewidth]{figs/testframework00.pdf}}
\only<2>{\includegraphics[width=\linewidth]{figs/testframework01.pdf}}
\end{center}
\small
\begin{columns}
\begin{column}[l]{.5\linewidth}
{\bf You want:}
\begin{itemize}
\item large power
\item small type I \& II error probability ($\alpha$ and $\beta$)
\end{itemize}
\end{column}
\begin{column}[r]{.5\linewidth}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{power}
\begin{task}{estimating power with bootstrapping}
\begin{itemize}
\item Take the script from yesterday in which we simulated the
null distribution of the means.
\item Extend it such that it plots the bootstrapped distribution
of the means as well (use the same bins for both histograms by
using {\tt hist} for computing the histogram and {\tt bar} for
plotting).
\item Use logical indexing to find all means that correspond to
true positives (using the 95\% decision boundaries computed
yesterday). Estimate the power by computing the fraction of true
positive bootstrapped means.
\item What is the probability that you get a false negative?
\item If you have time, plot the histogram of true positives in a
different color.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{summary}
\begin{itemize}
\item Proper study design is important to avoid statistical problems
like the winner's curse.
\item You should choose a test with high power.
\item There are also equations to select $n$ for type I error {\em
and} power (see book by Zar).
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 4-5 -- curve fitting and maximum likelihood}
\begin{frame}
\frametitle{Overview}
\begin{itemize}
\item minimizing/maximizing a function numerically (optimization) is
ubiquitous in science (curve fitting, maximum likelihood, ...)
\item today we will look at the basic elements of optimization and
apply it to curve fitting
\item tomorrow, we will apply it to maximum likelihood
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{plotting surfaces}
\begin{lstlisting}
range = linspace(-1,1,20);
[X,Y] = meshgrid(range, range);
surf(X,Y, (X.^2 + Y.^2));
colormap('winter');
\end{lstlisting}
\end{frame}
\begin{frame}
\frametitle{linear least squares}
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.3\linewidth}
\includegraphics[width=\linewidth]{figs/leastsquares.png}
\source{http://en.wikipedia.org/wiki/Linear\_least_squares\_\%28mathematics\%29}
\end{minipage}
\begin{minipage}{0.7\linewidth}
\begin{itemize}
\item The most common curve fitting problem is {\em linear least
squares}.
\item Its goal is to predict a set of output values $y_1, ...,
y_n$ from their corresponding input values $x_1,...,x_n$ with
a line $f_{a,b}(x) = a x+b$.
\item How is the line chosen?\pause
\item[] By minimization of the mean squared error
$$g(a,b) = \sum_{i=1}^n (y_i - f_{a,b}(x_i))^2$$
\end{itemize}
\end{minipage}
\end{minipage}
\end{frame}
\begin{frame}
\frametitle{error surface}
\begin{task}{plotting the error surface}
Write a function {\tt lserr} that takes 2-dimensional parameter
vector (slope and offset), an array of inputs {\tt x}, and an
array of corresponding outputs {\tt y}.
\end{task}
\end{frame}
\begin{frame}
\begin{center}
\Huge That's it.
\end{center}
\end{frame}
\end{document}

View File

@ -0,0 +1,310 @@
\documentclass{beamer}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{pgf}
%\usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
%\usepackage{multimedia}
\usepackage[latin1]{inputenc}
\usepackage{amsmath, amssymb}
\usepackage{bm}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{ulem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>
{
\usetheme{Singapore}
\setbeamercovered{opaque}
\usecolortheme{tuebingen}
\setbeamertemplate{navigation symbols}{}
\usefonttheme{default}
\useoutertheme{infolines}
% \useoutertheme{miniframes}
}
\AtBeginSubsection[]
{
\begin{frame}<beamer>
\begin{center}
\Huge \insertsectionhead
\end{center}
\tableofcontents[
currentsubsection,
hideothersubsections,
sectionstyle=show/hide,
subsectionstyle=show/shaded,
]
% \frametitle{\insertsectionhead}
\end{frame}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\setbeamertemplate{blocks}[rounded][shadow=true]
\title[]{Scientific Computing -- Statistics}
\author[Statistics]{Fabian Sinz\\Dept. Neuroethology,
University T\"ubingen\\
Bernstein Center T\"ubingen}
\institute[Scientific Computing]{}
\date{10/23/2014}
%\logo{\pgfuseimage{logo}}
\subject{Lectures}
%%%%%%%%%% configuration for code
\lstset{
basicstyle=\ttfamily,
numbers=left,
showstringspaces=false,
language=Matlab,
commentstyle=\itshape\color{darkgray},
keywordstyle=\color{blue},
stringstyle=\color{green},
backgroundcolor=\color{blue!10},
breaklines=true,
breakautoindent=true,
columns=flexible,
frame=single,
captionpos=b,
xleftmargin=1em,
xrightmargin=1em,
aboveskip=10pt
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\mycite}[1]{
\begin{flushright}
\tiny \color{black!80} #1
\end{flushright}
}
\input{../latex/environments.tex}
\makeatother
\begin{document}
\begin{frame}
\titlepage
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Day 4-5 -- curve fitting and maximum likelihood}
\subsection{curve fitting and optimization}
\begin{frame}
\frametitle{Overview}
\begin{itemize}
\item minimizing/maximizing a function numerically (optimization) is
ubiquitous in science (curve fitting, maximum likelihood, ...)
\item today we will look at the basic elements of optimization and
apply it to curve fitting
\item tomorrow, we will apply it to maximum likelihood
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{plotting surfaces}
\begin{lstlisting}
range = linspace(-1,1,20);
[X,Y] = meshgrid(range, range);
surf(X,Y, (X.^2 + Y.^2));
colormap('winter');
\end{lstlisting}
\end{frame}
\begin{frame}
\frametitle{linear least squares}
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.3\linewidth}
\includegraphics[width=\linewidth]{figs/leastsquares.png}
\source{http://en.wikipedia.org/wiki/Linear\_least_squares\_\%28mathematics\%29}
\end{minipage}
\begin{minipage}{0.7\linewidth}
\begin{itemize}
\item The most common curve fitting problem is {\em linear least
squares}.
\item Its goal is to predict a set of output values $y_1, ...,
y_n$ from their corresponding input values $x_1,...,x_n$ with
a line $f_{a,b}(x) = a x+b$.
\item How is the line chosen?\pause
\item[] By minimization of the mean squared error
$$g(a,b) = \sum_{i=1}^n (y_i - f_{a,b}(x_i))^2$$
\end{itemize}
\end{minipage}
\end{minipage}
\end{frame}
\begin{frame}
\frametitle{error surface}
\begin{task}{plotting the error surface}
\begin{itemize}
\item Write a function {\tt lserr} that takes 2-dimensional
parameter vector (slope $a$ and offset $b$), an array of inputs
{\tt x}, an array of corresponding outputs {\tt y}, and compute
the least squares error
$$g(a,b) = \sum_{i=1}^n (y_i - f_{a,b}(x_i))^2$$
with $$f_{a,b}(x_i) = a x_i + b.$$
\item Generate an example dataset with {\tt x=linspace(-5,5,20)}
and {\tt y = .5*x + 1 + randn(length(x),1)}.
\item Write a script that plots the error surface as a function
of $a$ and $b$.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{optima and derivatives}
\begin{itemize}
\item How did you find maxima/minima of functions at school?\pause
\item[] Compute the derivative, set it to zero, and solve for $x$.\pause
\item Can anybody remember how a derivative is defined (hint:
differential quotient)?\pause
\item[]$$f'(x) = \lim_{h\rightarrow 0} \frac{f(x+h) - f(x)}{h}$$
\item Could you write down this expression for the partial
derivative $\frac{\partial g(a,b)}{\partial a}$ of $g(a,b)$ w.r.t. $a$?\pause
\item[]$$\frac{\partial g(a,b)}{\partial a} = \lim_{h\rightarrow 0}
\frac{g(a+h,b) - g(a,b)}{h}$$\pause
\item What about $\frac{\partial g(a,b)}{\partial b}$?
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{gradient and numerical derivative}
\begin{definition}{gradient}
The {\em gradient} $$\nabla g(a,b) = \left(\frac{\partial
g(a,b)}{\partial a}, \frac{\partial g(a,b)}{\partial b}
\right)$$ is the vector with partial derivatives of $g$ w.r.t. $a$
and $b$.
\end{definition}
We can numerically approximate it, by using the definition of the
derivative
$$\frac{\partial g(a,b)}{\partial a} = \lim_{h\rightarrow 0}
\frac{g(a+h,b) - g(a,b)}{h} \approx \frac{g(a+h,b) - g(a,b)}{h},$$
for very small $h$ (e.g. {\tt h=1e-6}).
\end{frame}
\begin{frame}
\frametitle{error surface}
\begin{task}{plotting the gradient field}
\begin{itemize}
\item Write a function {\tt lserr\_gradient} that takes the same
arguments as {\tt lserr}, but numerically computes the gradient
$$\nabla g(a,b) = \left(\frac{\partial g(a,b)}{\partial a}, \frac{\partial g(a,b)}{\partial b} \right)$$
\item Add the gradient field as a vector field to your plot (use
{\tt quiver}).
\item Add a contour plot of the error surface as well (use {\tt contour}).
\item What can you observer about the directions of the gradient
with respect to the contour lines?
\end{itemize}
\end{task}
\end{frame}
\begin{frame}
\frametitle{gradient descent}
\begin{itemize}
\item The gradient $\nabla g(a,b)$ always points in the direction
of steepest ascent. \pause
\item How do we get the direction of steepest descent? \pause
\item[] We take minus the gradient $-\nabla g(a,b)$. \pause
\end{itemize}
{\bf gradient descent algorithm}
\begin{enumerate}
\item Start at some starting point $\mathbf p_0 = (a_0,b_0)$.
\item Repeat while gradient is large enough
\begin{itemize}
\item Compute the gradient at the current position $\mathbf p_t=(a_t,b_t)$.
\item Walk a small step into the gradient direction via $$\mathbf p_{t+1} =
\mathbf p_{t} - \varepsilon \nabla g(a_t,b_t)$$ where
$\varepsilon$ is a small number.
\end{itemize}
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{gradient descent}
\begin{task}{gradient descent}
\begin{itemize}
\item Implement a gradient descent for our linear regression
problem.
\item At each step in the algorithm, plot the error surface and
the current parameter point (hint use {\tt plot3} to plot a
point in 3D).
\item At each step also plot the linear regression line along with
the data points in a separate plot.
\item It is a good idea to use {\tt pause(.1)} after each plot, so
matlab has time updating the plots and you have time watching
the gradient descent at work.
\end{itemize}
\end{task}
\end{frame}
\begin{frame}[fragile]
\frametitle{optimization with matlab}
\scriptsize
A little adaptation for the objective function
\begin{lstlisting}
function [err, grad] = lserr(param, x, y)
err = mean( (param(1)*x + param(2) - y).^2 );
if nargout == 2
grad = lserr_gradient(param, x,y);
end
\end{lstlisting}
The actual optimization
\begin{lstlisting}
function param = estimate_regression(x,y, param0)
myfunc = @(p)(lserr(p,x,y));
param = fminunc(myfunc,param0, options);
\end{lstlisting}
\end{frame}
\begin{frame}
\frametitle{nonlinear regression}
\scriptsize
\begin{task}{fit a charging curve}
The following problem arises when estimating the time constant of
a membrane from data.
\vspace{.5cm}
\begin{minipage}{1.0\linewidth}
\begin{minipage}{0.5\linewidth}
\begin{center}
\includegraphics[width=\linewidth]{figs/charging.png}
\end{center}
\end{minipage}
\begin{minipage}{0.5\linewidth}
\begin{itemize}
\item Download the data {\tt membraneVoltage.mat}. It contains
the points plotted on the right hand side.
\item Write a nonlinear least squares fit to fit the function
$$ f_{A,\tau}(t) = A\cdot \left(1 -
e^{-\frac{t}{\tau}}\right)$$
to the data.
\item This looks scary, but it is not: If you programmed
everything correctly beforehand you only need to adapt the
function {\tt lserr} and use the optimization from the slide
before.
\item Plot the final result along with the data points.
\end{itemize}
\end{minipage}
\end{minipage}
\end{task}
\end{frame}
\begin{frame}
\begin{center}
\Huge That's it.
\end{center}
\end{frame}
\end{document}

View File

@ -1,6 +1,6 @@
load thymusglandweights.dat
n = 80;
n = 16;
x = thymusglandweights(1:n);
m = 5000;

View File

@ -0,0 +1,5 @@
function param = estimate_regression(x,y, param0)
options = optimoptions(@fminunc,'GradObj','on');
myfunc = @(p)(lserr(p,x,y));
param = fminunc(myfunc,param0, options);

View File

@ -0,0 +1,6 @@
function [err, grad] = lserr(param, x, y)
err = mean( (param(1)*x + param(2) - y).^2 );
if nargout == 2
grad = lserr_gradient(param, x,y);
end

View File

@ -0,0 +1,10 @@
function grad = lserr_gradient(param, x, y)
h = 1e-6;
grad = 0*param;
for i = 1:length(param)
paramh = param;
paramh(i) = param(i) + h;
grad(i) = (lserr(paramh,x,y) - lserr(param,x,y))/h;
end

View File

@ -0,0 +1,30 @@
param0 = [1,1];
step = 0.01;
m = 50;
arange = linspace(0,1,m);
brange = linspace(.5,1.5, m);
[A,B] = meshgrid(arange, brange);
E = 0*A;
x = linspace(-5,5,20);
y = .5*x + 1 + randn(1, length(x));
U = 0*A;
V = 0*A;
for i = 1:m
for j = 1:m
E(i,j) = lserr([A(i,j), B(i,j)], x, y);
grad = lserr_gradient([A(i,j), B(i,j)], x, y);
U(i,j) = grad(1);
V(i,j) = grad(2);
end
end
colormap('jet');
surf(A,B,E, 'FaceAlpha',.5);

View File

@ -0,0 +1,62 @@
close all
clear
m = 50;
arange = linspace(0,1,m);
brange = linspace(.5,1.5, m);
[A,B] = meshgrid(arange, brange);
E = 0*A;
x = linspace(-5,5,20);
y = .5*x + 1 + randn(1, length(x));
U = 0*A;
V = 0*A;
for i = 1:m
for j = 1:m
E(i,j) = lserr([A(i,j), B(i,j)], x, y);
grad = lserr_gradient([A(i,j), B(i,j)], x, y);
U(i,j) = grad(1);
V(i,j) = grad(2);
end
end
colormap('gray');
subplot(1,2,1);
hold on
surf(A,B,E, 'FaceAlpha',.5);
shading interp
pause
subplot(1,2,2);
plot(x,y,'ok');
%%
t = linspace(-5,5,100);
param0 = [0,0];
step = 0.01;
param = param0;
for i = 1:100
err = lserr(param, x, y);
derr = lserr_gradient(param, x, y);
subplot(1,2,1);
plot3(param(1), param(2), err,'or');
subplot(1,2,2);
hold off
plot(x,y,'ok');
hold on
plot(t, param(1)*t + param(2), '--k', 'LineWidth',2);
pause(0.2);
param = param - step*derr;
end
hold off

View File

@ -0,0 +1,49 @@
close all
clear all
load thymusglandweights.dat
literature_mean = 34.3;
x = thymusglandweights;
n = length(x);
y = x - mean(x) + literature_mean;
m = 2000;
me_null = zeros(m,1);
me_h1 = zeros(m,1);
for i = 1:m
me_null(i) = mean(y(randi(n,n,1)));
me_h1(i) = mean(x(randi(n,n,1)));
end
bins = linspace(34,35,100);
null = hist(me_null, bins);
h1 = hist(me_h1, bins);
bar(bins, null, 'FaceColor',[.3,.3,.3]);
hold on
bar(bins, h1, 'FaceColor',[.7,.7,.7]);
mu = mean(x);
plot([mu,mu],[0,200],'--r','LineWidth',3);
xlabel('thymus gland weights [g]');
ylabel('frequency');
title('bootstrapped null distribution');
hold off
% 5% significance boundaries
low = quantile(me_null,0.025);
high = quantile(me_null,0.975);
disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
hold on
plot([low,low],[0,200],'--g','LineWidth',3);
plot([high,high],[0,200],'--g','LineWidth',3);
hold off
idx = abs(me_h1-literature_mean) > abs(literature_mean - low);
pow = mean(idx);
h1positive = hist(me_h1(idx), bins);
hold on
bar(bins, h1positive, 'FaceColor','g');
hold off

View File

@ -0,0 +1,38 @@
close all;
clear;
m = 50;
arange = linspace(0,1,m);
brange = linspace(.5,1.5, m);
[A,B] = meshgrid(arange, brange);
E = 0*A;
x = linspace(-5,5,20);
y = .5*x + 1 + randn(1, length(x));
U = 0*A;
V = 0*A;
for i = 1:m
for j = 1:m
E(i,j) = lserr([A(i,j), B(i,j)], x, y);
grad = lserr_gradient([A(i,j), B(i,j)], x, y);
U(i,j) = grad(1);
V(i,j) = grad(2);
end
end
colormap('jet');
surf(A,B,E, 'FaceAlpha',.5);
%shading interp;
hold on
contour(A,B,E, 50, 'LineColor', 'k')
quiver(A,B,U,V);
xlabel('a');
ylabel('b');
zlabel('mean square error')
axis([0,1,.5,1.5])

View File

@ -11,7 +11,7 @@ y = x - mean(x) + literature_mean;
m = 2000;
me = zeros(m,1);
for i = 1:m
me(i) = median(y(randi(n,n,1)));
me(i) = mean(y(randi(n,n,1)));
end
hist(me, 50);

View File

@ -0,0 +1,21 @@
load thymusglandweights.dat
n = 10;
x = thymusglandweights;
m = 5000;
for i = 1:m
idx = randi(length(x), n,1);
y = x(idx);
[h,p] = ttest(y, 34.3);
if h == 1
disp(['p-value: ', num2str(p)]);
disp(['mu: ', num2str(mean(y))]);
disp(['mu total: ', num2str(mean(x))]);
break
end
end