Started statistics lecture

This commit is contained in:
2015-10-15 18:18:56 +02:00
parent 78ce93cb37
commit 06016d3e2f
149 changed files with 1015 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
load thymusglandweights.dat
x = thymusglandweights(1:50);
m = 500;
n = length(x);
mu = zeros(m,1);
for i = 1:m
mu(i) = mean(x(randi(n,n,1)));
end
fprintf("bootstrap standard error: %.4f\n", std(mu));
fprintf("standard error: %.4f\n", std(x)/sqrt(n));

View File

@@ -0,0 +1,19 @@
load thymusglandweights.dat
n = 16;
x = thymusglandweights(1:n);
m = 5000;
me = zeros(m,1);
for i = 1:m
me(i) = median(x(randi(n,n,1)));
end
t025 = tinv(0.025, n-1);
t975 = tinv(0.975, n-1);
se = std(x)/sqrt(n);
fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,0.975));
fprintf('analytical quantile: %.4f, %.4f \n', mean(x)+t025*se, mean(x)+t975*se);

View File

@@ -0,0 +1,17 @@
load thymusglandweights.dat
x = thymusglandweights(1:50);
m = 500;
n = length(x);
x = sort(x);
me = zeros(m,1);
for i = 1:m
me(i) = median(x(randi(n,n,1)));
end
a1 = binoinv(0.025,n,.5)-1;
a2 = binoinv(1-0.025,n,.5);
fprintf('bootstrap quantiles: %.4f, %.4f \n', quantile(me,0.025), quantile(me,1-0.025));
fprintf('analytical quantile: %.4f, %.4f \n', x(a1),x(a2));

View File

@@ -0,0 +1,5 @@
function param = estimate_regression(x,y, param0)
options = optimoptions(@fminunc,'GradObj','on');
myfunc = @(p)(lserr(p,x,y));
param = fminunc(myfunc,param0, options);

View File

@@ -0,0 +1,127 @@
function hh = gridxy(x,varargin)
% GRIDXY - Plot grid lines
% GRIDXY(X) plots vertical grid lines at the positions specified
% by X. GRIDXY(X,Y) also plots horizontal grid lines at the positions
% specified by Y. GRIDXY uses the current axes, if any. Lines outside
% the plot area are plotted but not shown. When X or Y is empty no vertical
% or horizontal lines are plotted.
%
% The lines are plotted as a single graphics object. H = GRIDXY(..) returns
% a graphics handle to that line object.
%
% GRIDXY(..., 'Prop1','Val1','Prop2','Val2', ...) uses the properties
% and values specified for color, linestyle, etc. Execute GET(H), where H is
% a line handle, to see a list of line object properties and their current values.
% Execute SET(H) to see a list of line object properties and legal property values.
%
% Examples
% % some random plot
% plot(10*rand(100,1), 10*rand(100,1),'bo') ;
% % horizontal red dashed grid
% gridxy([1.1 3.2 4.5],'Color','r','Linestyle',':') ;
% % vertical solid thicker yellowish grid, and store the handle
% h = gridxy([],[2.1:0.7:5 8],'Color',[0.9 1.0 0.2],'linewidth',3) ;
%
% GRIDXY can be used to plot a irregular grid on the axes.
%
% See also PLOT, REFLINE, GRID, AXES, REFLINEXY
% NOTE: This function was previously known as XYREFLINE
% for Matlab R13
% version 2.2 (feb 2008)
% (c) Jos van der Geest
% email: jos@jasen.nl
% History
% Created (1.0) feb 2006
% 2.0 apr 2007 - renamed from reflinexy to gridxy, reflinexy is now used
% for plotting intersection between X and Y axes
% 2.1 apr 2007 - add error check for line properties
% 2.2 feb 2008 - added set(gca,'layer','top') to put gridlines behind the
% axis tick marks
error(nargchk(1,Inf,nargin)) ;
% check the arguments
if ~isnumeric(x),
error('Numeric argument expected') ;
end
if nargin==1,
y = [] ;
va = [] ;
else
va = varargin ;
if ischar(va{1}),
% optional arguments are
y = [] ;
elseif isnumeric(va{1})
y = va{1} ;
va = va(2:end) ;
else
error('Invalid second argument') ;
end
if mod(size(va),2) == 1,
error('Property-Value have to be pairs') ;
end
end
% get the axes to plot in
hca=get(get(0,'currentfigure'),'currentaxes');
if isempty(hca),
warning('No current axes found') ;
return ;
end
% get the current limits of the axis
% used for limit restoration later on
xlim = get(hca,'xlim') ;
ylim = get(hca,'ylim') ;
% setup data for the vertical lines
xx1 = repmat(x(:).',3,1) ;
yy1 = repmat([ylim(:) ; nan],1,numel(x)) ;
% setup data for the horizontal lines
xx2 = repmat([xlim(:) ; nan],1,numel(y)) ;
yy2 = repmat(y(:).',3,1) ;
% create data for a single line object
xx1 = [xx1 xx2] ;
if ~isempty(xx1),
yy1 = [yy1 yy2] ;
% add the line to the current axes
np = get(hca,'nextplot') ;
set(hca,'nextplot','add') ;
h = line('xdata',xx1(:),'ydata',yy1(:)) ;
set(hca,'ylim',ylim,'xlim',xlim) ; % reset the limits
uistack(h,'bottom') ; % push lines to the bottom of the graph
set(hca,'nextplot',np,'Layer','top') ; % reset the nextplot state
if ~isempty(va),
try
set(h,va{:}) ; % set line properties
catch
msg = lasterror ;
error(msg.message(21:end)) ;
end
end
else
h = [] ;
end
if nargout==1, % if requested return handle
hh = h ;
end

View File

@@ -0,0 +1,55 @@
close all
% histogram
figure()
x = randn(2000,1);
hist(x, 50);
h = findobj(gca, 'Type','patch');
set(h(1), 'FaceColor',[.2,.2,.2], 'EdgeColor','w', 'linewidth',2)
grid('off')
h = gridxy([],get(gca,'ytick'),'color','w','linewidth',2)
box('off');
uistack(h, 'top')
xlabel('Data')
ylabel('Count')
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
% bar plot
figure
x = rand(10,1);
gray = [.5,.5,.5];
bar(1, mean(x), 'EdgeColor','w','FaceColor', gray);
hold on
bar(2, mean(x), 'EdgeColor','w','FaceColor', gray);
plot(0*x + 2, x, 'ok');
bar(3, mean(x), 'EdgeColor','w','FaceColor', gray);
errorbar(3, mean(x), std(x), 'ok');
bar(4, mean(x), 'EdgeColor','w','FaceColor', gray);
errorbar(4, mean(x), std(x)/sqrt(length(x)), 'ok');
set(gca, 'xtick',[])
ylabel('uniformly distributed random data in [0,1]')
box('off')
title('different forms of bar plots')
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
hold off
% box plot
figure
x = rand(10,1);
x(10) = 3;
boxplot(x)
set(gca, 'xtick',[])
ylabel('data')
box('off')
title('box plot')
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
hold off

View File

@@ -0,0 +1,5 @@
function ll = invg_loglikelihood(x, p)
mu = p(1);
lambda = p(2);
ll = .5*(log(lambda) - log(2*pi) - 3*log(x)) - ...
lambda*(x-mu).^2./(2*mu^2*x);

View File

@@ -0,0 +1,6 @@
function prob = invgausspdf(x, p)
mu = p(1);
lambda = p(2);
prob = sqrt(lambda/2/pi./x.^3) .* exp(-lambda *(x-mu).^2/2/mu^2./x);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
function [err, grad] = lserr(x, y, param)
err = mean( (param(1)*x + param(2) - y).^2 );
if nargout == 2
grad = lserr_gradient(param, x,y);
end

View File

@@ -0,0 +1,10 @@
function grad = lserr_gradient(param, x, y)
h = 1e-6;
grad = 0*param;
for i = 1:length(param)
paramh = param;
paramh(i) = param(i) + h;
grad(i) = (lserr(paramh,x,y) - lserr(param,x,y))/h;
end

View File

@@ -0,0 +1,30 @@
param0 = [1,1];
step = 0.01;
m = 50;
arange = linspace(0,1,m);
brange = linspace(.5,1.5, m);
[A,B] = meshgrid(arange, brange);
E = 0*A;
x = linspace(-5,5,20);
y = .5*x + 1 + randn(1, length(x));
U = 0*A;
V = 0*A;
for i = 1:m
for j = 1:m
E(i,j) = lserr([A(i,j), B(i,j)], x, y);
grad = lserr_gradient([A(i,j), B(i,j)], x, y);
U(i,j) = grad(1);
V(i,j) = grad(2);
end
end
colormap('jet');
surf(A,B,E, 'FaceAlpha',.5);

View File

@@ -0,0 +1,62 @@
close all
clear
m = 50;
arange = linspace(0,1,m);
brange = linspace(.5,1.5, m);
[A,B] = meshgrid(arange, brange);
E = 0*A;
x = linspace(-5,5,20);
y = .5*x + 1 + randn(1, length(x));
U = 0*A;
V = 0*A;
for i = 1:m
for j = 1:m
E(i,j) = lserr([A(i,j), B(i,j)], x, y);
grad = lserr_gradient([A(i,j), B(i,j)], x, y);
U(i,j) = grad(1);
V(i,j) = grad(2);
end
end
colormap('gray');
subplot(1,2,1);
hold on
surf(A,B,E, 'FaceAlpha',.5);
shading interp
pause
subplot(1,2,2);
plot(x,y,'ok');
%%
t = linspace(-5,5,100);
param0 = [1,1];
step = 0.01;
param = param0;
for i = 1:100
err = lserr(param, x, y);
derr = lserr_gradient(param, x, y);
subplot(1,2,1);
plot3(param(1), param(2), err,'or');
subplot(1,2,2);
hold off
plot(x,y,'ok');
hold on
plot(t, param(1)*t + param(2), '--k', 'LineWidth',2);
pause(0.2);
param = param - step*derr;
end
hold off

View File

@@ -0,0 +1,49 @@
close all
clear all
load thymusglandweights.dat
literature_mean = 34.3;
x = thymusglandweights;
n = length(x);
y = x - mean(x) + literature_mean;
m = 2000;
me_null = zeros(m,1);
me_h1 = zeros(m,1);
for i = 1:m
me_null(i) = mean(y(randi(n,n,1)));
me_h1(i) = mean(x(randi(n,n,1)));
end
bins = linspace(34,35,100);
null = hist(me_null, bins);
h1 = hist(me_h1, bins);
bar(bins, null, 'FaceColor',[.3,.3,.3]);
hold on
bar(bins, h1, 'FaceColor',[.7,.7,.7]);
mu = mean(x);
plot([mu,mu],[0,200],'--r','LineWidth',3);
xlabel('thymus gland weights [g]');
ylabel('frequency');
title('bootstrapped null distribution');
hold off
% 5% significance boundaries
low = quantile(me_null,0.025);
high = quantile(me_null,0.975);
disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
hold on
plot([low,low],[0,200],'--g','LineWidth',3);
plot([high,high],[0,200],'--g','LineWidth',3);
hold off
idx = abs(me_h1-literature_mean) > abs(literature_mean - low);
pow = mean(idx);
h1positive = hist(me_h1(idx), bins);
hold on
bar(bins, h1positive, 'FaceColor','g');
hold off

View File

@@ -0,0 +1,31 @@
close all
% cell type bar
figure()
bar([1,2], [50, 90], 'facecolor', 'k')
ylabel('cell count')
xlabel('cell type')
xlim([0.5,2.5])
ylim([0, 100])
box('off')
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'},'FontSize',20)
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);
% cell type pie
figure()
data = [50, 90];
h = pie(data, [1,0], {'pyramidal (n=50)', 'interneuron (n=90)'})
hText = findobj(h,'Type','text') % text object handles
set(h(1), 'FaceColor', [.2,.2,.2]);
set(h(2), 'Rotation', 45);
set(h(3), 'FaceColor', [.8,.8,.8]);
set(h(4), 'Rotation', 45);
title('cell count')
set(gca,'XTick',1:2,'XTickLabel',{'pyramidal', 'interneuron'})
box('off')
set(gcf, 'PaperUnits', 'centimeters');
set(gcf, 'PaperSize', [11.7 9.0]);
set(gcf, 'PaperPosition',[0.0 0.0 11.7 9.0]);

View File

@@ -0,0 +1,38 @@
close all;
clear;
m = 50;
arange = linspace(0,1,m);
brange = linspace(.5,1.5, m);
[A,B] = meshgrid(arange, brange);
E = 0*A;
x = linspace(-5,5,20);
y = .5*x + 1 + randn(1, length(x));
U = 0*A;
V = 0*A;
for i = 1:m
for j = 1:m
E(i,j) = lserr([A(i,j), B(i,j)], x, y);
grad = lserr_gradient([A(i,j), B(i,j)], x, y);
U(i,j) = grad(1);
V(i,j) = grad(2);
end
end
colormap('jet');
surf(A,B,E, 'FaceAlpha',.5);
%shading interp;
hold on
contour(A,B,E, 50, 'LineColor', 'k')
quiver(A,B,U,V);
xlabel('a');
ylabel('b');
zlabel('mean square error')
axis([0,1,.5,1.5])

View File

@@ -0,0 +1,38 @@
close all
clear all
load thymusglandweights.dat
literature_mean = 34.3;
x = thymusglandweights;
n = length(x);
y = x - mean(x) + literature_mean;
m = 2000;
me = zeros(m,1);
for i = 1:m
me(i) = mean(y(randi(n,n,1)));
end
hist(me, 50);
hold on
mu = mean(x);
plot([mu,mu],[0,200],'--r','LineWidth',3);
xlabel('thymus gland weights [g]');
ylabel('frequency');
title('bootstrapped null distribution');
hold off
% 5% significance boundaries
low = quantile(me,0.025);
high = quantile(me,0.975);
disp(['the 5% boundaries are: ', num2str(low), ' ', num2str(high)]);
hold on
plot([low,low],[0,200],'--g','LineWidth',3);
plot([high,high],[0,200],'--g','LineWidth',3);
hold off
pval = mean(abs(me-literature_mean) > abs(mu - literature_mean))
legend('Null distribution','measured mean','5% significance boundaries')

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
load thymusglandweights.dat
n = 10;
x = thymusglandweights;
m = 5000;
for i = 1:m
idx = randi(length(x), n,1);
y = x(idx);
[h,p] = ttest(y, 34.3);
if h == 1
disp(['p-value: ', num2str(p)]);
disp(['mu: ', num2str(mean(y))]);
disp(['mu total: ', num2str(mean(x))]);
break
end
end