%% (a) generate correlated data
n = 1000;
a = 0.2;
x = randn(n, 1);
y = randn(n, 1) + a*x;

%% (b) scatter plot:
subplot(1, 2, 1);
plot(x, a*x, 'r', 'linewidth', 3);
hold on
%scatter(x, y );   % either scatter ...
plot(x, y, 'o', 'markersize', 2 );  % ... or plot - same plot.
xlim([-4 4])
ylim([-4 4])
xlabel('x')
ylabel('y')
hold off

%% (d) correlation coefficient:
%c = corrcoef(x, y);  % returns correlation matrix
%rd = c(1, 2);
rd = corr(x, y);
fprintf('correlation coefficient = %.2f\n', rd );

%% (e) permutation:
nperm = 1000;
rs = zeros(nperm,1);
for i=1:nperm
    xr=x(randperm(length(x)));  % shuffle x
    yr=y(randperm(length(y)));  % shuffle y
    rs(i) = corr(xr, yr);
end

%% (g) pdf of the correlation coefficients:
[h,b] = hist(rs, 20);
h = h/sum(h)/(b(2)-b(1));  % normalization

%% (h) significance:
rq = quantile(rs, 0.95);
fprintf('correlation coefficient at 5%% significance = %.2f\n', rq);
if rd >= rq
    fprintf('--> correlation r=%.2f is significant\n', rd);
else
    fprintf('--> r=%.2f is not a significant correlation\n', rd);
end

%% plot:
subplot(1, 2, 2)
hold on;
bar(b, h, 'facecolor', 'b');
bar(b(b>=rq), h(b>=rq), 'facecolor', 'r');
plot( [rd rd], [0 4], 'r', 'linewidth', 2);
xlim([-0.25 0.25])
xlabel('Correlation coefficient');
ylabel('Probability density of H0');
hold off;

savefigpdf(gcf, 'correlationsignificance.pdf', 12, 6);