diff --git a/statistics/exercises/correlationsignificance.m b/statistics/exercises/correlationsignificance.m new file mode 100644 index 0000000..7a0f5f0 --- /dev/null +++ b/statistics/exercises/correlationsignificance.m @@ -0,0 +1,49 @@ +%% (a) generate correlated data +n=1000; +a=0.2; +x = randn(n, 1); +y = randn(n, 1) + a*x; + +%% (b) scatter plot: +subplot(1, 2, 1); +%scatter(x, y ); % either scatter ... +plot(x, y, 'o' ); % ... or plot - same plot. + +%% (d) correlation coefficient: +rd = corr(x, y); +%rd = r(0, 1); +fprintf('correlation coefficient = %.2f\n', rd ); + +%% (f) permutation: +nperm = 1000; +rs = zeros(nperm,1); +for i=1:nperm + xr=x(randperm(length(x))); % shuffle x + yr=y(randperm(length(y))); % shuffle y + rs(i) = corr(xr, yr); + %rs(i) = r(0,1); +end + +%% (g) pdf of the correlation coefficients: +[h,b] = hist(rs, 20 ); +h = h/sum(h)/(b(2)-b(1)); % normalization + +%% (h) significance: +rq = quantile(rs, 0.95); +fprintf('correlation coefficient at 5%% significance = %.2f\n', rq ); +if rd >= rq + fprintf('--> correlation r=%.2f is significant\n', rd); +else + fprintf('--> r=%.2f is not a significant correlation\n', rd); +end + +%% plot: +subplot(1, 2, 2) +hold on; +bar(b, h, 'facecolor', 'b'); +bar(b(b>=rq), h(b>=rq), 'facecolor', 'r'); +plot( [rd rd], [0 4], 'r', 'linewidth', 2 ); +xlabel('correlation coefficient'); +ylabel('probability density'); +hold off; +