[projects] example code for mutual information

2020-01-21 18:01:16 +01:00 · 2020-01-21 18:01:16 +01:00 · ca1b79c3cb
commit ca1b79c3cb
parent 1ddd3bb700
4 changed files with 122 additions and 10 deletions
--- a/projects/README
+++ b/projects/README
@ -7,12 +7,18 @@ Put your solution into the `code/` subfolder.
 Don't forget to add the project files to git (`git add FILENAMES`).
 Upload projects to Ilias
 ------------------------
 Simply upload ALL zip files into one folder or Uebungseinheit.
 Provide an additional file that links project names to students.
 Projects
 --------
 1) project_activation_curve
 medium
 Write questions
 2) project_adaptation_fit
 OK, medium
@ -34,7 +40,6 @@ OK, medium-difficult
 7) project_ficurves
 OK, medium
 Maybe add correlation test or fit statistics
 8) project_lif
 OK, difficult
@ -42,7 +47,6 @@ no statistics
 9) project_mutualinfo
 OK, medium
 Example code is missing
 10) project_noiseficurves
 OK, simple-medium
--- a/projects/project_mutualinfo/code/mi.m
+++ b/projects/project_mutualinfo/code/mi.m
@ -0,0 +1,8 @@
 function I = mi(nxy)
    pxy = nxy / sum(nxy(:));
    px = sum(nxy, 2) / sum(nxy(:));
    py = sum(nxy, 1) / sum(nxy(:));
    pi = pxy .* log2(pxy./(px*py));
    pi(nxy == 0) = 0.0;
    I = sum(pi(:));
 end
--- a/projects/project_mutualinfo/code/mutualinfo.m
+++ b/projects/project_mutualinfo/code/mutualinfo.m
@ -0,0 +1,90 @@
 %% load data:
 x = load('../data/decisions.mat');
 presented = x.presented;
 reported = x.reported;
 %% plot data:
 figure()
 plot(presented, 'ob', 'markersize', 10, 'markerfacecolor', 'b');
 hold on;
 plot(reported, 'or', 'markersize', 5, 'markerfacecolor', 'r');
 hold off
 ylim([0.5, 2.5])
 p1 = sum(presented == 1);
 p2 = sum(presented == 2);
 r1 = sum(reported == 1);
 r2 = sum(reported == 2);
 figure()
 bar([p1, p2, r1, r2]);
 set(gca, 'XTickLabel', {'p1', 'p2', 'r1', 'r2'});
 %% histogram:
 nxy = zeros(2, 2);
 for x = [1, 2]
    for y = [1, 2]
        nxy(x, y) = sum((presented == x) & (reported == y));
    end
 end
 figure()
 bar3(nxy)
 set(gca, 'XTickLabel', {'p1', 'p2'});
 set(gca, 'YTickLabel', {'r1', 'r2'});
 %% normalized histogram:
 pxy = nxy / sum(nxy(:));
 figure()
 imagesc(pxy)
 px = sum(nxy, 2) / sum(nxy(:));
 py = sum(nxy, 1) / sum(nxy(:));
 %% mutual information:
 miv = mi(nxy);
 %% permutation:
 np = 10000;
 mis = zeros(np, 1);
 for k = 1:np
    ppre = presented(randperm(length(presented)));
    prep = reported(randperm(length(reported)));
    pnxy = zeros(2, 2);
    for x = [1, 2]
        for y = [1, 2]
            pnxy(x, y) = sum((ppre == x) & (prep == y));
        end
    end
    mis(k) = mi(pnxy);
 end
 alpha = sum(mis>miv)/length(mis);
 fprintf('signifikance: %g\n', alpha);
 bins = [0.0:0.025:0.4];
 hist(mis, bins)
 hold on;
 plot([miv, miv], [0, np/10], '-r')
 hold off;
 xlabel('MI')
 ylabel('Count')
 %% maximum MI:
 n = 100000;
 pxs = [0:0.01:1.0];
 mis = zeros(length(pxs), 1);
 for k = 1:length(pxs)
    p = rand(n, 1);
    nxy = zeros(2, 2);
    nxy(1, 1) = sum(p<pxs(k));
    nxy(2, 2) = length(p) - nxy(1, 1);
    mis(k) = mi(nxy);
 %nxy(1, 2) = 0;
 %nxy(2, 1) = 0;
 %mi(nxy)
 end
 figure();
 plot(pxs, mis);
 hold on;
 plot([px(1), px(1)], [0, 1], '-r')
 hold off;
 xlabel('p(x=1)')
 ylabel('Max MI=Entropy')
--- a/projects/project_mutualinfo/mutualinfo.tex
+++ b/projects/project_mutualinfo/mutualinfo.tex
@ -31,21 +31,31 @@
    \part Use that probability distribution to compute the mutual
    information 
    \[ I[x:y] = \sum_{x\in\{1,2\}}\sum_{y\in\{1,2\}} P(x,y)
-    \log_2\frac{P(x,y)}{P(x)P(y)}\] 
+       \log_2\frac{P(x,y)}{P(x)P(y)}\]
    that the answers provide about the actually presented object.
    The mutual information is a measure from information theory that is
    used in neuroscience to quantify, for example, how much information
    a spike train carries about a sensory stimulus.
-    \part What is the maximally achievable mutual information (try to
+    \part What is the maximally achievable mutual information?
    find out by generating your own dataset which naturally should
    yield maximal information)?
-    \part Use bootstrapping (permutation test) to compute the $95\%$
+    Show this numerically by generating your own datasets which
-    confidence interval for the mutual information estimate in the
+    naturally should yield maximal information. Consider different
-    dataset from {\tt decisions.mat}.
+    distributions of $P(x)$.
    Here you may encounter a problem when computing the mutual
    information whenever $P(x,y)$ equals zero. For treating this
    special case think about (plot it) what the limit of $x \log x$ is
    for $x$ approaching zero. Use this information to fix the
    computation of the mutual information.
    \part Use a permutation test to compute the $95\%$ confidence
    interval for the mutual information estimate in the dataset from
    {\tt decisions.mat}. Does the measured mutual information indicate
    signifikant information transmission?
  \end{parts}