104 lines
3.5 KiB
Python
104 lines
3.5 KiB
Python
import numpy as np
|
|
import scipy.stats as st
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.gridspec as gridspec
|
|
import matplotlib.ticker as ticker
|
|
from plotstyle import *
|
|
|
|
rng = np.random.RandomState(637281)
|
|
|
|
# generate data that differ in their mein by d:
|
|
n = 200
|
|
d = 0.7
|
|
x = rng.randn(n) + d;
|
|
y = rng.randn(n);
|
|
#x = rng.exponential(1.0, n);
|
|
#y = rng.exponential(1.0, n) + d;
|
|
|
|
# histogram of data:
|
|
db = 0.5
|
|
bins = np.arange(-2.5, 2.6, db)
|
|
hx, _ = np.histogram(x, bins)
|
|
hy, _ = np.histogram(y, bins)
|
|
|
|
# Diference of means, pooled standard deviation and Cohen's d:
|
|
ad = np.mean(x)-np.mean(y)
|
|
s = np.sqrt(((len(x)-1)*np.var(x)+(len(y)-1)*np.var(y))/(len(x)+len(y)-2))
|
|
cd = ad/s
|
|
|
|
# permutation:
|
|
nperm = 1000
|
|
ads = []
|
|
xy = np.hstack((x, y))
|
|
for i in range(nperm) :
|
|
xyp = rng.permutation(xy)
|
|
ads.append(np.mean(xyp[:len(x)])-np.mean(xyp[len(x):]))
|
|
|
|
# histogram of shuffled data:
|
|
hxp, _ = np.histogram(xyp[:len(x)], bins)
|
|
hyp, _ = np.histogram(xyp[len(x):], bins)
|
|
|
|
# pdf of the differences of means:
|
|
h, b = np.histogram(ads, 20, density=True)
|
|
|
|
# significance:
|
|
dq = np.percentile(ads, 95.0)
|
|
print('Measured difference of means = %.2f, difference at 95%% percentile of permutation = %.2f' % (ad, dq))
|
|
da = 1.0-0.01*st.percentileofscore(ads, ad)
|
|
print('Measured difference of means %.2f is at %.2f%% percentile of permutation' % (ad, 100.0*da))
|
|
|
|
ap, at = st.ttest_ind(x, y)
|
|
print('Measured difference of means %.2f is at %.2f%% percentile of test' % (ad, ap))
|
|
|
|
|
|
fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
|
|
gs = gridspec.GridSpec(nrows=2, ncols=2, wspace=0.35, hspace=0.8,
|
|
**adjust_fs(fig, left=5.0, right=1.5, top=1.0, bottom=2.7))
|
|
|
|
ax = fig.add_subplot(gs[0,0])
|
|
ax.bar(bins[:-1]-0.25*db, hy, 0.5*db, **fsA)
|
|
ax.bar(bins[:-1]+0.25*db, hx, 0.5*db, **fsB)
|
|
ax.annotate('', xy=(0.0, 45.0), xytext=(d, 45.0), arrowprops=dict(arrowstyle='<->'))
|
|
ax.text(0.5*d, 50.0, 'd=%.1f' % d, ha='center')
|
|
ax.set_xlim(-2.5, 2.5)
|
|
ax.set_ylim(0.0, 50)
|
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
|
|
ax.set_xlabel('Original x and y values')
|
|
ax.set_ylabel('Counts')
|
|
|
|
ax = fig.add_subplot(gs[0,1])
|
|
ax.bar(bins[:-1]-0.25*db, hyp, 0.5*db, **fsA)
|
|
ax.bar(bins[:-1]+0.25*db, hxp, 0.5*db, **fsB)
|
|
ax.set_xlim(-2.5, 2.5)
|
|
ax.set_ylim(0.0, 50)
|
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(20.0))
|
|
ax.set_xlabel('Shuffled x and y values')
|
|
ax.set_ylabel('Counts')
|
|
|
|
ax = fig.add_subplot(gs[1,:])
|
|
ax.annotate('Measured\ndifference\nis significant!',
|
|
xy=(ad, 1.2), xycoords='data',
|
|
xytext=(ad-0.1, 2.2), textcoords='data', ha='right',
|
|
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
|
|
connectionstyle="angle3,angleA=-20,angleB=100") )
|
|
ax.annotate('95% percentile',
|
|
xy=(0.19, 0.9), xycoords='data',
|
|
xytext=(0.3, 5.0), textcoords='data', ha='left',
|
|
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
|
|
connectionstyle="angle3,angleA=40,angleB=80") )
|
|
ax.annotate('Distribution of\nnullhypothesis',
|
|
xy=(-0.08, 3.0), xycoords='data',
|
|
xytext=(-0.22, 4.5), textcoords='data', ha='left',
|
|
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
|
|
connectionstyle="angle3,angleA=60,angleB=150") )
|
|
ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
|
|
ax.bar(b[:-1][b[:-1]>=dq], h[b[:-1]>=dq], width=b[1]-b[0], **fsB)
|
|
ax.plot( [ad, ad], [0, 1], **lsA)
|
|
ax.set_xlim(-0.25, 0.85)
|
|
ax.set_ylim(0.0, 5.0)
|
|
ax.yaxis.set_major_locator(ticker.MultipleLocator(2.0))
|
|
ax.set_xlabel('Difference of means')
|
|
ax.set_ylabel('PDF of H0')
|
|
|
|
plt.savefig('permuteaverage.pdf')
|