This repository has been archived on 2021-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
scientificComputing/statistics/lecture/displayunivariatedata.py

103 lines
3.8 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy.stats import gaussian_kde
from plotstyle import *
#rng = np.random.RandomState(981)
#data = rng.randn(40, 1) + 4.0
rng = np.random.RandomState(1981)
data = rng.gamma(1.0, 1.5, 40) + 1.0
data = data[data<7.5]
barwidth = 0.8
scatterpos = 1.0
barpos = 2.5
boxpos = 4.0
fig = plt.figure(figsize=cm_size(figure_width, 1.1*figure_height))
spec = gridspec.GridSpec(nrows=1, ncols=2, width_ratios=[3, 1], wspace=0.1,
**adjust_fs(fig, left=4.0))
ax = fig.add_subplot(spec[0, 0], label='1')
wh = ax.boxplot( data, positions=[boxpos], widths=[barwidth], whis=100.0, patch_artist=True)
mediancolor = 'k'
wh['medians'][0].set_linewidth(4)
wh['medians'][0].set_color(mediancolor)
wh['whiskers'][0].set_linewidth(2)
wh['whiskers'][1].set_linewidth(2)
wh['whiskers'][0].set_linestyle('-')
wh['whiskers'][1].set_linestyle('-')
whiskercolor = 'k'
wh['whiskers'][0].set_color(whiskercolor)
wh['whiskers'][1].set_color(whiskercolor)
wh['caps'][0].set_color(whiskercolor)
wh['caps'][1].set_color(whiskercolor)
wh['boxes'][0].set_facecolor('#99ff00')
ax.set_xlim(0.0, 4.8)
ax.set_ylim( 0.0, 8.0)
ax.set_xticklabels([])
ax = fig.add_subplot(spec[0, 0], label='2')
ax.set_xlim(0.0, 4.8)
ax.set_xticks([scatterpos, barpos, boxpos])
ax.set_xticklabels(['(1) data', '(2) bar\n plot', '(3) box-\nwhisker'], fontsize='medium')
ax.set_ylabel('x')
ax.set_ylim( 0.0, 8.0)
# annotate box whisker:
ax.annotate('maximum',
xy=(boxpos, 6.5), xycoords='data',
xytext=(boxpos-1*barwidth, 7.6), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
connectionstyle="angle3,angleA=0,angleB=120") )
ax.annotate('3. quartile',
xy=(boxpos-0.3*barwidth, 3.7), xycoords='data',
xytext=(boxpos-0.1*barwidth, 5.5), textcoords='data', ha='right',
arrowprops=dict(arrowstyle="->", relpos=(0.4,0.0),
connectionstyle="angle3,angleA=0,angleB=120") )
ax.annotate('median',
xy=(boxpos+0.6*barwidth, 2.2), xycoords='data',
xytext=(boxpos+0.1*barwidth, 4.2), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
connectionstyle="angle3,angleA=-60,angleB=20") )
# scatter data points according to their density:
kernel = gaussian_kde(data)
x = kernel(data)
x /= np.max(x)
ax.plot(scatterpos+barwidth*x*(rng.rand(len(data))-0.5), data, **psA)
barmean = np.mean(data)
barstd = np.std(data)
ew = 0.2
if mpl_major > 1:
ax.bar([barpos], [barmean], barwidth, **fsC)
else:
ax.bar([barpos-0.5*barwidth], [barmean], barwidth, **fsC)
ax.plot([barpos, barpos], [barmean-barstd, barmean+barstd], **lsMarker)
ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean-barstd, barmean-barstd], **lsMarker)
ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean+barstd, barmean+barstd], **lsMarker)
ax.annotate('mean',
xy=(barpos-0.4*barwidth, 2.7), xycoords='data',
xytext=(barpos-1*barwidth, 5.5), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
connectionstyle="angle3,angleA=0,angleB=120") )
ax.annotate('mean plus\nstd. dev.',
xy=(barpos+0.05*barwidth, 4.2), xycoords='data',
xytext=(barpos-1*barwidth, 7.0), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
connectionstyle="angle3,angleA=-60,angleB=80") )
ax = fig.add_subplot(spec[0, 1])
ax.set_yticklabels([])
ax.set_ylim( 0.0, 8.0)
ax.set_xticks(np.arange(0.0, 0.4, 0.1))
ax.set_xlabel('(4) pdf')
bw = 0.75
bins = np.arange(0, 8.0+bw, bw)
h, b = np.histogram(data, bins)
ax.barh(b[:-1], h/bw/np.sum(h), bw, **fsB)
fig.subplots_adjust(top=0.9, bottom=0.2)
plt.savefig('displayunivariatedata.pdf')