This repository has been archived on 2021-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
scientificComputing/statistics/lecture/displayunivariatedata.py

112 lines
3.9 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
#rng = np.random.RandomState(981)
#data = rng.randn(40, 1) + 4.0
rng = np.random.RandomState(1981)
data = rng.gamma(1.0, 1.5, 40) + 1.0
data = data[data<7.5]
xpos = 0.08
ypos = 0.15
width = 0.65
height = 0.8
plt.xkcd()
fig = plt.figure( figsize=(6,3.4) )
ax = fig.add_axes([xpos, ypos, width, height])
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticklabels([])
ax.set_xlim(0.0, 4.8)
ax.set_ylabel('x')
ax.set_ylim( 0.0, 8.0)
barwidth = 0.8
scatterpos = 1.0
barpos = 2.5
boxpos = 4.0
ax.set_xticks([scatterpos, barpos, boxpos])
ax.set_xticklabels(['(1) data', '(2) bar\n plot', '(3) box-\nwhisker'])
ax.scatter(scatterpos-0.5*barwidth+rng.rand(len(data)), data, s=50)
barmean = np.mean(data)
barstd = np.std(data)
ew = 0.2
ax.bar([barpos-0.5*barwidth], [barmean], barwidth, color='#FFCC00')
eargs = {'color': 'k', 'lw': 2}
ax.plot([barpos, barpos], [barmean-barstd, barmean+barstd], **eargs)
ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean-barstd, barmean-barstd], **eargs)
ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean+barstd, barmean+barstd], **eargs)
ax.annotate('mean',
xy=(barpos-0.4*barwidth, 2.7), xycoords='data',
xytext=(barpos-1*barwidth, 5.5), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
connectionstyle="angle3,angleA=0,angleB=120") )
ax.annotate('mean plus\nstd. dev.',
xy=(barpos+0.05*barwidth, 4.2), xycoords='data',
xytext=(barpos-1*barwidth, 7.0), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
connectionstyle="angle3,angleA=-60,angleB=80") )
ax = fig.add_axes([xpos, ypos, width, height])
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.xaxis.set_ticks_position('none')
ax.yaxis.set_ticks_position('none')
ax.set_xticklabels([])
ax.set_yticklabels([])
wh = ax.boxplot( data, positions=[boxpos], widths=[barwidth], whis=100.0, patch_artist=True)
wh['medians'][0].set_linewidth(4)
wh['whiskers'][0].set_linewidth(2)
wh['whiskers'][1].set_linewidth(2)
wh['whiskers'][0].set_linestyle('-')
wh['whiskers'][1].set_linestyle('-')
whiskercolor = 'k'
wh['whiskers'][0].set_color(whiskercolor)
wh['whiskers'][1].set_color(whiskercolor)
wh['caps'][0].set_color(whiskercolor)
wh['caps'][1].set_color(whiskercolor)
wh['boxes'][0].set_facecolor('#99ff00')
ax.set_xlim(0.0, 4.8)
ax.set_ylim( 0.0, 8.0)
ax.annotate('maximum',
xy=(boxpos, 6.5), xycoords='data',
xytext=(boxpos-1*barwidth, 7.6), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
connectionstyle="angle3,angleA=0,angleB=120") )
ax.annotate('3. quartile',
xy=(boxpos-0.3*barwidth, 3.7), xycoords='data',
xytext=(boxpos-1.3*barwidth, 5.5), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.4,0.0),
connectionstyle="angle3,angleA=0,angleB=120") )
ax.annotate('median',
xy=(boxpos+0.6*barwidth, 2.2), xycoords='data',
xytext=(boxpos+0.1*barwidth, 4.2), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
connectionstyle="angle3,angleA=-60,angleB=20") )
ax = fig.add_axes([xpos+width+0.03, ypos, 0.98-(xpos+width+0.03), height])
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.set_yticklabels([])
ax.set_ylim( 0.0, 8.0)
ax.set_xticks(np.arange(0.0, 0.4, 0.1))
ax.set_xlabel('(4) p(x)')
bw = 0.75
bins = np.arange(0, 8.0+bw, bw)
h, b = np.histogram(data, bins)
ax.barh(b[:-1], h/bw/np.sum(h), bw, color='#CC0000')
plt.savefig('displayunivariatedata.pdf')
#plt.show()