updated plots of the data analysis chapters

This commit is contained in:
Jan Benda 2020-01-14 23:38:16 +01:00
parent 75aa46d71c
commit 9769d5e94f
18 changed files with 226 additions and 272 deletions

View File

@ -1,8 +1,7 @@
import numpy as np
import matplotlib.pyplot as plt
from plotstyle import *
plt.xkcd()
fig = plt.figure( figsize=(6,3.5) )
rng = np.random.RandomState(637281)
nsamples = 100
@ -25,11 +24,8 @@ for i in range(nresamples) :
musrs.append(np.mean(rng.randn(nsamples)))
hmusrs, _ = np.histogram(musrs, bins, density=True)
ax = fig.add_subplot(1, 1, 1)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=1.5))
ax.set_xlabel('Mean')
ax.set_xlim(-0.4, 0.4)
ax.set_ylabel('Probability density')
@ -45,9 +41,7 @@ ax.annotate('bootstrap\ndistribution',
xytext=(0.25, 4), textcoords='data',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
connectionstyle="angle3,angleA=20,angleB=60") )
ax.bar(bins[:-1]-0.25*db, hmusrs, 0.5*db, color='r')
ax.bar(bins[:-1]+0.25*db, hmus, 0.5*db, color='b')
ax.bar(bins[:-1]-0.25*db, hmusrs, 0.5*db, **fsB)
ax.bar(bins[:-1]+0.25*db, hmus, 0.5*db, **fsA)
plt.tight_layout()
plt.savefig('bootstrapsem.pdf')
#plt.show();

View File

@ -1,9 +1,8 @@
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
from plotstyle import *
plt.xkcd()
fig = plt.figure( figsize=(6,3.5) )
rng = np.random.RandomState(637281)
# generate correlated data:
@ -36,33 +35,28 @@ print('Measured correlation coefficient %.2f is at %.4f percentile of bootstrap'
rp, ra = st.pearsonr(x, y)
print('Measured correlation coefficient %.2f is at %.4f percentile of test' % (rp, ra))
ax = fig.add_subplot(1, 1, 1)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.2*figure_height))
fig.subplots_adjust(**adjust_fs(left=4.0, bottom=2.7, right=0.5, top=1.0))
ax.annotate('Measured\ncorrelation\nis significant!',
xy=(rd, 1.1), xycoords='data',
xytext=(rd, 2.2), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.2,0.0),
connectionstyle="angle3,angleA=10,angleB=80") )
connectionstyle="angle3,angleA=10,angleB=80") )
ax.annotate('95% percentile',
xy=(0.14, 0.9), xycoords='data',
xytext=(0.2, 4.0), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.1,0.0),
connectionstyle="angle3,angleA=30,angleB=70") )
connectionstyle="angle3,angleA=30,angleB=70") )
ax.annotate('Distribution of\nuncorrelated\nsamples',
xy=(-0.08, 3.6), xycoords='data',
xytext=(-0.22, 5.0), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
connectionstyle="angle3,angleA=150,angleB=100") )
ax.bar(b[:-1], h, width=b[1]-b[0], color='#ffff66')
ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], color='#ff9900')
ax.plot( [rd, rd], [0, 1], 'b', linewidth=4 )
connectionstyle="angle3,angleA=150,angleB=100") )
ax.bar(b[:-1], h, width=b[1]-b[0], **fsC)
ax.bar(b[:-1][b[:-1]>=rq], h[b[:-1]>=rq], width=b[1]-b[0], **fsB)
ax.plot( [rd, rd], [0, 1], **lsA)
ax.set_xlim(-0.25, 0.35)
ax.set_xlabel('Correlation coefficient')
ax.set_ylabel('Probability density of H0')
plt.tight_layout()
plt.savefig('permutecorrelation.pdf')
#plt.show();

View File

@ -1,24 +1,22 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.gridspec as gridspec
from plotstyle import *
plt.xkcd()
fig = plt.figure( figsize=(6,6.8) )
rng = np.random.RandomState(4637281)
lmarg=0.1
rmarg=0.1
ax = fig.add_axes([lmarg, 0.75, 1.0-rmarg, 0.25])
ax.spines['bottom'].set_position('zero')
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.get_yaxis().set_visible(False)
fig = plt.figure(figsize=cm_size(figure_width, 2.8*figure_height))
spec = gridspec.GridSpec(nrows=4, ncols=1, height_ratios=[4, 4, 1, 3], hspace=0.2,
**adjust_fs(fig, left=4.0))
ax = fig.add_subplot(spec[0, 0])
ax.set_xlim(0.0, np.pi)
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
ax.set_xticklabels([])
ax.set_ylim(0.0, 3.5)
ax.yaxis.set_major_locator(plt.NullLocator())
ax.text(-0.2, 0.5*3.5, 'Activity', rotation='vertical', va='center')
ax.annotate('Tuning curve',
xy=(0.42*np.pi, 2.5), xycoords='data',
@ -31,55 +29,49 @@ ax.annotate('',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
connectionstyle="angle3,angleA=80,angleB=90") )
ax.text(0.52*np.pi, 0.7, 'preferred\norientation')
ax.plot([0, 0], [0.0, 3.5], 'k', zorder=10, clip_on=False)
xx = np.arange(0.0, 2.0*np.pi, 0.01)
pp = 0.5*np.pi
yy = np.exp(np.cos(2.0*(xx+pp)))
ax.fill_between(xx, yy+0.25*yy, yy-0.25*yy, color=cm.autumn(0.3, 1), alpha=0.5)
ax.plot(xx, yy, color=cm.autumn(0.0, 1))
ax.fill_between(xx, yy+0.25*yy, yy-0.25*yy, **fsBa)
ax.plot(xx, yy, **lsB)
ax = fig.add_axes([lmarg, 0.34, 1.0-rmarg, 0.38])
ax.spines['bottom'].set_position('zero')
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.get_yaxis().set_visible(False)
ax = fig.add_subplot(spec[1, 0])
ax.set_xlim(0.0, np.pi)
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
ax.set_xticklabels([])
ax.set_ylim(-1.5, 3.0)
ax.text(0.5*np.pi, -1.8, 'Orientation', ha='center')
ax.set_ylim(0.0, 3.0)
ax.yaxis.set_major_locator(plt.NullLocator())
ax.text(-0.2, 0.5*3.5, 'Activity', rotation='vertical', va='center')
ax.plot([0, 0], [0.0, 3.0], 'k', zorder=10, clip_on=False)
xx = np.arange(0.0, 1.0*np.pi, 0.01)
prefphases = np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi)
responses = []
xresponse = 0.475*np.pi
for pp in prefphases :
for pp, ls, ps in zip(prefphases, [lsE, lsC, lsD, lsB, lsD, lsC, lsE],
[psE, psC, psD, psB, psD, psC, psE]) :
yy = np.exp(np.cos(2.0*(xx+pp)))
ax.plot(xx, yy, color=cm.autumn(2.0*np.abs(pp/np.pi-0.5), 1))
#ax.plot(xx, yy, color=cm.autumn(2.0*np.abs(pp/np.pi-0.5), 1))
ax.plot(xx, yy, **ls)
y = np.exp(np.cos(2.0*(xresponse+pp)))
responses.append(y + rng.randn()*0.25*y)
ax.plot(xresponse, y, '.', markersize=20, color=cm.autumn(2.0*np.abs(pp/np.pi-0.5), 1))
r=0.3
y=-0.8
ax.plot([pp-0.5*r*np.cos(pp), pp+0.5*r*np.cos(pp)], [y-r*np.sin(pp), y+r*np.sin(pp)], 'k', lw=6)
ax.plot(xresponse, y, **ps)
responses = np.array(responses)
ax = fig.add_axes([lmarg, 0.05, 1.0-rmarg, 0.22])
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.get_yaxis().set_visible(False)
ax = fig.add_subplot(spec[2, 0])
ax.show_spines('')
r = 0.3
ax.set_ylim(-1.1*r, 1.1*r)
for pp in prefphases:
ax.plot([pp-0.5*r*np.cos(pp), pp+0.5*r*np.cos(pp)], [-r*np.sin(pp), r*np.sin(pp)],
colors['black'], lw=6, clip_on=False)
ax = fig.add_subplot(spec[3, 0])
ax.set_xlim(0.0, np.pi)
ax.set_xticks(np.arange(0.125*np.pi, 1.*np.pi, 0.125*np.pi))
ax.set_xticklabels([])
ax.set_ylim(-1600, 0)
ax.yaxis.set_major_locator(plt.NullLocator())
ax.set_xlabel('Orientation')
ax.text(-0.2, -800, 'Log-Likelihood', rotation='vertical', va='center')
ax.plot([0, 0], [-1600, 0], 'k', zorder=10, clip_on=False)
phases = np.linspace(0.0, 1.1*np.pi, 100)
probs = np.zeros((len(responses), len(phases)))
for k, (pp, r) in enumerate(zip(prefphases, responses)) :
@ -95,7 +87,6 @@ ax.annotate('',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
connectionstyle="angle3,angleA=80,angleB=90") )
ax.text(maxp+0.05, -1100, 'most likely\norientation\ngiven the responses')
ax.plot(phases, loglikelihood, '-b')
ax.plot(phases, loglikelihood, **lsA)
plt.savefig('mlecoding.pdf')
#plt.show();

View File

@ -1,8 +1,11 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from plotstyle import *
plt.xkcd()
fig = plt.figure( figsize=(6,5) )
fig = plt.figure(figsize=cm_size(figure_width, 1.8*figure_height))
spec = gridspec.GridSpec(nrows=2, ncols=2, hspace=0.6,
**adjust_fs(fig, left=5.5))
# the data:
n = 40
@ -11,21 +14,17 @@ sigma = 0.5
rmu = 2.0
xd = rng.randn(n)*sigma+rmu
# and possible pdfs:
x = np.arange( 0.0, 4.0, 0.01 )
x = np.arange(0.0, 4.0, 0.01)
mus = [1.5, 2.0, 2.5]
g=np.zeros((len(x), len(mus)))
for k, mu in enumerate(mus) :
g[:,k] = np.exp(-0.5*((x-mu)/sigma)**2.0)/np.sqrt(2.0*np.pi)/sigma
# plot it:
ax = fig.add_subplot( 2, 1, 1 )
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax = fig.add_subplot(spec[0, :])
ax.set_xlim(0.5, 3.5)
ax.set_ylim(-0.02, 0.85)
ax.set_xticks( np.arange(0, 5))
ax.set_yticks( np.arange(0, 0.9, 0.2))
ax.set_xticks(np.arange(0, 5))
ax.set_yticks(np.arange(0, 0.9, 0.2))
ax.set_xlabel('x')
ax.set_ylabel('Probability density')
s = 1
@ -36,14 +35,14 @@ for mu in mus :
ax.annotate('', xy=(mu, 0.02), xycoords='data',
xytext=(mu, 0.75), textcoords='data',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
connectionstyle=cs), zorder=1 )
connectionstyle=cs), zorder=1)
if mu > rmu :
ax.text(mu-0.1, 0.04, '?', zorder=1, ha='right')
else :
ax.text(mu+0.1, 0.04, '?', zorder=1)
for k in range(len(mus)) :
ax.plot(x, g[:,k], zorder=5)
ax.scatter(xd, 0.05*rng.rand(len(xd))+0.2, s=30, zorder=10)
for k, ls in enumerate([lsCm, lsBm, lsDm]) :
ax.plot(x, g[:,k], zorder=5, **ls)
ax.plot(xd, 0.05*rng.rand(len(xd))+0.2, zorder=10, **psAm)
# likelihood:
thetas=np.arange(1.5, 2.6, 0.01)
@ -52,48 +51,38 @@ for i, theta in enumerate(thetas) :
ps[:,i]=np.exp(-0.5*((xd-theta)/sigma)**2.0)/np.sqrt(2.0*np.pi)/sigma
p=np.prod(ps,axis=0)
# plot it:
ax = fig.add_subplot( 2, 2, 3 )
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax = fig.add_subplot(spec[1, 0])
ax.set_xlabel(r'Parameter $\theta$')
ax.set_ylabel('Likelihood')
ax.set_xticks( np.arange(1.6, 2.5, 0.4))
ax.set_xticks(np.arange(1.6, 2.5, 0.4))
ax.annotate('Maximum',
xy=(2.0, 5.5e-11), xycoords='data',
xytext=(1.0, 1.1), textcoords='axes fraction', ha='right',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
connectionstyle="angle3,angleA=10,angleB=70") )
connectionstyle="angle3,angleA=10,angleB=70"))
ax.annotate('',
xy=(2.0, 0), xycoords='data',
xytext=(2.0, 5e-11), textcoords='data',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
connectionstyle="angle3,angleA=90,angleB=80") )
ax.plot(thetas,p)
connectionstyle="angle3,angleA=90,angleB=80"))
ax.plot(thetas, p, **lsAm)
ax = fig.add_subplot( 2, 2, 4 )
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax = fig.add_subplot(spec[1, 1])
ax.set_xlabel(r'Parameter $\theta$')
ax.set_ylabel('Log-Likelihood')
ax.set_ylim(-50,-20)
ax.set_xticks( np.arange(1.6, 2.5, 0.4))
ax.set_yticks( np.arange(-50, -19, 10.0))
ax.set_xticks(np.arange(1.6, 2.5, 0.4))
ax.set_yticks(np.arange(-50, -19, 10.0))
ax.annotate('Maximum',
xy=(2.0, -23), xycoords='data',
xytext=(1.0, 1.1), textcoords='axes fraction', ha='right',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.5),
connectionstyle="angle3,angleA=10,angleB=70") )
connectionstyle="angle3,angleA=10,angleB=70"))
ax.annotate('',
xy=(2.0, -50), xycoords='data',
xytext=(2.0, -26), textcoords='data',
arrowprops=dict(arrowstyle="->", relpos=(0.5,0.5),
connectionstyle="angle3,angleA=80,angleB=100") )
ax.plot(thetas,np.log(p))
connectionstyle="angle3,angleA=80,angleB=100"))
ax.plot(thetas,np.log(p), **lsAm)
plt.tight_layout();
plt.savefig('mlemean.pdf')
#plt.show();

View File

@ -2,9 +2,10 @@ import numpy as np
import scipy.stats as st
import scipy.optimize as opt
import matplotlib.pyplot as plt
from plotstyle import *
plt.xkcd()
fig = plt.figure( figsize=(6,3) )
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.subplots_adjust(**adjust_fs(fig, right=1.0))
# the data:
n = 100
@ -23,27 +24,23 @@ a = st.gamma.fit(xd, 5.0)
yf = st.gamma.pdf(xx, *a)
# plot it:
ax = fig.add_subplot( 1, 2, 1 )
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xlim(0, 10.0)
ax.set_ylim(0.0, 0.42)
ax.set_xticks( np.arange(0, 11, 2))
ax.set_yticks( np.arange(0, 0.42, 0.1))
ax.set_xlabel('x')
ax.set_ylabel('Probability density')
ax.plot(xx, yy, '-', lw=5, color='#ff0000', label='pdf')
ax.plot(xx, yf, '-', lw=2, color='#ffcc00', label='mle')
ax1.set_xlim(0, 10.0)
ax1.set_ylim(0.0, 0.42)
ax1.set_xticks(np.arange(0, 11, 2))
ax1.set_yticks(np.arange(0, 0.42, 0.1))
ax1.set_xlabel('x')
ax1.set_ylabel('Probability density')
ax1.plot(xx, yy, label='pdf', **lsB)
ax1.plot(xx, yf, label='mle', **lsCm)
kernel = st.gaussian_kde(xd)
x = kernel(xd)
x /= np.max(x)
ax.scatter(xd, 0.05*x*(rng.rand(len(xd))-0.5)+0.05, s=30, zorder=10)
ax.legend(loc='upper right', frameon=False)
sigma = 0.07
ax1.plot(xd, sigma*x*(rng.rand(len(xd))-0.5)+sigma, zorder=10, **psAm)
ax1.legend(loc='upper right')
# histogram:
h,b = np.histogram(xd, np.arange(0, 8.5, 1), density=True)
h,b = np.histogram(xd, np.arange(0, 8.4, 0.5), density=True)
# fit histogram:
def gammapdf(x, n, l, s) :
@ -52,22 +49,15 @@ popt, pcov = opt.curve_fit(gammapdf, b[:-1]+0.5*(b[1]-b[0]), h)
yc = st.gamma.pdf(xx, *popt)
# plot it:
ax = fig.add_subplot( 1, 2, 2 )
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xlim(0, 10.0)
ax.set_xticks( np.arange(0, 11, 2))
ax.set_xlabel('x')
ax.set_ylim(0.0, 0.42)
ax.set_yticks( np.arange(0, 0.42, 0.1))
ax.set_ylabel('Probability density')
ax.plot(xx, yy, '-', lw=5, color='#ff0000', label='pdf')
ax.plot(xx, yc, '-', lw=2, color='#ffcc00', label='fit')
ax.bar(b[:-1], h, np.diff(b))
ax.legend(loc='upper right', frameon=False)
ax2.set_xlim(0, 10.0)
ax2.set_xticks(np.arange(0, 11, 2))
ax2.set_xlabel('x')
ax2.set_ylim(0.0, 0.42)
ax2.set_yticks(np.arange(0, 0.42, 0.1))
ax2.set_ylabel('Probability density')
ax2.plot(xx, yy, label='pdf', **lsB)
ax2.plot(xx, yc, label='fit', **lsCm)
ax2.bar(b[:-1], h, np.diff(b), **fsA)
ax2.legend(loc='upper right')
plt.tight_layout();
plt.savefig('mlepdf.pdf')
#plt.show();

View File

@ -1,9 +1,14 @@
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from plotstyle import *
plt.xkcd()
fig = plt.figure(figsize=(6, 3))
fig = plt.figure()
spec = gridspec.GridSpec(nrows=1, ncols=2, wspace=0.3,
**adjust_fs(fig, left=5.5))
spec1 = gridspec.GridSpecFromSubplotSpec(1, 2, spec[0, 0], width_ratios=[3, 1], wspace=0.0)
spec2 = gridspec.GridSpecFromSubplotSpec(1, 2, spec[0, 1], width_ratios=[3, 1], wspace=0.0)
# the line:
slope = 2.0
@ -20,71 +25,44 @@ slopef = np.sum(x*y)/np.sum(x*x)
yf = slopef*xx
# plot it:
ax = fig.add_axes([0.09, 0.02, 0.33, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().set_tick_params(direction='inout', length=10, width=2)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax = fig.add_subplot(spec1[0, 0])
ax.set_xticks(np.arange(0.0, 4.1))
ax.set_xlim(0.0, 4.2)
ax.set_ylim(-4.0, 12.0)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.scatter(x, y, label='data', s=40, zorder=10)
ax.plot(xx, yy, 'r', lw=5.0, color='#ff0000', label='original', zorder=5)
ax.plot(xx, yf, '--', lw=1.0, color='#ffcc00', label='fit', zorder=7)
ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.15), frameon=False)
ax.plot(x, y, label='data', zorder=10, **psAm)
ax.plot(xx, yy, label='original', zorder=5, **lsB)
ax.plot(xx, yf, label='fit', zorder=7, **lsCm)
ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.15))
ax = fig.add_axes([0.42, 0.02, 0.07, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax = fig.add_subplot(spec1[0, 1])
ax.show_spines('l')
ax.set_xticks([])
ax.set_ylim(-4.0, 12.0)
ax.set_yticks([])
bins = np.arange(-4.0, 12.1, 0.75)
ax.hist(y, bins, orientation='horizontal', zorder=10)
ax.hist(y, bins, orientation='horizontal', zorder=10, **fsA)
ax = fig.add_axes([0.6, 0.02, 0.33, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().set_tick_params(direction='inout', length=10, width=2)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax = fig.add_subplot(spec2[0, 0])
ax.set_xticks(np.arange(0.0, 4.1))
ax.set_xlim(0.0, 4.2)
ax.set_ylim(-4.0, 12.0)
ax.set_xlabel('x')
ax.set_ylabel('y - mx')
ax.scatter(x, y - slopef*x, label='residuals', s=40, zorder=10)
#ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.0), frameon=False)
ax.plot(x, y - slopef*x, label='residuals', zorder=10, **psAm)
#ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1.0))
ax = fig.add_axes([0.93, 0.02, 0.07, 0.9])
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.get_yaxis().set_tick_params(direction='inout', length=10, width=2)
ax.yaxis.set_ticks_position('left')
ax = fig.add_subplot(spec2[0, 1])
ax.show_spines('l')
ax.set_xlim(0.0, 11.0)
ax.set_xticks([])
ax.set_ylim(-4.0, 12.0)
ax.set_yticks([])
r = y - slopef*x
ax.hist(r, bins, orientation='horizontal', zorder=10)
ax.hist(r, bins, orientation='horizontal', zorder=10, **fsA)
gx = np.arange(-4.0, 12.1, 0.1)
gy = st.norm.pdf(gx, np.mean(r), np.std(r))
ax.plot(1.0+gy*29.0, gx, 'r', lw=2, zorder=5)
ax.plot(1.0+gy*29.0, gx, zorder=5, **lsBm)
plt.savefig('mlepropline.pdf')
#plt.show();

View File

@ -13,19 +13,16 @@ ppi = 72.0
# colors:
colors = {}
colors['red'] = '#CC0000'
colors['red'] = '#DD1000'
colors['orange'] = '#FF9900'
colors['lightorange'] = '#FFCC00'
colors['yellow'] = '#FFFF66'
colors['yellow'] = '#FFF720'
colors['green'] = '#99FF00'
colors['blue'] = '#0010CC'
colors['gray'] = '#A7A7A7'
colors['black'] = '#000000'
colors['white'] = '#FFFFFF'
#colors_bendalab_vivid['red'] = '#D71000'
#colors_bendalab_vivid['orange'] = '#FF9000'
#colors_bendalab_vivid['yellow'] = '#FFF700'
#colors_bendalab_vivid['green'] = '#30D700'
#colors_bendalab_vivid['blue'] = '#0020C0'
@ -36,11 +33,14 @@ mainline = {'linestyle': '-', 'linewidth': lwthick}
minorline = {'linestyle': '-', 'linewidth': lwthin}
largemarker = {'marker': 'o', 'markersize': 9, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
smallmarker = {'marker': 'o', 'markersize': 6, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
filllw = 1.0
fillalpha = 0.5
largelinepoints = {'linestyle': '-', 'linewidth': lwthick, 'marker': 'o', 'markersize': 10, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
smalllinepoints = {'linestyle': '-', 'linewidth': lwthin, 'marker': 'o', 'markersize': 7, 'markeredgecolor': colors['white'], 'markeredgewidth': 1}
filllw = 1
fillec = colors['white']
fillalpha = 0.4
# helper lines:
lsSpine = {'c': colors['black'], 'linestyle': '-', 'linewidth': 1}
lsSpine = {'c': colors['black'], 'linestyle': '-', 'linewidth': 1, 'clip_on': False}
lsGrid = {'c': colors['gray'], 'linestyle': '--', 'linewidth': 1}
lsMarker = {'c': colors['black'], 'linestyle': '-', 'linewidth': 2}
@ -52,9 +52,13 @@ lsMarker = {'c': colors['black'], 'linestyle': '-', 'linewidth': 2}
# - plain style with a thick/solid line (e.g. lsA), and
# - minor style with a thinner or dashed line (e.g. lsAm).
# Point styles come in two variants:
# - plain style with large solid markers (e.g. psA), and
# - minor style with smaller markers (e.g. lsBm).
# Point (marker) styles come in two variants:
# - plain style with large solid markers (e.g. psB), and
# - minor style with smaller markers (e.g. psBm).
# Linepoint styles (markers connected by lines) come in two variants:
# - plain style with large solid markers (e.g. lpsA), and
# - minor style with smaller markers (e.g. lpsAm).
# Fill styles come in three variants:
# - plain (e.g. fsB) for a solid fill color and a darker edge color,
@ -65,13 +69,19 @@ lsA = dict({'color': colors['blue']}, **mainline)
lsAm = dict({'color': colors['blue']}, **minorline)
psA = dict({'color': colors['blue'], 'linestyle': 'none'}, **largemarker)
psAm = dict({'color': colors['blue'], 'linestyle': 'none'}, **smallmarker)
lpsA = dict({'color': colors['blue']}, **largelinepoints)
lpsAm = dict({'color': colors['blue']}, **smalllinepoints)
fsA = {'facecolor': colors['blue'], 'edgecolor': fillec, 'linewidth': filllw}
fsAs = {'facecolor': colors['blue'], 'edgecolor': 'none'}
fsAa = {'facecolor': colors['blue'], 'edgecolor': 'none', 'alpha': fillalpha}
lsB = dict({'color': colors['red']}, **mainline)
lsBm = dict({'color': colors['red']}, **minorline)
psB = dict({'color': colors['red'], 'linestyle': 'none'}, **largemarker)
psBm = dict({'color': colors['red'], 'linestyle': 'none'}, **smallmarker)
fsB = {'facecolor': colors['red'], 'edgecolor': colors['black'], 'linewidth': filllw}
lpsB = dict({'color': colors['red']}, **largelinepoints)
lpsBm = dict({'color': colors['red']}, **smalllinepoints)
fsB = {'facecolor': colors['red'], 'edgecolor': fillec, 'linewidth': filllw}
fsBs = {'facecolor': colors['red'], 'edgecolor': 'none'}
fsBa = {'facecolor': colors['red'], 'edgecolor': 'none', 'alpha': fillalpha}
@ -79,17 +89,25 @@ lsC = dict({'color': colors['lightorange']}, **mainline)
lsCm = dict({'color': colors['lightorange']}, **minorline)
psC = dict({'color': colors['lightorange'], 'linestyle': 'none'}, **largemarker)
psCm = dict({'color': colors['lightorange'], 'linestyle': 'none'}, **smallmarker)
fsC = {'facecolor': colors['lightorange'], 'edgecolor': colors['black'], 'linewidth': filllw}
fsC = {'facecolor': colors['lightorange'], 'edgecolor': fillec, 'linewidth': filllw}
fsCs = {'facecolor': colors['lightorange'], 'edgecolor': 'none'}
fsCa = {'facecolor': colors['lightorange'], 'edgecolor': 'none', 'alpha': fillalpha}
fsD = {'facecolor': colors['orange'], 'edgecolor': colors['black'], 'linewidth': filllw}
lsD = dict({'color': colors['orange']}, **mainline)
lsDm = dict({'color': colors['orange']}, **minorline)
psD = dict({'color': colors['orange'], 'linestyle': 'none'}, **largemarker)
psDm = dict({'color': colors['orange'], 'linestyle': 'none'}, **smallmarker)
fsD = {'facecolor': colors['orange'], 'edgecolor': fillec, 'linewidth': filllw}
fsDs = {'facecolor': colors['orange'], 'edgecolor': 'none'}
fsE = {'facecolor': colors['yellow'], 'edgecolor': colors['black'], 'linewidth': filllw}
lsE = dict({'color': colors['yellow']}, **mainline)
lsEm = dict({'color': colors['yellow']}, **minorline)
psE = dict({'color': colors['yellow'], 'linestyle': 'none'}, **largemarker)
psEm = dict({'color': colors['yellow'], 'linestyle': 'none'}, **smallmarker)
fsE = {'facecolor': colors['yellow'], 'edgecolor': fillec, 'linewidth': filllw}
fsEs = {'facecolor': colors['yellow'], 'edgecolor': 'none'}
fsF = {'facecolor': colors['green'], 'edgecolor': colors['black'], 'linewidth': filllw}
fsF = {'facecolor': colors['green'], 'edgecolor': fillec, 'linewidth': filllw}
fsFs = {'facecolor': colors['green'], 'edgecolor': 'none'}
# factor for scaling widths of bars in a bar plot:
@ -323,6 +341,7 @@ def common_format():
mpl.rcParams['grid.color'] = lsGrid['c']
mpl.rcParams['grid.linestyle'] = lsGrid['linestyle']
mpl.rcParams['grid.linewidth'] = lsGrid['linewidth']
mpl.rcParams['legend.frameon'] = False
mpl.rcParams['axes.facecolor'] = 'none'
mpl.rcParams['axes.edgecolor'] = lsSpine['c']
mpl.rcParams['axes.linewidth'] = lsSpine['linewidth']

View File

@ -16,11 +16,11 @@ def create_data():
def plot_data(ax, x, y, c):
ax.scatter(x, y, marker='o', color=colors['blue'], s=40, zorder=10)
ax.plot(x, y, zorder=10, **psAm)
xx = np.linspace(2.1, 3.9, 100)
ax.plot(xx, c*xx**3.0, color=colors['red'], lw=2, zorder=5)
ax.plot(xx, c*xx**3.0, zorder=5, **lsBm)
for cc in [0.25*c, 0.5*c, 2.0*c, 4.0*c]:
ax.plot(xx, cc*xx**3.0, color=colors['orange'], lw=1.5, zorder=5)
ax.plot(xx, cc*xx**3.0, zorder=5, **lsDm)
ax.set_xlabel('Size x', 'm')
ax.set_ylabel('Weight y', 'kg')
ax.set_xlim(2, 4)
@ -42,15 +42,15 @@ def plot_data_errors(ax, x, y, c):
xytext=(3.4, 70), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.9,1.0),
connectionstyle="angle3,angleA=50,angleB=-30") )
ax.scatter(x[:40], y[:40], color=colors['blue'], s=10, zorder=0)
ax.plot(x[:40], y[:40], zorder=0, **psAm)
inxs = [3, 10, 11, 17, 18, 21, 28, 30, 33]
ax.scatter(x[inxs], y[inxs], color=colors['blue'], s=40, zorder=10)
ax.plot(x[inxs], y[inxs], zorder=10, **psA)
xx = np.linspace(2.1, 3.9, 100)
ax.plot(xx, c*xx**3.0, color=colors['red'], lw=2)
ax.plot(xx, c*xx**3.0, **lsBm)
for i in inxs :
xx = [x[i], x[i]]
yy = [c*x[i]**3.0, y[i]]
ax.plot(xx, yy, color=colors['orange'], lw=2, zorder=5)
ax.plot(xx, yy, zorder=5, **lsDm)
def plot_error_hist(ax, x, y, c):
ax.set_xlabel('Squared error')
@ -67,7 +67,7 @@ def plot_error_hist(ax, x, y, c):
xytext=(800, 3), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.2),
connectionstyle="angle3,angleA=10,angleB=90") )
ax.hist(errors, bins, color=colors['orange'])
ax.hist(errors, bins, **fsC)

View File

@ -16,11 +16,11 @@ if __name__ == "__main__":
fig, ax = plt.subplots(figsize=cm_size(figure_width, 1.4*figure_height))
fig.subplots_adjust(**adjust_fs(left=6.0, right=1.2))
ax.scatter(x, y, marker='o', color=colors['blue'], s=40, zorder=10)
ax.plot(x, y, zorder=10, **psA)
xx = np.linspace(2.1, 3.9, 100)
ax.plot(xx, c*xx**3.0, color=colors['red'], lw=3, zorder=5)
ax.plot(xx, c*xx**3.0, zorder=5, **lsB)
for cc in [0.25*c, 0.5*c, 2.0*c, 4.0*c]:
ax.plot(xx, cc*xx**3.0, color=colors['orange'], lw=2, zorder=5)
ax.plot(xx, cc*xx**3.0, zorder=5, **lsDm)
ax.set_xlabel('Size x', 'm')
ax.set_ylabel('Weight y', 'kg')
ax.set_xlim(2, 4)

View File

@ -39,9 +39,9 @@ def plot_mse(ax, x, y, c, cs):
for i, cc in enumerate(ccs):
mses[i] = np.mean((y-(cc*x**3.0))**2.0)
ax.plot(ccs, mses, colors['blue'], lw=2, zorder=10)
ax.scatter(cs, ms, color=colors['red'], s=40, zorder=20)
ax.scatter(cs[-1], ms[-1], color=colors['orange'], s=60, zorder=30)
ax.plot(ccs, mses, zorder=10, **lsAm)
ax.plot(cs[:12], ms[:12], zorder=20, **psB)
ax.plot(cs[-1], ms[-1], zorder=30, **psC)
for i in range(4):
ax.annotate('',
xy=(cs[i+1]+0.2, ms[i+1]), xycoords='data',
@ -56,12 +56,12 @@ def plot_mse(ax, x, y, c, cs):
ax.set_yticks(np.arange(0, 30001, 10000))
def plot_descent(ax, cs, mses):
ax.plot(np.arange(len(mses))+1, mses, '-o', c=colors['red'], mew=0, ms=8)
ax.plot(np.arange(len(mses))+1, mses, **lpsBm)
ax.set_xlabel('Iteration')
#ax.set_ylabel('Mean squared error')
ax.set_xlim(0, 10.5)
ax.set_xlim(0, 12.5)
ax.set_ylim(0, 25000)
ax.set_xticks(np.arange(0.0, 10.1, 2.0))
ax.set_xticks(np.arange(0.0, 12.1, 2.0))
ax.set_yticks(np.arange(0, 30001, 10000))
ax.set_yticklabels([])

View File

@ -4,8 +4,7 @@ from plotstyle import *
plain_style()
fig = plt.figure( figsize=(2.5,3.4) )
ax = fig.add_subplot(1, 1, 1)
fig, ax = plt.subplots(figsize=(2.5,3.4))
# parabula:
x1 = -0.2
@ -14,7 +13,7 @@ x = np.linspace(x1, x2, 200)
y = x*x
ax.set_xlim(x1, x2)
ax.set_ylim(-0.2, 0.7)
ax.plot(x, y, c=colors['blue'], lw=4, zorder=0)
ax.plot(x, y, zorder=0, **lsA)
# secant:
x = np.asarray([0.1, 0.7])
y = x*x
@ -22,33 +21,33 @@ ax.set_xticks(x)
ax.set_yticks(y)
ax.set_xticklabels(['$x$','$x+\Delta x$'])
ax.set_yticklabels(['',''])
ax.scatter(x, y, c=colors['red'], edgecolor='none', s=150, zorder=10)
ax.plot(x, y, zorder=10, **psB)
# function values:
ax.plot([x[0], x[0], x1],[-0.2, y[0], y[0]], '--k', lw=1, zorder=6)
ax.plot([x[1], x[1], x1],[-0.2, y[1], y[1]], '--k', lw=1, zorder=6)
ax.plot([x[0], x[0], x1],[-0.2, y[0], y[0]], zorder=6, **lsGrid)
ax.plot([x[1], x[1], x1],[-0.2, y[1], y[1]], zorder=6, **lsGrid)
ax.text(x1+0.05, y[0]+0.05, '$f(x)$', zorder=6)
ax.text(x1+0.05, y[1]+0.05, '$f(x+\Delta x)$', zorder=6)
# slope triangle:
ax.plot([x[0], x[1], x[1]],[y[0], y[0], y[1]], '-k', lw=2, zorder=7)
ax.text(np.mean(x), y[0]-0.08, '$\Delta x$', ha='center', zorder=7)
ax.plot([x[0], x[1], x[1]],[y[0], y[0], y[1]], zorder=7, **lsMarker)
ax.text(np.mean(x), y[0]-0.07, '$\Delta x$', ha='center', zorder=7)
ax.text(x[1]+0.05, np.mean(y), '$f(x+\Delta x)-f(x)$', va='center', rotation='vertical', zorder=7)
# secant line:
m = np.diff(y)/np.diff(x)
xl = [x1, x2]
yl = m*(xl-x[0])+y[0]
ax.plot(xl, yl, c=colors['red'], lw=3, zorder=7)
ax.plot(xl, yl, zorder=7, **lsBm)
# derivative:
md = 2.0*x[0]
yl = md*(xl-x[0])+y[0]
ax.plot(xl, yl, c=colors['yellow'], lw=3, zorder=5)
ax.plot(xl, yl, zorder=5, **lsDm)
# limit:
for ml in np.linspace(md, m, 5)[1:] :
yl = ml*(xl-x[0])+y[0]
xs = 0.5*(ml+np.sqrt(ml*ml-4.0*(ml*x[0]-y[0])))
ax.scatter([xs], [xs*xs], c=colors['orange'], edgecolor='none', s=80, zorder=3)
ax.plot(xl, yl, c=colors['orange'], lw=2, zorder=3)
ax.plot([xs], [xs*xs], zorder=3, **psC)
ax.plot(xl, yl, zorder=3, **lsCm)
fig.subplots_adjust(**adjust_fs(fig, 0.5, 0.5, 1.4, 0.5))
plt.savefig('derivative.pdf')

View File

@ -14,7 +14,7 @@ def create_data():
def plot_data(ax, x, y):
ax.scatter(x, y, marker='o', color=colors['blue'], s=40)
ax.plot(x, y, **psA)
ax.set_xlabel('Input x')
ax.set_ylabel('Output y')
ax.set_xlim(0, 120)
@ -24,10 +24,10 @@ def plot_data(ax, x, y):
def plot_data_slopes(ax, x, y, m, n):
ax.scatter(x, y, marker='o', color=colors['blue'], s=40)
ax.plot(x, y, **psA)
xx = np.asarray([2, 118])
for i in np.linspace(0.3*m, 2.0*m, 5):
ax.plot(xx, i*xx+n, color=colors['red'], lw=2)
ax.plot(xx, i*xx+n, **lsBm)
ax.set_xlabel('Input x')
#ax.set_ylabel('Output y')
ax.set_xlim(0, 120)
@ -37,10 +37,10 @@ def plot_data_slopes(ax, x, y, m, n):
def plot_data_intercepts(ax, x, y, m, n):
ax.scatter(x, y, marker='o', color=colors['blue'], s=40)
ax.plot(x, y, **psA)
xx = np.asarray([2, 118])
for i in np.linspace(n-1*n, n+1*n, 5):
ax.plot(xx, m*xx + i, color=colors['red'], lw=2)
ax.plot(xx, m*xx + i, **lsBm)
ax.set_xlabel('Input x')
#ax.set_ylabel('Output y')
ax.set_xlim(0, 120)

View File

@ -25,15 +25,15 @@ def plot_data(ax, x, y, m, n):
xytext=(80, -50), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.9,1.0),
connectionstyle="angle3,angleA=50,angleB=-30") )
ax.scatter(x[:40], y[:40], color=colors['blue'], s=10, zorder=0)
ax.plot(x[:40], y[:40], zorder=0, **psAm)
inxs = [3, 13, 16, 19, 25, 34, 36]
ax.scatter(x[inxs], y[inxs], color=colors['blue'], s=40, zorder=10)
ax.plot(x[inxs], y[inxs], zorder=10, **psA)
xx = np.asarray([2, 118])
ax.plot(xx, m*xx+n, color=colors['red'], lw=2)
ax.plot(xx, m*xx+n, **lsBm)
for i in inxs :
xx = [x[i], x[i]]
yy = [m*x[i]+n, y[i]]
ax.plot(xx, yy, color=colors['orange'], lw=2, zorder=5)
ax.plot(xx, yy, zorder=5, **lsDm)
def plot_error_hist(ax, x, y, m, n):
@ -51,7 +51,7 @@ def plot_error_hist(ax, x, y, m, n):
xytext=(350, 20), textcoords='data', ha='left',
arrowprops=dict(arrowstyle="->", relpos=(0.0,0.2),
connectionstyle="angle3,angleA=10,angleB=90") )
ax.hist(errors, bins, color=colors['orange'])
ax.hist(errors, bins, **fsD)

View File

@ -24,17 +24,17 @@ ax.set_ylim(-0.05, 1.05)
ax.set_yticks(np.arange(0.0, 1.1, 0.2))
med = xs[cdf>=0.5][0]
ax.plot([-3.2, med, med], [0.5, 0.5, 0.0], zorder=-5, **lsMarker)
ax.plot([-3.2, med, med], [0.5, 0.5, 0.0], zorder=-5, **lsSpine)
ax.text(-2.8, 0.55, 'F=0.5')
ax.text(0.15, 0.25, 'median at %.2f' % med)
q3 = xs[cdf>=0.75][0]
ax.plot([-3.2, q3, q3], [0.75, 0.75, 0.0], zorder=-5, **lsMarker)
ax.plot([-3.2, q3, q3], [0.75, 0.75, 0.0], zorder=-5, **lsSpine)
ax.text(-2.8, 0.8, 'F=0.75')
ax.text(0.8, 0.5, '3. quartile at %.2f' % q3)
p = cdf[xs>=-1.0][0]
ax.plot([-3.2, -1.0, -1.0], [p, p, 0.0], zorder=-5, **lsMarker)
ax.plot([-3.2, -1.0, -1.0], [p, p, 0.0], zorder=-5, **lsSpine)
ax.text(-2.8, 0.2, 'F=%.2f' % p)
ax.text(-0.9, 0.05, '-1')

View File

@ -25,6 +25,6 @@ ax2.set_xticks(range(1, 7))
ax2.set_xlabel('x')
ax2.set_ylim(0, 0.23)
ax2.set_ylabel('Probability')
ax2.plot([0.2, 6.8], [1.0/6.0, 1.0/6.0], zorder=1, **lsAm)
ax2.hist([x2, x1], bins, normed=True, zorder=10, **fs)
ax2.plot([0.2, 6.8], [1.0/6.0, 1.0/6.0], zorder=-10, **lsAm)
ax2.hist([x2, x1], bins, normed=True, zorder=-5, **fs)
fig.savefig('diehistograms.pdf')

View File

@ -14,7 +14,7 @@ scatterpos = 1.0
barpos = 2.5
boxpos = 4.0
fig = plt.figure(figsize=cm_size(figure_width, 1.2*figure_height))
fig = plt.figure(figsize=cm_size(figure_width, 1.1*figure_height))
spec = gridspec.GridSpec(nrows=1, ncols=2, width_ratios=[3, 1], wspace=0.1,
**adjust_fs(fig, left=4.0))
@ -53,7 +53,7 @@ ax.set_xticklabels([])
ax = fig.add_subplot(spec[0, 0])
ax.set_xlim(0.0, 4.8)
ax.set_xticks([scatterpos, barpos, boxpos])
ax.set_xticklabels(['(1) data', '(2) bar\n plot', '(3) box-\nwhisker'])
ax.set_xticklabels(['(1) data', '(2) bar\n plot', '(3) box-\nwhisker'], fontsize='medium')
ax.set_ylabel('x')
ax.set_ylim( 0.0, 8.0)
@ -85,7 +85,7 @@ ax = fig.add_subplot(spec[0, 1])
ax.set_yticklabels([])
ax.set_ylim( 0.0, 8.0)
ax.set_xticks(np.arange(0.0, 0.4, 0.1))
ax.set_xlabel('(4) p(x)')
ax.set_xlabel('(4) pdf')
bw = 0.75
bins = np.arange(0, 8.0+bw, bw)
h, b = np.histogram(data, bins)

View File

@ -60,7 +60,7 @@ ax = fig.add_subplot(spec[:, 1])
ax.set_xlabel('x')
ax.set_xlim(-3.2, 3.2)
ax.set_xticks(np.arange(-3.0, 3.1, 1.0))
ax.set_ylabel('Probab. density p(x)')
ax.set_ylabel('Prob. density p(x)')
ax.set_ylim(0.0, 0.49)
ax.set_yticks(np.arange(0.0, 0.41, 0.1))
kd, xx = kerneldensity(r, -3.2, 3.2, 0.2)

View File

@ -115,17 +115,9 @@ function \mcode{median()} computes the median.
writing reliable code!
\end{exercise}
\begin{figure}[t]
\includegraphics[width=1\textwidth]{quartile}
\titlecaption{\label{quartilefig} Median and quartiles of a normal
distribution.}{ The interquartile range between the first and the
third quartile contains 50\,\% of the data and contains the
median.}
\end{figure}
The distribution of data can be further characterized by the position
of its \entermde[quartile]{Quartil}{quartiles}. Neighboring quartiles are
separated by 25\,\% of the data (\figref{quartilefig}).
separated by 25\,\% of the data.% (\figref{quartilefig}).
\entermde[percentile]{Perzentil}{Percentiles} allow to characterize the
distribution of the data in more detail. The 3$^{\rm rd}$ quartile
corresponds to the 75$^{\rm th}$ percentile, because 75\,\% of the
@ -156,15 +148,13 @@ median that extends from the 1$^{\rm st}$ to the 3$^{\rm rd}$
quartile. The whiskers mark the minimum and maximum value of the data
set (\figref{displayunivariatedatafig} (3)).
\begin{exercise}{univariatedata.m}{}
Generate 40 normally distributed random numbers with a mean of 2 and
illustrate their distribution in a box-whisker plot
(\code{boxplot()} function), with a bar and errorbar illustrating
the mean and standard deviation (\code{bar()}, \code{errorbar()}),
and the data themselves jittered randomly (as in
\figref{displayunivariatedatafig}). How to interpret the different
plots?
\end{exercise}
% \begin{figure}[t]
% \includegraphics[width=1\textwidth]{quartile}
% \titlecaption{\label{quartilefig} Median and quartiles of a normal
% distribution.}{ The interquartile range between the first and the
% third quartile contains 50\,\% of the data and contains the
% median.}
% \end{figure}
% \begin{exercise}{boxwhisker.m}{}
% Generate a $40 \times 10$ matrix of random numbers and
@ -201,6 +191,16 @@ Histograms are often used to estimate the
\enterm[probability!distribution]{probability distribution}
(\determ[Wahrscheinlichkeits!-verteilung]{Wahrscheinlichkeitsverteilung}) of the data values.
\begin{exercise}{univariatedata.m}{}
Generate 40 normally distributed random numbers with a mean of 2 and
illustrate their distribution in a box-whisker plot
(\code{boxplot()} function), with a bar and errorbar illustrating
the mean and standard deviation (\code{bar()}, \code{errorbar()}),
and the data themselves jittered randomly (as in
\figref{displayunivariatedatafig}). How to interpret the different
plots?
\end{exercise}
\subsection{Probabilities}
In the frequentist interpretation of probability, the
\enterm{probability} (\determ{Wahrscheinlichkeit}) of an event
@ -252,7 +252,7 @@ real number like, e.g., 0.123456789 is zero, because there are
uncountable many real numbers.
We can only ask for the probability to get a measurement value in some
range. For example, we can ask for the probability $P(1.2<x<1.3)$ to
range. For example, we can ask for the probability $P(0<x<1)$ to
get a measurement between 0 and 1 (\figref{pdfprobabilitiesfig}). More
generally, we want to know the probability $P(x_0<x<x_1)$ to obtain a
measurement between $x_0$ and $x_1$. If we define the width of the
@ -280,7 +280,7 @@ inverse of the unit of the data values --- hence the name ``density''.
\end{figure}
The probability to get a value $x$ between $x_1$ and $x_2$ is
given by the integral of the probability density:
given by an integral over the probability density:
\[ P(x_1 < x < x2) = \int\limits_{x_1}^{x_2} p(x) \, dx \; . \]
Because the probability to get any value $x$ is one, the integral of
the probability density over the whole real axis must be one:
@ -329,7 +329,7 @@ values fall within each bin (\figref{pdfhistogramfig} left).
observe?
\end{exercise}
To turn such histograms to estimates of probability densities they
To turn such histograms into estimates of probability densities they
need to be normalized such that according to \eqnref{pdfnorm} their
integral equals one. While histograms of categorical data are
normalized such that their sum equals one, here we need to integrate
@ -343,7 +343,7 @@ and the
\[ p(x_i) = \frac{n_i}{A} = \frac{n_i}{\Delta x \sum_{i=1}^N n_i} =
\frac{n_i}{N \Delta x} \; .\]
A histogram needs to be divided by both the sum of the frequencies
$n_i$ and the bin width $\Delta x$ to results in an estimate of the
$n_i$ and the bin width $\Delta x$ to result in an estimate of the
corresponding probability density. Only then can the distribution be
compared with other distributions and in particular with theoretical
probability density functions like the one of the normal distribution
@ -371,19 +371,20 @@ probability density functions like the one of the normal distribution
A problem of using histograms for estimating probability densities is
that they have hard bin edges. Depending on where the bin edges are
placed a data value falls in one or the other bin. As a result the
shape histogram depends on the exact position of its bins
(\figref{kerneldensityfig} left).
shape of the resulting histogram depends on the exact position of its
bins (\figref{kerneldensityfig} left).
\begin{figure}[t]
\includegraphics[width=1\textwidth]{kerneldensity}
\titlecaption{\label{kerneldensityfig} Kernel densities.}{Left: The
histogram-based estimation of the probability density is dependent
on the position of the bins. In the bottom plot the bins have
\titlecaption{\label{kerneldensityfig} Kernel densities.}{The
histogram-based estimation of the probability density depends on
the position of the bins (left). In the bottom plot the bins have
been shifted by half a bin width (here $\Delta x=0.4$) and as a
result details of the probability density look different. Look,
for example, at the height of the largest bin. Right: In contrast,
a kernel density is uniquely defined for a given kernel width
(here Gaussian kernels with standard deviation of $\sigma=0.2$).}
for example, at the height of the largest bin. In contrast, a
kernel density is uniquely defined for a given kernel width
(right, Gaussian kernels with standard deviation of
$\sigma=0.2$).}
\end{figure}
To avoid this problem so called \entermde[kernel
@ -460,7 +461,6 @@ and percentiles can be determined from the inverse cumulative function.
Use the estimate to compute the value of the 5\,\% percentile.
\end{exercise}
\newpage
\section{Correlations}
Until now we described properties of univariate data sets. In