scientificComputing/statistics/lecture/correlation.py

import numpy as np
import matplotlib.pyplot as plt

plt.xkcd()
fig = plt.figure( figsize=(6,5) )
n = 200
for k, r  in enumerate( [ 1.0, 0.6, 0.0, -0.9 ] ) :
    x = np.random.randn( n )
    y = r*x + np.sqrt(1.0-r*r)*np.random.randn( n )
    ax = fig.add_subplot( 2, 2, k+1 )
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    ax.text( -2, 2.5, 'r=%.1f' % r )
    if k == 0 :
        ax.text( 2.8, -2, 'positively\ncorrelated', ha='right' )
    elif k == 1 :
        ax.text( 2.8, -2.5, 'weakly\ncorrelated', ha='right' )
    elif k == 2 :
        ax.text( 2.8, -2.5, 'not\ncorrelated', ha='right' )
    elif k == 3 :
        ax.text( -2.5, -2, 'negatively\ncorrelated', ha='left' )
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_xlim( -3.0, 3.0)
    ax.set_ylim( -3.0, 3.0)
    ax.scatter( x, y )

plt.tight_layout()
plt.savefig('correlation.pdf')
#plt.show()