import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt

x = np.arange(0.0, 10.0, 0.5)
ytrue = 2.0*x
y = ytrue + 5.0*np.random.randn(len(x))
z = np.polyfit(x, y, 1)
p = np.poly1d(z)

r, _ = st.pearsonr(x, y)

print('total variance: %g' % np.var(y))
print('residual variance: %g' % np.var(y-p(x)))
print('variance explained: %g' % (1.0-np.var(y-p(x))/np.var(y)))
print('r: %g' % r)
print('variance explained: %g' % (r*r))
print('slope: %g' % z[0])

fig, axs = plt.subplots(1, 2, sharex=True, sharey=True)
axs[0].plot(x, ytrue, 'r')
axs[0].scatter(x, y, c='b')
axs[0].plot(x, p(x), 'b')
axs[1].scatter(x, y-p(x), c='b')
#plt.show()

xn = (x - np.mean(x))/np.std(x)
yn = (y - np.mean(y))/np.std(y)
z = np.polyfit(xn, yn, 1)
p = np.poly1d(z)
r, _ = st.pearsonr(xn, yn)

print('r: %g' % r)
print('slope: %g' % z[0])

fig, ax = plt.subplots()
ax.scatter(xn, yn, c='b')
ax.plot(xn, p(xn), 'b')
plt.show()