From 82b4d5e0808ce2c56dd143d9716657f36cf81932 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Mon, 25 Nov 2019 22:19:10 +0100
Subject: [PATCH] [statistics] improved scatter of displayunivariatedata.py

---
 statistics/lecture/displayunivariatedata.py | 90 +++++++++++----------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/statistics/lecture/displayunivariatedata.py b/statistics/lecture/displayunivariatedata.py
index 55130a8..97816f2 100644
--- a/statistics/lecture/displayunivariatedata.py
+++ b/statistics/lecture/displayunivariatedata.py
@@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+from scipy.stats import gaussian_kde
 
 #rng = np.random.RandomState(981)
 #data = rng.randn(40, 1) + 4.0
@@ -10,58 +11,23 @@ xpos = 0.08
 ypos = 0.15
 width = 0.65
 height = 0.8
-
-plt.xkcd()
-fig = plt.figure( figsize=(6,3.4) )
-
-ax = fig.add_axes([xpos, ypos, width, height])
-ax.spines['right'].set_visible(False)
-ax.spines['top'].set_visible(False)
-ax.yaxis.set_ticks_position('left')
-ax.xaxis.set_ticks_position('bottom')
-ax.set_xticklabels([])
-ax.set_xlim(0.0, 4.8)
-ax.set_ylabel('x')
-ax.set_ylim( 0.0, 8.0)
 barwidth = 0.8
-
 scatterpos = 1.0
 barpos = 2.5
 boxpos = 4.0
 
-ax.set_xticks([scatterpos, barpos, boxpos])
-ax.set_xticklabels(['(1) data', '(2) bar\n plot', '(3) box-\nwhisker'])
-
-ax.scatter(scatterpos-0.5*barwidth+rng.rand(len(data)), data, s=50)
-
-barmean = np.mean(data)
-barstd = np.std(data)
-ew = 0.2
-ax.bar([barpos-0.5*barwidth], [barmean], barwidth, color='#FFCC00')
-eargs = {'color': 'k', 'lw': 2}
-ax.plot([barpos, barpos], [barmean-barstd, barmean+barstd], **eargs)
-ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean-barstd, barmean-barstd], **eargs)
-ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean+barstd, barmean+barstd], **eargs)
-ax.annotate('mean',
-            xy=(barpos-0.4*barwidth, 2.7), xycoords='data',
-            xytext=(barpos-1*barwidth, 5.5), textcoords='data', ha='left',
-            arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
-            connectionstyle="angle3,angleA=0,angleB=120") )
-ax.annotate('mean plus\nstd. dev.',
-            xy=(barpos+0.05*barwidth, 4.2), xycoords='data',
-            xytext=(barpos-1*barwidth, 7.0), textcoords='data', ha='left',
-            arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
-            connectionstyle="angle3,angleA=-60,angleB=80") )
+plt.xkcd()
+fig = plt.figure( figsize=(6,3.4) )
 
 ax = fig.add_axes([xpos, ypos, width, height])
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)
-ax.spines['left'].set_visible(False)
-ax.spines['bottom'].set_visible(False)
-ax.xaxis.set_ticks_position('none')
-ax.yaxis.set_ticks_position('none')
-ax.set_xticklabels([])
-ax.set_yticklabels([])
+#ax.spines['left'].set_visible(False)
+#ax.spines['bottom'].set_visible(False)
+#ax.xaxis.set_ticks_position('none')
+#ax.yaxis.set_ticks_position('none')
+#ax.set_xticklabels([])
+#ax.set_yticklabels([])
 wh = ax.boxplot( data, positions=[boxpos], widths=[barwidth], whis=100.0, patch_artist=True)
 wh['medians'][0].set_linewidth(4)
 wh['whiskers'][0].set_linewidth(2)
@@ -92,6 +58,44 @@ ax.annotate('median',
             arrowprops=dict(arrowstyle="->", relpos=(0.8,0.0),
             connectionstyle="angle3,angleA=-60,angleB=20") )
 
+ax = fig.add_axes([xpos, ypos, width, height])
+ax.spines['right'].set_visible(False)
+ax.spines['top'].set_visible(False)
+ax.yaxis.set_ticks_position('left')
+ax.xaxis.set_ticks_position('bottom')
+ax.set_xticklabels([])
+ax.set_xlim(0.0, 4.8)
+ax.set_ylabel('x')
+ax.set_ylim( 0.0, 8.0)
+
+ax.set_xticks([scatterpos, barpos, boxpos])
+ax.set_xticklabels(['(1) data', '(2) bar\n plot', '(3) box-\nwhisker'])
+
+# scatter data points according to their density:
+kernel = gaussian_kde(data)
+x = kernel(data)
+x /= np.max(x)
+ax.scatter(scatterpos+barwidth*x*(rng.rand(len(data))-0.5), data, s=50)
+
+barmean = np.mean(data)
+barstd = np.std(data)
+ew = 0.2
+ax.bar([barpos-0.5*barwidth], [barmean], barwidth, color='#FFCC00')
+eargs = {'color': 'k', 'lw': 2}
+ax.plot([barpos, barpos], [barmean-barstd, barmean+barstd], **eargs)
+ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean-barstd, barmean-barstd], **eargs)
+ax.plot([barpos-0.5*ew, barpos+0.5*ew], [barmean+barstd, barmean+barstd], **eargs)
+ax.annotate('mean',
+            xy=(barpos-0.4*barwidth, 2.7), xycoords='data',
+            xytext=(barpos-1*barwidth, 5.5), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(1.0,0.5),
+            connectionstyle="angle3,angleA=0,angleB=120") )
+ax.annotate('mean plus\nstd. dev.',
+            xy=(barpos+0.05*barwidth, 4.2), xycoords='data',
+            xytext=(barpos-1*barwidth, 7.0), textcoords='data', ha='left',
+            arrowprops=dict(arrowstyle="->", relpos=(0.5,0.0),
+            connectionstyle="angle3,angleA=-60,angleB=80") )
+
 ax = fig.add_axes([xpos+width+0.03, ypos, 0.98-(xpos+width+0.03), height])
 ax.spines['right'].set_visible(False)
 ax.spines['top'].set_visible(False)