pymc-devs · twiecki · May 18, 2017 · May 17, 2017 · May 18, 2017 · twiecki
diff --git a/pymc3/plots/__init__.py b/pymc3/plots/__init__.py
@@ -1,7 +1,7 @@
 from .autocorrplot import autocorrplot
 from .compareplot import compareplot
 from .forestplot import forestplot
-from .kdeplot import kdeplot, kde2plot
+from .kdeplot import kdeplot
 from .posteriorplot import plot_posterior, plot_posterior_predictive_glm
 from .traceplot import traceplot
 from .energyplot import energyplot
diff --git a/pymc3/plots/artists.py b/pymc3/plots/artists.py
@@ -1,8 +1,8 @@
 import numpy as np
-from scipy.stats import kde, mode
+from scipy.stats import mode
 
 from pymc3.stats import hpd
-from .utils import fast_kde
+from .kdeplot import fast_kde, kdeplot
 
 
 def _histplot_bins(column, bins=100):
@@ -46,26 +46,7 @@ def kdeplot_op(ax, data, prior=None, prior_alpha=1, prior_style='--'):
     return ls, pls
 
 
-def kde2plot_op(ax, x, y, grid=200, **kwargs):
-    xmin = x.min()
-    xmax = x.max()
-    ymin = y.min()
-    ymax = y.max()
-    extent = kwargs.pop('extent', [])
-    if len(extent) != 4:
-        extent = [xmin, xmax, ymin, ymax]
-
-    grid = grid * 1j
-    X, Y = np.mgrid[xmin:xmax:grid, ymin:ymax:grid]
-    positions = np.vstack([X.ravel(), Y.ravel()])
-    values = np.vstack([x, y])
-    kernel = kde.gaussian_kde(values)
-    Z = np.reshape(kernel(positions).T, X.shape)
-
-    ax.imshow(np.rot90(Z), extent=extent, **kwargs)
-
-
-def plot_posterior_op(trace_values, figsize, ax, kde_plot, point_estimate, round_to,
+def plot_posterior_op(trace_values, ax, kde_plot, point_estimate, round_to,
                       alpha_level, ref_val, rope, text_size=16, **kwargs):
     """Artist to draw posterior."""
     def format_as_percent(x, round_to=0):
@@ -139,9 +120,8 @@ def set_key_if_doesnt_exist(d, key, value):
             d[key] = value
 
     if kde_plot:
-        density, l, u = fast_kde(trace_values)
-        x = np.linspace(l, u, len(density))
-        ax.plot(x, density, figsize=figsize, **kwargs)
+        kdeplot(trace_values, alpha=0.35, ax=ax, **kwargs)
+
     else:
         set_key_if_doesnt_exist(kwargs, 'bins', 30)
         set_key_if_doesnt_exist(kwargs, 'edgecolor', 'w')
@@ -157,3 +137,5 @@ def set_key_if_doesnt_exist(d, key, value):
         display_ref_val(ref_val)
     if rope is not None:
         display_rope(rope)
+
+
diff --git a/pymc3/plots/energyplot.py b/pymc3/plots/energyplot.py
@@ -1,9 +1,10 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from .utils import fast_kde
+from .kdeplot import kdeplot
 
-def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, lw=0, alpha=0.5, frame=True, **kwargs):
+def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, lw=0,
+               alpha=0.35, frame=True, **kwargs):
     """Plot energy transition distribution and marginal energy distribution in order
     to diagnose poor exploration by HMC algorithms.
 
@@ -37,8 +38,8 @@ def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, lw=0, alph
     except KeyError:
         print('There is no energy information in the passed trace.')
         return ax
-    series_dict = {'Marginal energy distribution': energy - energy.mean(),
-                'Energy transition distribution': np.diff(energy)}
+    series = [('Marginal energy distribution', energy - energy.mean()),
+              ('Energy transition distribution', np.diff(energy))]
 
     if figsize is None:
         figsize = (8, 6)
@@ -47,15 +48,13 @@ def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, lw=0, alph
         _, ax = plt.subplots(figsize=figsize)
 
     if kind == 'kde':
-        for series in series_dict:
-            density, l, u = fast_kde(series_dict[series])
-            x = np.linspace(l, u, len(density))
-            ax.plot(x, density, label=series, **kwargs)
-            ax.fill_between(x, density, alpha=alpha)
-
+        for label, value in series:
+            kdeplot(value, label=label, alpha=alpha, shade=True, ax=ax,
+                     **kwargs)
+
     elif kind == 'hist':
-        for series in series_dict:
-            ax.hist(series_dict[series], lw=lw, alpha=alpha, label=series, **kwargs)
+        for label, value in series:
+            ax.hist(value, lw=lw, alpha=alpha, label=label, **kwargs)
 
     else:
         raise ValueError('Plot type {} not recognized.'.format(kind))

diff --git a/pymc3/plots/kdeplot.py b/pymc3/plots/kdeplot.py
@@ -1,17 +1,73 @@
 import matplotlib.pyplot as plt
+import numpy as np
+from scipy.signal import gaussian, convolve
 
-from .artists import kdeplot_op, kde2plot_op
 
-
-def kdeplot(data, ax=None):
+def kdeplot(trace_values, label=None, alpha=0.35, shade=False, ax=None,
+             **kwargs):
     if ax is None:
-        _, ax = plt.subplots(1, 1, squeeze=True)
-    kdeplot_op(ax, data)
+        _, ax = plt.subplots()
+    density, l, u = fast_kde(trace_values)
+    x = np.linspace(l, u, len(density))
+    ax.plot(x, density, label=label, **kwargs)
+    if shade:
+        ax.fill_between(x, density, alpha=alpha, **kwargs)
     return ax
+
+def fast_kde(x):
+    """
+    A fft-based Gaussian kernel density estimate (KDE) for computing
+    the KDE on a regular grid.
+    The code was adapted from https://github.com/mfouesneau/faststats
+
+    Parameters
+    ----------
+
+    x : Numpy array or list
+
+    Returns
+    -------
+
+    grid: A gridded 1D KDE of the input points (x).
+    xmin: minimum value of x
+    xmax: maximum value of x
+
+    """
+    x = x[~np.isnan(x)]
+    x = x[~np.isinf(x)]
+    n = len(x)
+    nx = 200
+
+    # add small jitter in case input values are the same
+    x += np.random.uniform(-1E-12, 1E-12, size=n)
+    xmin, xmax = np.min(x), np.max(x)
+
+    # compute histogram
+    bins = np.linspace(xmin, xmax, nx)
+    xyi = np.digitize(x, bins)
+    dx = (xmax - xmin) / (nx - 1)
+    grid = np.histogram(x, bins=nx)[0]
+
+    # Scaling factor for bandwidth
+    scotts_factor = n ** (-0.2)
+    # Determine the bandwidth using Scott's rule
+    std_x = np.std(xyi)
+    kern_nx = int(np.round(scotts_factor * 2 * np.pi * std_x))
+
+    # Evaluate the gaussian function on the kernel grid
+    kernel = np.reshape(gaussian(kern_nx, scotts_factor * std_x), kern_nx)
+
+    # Compute the KDE
+    # use symmetric padding to correct for data boundaries in the kde
+    npad = np.min((nx, 2 * kern_nx))
+
+    grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]])
+    grid = convolve(grid, kernel, mode='same')[npad: npad + nx]
+
+    norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5
+
+    grid = grid / norm_factor
+
+    return grid, xmin, xmax
 
 
-def kde2plot(x, y, grid=200, ax=None, **kwargs):
-    if ax is None:
-        _, ax = plt.subplots(1, 1, squeeze=True)
-    kde2plot_op(ax, x, y, grid, **kwargs)
-    return ax
diff --git a/pymc3/plots/posteriorplot.py b/pymc3/plots/posteriorplot.py
@@ -79,8 +79,8 @@ def get_trace_dict(tr, varnames):
         if figsize is None:
             figsize = (6, 2)
         if ax is None:
-            fig, ax = plt.subplots()
-        plot_posterior_op(transform(trace), figsize=figsize, ax=ax, kde_plot=kde_plot,
+            fig, ax = plt.subplots(figsize=figsize)
+        plot_posterior_op(transform(trace), ax=ax, kde_plot=kde_plot,
                           point_estimate=point_estimate, round_to=round_to,
                           alpha_level=alpha_level, ref_val=ref_val, rope=rope, text_size=text_size, **kwargs)
     else:
@@ -94,7 +94,7 @@ def get_trace_dict(tr, varnames):
 
         for a, v in zip(np.atleast_1d(ax), trace_dict):
             tr_values = transform(trace_dict[v])
-            plot_posterior_op(tr_values, figsize=figsize, ax=a, kde_plot=kde_plot,
+            plot_posterior_op(tr_values, ax=a, kde_plot=kde_plot,
                               point_estimate=point_estimate, round_to=round_to,
                               alpha_level=alpha_level, ref_val=ref_val, rope=rope, text_size=text_size, **kwargs)
             a.set_title(v)
@@ -138,3 +138,5 @@ def plot_posterior_predictive_glm(trace, eval=None, lm=None, samples=30, **kwarg
         kwargs.pop('label', None)
 
     plt.title('Posterior predictive')
+
+
diff --git a/pymc3/plots/utils.py b/pymc3/plots/utils.py
@@ -1,6 +1,5 @@
 import matplotlib.pyplot as plt
 import numpy as np
-from scipy.signal import gaussian, convolve
 # plotting utilities can all be in this namespace
 from ..util import get_default_varnames  # pylint: disable=unused-import
 
@@ -41,58 +40,3 @@ def make_2d(a):
     return a
 
 
-def fast_kde(x):
-    """
-    A fft-based Gaussian kernel density estimate (KDE) for computing
-    the KDE on a regular grid.
-    The code was adapted from https://github.com/mfouesneau/faststats
-
-    Parameters
-    ----------
-
-    x : Numpy array or list
-
-    Returns
-    -------
-
-    grid: A gridded 1D KDE of the input points (x).
-    xmin: minimum value of x
-    xmax: maximum value of x
-
-    """
-    x = x[~np.isnan(x)]
-    x = x[~np.isinf(x)]
-    n = len(x)
-    nx = 200
-
-    # add small jitter in case input values are the same
-    x += np.random.uniform(-1E-12, 1E-12, size=n)
-    xmin, xmax = np.min(x), np.max(x)
-
-    # compute histogram
-    bins = np.linspace(xmin, xmax, nx)
-    xyi = np.digitize(x, bins)
-    dx = (xmax - xmin) / (nx - 1)
-    grid = np.histogram(x, bins=nx)[0]
-
-    # Scaling factor for bandwidth
-    scotts_factor = n ** (-0.2)
-    # Determine the bandwidth using Scott's rule
-    std_x = np.std(xyi)
-    kern_nx = int(np.round(scotts_factor * 2 * np.pi * std_x))
-
-    # Evaluate the gaussian function on the kernel grid
-    kernel = np.reshape(gaussian(kern_nx, scotts_factor * std_x), kern_nx)
-
-    # Compute the KDE
-    # use symmetric padding to correct for data boundaries in the kde
-    npad = np.min((nx, 2 * kern_nx))
-
-    grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]])
-    grid = convolve(grid, kernel, mode='same')[npad: npad + nx]
-
-    norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5
-
-    grid = grid / norm_factor
-
-    return grid, xmin, xmax