diff --git a/pymc3/plots.py b/pymc3/plots.py index 44e9a7c7f9..fb7198bc46 100644 --- a/pymc3/plots.py +++ b/pymc3/plots.py @@ -4,6 +4,7 @@ import matplotlib.pyplot as plt import pymc3 as pm from .stats import quantiles, hpd +from scipy.signal import gaussian, convolve __all__ = ['traceplot', 'kdeplot', 'kde2plot', 'forestplot', 'autocorrplot', 'plot_posterior'] @@ -120,16 +121,15 @@ def kdeplot_op(ax, data, prior=None, prior_alpha=1, prior_style='--'): for i in range(data.shape[1]): d = data[:, i] try: - density = kde.gaussian_kde(d) - l = np.min(d) - u = np.max(d) - x = np.linspace(0, 1, 100) * (u - l) + l + density, l, u = fast_kde(d) + x = np.linspace(l, u, len(density)) if prior is not None: p = prior.logp(x).eval() ax.plot(x, np.exp(p), alpha=prior_alpha, ls=prior_style) - ax.plot(x, density(x)) + ax.plot(x, density) + ax.set_ylim(bottom=0) except LinAlgError: errored.append(i) @@ -721,11 +721,9 @@ def set_key_if_doesnt_exist(d, key, value): d[key] = value if kde_plot: - density = kde.gaussian_kde(trace_values) - l = np.min(trace_values) - u = np.max(trace_values) - x = np.linspace(0, 1, 100) * (u - l) + l - ax.plot(x, density(x), **kwargs) + density, l, u = fast_kde(trace_values) + x = np.linspace(l, u, len(density)) + ax.plot(x, density, **kwargs) else: set_key_if_doesnt_exist(kwargs, 'bins', 30) set_key_if_doesnt_exist(kwargs, 'edgecolor', 'w') @@ -790,3 +788,57 @@ def get_trace_dict(tr, varnames): fig.tight_layout() return ax + +def fast_kde(x): + """ + A fft-based Gaussian kernel density estimate (KDE) for computing + the KDE on a regular grid. + The code was adapted from https://github.com/mfouesneau/faststats + + Parameters + ---------- + + x : Numpy array or list + + Returns + ------- + + grid: A gridded 1D KDE of the input points (x). + xmin: minimum value of x + xmax: maximum value of x + + """ + + xmin, xmax = x.min(), x.max() + + n = len(x) + nx = 256 + + # compute histogram + bins = np.linspace(x.min(), x.max(), nx) + xyi = np.digitize(x, bins) + dx = (xmax - xmin) / (nx - 1) + grid = np.histogram(x, bins=nx)[0] + + # Scaling factor for bandwidth + scotts_factor = n ** (-0.2) + # Determine the bandwidth using Scott's rule + std_x = np.std(xyi) + kern_nx = int(np.round(scotts_factor * 2 * np.pi * std_x)) + + # Evaluate the gaussian function on the kernel grid + kernel = np.reshape(gaussian(kern_nx, scotts_factor * std_x), kern_nx) + + + # Compute the KDE + # use symmetric padding to correct for data boundaries in the kde + npad = np.min((nx, 2 * kern_nx)) + + grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]]) + grid = convolve(grid, kernel, mode='same')[npad: npad + nx] + + norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5 + + grid /= norm_factor + + return grid, xmin, xmax