Skip to content

Kde boundaries #1567

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 30, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 62 additions & 10 deletions pymc3/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import matplotlib.pyplot as plt
import pymc3 as pm
from .stats import quantiles, hpd
from scipy.signal import gaussian, convolve

__all__ = ['traceplot', 'kdeplot', 'kde2plot',
'forestplot', 'autocorrplot', 'plot_posterior']
Expand Down Expand Up @@ -120,16 +121,15 @@ def kdeplot_op(ax, data, prior=None, prior_alpha=1, prior_style='--'):
for i in range(data.shape[1]):
d = data[:, i]
try:
density = kde.gaussian_kde(d)
l = np.min(d)
u = np.max(d)
x = np.linspace(0, 1, 100) * (u - l) + l
density, l, u = fast_kde(d)
x = np.linspace(l, u, len(density))

if prior is not None:
p = prior.logp(x).eval()
ax.plot(x, np.exp(p), alpha=prior_alpha, ls=prior_style)

ax.plot(x, density(x))
ax.plot(x, density)
ax.set_ylim(bottom=0)

except LinAlgError:
errored.append(i)
Expand Down Expand Up @@ -721,11 +721,9 @@ def set_key_if_doesnt_exist(d, key, value):
d[key] = value

if kde_plot:
density = kde.gaussian_kde(trace_values)
l = np.min(trace_values)
u = np.max(trace_values)
x = np.linspace(0, 1, 100) * (u - l) + l
ax.plot(x, density(x), **kwargs)
density, l, u = fast_kde(trace_values)
x = np.linspace(l, u, len(density))
ax.plot(x, density, **kwargs)
else:
set_key_if_doesnt_exist(kwargs, 'bins', 30)
set_key_if_doesnt_exist(kwargs, 'edgecolor', 'w')
Expand Down Expand Up @@ -790,3 +788,57 @@ def get_trace_dict(tr, varnames):

fig.tight_layout()
return ax

def fast_kde(x):
"""
A fft-based Gaussian kernel density estimate (KDE) for computing
the KDE on a regular grid.
The code was adapted from https://github.com/mfouesneau/faststats

Parameters
----------

x : Numpy array or list

Returns
-------

grid: A gridded 1D KDE of the input points (x).
xmin: minimum value of x
xmax: maximum value of x

"""

xmin, xmax = x.min(), x.max()

n = len(x)
nx = 256

# compute histogram
bins = np.linspace(x.min(), x.max(), nx)
xyi = np.digitize(x, bins)
dx = (xmax - xmin) / (nx - 1)
grid = np.histogram(x, bins=nx)[0]

# Scaling factor for bandwidth
scotts_factor = n ** (-0.2)
# Determine the bandwidth using Scott's rule
std_x = np.std(xyi)
kern_nx = int(np.round(scotts_factor * 2 * np.pi * std_x))

# Evaluate the gaussian function on the kernel grid
kernel = np.reshape(gaussian(kern_nx, scotts_factor * std_x), kern_nx)


# Compute the KDE
# use symmetric padding to correct for data boundaries in the kde
npad = np.min((nx, 2 * kern_nx))

grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]])
grid = convolve(grid, kernel, mode='same')[npad: npad + nx]

norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5

grid /= norm_factor

return grid, xmin, xmax