Skip to content

ENH: Allow to plot weighted KDEs. #59337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 31, 2024
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ Period

Plotting
^^^^^^^^
- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`)
- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
Expand Down
6 changes: 5 additions & 1 deletion pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 +1450,7 @@ def kde(
self,
bw_method: Literal["scott", "silverman"] | float | Callable | None = None,
ind: np.ndarray | int | None = None,
weights: np.ndarray | None = None,
**kwargs,
) -> PlotAccessor:
"""
Expand All @@ -1475,6 +1476,9 @@ def kde(
1000 equally spaced points are used. If `ind` is a NumPy array, the
KDE is evaluated at the points passed. If `ind` is an integer,
`ind` number of equally spaced points are used.
weights : NumPy array, optional
Weights of datapoints. This must be the same shape as datapoints.
If None (default), the samples are assumed to be equally weighted.
**kwargs
Additional keyword arguments are documented in
:meth:`DataFrame.plot`.
Expand Down Expand Up @@ -1560,7 +1564,7 @@ def kde(

>>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
"""
return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
return self(kind="kde", bw_method=bw_method, ind=ind, weights=weights, **kwargs)

density = kde

Expand Down
3 changes: 2 additions & 1 deletion pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def _plot( # type: ignore[override]
y: np.ndarray,
style=None,
bw_method=None,
weights=None,
ind=None,
column_num=None,
stacking_id: int | None = None,
Expand All @@ -277,7 +278,7 @@ def _plot( # type: ignore[override]
from scipy.stats import gaussian_kde

y = remove_na_arraylike(y)
gkde = gaussian_kde(y, bw_method=bw_method)
gkde = gaussian_kde(y, bw_method=bw_method, weights=weights)

y = gkde.evaluate(ind)
lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/plotting/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,22 @@ def test_kde_kwargs(self, ts, bw_method, ind):
pytest.importorskip("scipy")
_check_plot_works(ts.plot.kde, bw_method=bw_method, ind=ind)

@pytest.mark.parametrize(
"bw_method, ind, weights",
[
["scott", 20, None],
[None, 20, None],
[None, np.int_(20), None],
[0.5, np.linspace(-100, 100, 20), None],
["scott", 40, np.linspace(0.0, 2.0, 50)],
],
)
def test_kde_kwargs_weights(self, bw_method, ind, weights):
# GH59337
pytest.importorskip("scipy")
s = Series(np.random.default_rng(2).uniform(size=50))
_check_plot_works(s.plot.kde, bw_method=bw_method, ind=ind, weights=weights)

def test_density_kwargs(self, ts):
pytest.importorskip("scipy")
sample_points = np.linspace(-100, 100, 20)
Expand Down
Loading