Skip to content

Commit 9c08431

Browse files
fbourgeyyuanx749
andauthored
ENH: Allow to plot weighted KDEs. (#59337)
* added weights argument in _plot function and modified scipy.stats.gaussian_kde accordingly * Update pandas/plotting/_core.py Co-authored-by: Xiao Yuan <[email protected]> * moved "weights" after "ind" * added entry in whatsnew v3.0.0.rst * added new test for weights * removed weights * moved message from Plotting section to Other enhancements --------- Co-authored-by: Xiao Yuan <[email protected]>
1 parent 89c8d7a commit 9c08431

File tree

4 files changed

+24
-2
lines changed

4 files changed

+24
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Other enhancements
3535
- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
3636
- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)
3737
- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
38+
- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`)
3839
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
3940
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
4041
- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`)

pandas/plotting/_core.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,7 @@ def kde(
14501450
self,
14511451
bw_method: Literal["scott", "silverman"] | float | Callable | None = None,
14521452
ind: np.ndarray | int | None = None,
1453+
weights: np.ndarray | None = None,
14531454
**kwargs,
14541455
) -> PlotAccessor:
14551456
"""
@@ -1475,6 +1476,9 @@ def kde(
14751476
1000 equally spaced points are used. If `ind` is a NumPy array, the
14761477
KDE is evaluated at the points passed. If `ind` is an integer,
14771478
`ind` number of equally spaced points are used.
1479+
weights : NumPy array, optional
1480+
Weights of datapoints. This must be the same shape as datapoints.
1481+
If None (default), the samples are assumed to be equally weighted.
14781482
**kwargs
14791483
Additional keyword arguments are documented in
14801484
:meth:`DataFrame.plot`.
@@ -1560,7 +1564,7 @@ def kde(
15601564
15611565
>>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
15621566
"""
1563-
return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1567+
return self(kind="kde", bw_method=bw_method, ind=ind, weights=weights, **kwargs)
15641568

15651569
density = kde
15661570

pandas/plotting/_matplotlib/hist.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def _plot( # type: ignore[override]
269269
y: np.ndarray,
270270
style=None,
271271
bw_method=None,
272+
weights=None,
272273
ind=None,
273274
column_num=None,
274275
stacking_id: int | None = None,
@@ -277,7 +278,7 @@ def _plot( # type: ignore[override]
277278
from scipy.stats import gaussian_kde
278279

279280
y = remove_na_arraylike(y)
280-
gkde = gaussian_kde(y, bw_method=bw_method)
281+
gkde = gaussian_kde(y, bw_method=bw_method, weights=weights)
281282

282283
y = gkde.evaluate(ind)
283284
lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)

pandas/tests/plotting/test_series.py

+16
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,22 @@ def test_kde_kwargs(self, ts, bw_method, ind):
538538
pytest.importorskip("scipy")
539539
_check_plot_works(ts.plot.kde, bw_method=bw_method, ind=ind)
540540

541+
@pytest.mark.parametrize(
542+
"bw_method, ind, weights",
543+
[
544+
["scott", 20, None],
545+
[None, 20, None],
546+
[None, np.int_(20), None],
547+
[0.5, np.linspace(-100, 100, 20), None],
548+
["scott", 40, np.linspace(0.0, 2.0, 50)],
549+
],
550+
)
551+
def test_kde_kwargs_weights(self, bw_method, ind, weights):
552+
# GH59337
553+
pytest.importorskip("scipy")
554+
s = Series(np.random.default_rng(2).uniform(size=50))
555+
_check_plot_works(s.plot.kde, bw_method=bw_method, ind=ind, weights=weights)
556+
541557
def test_density_kwargs(self, ts):
542558
pytest.importorskip("scipy")
543559
sample_points = np.linspace(-100, 100, 20)

0 commit comments

Comments
 (0)