diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 768b12ba1007f..1627a90fc6ac0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -35,6 +35,7 @@ Other enhancements - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) +- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 17df98f026656..b60392368d944 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1450,6 +1450,7 @@ def kde( self, bw_method: Literal["scott", "silverman"] | float | Callable | None = None, ind: np.ndarray | int | None = None, + weights: np.ndarray | None = None, **kwargs, ) -> PlotAccessor: """ @@ -1475,6 +1476,9 @@ def kde( 1000 equally spaced points are used. If `ind` is a NumPy array, the KDE is evaluated at the points passed. If `ind` is an integer, `ind` number of equally spaced points are used. + weights : NumPy array, optional + Weights of datapoints. This must be the same shape as datapoints. + If None (default), the samples are assumed to be equally weighted. **kwargs Additional keyword arguments are documented in :meth:`DataFrame.plot`. @@ -1560,7 +1564,7 @@ def kde( >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) """ - return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) + return self(kind="kde", bw_method=bw_method, ind=ind, weights=weights, **kwargs) density = kde diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 2c4d714bf1a0c..97e510982ab93 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -269,6 +269,7 @@ def _plot( # type: ignore[override] y: np.ndarray, style=None, bw_method=None, + weights=None, ind=None, column_num=None, stacking_id: int | None = None, @@ -277,7 +278,7 @@ def _plot( # type: ignore[override] from scipy.stats import gaussian_kde y = remove_na_arraylike(y) - gkde = gaussian_kde(y, bw_method=bw_method) + gkde = gaussian_kde(y, bw_method=bw_method, weights=weights) y = gkde.evaluate(ind) lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2ca9dbf92e617..52ca66c218862 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -538,6 +538,22 @@ def test_kde_kwargs(self, ts, bw_method, ind): pytest.importorskip("scipy") _check_plot_works(ts.plot.kde, bw_method=bw_method, ind=ind) + @pytest.mark.parametrize( + "bw_method, ind, weights", + [ + ["scott", 20, None], + [None, 20, None], + [None, np.int_(20), None], + [0.5, np.linspace(-100, 100, 20), None], + ["scott", 40, np.linspace(0.0, 2.0, 50)], + ], + ) + def test_kde_kwargs_weights(self, bw_method, ind, weights): + # GH59337 + pytest.importorskip("scipy") + s = Series(np.random.default_rng(2).uniform(size=50)) + _check_plot_works(s.plot.kde, bw_method=bw_method, ind=ind, weights=weights) + def test_density_kwargs(self, ts): pytest.importorskip("scipy") sample_points = np.linspace(-100, 100, 20)