From 092e216f54c34cb106700258e402b01eff27fdcb Mon Sep 17 00:00:00 2001 From: fbourgey Date: Wed, 24 Jul 2024 14:00:17 -0400 Subject: [PATCH 1/7] added weights argument in _plot function and modified scipy.stats.gaussian_kde accordingly --- pandas/plotting/_core.py | 6 +++++- pandas/plotting/_matplotlib/hist.py | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 17df98f026656..2cf39408751f4 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1449,6 +1449,7 @@ def hist( def kde( self, bw_method: Literal["scott", "silverman"] | float | Callable | None = None, + weights: np.ndarray | None = None, ind: np.ndarray | int | None = None, **kwargs, ) -> PlotAccessor: @@ -1470,6 +1471,9 @@ def kde( 'scott', 'silverman', a scalar constant or a callable. If None (default), 'scott' is used. See :class:`scipy.stats.gaussian_kde` for more information. + weights : NumPy array, optional + Weights of datapoints. This must be the same shape as dataset. + If None (default), the samples are assumed to be equally weighted. ind : NumPy array or int, optional Evaluation points for the estimated PDF. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the @@ -1560,7 +1564,7 @@ def kde( >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) """ - return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) + return self(kind="kde", bw_method=bw_method, weights=weights, ind=ind, **kwargs) density = kde diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 2c4d714bf1a0c..97e510982ab93 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -269,6 +269,7 @@ def _plot( # type: ignore[override] y: np.ndarray, style=None, bw_method=None, + weights=None, ind=None, column_num=None, stacking_id: int | None = None, @@ -277,7 +278,7 @@ def _plot( # type: ignore[override] from scipy.stats import gaussian_kde y = remove_na_arraylike(y) - gkde = gaussian_kde(y, bw_method=bw_method) + gkde = gaussian_kde(y, bw_method=bw_method, weights=weights) y = gkde.evaluate(ind) lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) From 380e86e304256bfcacd06db0dc1a55e735342359 Mon Sep 17 00:00:00 2001 From: Florian Bourgey Date: Tue, 30 Jul 2024 16:36:02 -0400 Subject: [PATCH 2/7] Update pandas/plotting/_core.py Co-authored-by: Xiao Yuan --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 2cf39408751f4..095c86b8cd4f3 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1472,7 +1472,7 @@ def kde( If None (default), 'scott' is used. See :class:`scipy.stats.gaussian_kde` for more information. weights : NumPy array, optional - Weights of datapoints. This must be the same shape as dataset. + Weights of datapoints. This must be the same shape as datapoints. If None (default), the samples are assumed to be equally weighted. ind : NumPy array or int, optional Evaluation points for the estimated PDF. If None (default), From c4d72c1b3423558cfcd5a9f020065d6c71fdbbdd Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 30 Jul 2024 16:44:22 -0400 Subject: [PATCH 3/7] moved "weights" after "ind" --- pandas/plotting/_core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 095c86b8cd4f3..b60392368d944 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1449,8 +1449,8 @@ def hist( def kde( self, bw_method: Literal["scott", "silverman"] | float | Callable | None = None, - weights: np.ndarray | None = None, ind: np.ndarray | int | None = None, + weights: np.ndarray | None = None, **kwargs, ) -> PlotAccessor: """ @@ -1471,14 +1471,14 @@ def kde( 'scott', 'silverman', a scalar constant or a callable. If None (default), 'scott' is used. See :class:`scipy.stats.gaussian_kde` for more information. - weights : NumPy array, optional - Weights of datapoints. This must be the same shape as datapoints. - If None (default), the samples are assumed to be equally weighted. ind : NumPy array or int, optional Evaluation points for the estimated PDF. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the KDE is evaluated at the points passed. If `ind` is an integer, `ind` number of equally spaced points are used. + weights : NumPy array, optional + Weights of datapoints. This must be the same shape as datapoints. + If None (default), the samples are assumed to be equally weighted. **kwargs Additional keyword arguments are documented in :meth:`DataFrame.plot`. @@ -1564,7 +1564,7 @@ def kde( >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) """ - return self(kind="kde", bw_method=bw_method, weights=weights, ind=ind, **kwargs) + return self(kind="kde", bw_method=bw_method, ind=ind, weights=weights, **kwargs) density = kde From 11a239dc914868b34f9fdc21513105de7ce97a8a Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 30 Jul 2024 16:55:15 -0400 Subject: [PATCH 4/7] added entry in whatsnew v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 768b12ba1007f..6f5232dcf9ecb 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -593,6 +593,7 @@ Period Plotting ^^^^^^^^ +- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) - Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) From d5f260b5ba0e1ceb3373dec2c2dc6880baedbf2e Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 30 Jul 2024 21:06:42 -0400 Subject: [PATCH 5/7] added new test for weights --- pandas/tests/plotting/test_series.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2ca9dbf92e617..18c000a04d937 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -538,6 +538,23 @@ def test_kde_kwargs(self, ts, bw_method, ind): pytest.importorskip("scipy") _check_plot_works(ts.plot.kde, bw_method=bw_method, ind=ind) + @pytest.mark.parametrize( + "bw_method, ind, weights", + [ + ["scott", 20, None], + [None, 20, None], + [None, np.int_(20), None], + [0.5, np.linspace(-100, 100, 20), None], + ["scott", 40, np.linspace(0.0, 2.0, 50)], + ], + ) + def test_kde_kwargs_weights(self, bw_method, ind, weights): + # GH59337 + pytest.importorskip("scipy") + s = Series(np.random.default_rng(2).uniform(size=50)) + # weights = np.linspace(0.0, 2.0, 50) + _check_plot_works(s.plot.kde, bw_method=bw_method, ind=ind, weights=weights) + def test_density_kwargs(self, ts): pytest.importorskip("scipy") sample_points = np.linspace(-100, 100, 20) From 8efd0002027f7fd29408cfa48efd41a5eb8ebb61 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 30 Jul 2024 21:07:24 -0400 Subject: [PATCH 6/7] removed weights --- pandas/tests/plotting/test_series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 18c000a04d937..52ca66c218862 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -552,7 +552,6 @@ def test_kde_kwargs_weights(self, bw_method, ind, weights): # GH59337 pytest.importorskip("scipy") s = Series(np.random.default_rng(2).uniform(size=50)) - # weights = np.linspace(0.0, 2.0, 50) _check_plot_works(s.plot.kde, bw_method=bw_method, ind=ind, weights=weights) def test_density_kwargs(self, ts): From 67cb4912cb7124aea04459c24bb827b7274419b1 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Wed, 31 Jul 2024 08:22:08 -0400 Subject: [PATCH 7/7] moved message from Plotting section to Other enhancements --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6f5232dcf9ecb..1627a90fc6ac0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -35,6 +35,7 @@ Other enhancements - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) +- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) @@ -593,7 +594,6 @@ Period Plotting ^^^^^^^^ -- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) - Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)