From a269b761780000e1c71bdee9a449454a661288f7 Mon Sep 17 00:00:00 2001 From: "wenjun.swj" Date: Wed, 24 Feb 2021 20:48:44 +0800 Subject: [PATCH 1/2] BUG: Cannot sample on DataFrameGroupBy with weights when index is specified --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_sample.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e939c184d501a..5f42ab5c84938 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3076,7 +3076,7 @@ def sample( if weights is not None: weights = Series(weights, index=self._selected_obj.index) - ws = [weights[idx] for idx in self.indices.values()] + ws = [weights.iloc[idx] for idx in self.indices.values()] else: ws = [None] * self.ngroups diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 13147ca704b56..1f9b587f68982 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -116,14 +116,18 @@ def test_groupby_sample_without_n_or_frac(): tm.assert_series_equal(result, expected) -def test_groupby_sample_with_weights(): +@pytest.mark.parametrize( + "index, expect_index", + [(["w", "x", "y", "z"], ["w", "w", "y", "y"]), ([3, 4, 5, 6], [3, 3, 5, 5])], +) +def test_groupby_sample_with_weights(index, expect_index): values = [1] * 2 + [2] * 2 - df = DataFrame({"a": values, "b": values}, index=Index(["w", "x", "y", "z"])) + df = DataFrame({"a": values, "b": values}, index=Index(index)) result = df.groupby("a").sample(n=2, replace=True, weights=[1, 0, 1, 0]) - expected = DataFrame({"a": values, "b": values}, index=Index(["w", "w", "y", "y"])) + expected = DataFrame({"a": values, "b": values}, index=Index(expect_index)) tm.assert_frame_equal(result, expected) result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0]) - expected = Series(values, name="b", index=Index(["w", "w", "y", "y"])) + expected = Series(values, name="b", index=Index(expect_index)) tm.assert_series_equal(result, expected) From fcb102e7e9640bcac7cd324cd25b82b96e2e9a89 Mon Sep 17 00:00:00 2001 From: wjsi Date: Wed, 24 Feb 2021 22:24:50 +0800 Subject: [PATCH 2/2] Add changes docs & minor fixes --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/tests/groupby/test_sample.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d5177075afda5..945dc0039fea6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -436,6 +436,7 @@ Groupby/resample/rolling - Bug in :meth:`core.window.rolling.RollingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.corr` where the groupby column would return 0 instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) - Bug in :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` where 1 would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) - Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Bug in :meth:`DataFrameGroupBy.sample` where error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`) - Reshaping diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 1f9b587f68982..4b8b0173789ae 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -117,17 +117,18 @@ def test_groupby_sample_without_n_or_frac(): @pytest.mark.parametrize( - "index, expect_index", + "index, expected_index", [(["w", "x", "y", "z"], ["w", "w", "y", "y"]), ([3, 4, 5, 6], [3, 3, 5, 5])], ) -def test_groupby_sample_with_weights(index, expect_index): +def test_groupby_sample_with_weights(index, expected_index): + # GH 39927 - tests for integer index needed values = [1] * 2 + [2] * 2 df = DataFrame({"a": values, "b": values}, index=Index(index)) result = df.groupby("a").sample(n=2, replace=True, weights=[1, 0, 1, 0]) - expected = DataFrame({"a": values, "b": values}, index=Index(expect_index)) + expected = DataFrame({"a": values, "b": values}, index=Index(expected_index)) tm.assert_frame_equal(result, expected) result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0]) - expected = Series(values, name="b", index=Index(expect_index)) + expected = Series(values, name="b", index=Index(expected_index)) tm.assert_series_equal(result, expected)