diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 85dddbefcd9fa..ecb629a0391df 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -441,6 +441,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly when window is an offset and dates are in descending order (:issue:`40002`) - Bug in :class:`SeriesGroupBy` and :class:`DataFrameGroupBy` on an empty ``Series`` or ``DataFrame`` would lose index, columns, and/or data types when directly using the methods ``idxmax``, ``idxmin``, ``mad``, ``min``, ``max``, ``sum``, ``prod``, and ``skew`` or using them through ``apply``, ``aggregate``, or ``resample`` (:issue:`26411`) +- Bug in :meth:`DataFrameGroupBy.sample` where error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`) - Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b9310794d7caa..7bcdb348b8a1e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3076,7 +3076,7 @@ def sample( if weights is not None: weights = Series(weights, index=self._selected_obj.index) - ws = [weights[idx] for idx in self.indices.values()] + ws = [weights.iloc[idx] for idx in self.indices.values()] else: ws = [None] * self.ngroups diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 13147ca704b56..4b8b0173789ae 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -116,14 +116,19 @@ def test_groupby_sample_without_n_or_frac(): tm.assert_series_equal(result, expected) -def test_groupby_sample_with_weights(): +@pytest.mark.parametrize( + "index, expected_index", + [(["w", "x", "y", "z"], ["w", "w", "y", "y"]), ([3, 4, 5, 6], [3, 3, 5, 5])], +) +def test_groupby_sample_with_weights(index, expected_index): + # GH 39927 - tests for integer index needed values = [1] * 2 + [2] * 2 - df = DataFrame({"a": values, "b": values}, index=Index(["w", "x", "y", "z"])) + df = DataFrame({"a": values, "b": values}, index=Index(index)) result = df.groupby("a").sample(n=2, replace=True, weights=[1, 0, 1, 0]) - expected = DataFrame({"a": values, "b": values}, index=Index(["w", "w", "y", "y"])) + expected = DataFrame({"a": values, "b": values}, index=Index(expected_index)) tm.assert_frame_equal(result, expected) result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0]) - expected = Series(values, name="b", index=Index(["w", "w", "y", "y"])) + expected = Series(values, name="b", index=Index(expected_index)) tm.assert_series_equal(result, expected)