Skip to content

Commit 85182df

Browse files
authored
BUG: Cannot sample on DataFrameGroupBy with weights when index is specified (#40015)
1 parent 408216c commit 85182df

File tree

3 files changed

+11
-5
lines changed

3 files changed

+11
-5
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ Groupby/resample/rolling
442442
- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`)
443443
- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly when window is an offset and dates are in descending order (:issue:`40002`)
444444
- Bug in :class:`SeriesGroupBy` and :class:`DataFrameGroupBy` on an empty ``Series`` or ``DataFrame`` would lose index, columns, and/or data types when directly using the methods ``idxmax``, ``idxmin``, ``mad``, ``min``, ``max``, ``sum``, ``prod``, and ``skew`` or using them through ``apply``, ``aggregate``, or ``resample`` (:issue:`26411`)
445+
- Bug in :meth:`DataFrameGroupBy.sample` where error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`)
445446
-
446447

447448
Reshaping

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3076,7 +3076,7 @@ def sample(
30763076

30773077
if weights is not None:
30783078
weights = Series(weights, index=self._selected_obj.index)
3079-
ws = [weights[idx] for idx in self.indices.values()]
3079+
ws = [weights.iloc[idx] for idx in self.indices.values()]
30803080
else:
30813081
ws = [None] * self.ngroups
30823082

pandas/tests/groupby/test_sample.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,19 @@ def test_groupby_sample_without_n_or_frac():
116116
tm.assert_series_equal(result, expected)
117117

118118

119-
def test_groupby_sample_with_weights():
119+
@pytest.mark.parametrize(
120+
"index, expected_index",
121+
[(["w", "x", "y", "z"], ["w", "w", "y", "y"]), ([3, 4, 5, 6], [3, 3, 5, 5])],
122+
)
123+
def test_groupby_sample_with_weights(index, expected_index):
124+
# GH 39927 - tests for integer index needed
120125
values = [1] * 2 + [2] * 2
121-
df = DataFrame({"a": values, "b": values}, index=Index(["w", "x", "y", "z"]))
126+
df = DataFrame({"a": values, "b": values}, index=Index(index))
122127

123128
result = df.groupby("a").sample(n=2, replace=True, weights=[1, 0, 1, 0])
124-
expected = DataFrame({"a": values, "b": values}, index=Index(["w", "w", "y", "y"]))
129+
expected = DataFrame({"a": values, "b": values}, index=Index(expected_index))
125130
tm.assert_frame_equal(result, expected)
126131

127132
result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0])
128-
expected = Series(values, name="b", index=Index(["w", "w", "y", "y"]))
133+
expected = Series(values, name="b", index=Index(expected_index))
129134
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)