Skip to content

Commit 382aefc

Browse files
authored
REGR: groupby.transform producing segfault (#46585)
1 parent 2555468 commit 382aefc

File tree

3 files changed

+27
-13
lines changed

3 files changed

+27
-13
lines changed

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1106,7 +1106,7 @@ def _set_result_index_ordered(
11061106
# set the result index on the passed values object and
11071107
# return the new object, xref 8046
11081108

1109-
if self.grouper.is_monotonic:
1109+
if self.grouper.is_monotonic and not self.grouper.has_dropped_na:
11101110
# shortcut if we have an already ordered grouper
11111111
result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True)
11121112
return result

pandas/core/groupby/ops.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,10 @@ def result_ilocs(self) -> npt.NDArray[np.intp]:
818818
# Original indices are where group_index would go via sorting.
819819
# But when dropna is true, we need to remove null values while accounting for
820820
# any gaps that then occur because of them.
821-
group_index = get_group_index(self.codes, self.shape, sort=False, xnull=True)
821+
group_index = get_group_index(
822+
self.codes, self.shape, sort=self._sort, xnull=True
823+
)
824+
group_index, _ = compress_group_index(group_index, sort=self._sort)
822825

823826
if self.has_dropped_na:
824827
mask = np.where(group_index >= 0)

pandas/tests/groupby/transform/test_transform.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -1303,23 +1303,34 @@ def test_transform_cumcount():
13031303
tm.assert_series_equal(result, expected)
13041304

13051305

1306-
def test_null_group_lambda_self(sort, dropna):
1306+
@pytest.mark.parametrize("keys", [["A1"], ["A1", "A2"]])
1307+
def test_null_group_lambda_self(request, sort, dropna, keys):
13071308
# GH 17093
1308-
np.random.seed(0)
1309-
keys = np.random.randint(0, 5, size=50).astype(float)
1310-
nulls = np.random.choice([0, 1], keys.shape).astype(bool)
1311-
keys[nulls] = np.nan
1312-
values = np.random.randint(0, 5, size=keys.shape)
1313-
df = DataFrame({"A": keys, "B": values})
1309+
if not sort and not dropna:
1310+
msg = "GH#46584: null values get sorted when sort=False"
1311+
request.node.add_marker(pytest.mark.xfail(reason=msg, strict=False))
1312+
1313+
size = 50
1314+
nulls1 = np.random.choice([False, True], size)
1315+
nulls2 = np.random.choice([False, True], size)
1316+
# Whether a group contains a null value or not
1317+
nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2
1318+
1319+
a1 = np.random.randint(0, 5, size=size).astype(float)
1320+
a1[nulls1] = np.nan
1321+
a2 = np.random.randint(0, 5, size=size).astype(float)
1322+
a2[nulls2] = np.nan
1323+
values = np.random.randint(0, 5, size=a1.shape)
1324+
df = DataFrame({"A1": a1, "A2": a2, "B": values})
13141325

13151326
expected_values = values
1316-
if dropna and nulls.any():
1327+
if dropna and nulls_grouper.any():
13171328
expected_values = expected_values.astype(float)
1318-
expected_values[nulls] = np.nan
1329+
expected_values[nulls_grouper] = np.nan
13191330
expected = DataFrame(expected_values, columns=["B"])
13201331

1321-
gb = df.groupby("A", dropna=dropna, sort=sort)
1322-
result = gb.transform(lambda x: x)
1332+
gb = df.groupby(keys, dropna=dropna, sort=sort)
1333+
result = gb[["B"]].transform(lambda x: x)
13231334
tm.assert_frame_equal(result, expected)
13241335

13251336

0 commit comments

Comments
 (0)