Skip to content

Commit 55e3bff

Browse files
authored
REGR: Groupby first/last/nth treats None as an observation (#38330)
* BUG: Groupby first/last/nth treats None as an observation * Reverted test changes, whatsnew * Reverted test changes, whatsnew * Remove nth from whatsnew
1 parent 22dbef1 commit 55e3bff

File tree

3 files changed

+25
-8
lines changed

3 files changed

+25
-8
lines changed

doc/source/whatsnew/v1.1.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Fixed regressions
2727
- Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`).
2828
- Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`)
2929
- Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`)
30+
- Fixed regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` where ``None`` was considered a non-NA value (:issue:`38286`)
3031

3132
.. ---------------------------------------------------------------------------
3233

pandas/_libs/groupby.pyx

+4-8
Original file line numberDiff line numberDiff line change
@@ -928,9 +928,7 @@ def group_last(rank_t[:, :] out,
928928
for j in range(K):
929929
val = values[i, j]
930930

931-
# None should not be treated like other NA-like
932-
# so that it won't be converted to nan
933-
if not checknull(val) or val is None:
931+
if not checknull(val):
934932
# NB: use _treat_as_na here once
935933
# conditional-nogil is available.
936934
nobs[lab, j] += 1
@@ -939,7 +937,7 @@ def group_last(rank_t[:, :] out,
939937
for i in range(ncounts):
940938
for j in range(K):
941939
if nobs[i, j] < min_count:
942-
out[i, j] = NAN
940+
out[i, j] = None
943941
else:
944942
out[i, j] = resx[i, j]
945943
else:
@@ -1023,9 +1021,7 @@ def group_nth(rank_t[:, :] out,
10231021
for j in range(K):
10241022
val = values[i, j]
10251023

1026-
# None should not be treated like other NA-like
1027-
# so that it won't be converted to nan
1028-
if not checknull(val) or val is None:
1024+
if not checknull(val):
10291025
# NB: use _treat_as_na here once
10301026
# conditional-nogil is available.
10311027
nobs[lab, j] += 1
@@ -1035,7 +1031,7 @@ def group_nth(rank_t[:, :] out,
10351031
for i in range(ncounts):
10361032
for j in range(K):
10371033
if nobs[i, j] < min_count:
1038-
out[i, j] = NAN
1034+
out[i, j] = None
10391035
else:
10401036
out[i, j] = resx[i, j]
10411037

pandas/tests/groupby/test_nth.py

+20
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,26 @@ def test_first_last_with_None(method):
101101
tm.assert_frame_equal(result, df)
102102

103103

104+
@pytest.mark.parametrize("method", ["first", "last"])
105+
@pytest.mark.parametrize(
106+
"df, expected",
107+
[
108+
(
109+
DataFrame({"id": "a", "value": [None, "foo", np.nan]}),
110+
DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")),
111+
),
112+
(
113+
DataFrame({"id": "a", "value": [np.nan]}, dtype=object),
114+
DataFrame({"value": [None]}, index=Index(["a"], name="id")),
115+
),
116+
],
117+
)
118+
def test_first_last_with_None_expanded(method, df, expected):
119+
# GH 32800, 38286
120+
result = getattr(df.groupby("id"), method)()
121+
tm.assert_frame_equal(result, expected)
122+
123+
104124
def test_first_last_nth_dtypes(df_mixed_floats):
105125

106126
df = df_mixed_floats.copy()

0 commit comments

Comments
 (0)