Skip to content

Commit 0cf7fd6

Browse files
JDkubasimonjayhawkins
authored andcommitted
Backport PR pandas-dev#33462 on branch 1.0.x (BUG: None converted to NaN after groupby first and last)
1 parent bd6690a commit 0cf7fd6

File tree

3 files changed

+18
-3
lines changed

3 files changed

+18
-3
lines changed

doc/source/whatsnew/v1.0.4.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ including other versions of pandas.
1515

1616
Fixed regressions
1717
~~~~~~~~~~~~~~~~~
18-
-
18+
- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`)
1919
-
2020

2121
.. _whatsnew_104.bug_fixes:

pandas/_libs/groupby.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,9 @@ def group_last(rank_t[:, :] out,
892892
for j in range(K):
893893
val = values[i, j]
894894

895-
if not checknull(val):
895+
# None should not be treated like other NA-like
896+
# so that it won't be converted to nan
897+
if not checknull(val) or val is None:
896898
# NB: use _treat_as_na here once
897899
# conditional-nogil is available.
898900
nobs[lab, j] += 1
@@ -983,7 +985,9 @@ def group_nth(rank_t[:, :] out,
983985
for j in range(K):
984986
val = values[i, j]
985987

986-
if not checknull(val):
988+
# None should not be treated like other NA-like
989+
# so that it won't be converted to nan
990+
if not checknull(val) or val is None:
987991
# NB: use _treat_as_na here once
988992
# conditional-nogil is available.
989993
nobs[lab, j] += 1

pandas/tests/groupby/test_nth.py

+11
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,17 @@ def test_nth_with_na_object(index, nulls_fixture):
9494
tm.assert_frame_equal(result, expected)
9595

9696

97+
@pytest.mark.parametrize("method", ["first", "last"])
98+
def test_first_last_with_None(method):
99+
# https://github.com/pandas-dev/pandas/issues/32800
100+
# None should be preserved as object dtype
101+
df = pd.DataFrame.from_dict({"id": ["a"], "value": [None]})
102+
groups = df.groupby("id", as_index=False)
103+
result = getattr(groups, method)()
104+
105+
tm.assert_frame_equal(result, df)
106+
107+
97108
def test_first_last_nth_dtypes(df_mixed_floats):
98109

99110
df = df_mixed_floats.copy()

0 commit comments

Comments
 (0)