diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 718de09a0c3e4..f789b74e1b795 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -546,6 +546,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`) - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`) - Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`) +- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index e7ac3b8442c6d..53e66c4b8723d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -893,7 +893,9 @@ def group_last(rank_t[:, :] out, for j in range(K): val = values[i, j] - if not checknull(val): + # None should not be treated like other NA-like + # so that it won't be converted to nan + if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 @@ -986,7 +988,9 @@ def group_nth(rank_t[:, :] out, for j in range(K): val = values[i, j] - if not checknull(val): + # None should not be treated like other NA-like + # so that it won't be converted to nan + if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index e1bc058508bee..947907caf5cbc 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -94,6 +94,17 @@ def test_nth_with_na_object(index, nulls_fixture): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype + df = pd.DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + def test_first_last_nth_dtypes(df_mixed_floats): df = df_mixed_floats.copy()