From f724c5a04fb36b4faa76c451957f9bbbdffffcb0 Mon Sep 17 00:00:00 2001 From: JDkuba Date: Fri, 10 Apr 2020 19:06:52 +0200 Subject: [PATCH 1/2] BUG: None converted after groupby first and last --- pandas/_libs/groupby.pyx | 4 ++-- pandas/tests/groupby/test_nth.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index e7ac3b8442c6d..cc9630a33c5f8 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -893,7 +893,7 @@ def group_last(rank_t[:, :] out, for j in range(K): val = values[i, j] - if not checknull(val): + if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 @@ -986,7 +986,7 @@ def group_nth(rank_t[:, :] out, for j in range(K): val = values[i, j] - if not checknull(val): + if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index b1476f1059d84..16dbd1da771e1 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -94,6 +94,16 @@ def test_nth_with_na_object(index, nulls_fixture): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + df = pd.DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + def test_first_last_nth_dtypes(df_mixed_floats): df = df_mixed_floats.copy() From 41efd576464512e02e3d066c2703ba54750042c5 Mon Sep 17 00:00:00 2001 From: JDkuba Date: Sat, 11 Apr 2020 12:22:01 +0200 Subject: [PATCH 2/2] BUG: whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/groupby.pyx | 4 ++++ pandas/tests/groupby/test_nth.py | 1 + 3 files changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 718de09a0c3e4..f789b74e1b795 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -546,6 +546,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`) - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`) - Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`) +- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index cc9630a33c5f8..53e66c4b8723d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -893,6 +893,8 @@ def group_last(rank_t[:, :] out, for j in range(K): val = values[i, j] + # None should not be treated like other NA-like + # so that it won't be converted to nan if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. @@ -986,6 +988,8 @@ def group_nth(rank_t[:, :] out, for j in range(K): val = values[i, j] + # None should not be treated like other NA-like + # so that it won't be converted to nan if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index dc393b31c28f9..947907caf5cbc 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -97,6 +97,7 @@ def test_nth_with_na_object(index, nulls_fixture): @pytest.mark.parametrize("method", ["first", "last"]) def test_first_last_with_None(method): # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype df = pd.DataFrame.from_dict({"id": ["a"], "value": [None]}) groups = df.groupby("id", as_index=False) result = getattr(groups, method)()