From e0bd3ba2f34e4af66ec033e1ec395afecc1deb41 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 26 Jul 2023 20:41:07 -0400 Subject: [PATCH 1/3] Remove test_groupby_agg_extension (marked xfail) Remove special `test_groupby_agg_extension` in `test_json.py` as we can now pass the test generally with these fixes to `GroupBy.first` and `GroupBy.last`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/core/groupby/groupby.py | 4 ++-- pandas/tests/extension/json/test_json.py | 4 ---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d0dae450735a3..cb63946164ee0 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -649,6 +649,8 @@ Other - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`) - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) - Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`) +- Bug in :meth:`GroupBy.first` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) +- Bug in :meth:`GroupBy.last` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`) - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8d8302826462e..0c30a43db43c8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3279,7 +3279,7 @@ def first(x: Series): """Helper function for first item that isn't NA.""" arr = x.array[notna(x.array)] if not len(arr): - return np.nan + return x.array.dtype.na_value return arr[0] if isinstance(obj, DataFrame): @@ -3338,7 +3338,7 @@ def last(x: Series): """Helper function for last item that isn't NA.""" arr = x.array[notna(x.array)] if not len(arr): - return np.nan + x.array.dtype.na_value return arr[-1] if isinstance(obj, DataFrame): diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 0920e70142446..fb2208b304f9e 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -356,10 +356,6 @@ def test_groupby_extension_no_sort(self): """ super().test_groupby_extension_no_sort() - @pytest.mark.xfail(reason="GH#39098: Converts agg result to object") - def test_groupby_agg_extension(self, data_for_grouping): - super().test_groupby_agg_extension(data_for_grouping) - class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests): def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): From 3073ba308e50abb7b01a79b75f86392a6a5f35b5 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 26 Jul 2023 22:02:08 -0400 Subject: [PATCH 2/3] Update v2.1.0.rst Fix sort order. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index cb63946164ee0..2c5cc3826d0d6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -643,14 +643,14 @@ Other - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) - Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) - Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`) +- Bug in :meth:`GroupBy.first` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) +- Bug in :meth:`GroupBy.last` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) - Bug in :meth:`Index.sort_values` when a ``key`` is passed (:issue:`52764`) - Bug in :meth:`Series.align`, :meth:`DataFrame.align`, :meth:`Series.reindex`, :meth:`DataFrame.reindex`, :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, incorrectly failing to raise with method="asfreq" (:issue:`53620`) - Bug in :meth:`Series.argsort` failing to raise when an invalid ``axis`` is passed (:issue:`54257`) - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`) - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) - Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`) -- Bug in :meth:`GroupBy.first` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) -- Bug in :meth:`GroupBy.last` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`) - From 541eaf8b34055869f491e80ad941b95d80066497 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Thu, 27 Jul 2023 12:38:41 -0400 Subject: [PATCH 3/3] Apply suggestions from code review Incorporate suggestions from @mroeschke (consolidating docs and fixing a malformed return statement). Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 3 +-- pandas/core/groupby/groupby.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2c5cc3826d0d6..eab1da837a640 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -643,8 +643,7 @@ Other - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) - Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) - Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`) -- Bug in :meth:`GroupBy.first` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) -- Bug in :meth:`GroupBy.last` the helper function should return the NA value of the EA if all values are NA (:issue:`39098`) +- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where an empty group would return ``np.nan`` instead of a an ExtensionArray's NA value (:issue:`39098`) - Bug in :meth:`Index.sort_values` when a ``key`` is passed (:issue:`52764`) - Bug in :meth:`Series.align`, :meth:`DataFrame.align`, :meth:`Series.reindex`, :meth:`DataFrame.reindex`, :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, incorrectly failing to raise with method="asfreq" (:issue:`53620`) - Bug in :meth:`Series.argsort` failing to raise when an invalid ``axis`` is passed (:issue:`54257`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0c30a43db43c8..f70fc4303f938 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3338,7 +3338,7 @@ def last(x: Series): """Helper function for last item that isn't NA.""" arr = x.array[notna(x.array)] if not len(arr): - x.array.dtype.na_value + return x.array.dtype.na_value return arr[-1] if isinstance(obj, DataFrame):