pandas-dev · MichaelTiemannOSC · Jul 2, 2023 · Jul 2, 2023 · Jul 2, 2023 · Jul 2, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -90,6 +90,7 @@ Other enhancements
 - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`)
 - :meth:`arrays.DatetimeArray.map`, :meth:`arrays.TimedeltaArray.map` and :meth:`arrays.PeriodArray.map` can now take a ``na_action`` argument (:issue:`51644`)
 - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
+- :meth:`arrays.BaseMaskedArray.take` handle non-na float as fill value (triggered by ``ufloat`` NaN from ``uncertainties`` package) (see `PR <https://github.com/pandas-dev/pandas/pull/53970>`_)
 - Add :meth:`diff()` and :meth:`round()` for :class:`Index` (:issue:`19708`)
 - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)
 - Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`)
@@ -107,6 +108,7 @@ Other enhancements
 - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
 - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
 - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`)
+- :meth:`GroupBy.first` if series contains only NA values (which might be NaN), return the first NA value, else return np.nan (see `PR <https://github.com/pandas-dev/pandas/pull/53970>`_)
 - :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)
 - :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`)
 - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`)
@@ -552,6 +554,7 @@ Other
 - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`)
 - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
 - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
+- Change ``~tests/extension/test_boolean.py`` to use pd.NA instead of np.nan (following similar patterns in ``~tests/extension/test_integer.py`` and ``~tests/extension/test_float.py``), updating :func:`make_data`, :func:`data_missing`, :func:`data_missing_for_sorting`, :func:`data_for_grouping`, :func:`_check_op` (see `PR <https://github.com/pandas-dev/pandas/pull/53970>`_)
 
 .. ***DO NOT USE THIS SECTION***
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -870,7 +870,9 @@ def take(
         # we only fill where the indexer is null
         # not existing missing values
         # TODO(jreback) what if we have a non-na float as a fill value?
-        if allow_fill and notna(fill_value):
+        # NaN with uncertainties is scalar but does not register as `isna`,
+        # so use fact that NaN != NaN
+        if allow_fill and notna(fill_value) and fill_value == fill_value:
             fill_mask = np.asarray(indexer) == -1
             result[fill_mask] = fill_value
             mask = mask ^ fill_mask

@@ -3200,7 +3200,10 @@ def first(x: Series):
                 """Helper function for first item that isn't NA."""
                 arr = x.array[notna(x.array)]
                 if not len(arr):
-                    return np.nan
+                    nan_arr = x.array[isna(x.array)]
+                    if not len(nan_arr):
+                        return np.nan
+                    return nan_arr[0]
                 return arr[0]
 
             if isinstance(obj, DataFrame):

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -587,7 +587,7 @@ def nkeys(self) -> int:
 
     def get_iterator(
         self, data: NDFrameT, axis: AxisInt = 0
-    ) -> Iterator[tuple[Hashable, NDFrameT]]:
+    ) -> Iterator[tuple[Hashable, NDFrameT]]:  # Doesn't work with non-hashable EA types
         """
         Groupby iterator
 

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -356,9 +356,10 @@ def test_groupby_extension_no_sort(self):
         """
         super().test_groupby_extension_no_sort()
 
-    @pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
-    def test_groupby_agg_extension(self, data_for_grouping):
-        super().test_groupby_agg_extension(data_for_grouping)
+    # GH#39098 is now fixed, so we don't need to XFAIL, nor subclass this test
+    # @pytest.mark.xpass(reason="GH#39098: Converts agg result to object")
+    # def test_groupby_agg_extension(self, data_for_grouping):
+    #     super().test_groupby_agg_extension(data_for_grouping)
 
 
 class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):

diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -28,7 +28,7 @@
 
 
 def make_data():
-    return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False]
+    return [True, False] * 4 + [pd.NA] + [True, False] * 44 + [pd.NA] + [True, False]
 
 
 @pytest.fixture
@@ -48,7 +48,7 @@ def data_for_twos(dtype):
 
 @pytest.fixture
 def data_missing(dtype):
-    return pd.array([np.nan, True], dtype=dtype)
+    return pd.array([pd.NA, True], dtype=dtype)
 
 
 @pytest.fixture
@@ -58,7 +58,7 @@ def data_for_sorting(dtype):
 
 @pytest.fixture
 def data_missing_for_sorting(dtype):
-    return pd.array([True, np.nan, False], dtype=dtype)
+    return pd.array([True, pd.NA, False], dtype=dtype)
 
 
 @pytest.fixture
@@ -76,7 +76,7 @@ def na_value():
 def data_for_grouping(dtype):
     b = True
     a = False
-    na = np.nan
+    na = pd.NA
     return pd.array([b, b, na, na, a, a, b], dtype=dtype)
 
 
@@ -147,7 +147,7 @@ def _check_op(self, obj, op, other, op_name, exc=NotImplementedError):
                 expected = expected.astype("Float64")
             if op_name == "__rpow__":
                 # for rpow, combine does not propagate NaN
-                expected[result.isna()] = np.nan
+                expected[result.isna()] = pd.NA
             self.assert_equal(result, expected)
         else:
             with pytest.raises(exc):