Support uncertainties as EA Dtype

MichaelTiemannOSC · MichaelTiemannOSC · commit 4c04e30d5c54 · 2023-07-02T10:04:12.000-04:00
In parallel with Pint and Pint-Pandas, changes to support uncertainties as an EA Dtype. See hgrecco/pint#1615 and hgrecco/pint-pandas#140 Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com>
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -870,7 +870,8 @@ def take(
         # we only fill where the indexer is null
         # not existing missing values
         # TODO(jreback) what if we have a non-na float as a fill value?
-        if allow_fill and notna(fill_value):
+        # NaN with uncertainties is scalar but does not register as `isna`, so use fact that NaN != NaN
+        if allow_fill and notna(fill_value) and fill_value==fill_value:
             fill_mask = np.asarray(indexer) == -1
             result[fill_mask] = fill_value
             mask = mask ^ fill_mask
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -3200,7 +3200,10 @@ def first(x: Series):
                 """Helper function for first item that isn't NA."""
                 arr = x.array[notna(x.array)]
                 if not len(arr):
-                    return np.nan
+                    nan_arr = x.array[isna(x.array)]
+                    if not len(nan_arr):
+                        return np.nan
+                    return nan_arr[0]
                 return arr[0]
 
             if isinstance(obj, DataFrame):
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -587,7 +587,7 @@ def nkeys(self) -> int:
 
     def get_iterator(
         self, data: NDFrameT, axis: AxisInt = 0
-    ) -> Iterator[tuple[Hashable, NDFrameT]]:
+    ) -> Iterator[tuple[Hashable, NDFrameT]]:  # Doesn't work with non-hashable EA types
         """
         Groupby iterator
 
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -28,7 +28,7 @@
 
 
 def make_data():
-    return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False]
+    return [True, False] * 4 + [pd.NA] + [True, False] * 44 + [pd.NA] + [True, False]
 
 
 @pytest.fixture
@@ -48,7 +48,7 @@ def data_for_twos(dtype):
 
 @pytest.fixture
 def data_missing(dtype):
-    return pd.array([np.nan, True], dtype=dtype)
+    return pd.array([pd.NA, True], dtype=dtype)
 
 
 @pytest.fixture
@@ -58,7 +58,7 @@ def data_for_sorting(dtype):
 
 @pytest.fixture
 def data_missing_for_sorting(dtype):
-    return pd.array([True, np.nan, False], dtype=dtype)
+    return pd.array([True, pd.NA, False], dtype=dtype)
 
 
 @pytest.fixture
@@ -76,7 +76,7 @@ def na_value():
 def data_for_grouping(dtype):
     b = True
     a = False
-    na = np.nan
+    na = pd.NA
     return pd.array([b, b, na, na, a, a, b], dtype=dtype)
 
 
@@ -147,7 +147,7 @@ def _check_op(self, obj, op, other, op_name, exc=NotImplementedError):
                 expected = expected.astype("Float64")
             if op_name == "__rpow__":
                 # for rpow, combine does not propagate NaN
-                expected[result.isna()] = np.nan
+                expected[result.isna()] = pd.NA
             self.assert_equal(result, expected)
         else:
             with pytest.raises(exc):