pandas-dev · jreback · Feb 25, 2021 · Jan 25, 2021 · Jan 25, 2021 · Jan 25, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -371,7 +371,7 @@ Missing
 ^^^^^^^
 
 - Bug in :class:`Grouper` now correctly propagates ``dropna`` argument and :meth:`DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`)
--
+- Bug in :func:`isna`, and :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna` (and the corresponding ``notna`` functions) not recognizing ``Decimal("NaN")`` objects (:issue:`39409`)
 -
 
 MultiIndex

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -1,3 +1,4 @@
+from decimal import Decimal
 import numbers
 
 import cython
@@ -36,6 +37,8 @@ cdef:
 
     bint is_32bit = not IS64
 
+    type cDecimal = Decimal  # for faster isinstance checks
+
 
 cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False):
     """
@@ -86,6 +89,8 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
             and util.is_timedelta64_object(right)
             and get_timedelta64_value(right) == NPY_NAT
         )
+    elif is_decimal_na(left):
+        return is_decimal_na(right)
     return False
 
 
@@ -113,7 +118,18 @@ cpdef bint checknull(object val):
     The difference between `checknull` and `checknull_old` is that `checknull`
     does *not* consider INF or NEGINF to be NA.
     """
-    return val is C_NA or is_null_datetimelike(val, inat_is_null=False)
+    return (
+        val is C_NA
+        or is_null_datetimelike(val, inat_is_null=False)
+        or is_decimal_na(val)
+    )
+
+
+cdef inline bint is_decimal_na(object val):
+    """
+    Is this a decimal.Decimal object Decimal("NAN").
+    """
+    return isinstance(val, cDecimal) and val != val
 
 
 cpdef bint checknull_old(object val):

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -2,6 +2,7 @@
 
 import collections
 from datetime import datetime
+from decimal import Decimal
 from functools import wraps
 import operator
 import os
@@ -146,7 +147,7 @@
     + BYTES_DTYPES
 )
 
-NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA]
+NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
 
 EMPTY_STRING_PATTERN = re.compile("^$")
 

diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -7,6 +7,7 @@
 import numpy as np
 
 from pandas._libs.lib import no_default
+from pandas._libs.missing import is_matching_na
 import pandas._libs.testing as _testing
 
 from pandas.core.dtypes.common import (
@@ -457,22 +458,8 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
 
     if left_attr is right_attr:
         return True
-    elif (
-        is_number(left_attr)
-        and np.isnan(left_attr)
-        and is_number(right_attr)
-        and np.isnan(right_attr)
-    ):
-        # np.nan
-        return True
-    elif (
-        isinstance(left_attr, (np.datetime64, np.timedelta64))
-        and isinstance(right_attr, (np.datetime64, np.timedelta64))
-        and type(left_attr) is type(right_attr)
-        and np.isnat(left_attr)
-        and np.isnat(right_attr)
-    ):
-        # np.datetime64("nat") or np.timedelta64("nat")
+    elif is_matching_na(left_attr, right_attr):
+        # e.g. both np.nan, both NaT, both pd.NA, ...
         return True
 
     try:

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -304,7 +304,7 @@ def nselect_method(request):
 # ----------------------------------------------------------------
 # Missing values & co.
 # ----------------------------------------------------------------
-@pytest.fixture(params=tm.NULL_OBJECTS, ids=str)
+@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__)
 def nulls_fixture(request):
     """
     Fixture for each null type in pandas.

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -1,6 +1,7 @@
 """
 missing types & inference
 """
+from decimal import Decimal
 from functools import partial
 
 import numpy as np
@@ -610,20 +611,24 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
     """
     if not lib.is_scalar(obj) or not isna(obj):
         return False
-    if dtype.kind == "M":
+    elif dtype.kind == "M":
         if isinstance(dtype, np.dtype):
             # i.e. not tzaware
-            return not isinstance(obj, np.timedelta64)
+            return not isinstance(obj, (np.timedelta64, Decimal))
         # we have to rule out tznaive dt64("NaT")
-        return not isinstance(obj, (np.timedelta64, np.datetime64))
-    if dtype.kind == "m":
-        return not isinstance(obj, np.datetime64)
-    if dtype.kind in ["i", "u", "f", "c"]:
+        return not isinstance(obj, (np.timedelta64, np.datetime64, Decimal))
+    elif dtype.kind == "m":
+        return not isinstance(obj, (np.datetime64, Decimal))
+    elif dtype.kind in ["i", "u", "f", "c"]:
         # Numeric
         return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64))
 
+    elif dtype == np.dtype(object):
+        # This is needed for Categorical, but is kind of weird
+        return True
+
     # must be PeriodDType
-    return not isinstance(obj, (np.datetime64, np.timedelta64))
+    return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal))
 
 
 def isna_all(arr: ArrayLike) -> bool:

diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py
@@ -3,6 +3,7 @@
 """
 
 import datetime
+from decimal import Decimal
 
 import numpy as np
 import pytest
@@ -538,7 +539,20 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, nulls_fi
     fill_value = nulls_fixture
     dtype = np.dtype(any_numpy_dtype_reduced)
 
-    if is_integer_dtype(dtype) and fill_value is not NaT:
+    if isinstance(fill_value, Decimal):
+        # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture)
+        #  this is the existing behavior in maybe_promote,
+        #  hinges on is_valid_na_for_dtype
+        if dtype.kind in ["i", "u", "f", "c"]:
+            if dtype.kind in ["i", "u"]:
+                expected_dtype = np.dtype(np.float64)
+            else:
+                expected_dtype = dtype
+            exp_val_for_scalar = np.nan
+        else:
+            expected_dtype = np.dtype(object)
+            exp_val_for_scalar = fill_value
+    elif is_integer_dtype(dtype) and fill_value is not NaT:
         # integer + other missing value (np.nan / None) casts to float
         expected_dtype = np.float64
         exp_val_for_scalar = np.nan

diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
@@ -317,6 +317,43 @@ def test_period(self):
         tm.assert_series_equal(isna(s), exp)
         tm.assert_series_equal(notna(s), ~exp)
 
+    def test_decimal(self):
+        # scalars GH#23530
+        a = Decimal(1.0)
+        assert pd.isna(a) is False
+        assert pd.notna(a) is True
+
+        b = Decimal("NaN")
+        assert pd.isna(b) is True
+        assert pd.notna(b) is False
+
+        # array
+        arr = np.array([a, b])
+        expected = np.array([False, True])
+        result = pd.isna(arr)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = pd.notna(arr)
+        tm.assert_numpy_array_equal(result, ~expected)
+
+        # series
+        ser = Series(arr)
+        expected = Series(expected)
+        result = pd.isna(ser)
+        tm.assert_series_equal(result, expected)
+
+        result = pd.notna(ser)
+        tm.assert_series_equal(result, ~expected)
+
+        # index
+        idx = pd.Index(arr)
+        expected = np.array([False, True])
+        result = pd.isna(idx)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = pd.notna(idx)
+        tm.assert_numpy_array_equal(result, ~expected)
+
 
 @pytest.mark.parametrize("dtype_equal", [True, False])
 def test_array_equivalent(dtype_equal):
@@ -619,24 +656,22 @@ def test_empty_like(self):
 
 
 class TestLibMissing:
-    def test_checknull(self):
-        for value in na_vals:
-            assert libmissing.checknull(value)
+    @pytest.mark.parametrize("func", [libmissing.checknull, isna])
+    def test_checknull(self, func):
+        for value in na_vals + sometimes_na_vals:
+            assert func(value)
 
         for value in inf_vals:
-            assert not libmissing.checknull(value)
+            assert not func(value)
 
         for value in int_na_vals:
-            assert not libmissing.checknull(value)
-
-        for value in sometimes_na_vals:
-            assert not libmissing.checknull(value)
+            assert not func(value)
 
         for value in never_na_vals:
-            assert not libmissing.checknull(value)
+            assert not func(value)
 
     def test_checknull_old(self):
-        for value in na_vals:
+        for value in na_vals + sometimes_na_vals:
             assert libmissing.checknull_old(value)
 
         for value in inf_vals:
@@ -645,9 +680,6 @@ def test_checknull_old(self):
         for value in int_na_vals:
             assert not libmissing.checknull_old(value)
 
-        for value in sometimes_na_vals:
-            assert not libmissing.checknull_old(value)
-
         for value in never_na_vals:
             assert not libmissing.checknull_old(value)
 
@@ -682,6 +714,9 @@ def test_is_matching_na(self, nulls_fixture, nulls_fixture2):
         elif is_float(left) and is_float(right):
             # np.nan vs float("NaN") we consider as matching
             assert libmissing.is_matching_na(left, right)
+        elif type(left) is type(right):
+            # e.g. both Decimal("NaN")
+            assert libmissing.is_matching_na(left, right)
         else:
             assert not libmissing.is_matching_na(left, right)
 

diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
@@ -48,7 +48,8 @@ def test_contains(self, data, data_missing):
 
         # the data can never contain other nan-likes than na_value
         for na_value_obj in tm.NULL_OBJECTS:
-            if na_value_obj is na_value:
+            if na_value_obj is na_value or type(na_value_obj) == type(na_value):
+                # type check for e.g. two instances of Decimal("NAN")
                 continue
             assert na_value_obj not in data
             assert na_value_obj not in data_missing

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -178,13 +178,6 @@ class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
 class TestMethods(BaseDecimal, base.BaseMethodsTests):
     @pytest.mark.parametrize("dropna", [True, False])
     def test_value_counts(self, all_data, dropna, request):
-        if any(x != x for x in all_data):
-            mark = pytest.mark.xfail(
-                reason="tm.assert_series_equal incorrectly raises",
-                raises=AssertionError,
-            )
-            request.node.add_marker(mark)
-
         all_data = all_data[:10]
         if dropna:
             other = np.array(all_data[~all_data.isna()])
@@ -212,12 +205,6 @@ class TestCasting(BaseDecimal, base.BaseCastingTests):
 
 
 class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
-    def test_groupby_apply_identity(self, data_for_grouping, request):
-        if any(x != x for x in data_for_grouping):
-            mark = pytest.mark.xfail(reason="tm.assert_series_equal raises incorrectly")
-            request.node.add_marker(mark)
-        super().test_groupby_apply_identity(data_for_grouping)
-
     def test_groupby_agg_extension(self, data_for_grouping):
         super().test_groupby_agg_extension(data_for_grouping)
 

diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py
@@ -1,6 +1,8 @@
 """
 Tests for the Index constructor conducting inference.
 """
+from decimal import Decimal
+
 import numpy as np
 import pytest
 
@@ -89,6 +91,10 @@ def test_constructor_infer_periodindex(self):
     def test_constructor_infer_nat_dt_like(
         self, pos, klass, dtype, ctor, nulls_fixture, request
     ):
+        if isinstance(nulls_fixture, Decimal):
+            # We dont cast these to datetime64/timedelta64
+            return
+
         expected = klass([NaT, NaT])
         assert expected.dtype == dtype
         data = [ctor]

diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -104,12 +104,14 @@ def test_numeric_compat(self):
     def test_insert_na(self, nulls_fixture):
         # GH 18295 (test missing)
         index = self.create_index()
+        na_val = nulls_fixture
 
-        if nulls_fixture is pd.NaT:
+        if na_val is pd.NaT:
             expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
         else:
             expected = Float64Index([index[0], np.nan] + list(index[1:]))
-        result = index.insert(1, nulls_fixture)
+
+        result = index.insert(1, na_val)
         tm.assert_index_equal(result, expected)
 
 

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1,5 +1,6 @@
 import datetime
 from datetime import timedelta
+from decimal import Decimal
 from io import StringIO
 import json
 import os
@@ -1742,8 +1743,12 @@ def test_json_pandas_na(self):
         result = DataFrame([[pd.NA]]).to_json()
         assert result == '{"0":{"0":null}}'
 
-    def test_json_pandas_nulls(self, nulls_fixture):
+    def test_json_pandas_nulls(self, nulls_fixture, request):
         # GH 31615
+        if isinstance(nulls_fixture, Decimal):
+            mark = pytest.mark.xfail(reason="not implemented")
 if (npy_isnan(val) || npy_isinf(val)) { 
 if (npy_isnan(val) || npy_isinf(val)) { 
+            request.node.add_marker(mark)
+
         result = DataFrame([[nulls_fixture]]).to_json()
         assert result == '{"0":{"0":null}}'
 

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -6,6 +6,7 @@
     datetime,
     timedelta,
 )
+from decimal import Decimal
 import locale
 
 from dateutil.parser import parse
@@ -2446,9 +2447,15 @@ def test_nullable_integer_to_datetime():
 
 @pytest.mark.parametrize("klass", [np.array, list])
 def test_na_to_datetime(nulls_fixture, klass):
-    result = pd.to_datetime(klass([nulls_fixture]))
 
-    assert result[0] is pd.NaT
+    if isinstance(nulls_fixture, Decimal):
+        with pytest.raises(TypeError, match="not convertible to datetime"):
+            pd.to_datetime(klass([nulls_fixture]))
+
+    else:
+        result = pd.to_datetime(klass([nulls_fixture]))
+
+        assert result[0] is pd.NaT
 
 
 def test_empty_string_datetime_coerce__format():
-Original file line number
+Diff line change
@@ Expand Up / @@ -371,7 +371,7 @@ Missing @@
     ^^^^^^^
     - Bug in :class:`Grouper` now correctly propagates ``dropna`` argument and :meth:`DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`)
-    -
+    - Bug in :func:`isna`, and :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna` (and the corresponding ``notna`` functions) not recognizing ``Decimal("NaN")`` objects (:issue:`39409`)
     -
     MultiIndex
@@ Expand Down @@