diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index e61f50afea838..77bba6c431053 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -432,7 +432,7 @@ Other - Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`) - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`) -- +- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) .. --------------------------------------------------------------------------- diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 494d9ac60dd96..024bfb02fe09d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -459,13 +459,24 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): ): # np.nan return True + elif ( + isinstance(left_attr, (np.datetime64, np.timedelta64)) + and isinstance(right_attr, (np.datetime64, np.timedelta64)) + and type(left_attr) is type(right_attr) + and np.isnat(left_attr) + and np.isnat(right_attr) + ): + # np.datetime64("nat") or np.timedelta64("nat") + return True try: result = left_attr == right_attr except TypeError: # datetimetz on rhs may raise TypeError result = False - if not isinstance(result, bool): + if (left_attr is pd.NA) ^ (right_attr is pd.NA): + result = False + elif not isinstance(result, bool): result = result.all() if result: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 51920f1613c12..e55ddbcc783d0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -715,6 +715,8 @@ def factorize( values, dtype = _ensure_data(values) if original.dtype.kind in ["m", "M"]: + # Note: factorize_array will cast NaT bc it has a __int__ + # method, but will not cast the more-correct dtype.type("nat") na_value = iNaT else: na_value = None diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 2c69096e56973..f87f40cd55e2c 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -382,6 +382,8 @@ def __init__( stacklevel=2, ) data = np.asarray(data, dtype="datetime64[ns]") + if fill_value is NaT: + fill_value = np.datetime64("NaT", "ns") data = np.asarray(data) sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index f0455c01fa085..0db0b1f6a97ef 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -559,14 +559,14 @@ def na_value_for_dtype(dtype, compat: bool = True): >>> na_value_for_dtype(np.dtype('bool')) False >>> na_value_for_dtype(np.dtype('datetime64[ns]')) - NaT + numpy.datetime64('NaT') """ dtype = pandas_dtype(dtype) if is_extension_array_dtype(dtype): return dtype.na_value - if needs_i8_conversion(dtype): - return NaT + elif needs_i8_conversion(dtype): + return dtype.type("NaT", "ns") elif is_float_dtype(dtype): return np.nan elif is_integer_dtype(dtype): diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 8cd0d29a34ec8..58fedbd3e4231 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -14,8 +14,8 @@ ("float", np.nan), ("bool", False), ("object", np.nan), - ("datetime64[ns]", pd.NaT), - ("timedelta64[ns]", pd.NaT), + ("datetime64[ns]", np.datetime64("NaT", "ns")), + ("timedelta64[ns]", np.timedelta64("NaT", "ns")), ], ) def test_inferred_dtype(dtype, fill_value): diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index c02185dd82043..0d92ef02e07c8 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -472,8 +472,8 @@ def test_array_equivalent_nested(): "dtype, na_value", [ # Datetime-like - (np.dtype("M8[ns]"), NaT), - (np.dtype("m8[ns]"), NaT), + (np.dtype("M8[ns]"), np.datetime64("NaT", "ns")), + (np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")), (DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT), (PeriodDtype("M"), NaT), # Integer @@ -499,7 +499,11 @@ def test_array_equivalent_nested(): ) def test_na_value_for_dtype(dtype, na_value): result = na_value_for_dtype(dtype) - assert result is na_value + # identify check doesnt work for datetime64/timedelta64("NaT") bc they + # are not singletons + assert result is na_value or ( + isna(result) and isna(na_value) and type(result) is type(na_value) + ) class TestNAObj: diff --git a/pandas/tests/util/test_assert_attr_equal.py b/pandas/tests/util/test_assert_attr_equal.py new file mode 100644 index 0000000000000..6fad38c2cd44e --- /dev/null +++ b/pandas/tests/util/test_assert_attr_equal.py @@ -0,0 +1,30 @@ +from types import SimpleNamespace + +import pytest + +from pandas.core.dtypes.common import is_float + +import pandas._testing as tm + + +def test_assert_attr_equal(nulls_fixture): + obj = SimpleNamespace() + obj.na_value = nulls_fixture + assert tm.assert_attr_equal("na_value", obj, obj) + + +def test_assert_attr_equal_different_nulls(nulls_fixture, nulls_fixture2): + obj = SimpleNamespace() + obj.na_value = nulls_fixture + + obj2 = SimpleNamespace() + obj2.na_value = nulls_fixture2 + + if nulls_fixture is nulls_fixture2: + assert tm.assert_attr_equal("na_value", obj, obj2) + elif is_float(nulls_fixture) and is_float(nulls_fixture2): + # we consider float("nan") and np.float64("nan") to be equivalent + assert tm.assert_attr_equal("na_value", obj, obj2) + else: + with pytest.raises(AssertionError, match='"na_value" are different'): + tm.assert_attr_equal("na_value", obj, obj2)