From ae292d8725455c889f35a7cfce627557356504fc Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 28 Jan 2021 17:10:48 -0800 Subject: [PATCH 1/3] BUG: assert_attr_equal with numpy nat or pd.NA --- pandas/_testing/asserters.py | 13 ++++++++- pandas/core/algorithms.py | 4 ++- pandas/core/arrays/sparse/array.py | 2 ++ pandas/core/dtypes/missing.py | 4 +-- pandas/tests/arrays/sparse/test_dtype.py | 4 +-- pandas/tests/dtypes/test_missing.py | 10 ++++--- pandas/tests/util/test_assert_attr_equal.py | 30 +++++++++++++++++++++ 7 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 pandas/tests/util/test_assert_attr_equal.py diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 494d9ac60dd96..024bfb02fe09d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -459,13 +459,24 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): ): # np.nan return True + elif ( + isinstance(left_attr, (np.datetime64, np.timedelta64)) + and isinstance(right_attr, (np.datetime64, np.timedelta64)) + and type(left_attr) is type(right_attr) + and np.isnat(left_attr) + and np.isnat(right_attr) + ): + # np.datetime64("nat") or np.timedelta64("nat") + return True try: result = left_attr == right_attr except TypeError: # datetimetz on rhs may raise TypeError result = False - if not isinstance(result, bool): + if (left_attr is pd.NA) ^ (right_attr is pd.NA): + result = False + elif not isinstance(result, bool): result = result.all() if result: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ae7e8191fc482..917024f93745b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -715,7 +715,9 @@ def factorize( values, dtype = _ensure_data(values) if original.dtype.kind in ["m", "M"]: - na_value = na_value_for_dtype(original.dtype) + # Note: factorize_array will cast NaT bc it has a __int__ + # method, but will not cast the more-correct dtype.type("nat") + na_value = iNaT else: na_value = None diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 2c69096e56973..f87f40cd55e2c 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -382,6 +382,8 @@ def __init__( stacklevel=2, ) data = np.asarray(data, dtype="datetime64[ns]") + if fill_value is NaT: + fill_value = np.datetime64("NaT", "ns") data = np.asarray(data) sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index f0455c01fa085..0874e57afd887 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -565,8 +565,8 @@ def na_value_for_dtype(dtype, compat: bool = True): if is_extension_array_dtype(dtype): return dtype.na_value - if needs_i8_conversion(dtype): - return NaT + elif needs_i8_conversion(dtype): + return dtype.type("NaT", "ns") elif is_float_dtype(dtype): return np.nan elif is_integer_dtype(dtype): diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 8cd0d29a34ec8..58fedbd3e4231 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -14,8 +14,8 @@ ("float", np.nan), ("bool", False), ("object", np.nan), - ("datetime64[ns]", pd.NaT), - ("timedelta64[ns]", pd.NaT), + ("datetime64[ns]", np.datetime64("NaT", "ns")), + ("timedelta64[ns]", np.timedelta64("NaT", "ns")), ], ) def test_inferred_dtype(dtype, fill_value): diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index c02185dd82043..0d92ef02e07c8 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -472,8 +472,8 @@ def test_array_equivalent_nested(): "dtype, na_value", [ # Datetime-like - (np.dtype("M8[ns]"), NaT), - (np.dtype("m8[ns]"), NaT), + (np.dtype("M8[ns]"), np.datetime64("NaT", "ns")), + (np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")), (DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT), (PeriodDtype("M"), NaT), # Integer @@ -499,7 +499,11 @@ def test_array_equivalent_nested(): ) def test_na_value_for_dtype(dtype, na_value): result = na_value_for_dtype(dtype) - assert result is na_value + # identify check doesnt work for datetime64/timedelta64("NaT") bc they + # are not singletons + assert result is na_value or ( + isna(result) and isna(na_value) and type(result) is type(na_value) + ) class TestNAObj: diff --git a/pandas/tests/util/test_assert_attr_equal.py b/pandas/tests/util/test_assert_attr_equal.py new file mode 100644 index 0000000000000..6fad38c2cd44e --- /dev/null +++ b/pandas/tests/util/test_assert_attr_equal.py @@ -0,0 +1,30 @@ +from types import SimpleNamespace + +import pytest + +from pandas.core.dtypes.common import is_float + +import pandas._testing as tm + + +def test_assert_attr_equal(nulls_fixture): + obj = SimpleNamespace() + obj.na_value = nulls_fixture + assert tm.assert_attr_equal("na_value", obj, obj) + + +def test_assert_attr_equal_different_nulls(nulls_fixture, nulls_fixture2): + obj = SimpleNamespace() + obj.na_value = nulls_fixture + + obj2 = SimpleNamespace() + obj2.na_value = nulls_fixture2 + + if nulls_fixture is nulls_fixture2: + assert tm.assert_attr_equal("na_value", obj, obj2) + elif is_float(nulls_fixture) and is_float(nulls_fixture2): + # we consider float("nan") and np.float64("nan") to be equivalent + assert tm.assert_attr_equal("na_value", obj, obj2) + else: + with pytest.raises(AssertionError, match='"na_value" are different'): + tm.assert_attr_equal("na_value", obj, obj2) From d0271dbb685144c5f0fb226e4d64b0024f7d8c3e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 28 Jan 2021 19:16:42 -0800 Subject: [PATCH 2/3] update doctest --- pandas/core/dtypes/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 0874e57afd887..0db0b1f6a97ef 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -559,7 +559,7 @@ def na_value_for_dtype(dtype, compat: bool = True): >>> na_value_for_dtype(np.dtype('bool')) False >>> na_value_for_dtype(np.dtype('datetime64[ns]')) - NaT + numpy.datetime64('NaT') """ dtype = pandas_dtype(dtype) From b735d6591f302bd6084abd037134fa0380aeca3f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Feb 2021 18:34:28 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index e61f50afea838..77bba6c431053 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -432,7 +432,7 @@ Other - Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`) - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`) -- +- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) .. ---------------------------------------------------------------------------