Skip to content

Commit 48f73d8

Browse files
authored
BUG: assert_attr_equal with numpy nat or pd.NA (#39461)
1 parent 8c35fcd commit 48f73d8

File tree

8 files changed

+59
-10
lines changed

8 files changed

+59
-10
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ Other
433433
- Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`)
434434
- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
435435
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
436-
-
436+
- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
437437

438438
.. ---------------------------------------------------------------------------
439439

pandas/_testing/asserters.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -459,13 +459,24 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
459459
):
460460
# np.nan
461461
return True
462+
elif (
463+
isinstance(left_attr, (np.datetime64, np.timedelta64))
464+
and isinstance(right_attr, (np.datetime64, np.timedelta64))
465+
and type(left_attr) is type(right_attr)
466+
and np.isnat(left_attr)
467+
and np.isnat(right_attr)
468+
):
469+
# np.datetime64("nat") or np.timedelta64("nat")
470+
return True
462471

463472
try:
464473
result = left_attr == right_attr
465474
except TypeError:
466475
# datetimetz on rhs may raise TypeError
467476
result = False
468-
if not isinstance(result, bool):
477+
if (left_attr is pd.NA) ^ (right_attr is pd.NA):
478+
result = False
479+
elif not isinstance(result, bool):
469480
result = result.all()
470481

471482
if result:

pandas/core/algorithms.py

+2
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,8 @@ def factorize(
715715
values, dtype = _ensure_data(values)
716716

717717
if original.dtype.kind in ["m", "M"]:
718+
# Note: factorize_array will cast NaT bc it has a __int__
719+
# method, but will not cast the more-correct dtype.type("nat")
718720
na_value = iNaT
719721
else:
720722
na_value = None

pandas/core/arrays/sparse/array.py

+2
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,8 @@ def __init__(
382382
stacklevel=2,
383383
)
384384
data = np.asarray(data, dtype="datetime64[ns]")
385+
if fill_value is NaT:
386+
fill_value = np.datetime64("NaT", "ns")
385387
data = np.asarray(data)
386388
sparse_values, sparse_index, fill_value = make_sparse(
387389
data, kind=kind, fill_value=fill_value, dtype=dtype

pandas/core/dtypes/missing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -559,14 +559,14 @@ def na_value_for_dtype(dtype, compat: bool = True):
559559
>>> na_value_for_dtype(np.dtype('bool'))
560560
False
561561
>>> na_value_for_dtype(np.dtype('datetime64[ns]'))
562-
NaT
562+
numpy.datetime64('NaT')
563563
"""
564564
dtype = pandas_dtype(dtype)
565565

566566
if is_extension_array_dtype(dtype):
567567
return dtype.na_value
568-
if needs_i8_conversion(dtype):
569-
return NaT
568+
elif needs_i8_conversion(dtype):
569+
return dtype.type("NaT", "ns")
570570
elif is_float_dtype(dtype):
571571
return np.nan
572572
elif is_integer_dtype(dtype):

pandas/tests/arrays/sparse/test_dtype.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
("float", np.nan),
1515
("bool", False),
1616
("object", np.nan),
17-
("datetime64[ns]", pd.NaT),
18-
("timedelta64[ns]", pd.NaT),
17+
("datetime64[ns]", np.datetime64("NaT", "ns")),
18+
("timedelta64[ns]", np.timedelta64("NaT", "ns")),
1919
],
2020
)
2121
def test_inferred_dtype(dtype, fill_value):

pandas/tests/dtypes/test_missing.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,8 @@ def test_array_equivalent_nested():
472472
"dtype, na_value",
473473
[
474474
# Datetime-like
475-
(np.dtype("M8[ns]"), NaT),
476-
(np.dtype("m8[ns]"), NaT),
475+
(np.dtype("M8[ns]"), np.datetime64("NaT", "ns")),
476+
(np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")),
477477
(DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT),
478478
(PeriodDtype("M"), NaT),
479479
# Integer
@@ -499,7 +499,11 @@ def test_array_equivalent_nested():
499499
)
500500
def test_na_value_for_dtype(dtype, na_value):
501501
result = na_value_for_dtype(dtype)
502-
assert result is na_value
502+
# identify check doesnt work for datetime64/timedelta64("NaT") bc they
503+
# are not singletons
504+
assert result is na_value or (
505+
isna(result) and isna(na_value) and type(result) is type(na_value)
506+
)
503507

504508

505509
class TestNAObj:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from types import SimpleNamespace
2+
3+
import pytest
4+
5+
from pandas.core.dtypes.common import is_float
6+
7+
import pandas._testing as tm
8+
9+
10+
def test_assert_attr_equal(nulls_fixture):
11+
obj = SimpleNamespace()
12+
obj.na_value = nulls_fixture
13+
assert tm.assert_attr_equal("na_value", obj, obj)
14+
15+
16+
def test_assert_attr_equal_different_nulls(nulls_fixture, nulls_fixture2):
17+
obj = SimpleNamespace()
18+
obj.na_value = nulls_fixture
19+
20+
obj2 = SimpleNamespace()
21+
obj2.na_value = nulls_fixture2
22+
23+
if nulls_fixture is nulls_fixture2:
24+
assert tm.assert_attr_equal("na_value", obj, obj2)
25+
elif is_float(nulls_fixture) and is_float(nulls_fixture2):
26+
# we consider float("nan") and np.float64("nan") to be equivalent
27+
assert tm.assert_attr_equal("na_value", obj, obj2)
28+
else:
29+
with pytest.raises(AssertionError, match='"na_value" are different'):
30+
tm.assert_attr_equal("na_value", obj, obj2)

0 commit comments

Comments
 (0)