Backport PR #52821 on branch 2.0.x (BUG: Non unitless np NaT arithmetic with non-nano) (#52847)

phofl · mroeschke · web-flow · commit e52861d8acf8 · 2023-04-22T19:19:16.000+02:00
BUG: Non unitless np NaT arithmetic with non-nano (#52821) (cherry picked from commit eb9a3e8) Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst
@@ -38,6 +38,7 @@ Bug fixes
 - Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
 - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
 - Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)
+- Bug in arithmetic between ``np.datetime64`` and ``np.timedelta64`` ``NaT`` scalars with units always returning nanosecond resolution (:issue:`52295`)
 - Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`)
 - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
 - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
@@ -18,7 +18,14 @@
     lib,
     ops as libops,
 )
-from pandas._libs.tslibs import BaseOffset
+from pandas._libs.tslibs import (
+    BaseOffset,
+    get_supported_reso,
+    get_unit_from_dtype,
+    is_supported_unit,
+    is_unitless,
+    npy_unit_to_abbrev,
+)
 from pandas._typing import (
     ArrayLike,
     Shape,
@@ -475,7 +482,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
             from pandas.core.arrays import DatetimeArray
 
             # Avoid possible ambiguities with pd.NaT
-            obj = obj.astype("datetime64[ns]")
+            # GH 52295
+            if is_unitless(obj.dtype):
+                obj = obj.astype("datetime64[ns]")
+            elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
+                unit = get_unit_from_dtype(obj.dtype)
+                closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
+                obj = obj.astype(f"datetime64[{closest_unit}]")
             right = np.broadcast_to(obj, shape)
             return DatetimeArray(right)
 
@@ -488,7 +501,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
             # wrapping timedelta64("NaT") in Timedelta returns NaT,
             #  which would incorrectly be treated as a datetime-NaT, so
             #  we broadcast and wrap in a TimedeltaArray
-            obj = obj.astype("timedelta64[ns]")
+            # GH 52295
+            if is_unitless(obj.dtype):
+                obj = obj.astype("timedelta64[ns]")
+            elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
+                unit = get_unit_from_dtype(obj.dtype)
+                closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
+                obj = obj.astype(f"timedelta64[{closest_unit}]")
             right = np.broadcast_to(obj, shape)
             return TimedeltaArray(right)
 
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -2436,3 +2436,40 @@ def test_dt64arr_addsub_object_dtype_2d():
 
     assert result2.shape == (4, 1)
     assert all(td._value == 0 for td in result2.ravel())
+
+
+def test_non_nano_dt64_addsub_np_nat_scalars():
+    # GH 52295
+    ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
+    result = ser - np.datetime64("nat", "ms")
+    expected = Series([NaT] * 3, dtype="timedelta64[ms]")
+    tm.assert_series_equal(result, expected)
+
+    result = ser + np.timedelta64("nat", "ms")
+    expected = Series([NaT] * 3, dtype="datetime64[ms]")
+    tm.assert_series_equal(result, expected)
+
+
+def test_non_nano_dt64_addsub_np_nat_scalars_unitless():
+    # GH 52295
+    # TODO: Can we default to the ser unit?
+    ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
+    result = ser - np.datetime64("nat")
+    expected = Series([NaT] * 3, dtype="timedelta64[ns]")
+    tm.assert_series_equal(result, expected)
+
+    result = ser + np.timedelta64("nat")
+    expected = Series([NaT] * 3, dtype="datetime64[ns]")
+    tm.assert_series_equal(result, expected)
+
+
+def test_non_nano_dt64_addsub_np_nat_scalars_unsupported_unit():
+    # GH 52295
+    ser = Series([12332, 23243, 33243], dtype="datetime64[s]")
+    result = ser - np.datetime64("nat", "D")
+    expected = Series([NaT] * 3, dtype="timedelta64[s]")
+    tm.assert_series_equal(result, expected)
+
+    result = ser + np.timedelta64("nat", "D")
+    expected = Series([NaT] * 3, dtype="datetime64[s]")
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -292,6 +292,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
             np.datetime64("NaT", "ns"),
             pd.NaT,
         ],
+        ids=repr,
     )
     def test_add_sub_datetimedeltalike_invalid(
         self, numeric_idx, other, box_with_array

Original file line number	Diff line number	Diff line change
`@@ -292,6 +292,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array`
`292`	`292`	`np.datetime64("NaT", "ns"),`
`293`	`293`	`pd.NaT,`
`294`	`294`	`],`
	`295`	`+ ids=repr,`
`295`	`296`	`)`
`296`	`297`	`def test_add_sub_datetimedeltalike_invalid(`
`297`	`298`	`self, numeric_idx, other, box_with_array`