Skip to content

Commit e52861d

Browse files
phoflmroeschke
andauthored
Backport PR #52821 on branch 2.0.x (BUG: Non unitless np NaT arithmetic with non-nano) (#52847)
BUG: Non unitless np NaT arithmetic with non-nano (#52821) (cherry picked from commit eb9a3e8) Co-authored-by: Matthew Roeschke <[email protected]>
1 parent fa94d3b commit e52861d

File tree

4 files changed

+61
-3
lines changed

4 files changed

+61
-3
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Bug fixes
3838
- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
3939
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
4040
- Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)
41+
- Bug in arithmetic between ``np.datetime64`` and ``np.timedelta64`` ``NaT`` scalars with units always returning nanosecond resolution (:issue:`52295`)
4142
- Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`)
4243
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
4344
- Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)

pandas/core/ops/array_ops.py

+22-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,14 @@
1818
lib,
1919
ops as libops,
2020
)
21-
from pandas._libs.tslibs import BaseOffset
21+
from pandas._libs.tslibs import (
22+
BaseOffset,
23+
get_supported_reso,
24+
get_unit_from_dtype,
25+
is_supported_unit,
26+
is_unitless,
27+
npy_unit_to_abbrev,
28+
)
2229
from pandas._typing import (
2330
ArrayLike,
2431
Shape,
@@ -475,7 +482,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
475482
from pandas.core.arrays import DatetimeArray
476483

477484
# Avoid possible ambiguities with pd.NaT
478-
obj = obj.astype("datetime64[ns]")
485+
# GH 52295
486+
if is_unitless(obj.dtype):
487+
obj = obj.astype("datetime64[ns]")
488+
elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
489+
unit = get_unit_from_dtype(obj.dtype)
490+
closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
491+
obj = obj.astype(f"datetime64[{closest_unit}]")
479492
right = np.broadcast_to(obj, shape)
480493
return DatetimeArray(right)
481494

@@ -488,7 +501,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
488501
# wrapping timedelta64("NaT") in Timedelta returns NaT,
489502
# which would incorrectly be treated as a datetime-NaT, so
490503
# we broadcast and wrap in a TimedeltaArray
491-
obj = obj.astype("timedelta64[ns]")
504+
# GH 52295
505+
if is_unitless(obj.dtype):
506+
obj = obj.astype("timedelta64[ns]")
507+
elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
508+
unit = get_unit_from_dtype(obj.dtype)
509+
closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
510+
obj = obj.astype(f"timedelta64[{closest_unit}]")
492511
right = np.broadcast_to(obj, shape)
493512
return TimedeltaArray(right)
494513

pandas/tests/arithmetic/test_datetime64.py

+37
Original file line numberDiff line numberDiff line change
@@ -2436,3 +2436,40 @@ def test_dt64arr_addsub_object_dtype_2d():
24362436

24372437
assert result2.shape == (4, 1)
24382438
assert all(td._value == 0 for td in result2.ravel())
2439+
2440+
2441+
def test_non_nano_dt64_addsub_np_nat_scalars():
2442+
# GH 52295
2443+
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
2444+
result = ser - np.datetime64("nat", "ms")
2445+
expected = Series([NaT] * 3, dtype="timedelta64[ms]")
2446+
tm.assert_series_equal(result, expected)
2447+
2448+
result = ser + np.timedelta64("nat", "ms")
2449+
expected = Series([NaT] * 3, dtype="datetime64[ms]")
2450+
tm.assert_series_equal(result, expected)
2451+
2452+
2453+
def test_non_nano_dt64_addsub_np_nat_scalars_unitless():
2454+
# GH 52295
2455+
# TODO: Can we default to the ser unit?
2456+
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
2457+
result = ser - np.datetime64("nat")
2458+
expected = Series([NaT] * 3, dtype="timedelta64[ns]")
2459+
tm.assert_series_equal(result, expected)
2460+
2461+
result = ser + np.timedelta64("nat")
2462+
expected = Series([NaT] * 3, dtype="datetime64[ns]")
2463+
tm.assert_series_equal(result, expected)
2464+
2465+
2466+
def test_non_nano_dt64_addsub_np_nat_scalars_unsupported_unit():
2467+
# GH 52295
2468+
ser = Series([12332, 23243, 33243], dtype="datetime64[s]")
2469+
result = ser - np.datetime64("nat", "D")
2470+
expected = Series([NaT] * 3, dtype="timedelta64[s]")
2471+
tm.assert_series_equal(result, expected)
2472+
2473+
result = ser + np.timedelta64("nat", "D")
2474+
expected = Series([NaT] * 3, dtype="datetime64[s]")
2475+
tm.assert_series_equal(result, expected)

pandas/tests/arithmetic/test_numeric.py

+1
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
292292
np.datetime64("NaT", "ns"),
293293
pd.NaT,
294294
],
295+
ids=repr,
295296
)
296297
def test_add_sub_datetimedeltalike_invalid(
297298
self, numeric_idx, other, box_with_array

0 commit comments

Comments
 (0)