Skip to content

BUG: all-NaT TDI division with object dtype preserve td64 #44237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ Datetimelike

Timedelta
^^^^^^^^^
-
- Bug in division of all-``NaT`` :class:`TimeDeltaIndex`, :class:`Series` or :class:`DataFrame` column with object-dtype arraylike of numbers failing to infer the result as timedelta64-dtype (:issue:`39750`)
-

Timezones
Expand Down
26 changes: 20 additions & 6 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,12 +573,17 @@ def __truediv__(self, other):

# We need to do dtype inference in order to keep DataFrame ops
# behavior consistent with Series behavior
inferred = lib.infer_dtype(result)
inferred = lib.infer_dtype(result, skipna=False)
if inferred == "timedelta":
flat = result.ravel()
result = type(self)._from_sequence(flat).reshape(result.shape)
elif inferred == "floating":
result = result.astype(float)
elif inferred == "datetime":
# GH#39750 this occurs when result is all-NaT, in which case
# we want to interpret these NaTs as td64.
# We construct an all-td64NaT result.
result = self * np.nan

return result

Expand Down Expand Up @@ -679,13 +684,22 @@ def __floordiv__(self, other):
elif is_object_dtype(other.dtype):
# error: Incompatible types in assignment (expression has type
# "List[Any]", variable has type "ndarray")
result = [ # type: ignore[assignment]
self[n] // other[n] for n in range(len(self))
]
result = np.array(result)
if lib.infer_dtype(result, skipna=False) == "timedelta":
srav = self.ravel()
orav = other.ravel()
res_list = [srav[n] // orav[n] for n in range(len(srav))]
result_flat = np.asarray(res_list)
inferred = lib.infer_dtype(result_flat, skipna=False)

result = result_flat.reshape(self.shape)

if inferred == "timedelta":
result, _ = sequence_to_td64ns(result)
return type(self)(result)
if inferred == "datetime":
# GH#39750 occurs when result is all-NaT, which in this
# case should be interpreted as td64nat. This can only
# occur when self is all-td64nat
return self * np.nan
return result

elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype):
Expand Down
23 changes: 14 additions & 9 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,7 @@ def test_td64arr_rmul_numeric_array(
ids=lambda x: type(x).__name__,
)
def test_td64arr_div_numeric_array(
self, box_with_array, vector, any_real_numpy_dtype, using_array_manager
self, box_with_array, vector, any_real_numpy_dtype
):
# GH#4521
# divide/multiply by integers
Expand Down Expand Up @@ -2062,14 +2062,6 @@ def test_td64arr_div_numeric_array(
expected = tm.box_expected(expected, xbox)
assert tm.get_dtype(expected) == "m8[ns]"

if using_array_manager and box_with_array is DataFrame:
# TODO the behaviour is buggy here (third column with all-NaT
# as result doesn't get preserved as timedelta64 dtype).
# Reported at https://github.com/pandas-dev/pandas/issues/39750
# Changing the expected instead of xfailing to continue to test
# the correct behaviour for the other columns
expected[2] = Series([NaT, NaT], dtype=object)

tm.assert_equal(result, expected)

with pytest.raises(TypeError, match=pattern):
Expand Down Expand Up @@ -2137,6 +2129,19 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names):
else:
tm.assert_equal(result, expected)

def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array):
# GH#39750 make sure we infer the result as td64
tdi = TimedeltaIndex([NaT, NaT])

left = tm.box_expected(tdi, box_with_array)
right = np.array([2, 2.0], dtype=object)

result = left / right
tm.assert_equal(result, left)

result = left // right
tm.assert_equal(result, left)


class TestTimedelta64ArrayLikeArithmetic:
# Arithmetic tests for timedelta64[ns] vectors fully parametrized over
Expand Down