From 2e97636d6c99195cd139ad3eda44e721962a85d8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Oct 2021 19:39:47 -0700 Subject: [PATCH 1/3] BUG: all-NaT TDI division with object dtype preserve td64 --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/arrays/timedeltas.py | 15 +++++++++++++-- pandas/tests/arithmetic/test_timedelta64.py | 13 +++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 699d8a81243db..39485e825e83d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -475,7 +475,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- +- Bug in division of all-``NaT`` :class:`TimeDeltaIndex`, :class:`Series` or :class:`DataFrame` column with object-dtype arraylike of numbers failing to infer the result as timedelta64-dtype (:issue:`39750`) - Timezones diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 040c7e6804f64..55e9e70a40711 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -573,12 +573,17 @@ def __truediv__(self, other): # We need to do dtype inference in order to keep DataFrame ops # behavior consistent with Series behavior - inferred = lib.infer_dtype(result) + inferred = lib.infer_dtype(result, skipna=False) if inferred == "timedelta": flat = result.ravel() result = type(self)._from_sequence(flat).reshape(result.shape) elif inferred == "floating": result = result.astype(float) + elif inferred == "datetime": + # GH#39750 this occurs when result is all-NaT, in which case + # we want to interpret these NaTs as td64. + # We construct an all-td64NaT result. + result = self * np.nan return result @@ -683,9 +688,15 @@ def __floordiv__(self, other): self[n] // other[n] for n in range(len(self)) ] result = np.array(result) - if lib.infer_dtype(result, skipna=False) == "timedelta": + inferred = lib.infer_dtype(result, skipna=False) + if inferred == "timedelta": result, _ = sequence_to_td64ns(result) return type(self)(result) + if inferred == "datetime": + # GH#39750 occurs when result is all-NaT, which in this + # case should be interpreted as td64nat. This can only + # occur when self is all-td64nat + return self * np.nan return result elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 7765c29ee59c8..b5d61a2c9f004 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2137,6 +2137,19 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names): else: tm.assert_equal(result, expected) + def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array): + # GH#39750 make sure we infer the result as td64 + tdi = TimedeltaIndex([NaT, NaT]) + + left = tm.box_expected(tdi, box_with_array) + right = np.array([2, 2.0], dtype=object) + + result = left / right + tm.assert_equal(result, left) + + result = left // right + tm.assert_equal(result, left) + class TestTimedelta64ArrayLikeArithmetic: # Arithmetic tests for timedelta64[ns] vectors fully parametrized over From fadaedd31e671ae005a582d1b028c8f0242c11fa Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Oct 2021 19:42:25 -0700 Subject: [PATCH 2/3] Fix ArrayManager test --- pandas/tests/arithmetic/test_timedelta64.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b5d61a2c9f004..0b43cb4f3d78c 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2022,7 +2022,7 @@ def test_td64arr_rmul_numeric_array( ids=lambda x: type(x).__name__, ) def test_td64arr_div_numeric_array( - self, box_with_array, vector, any_real_numpy_dtype, using_array_manager + self, box_with_array, vector, any_real_numpy_dtype ): # GH#4521 # divide/multiply by integers @@ -2062,14 +2062,6 @@ def test_td64arr_div_numeric_array( expected = tm.box_expected(expected, xbox) assert tm.get_dtype(expected) == "m8[ns]" - if using_array_manager and box_with_array is DataFrame: - # TODO the behaviour is buggy here (third column with all-NaT - # as result doesn't get preserved as timedelta64 dtype). - # Reported at https://github.com/pandas-dev/pandas/issues/39750 - # Changing the expected instead of xfailing to continue to test - # the correct behaviour for the other columns - expected[2] = Series([NaT, NaT], dtype=object) - tm.assert_equal(result, expected) with pytest.raises(TypeError, match=pattern): From 606f8dcde835b7bba0447ed6dd8a34db1d6ea10a Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Oct 2021 22:36:49 -0700 Subject: [PATCH 3/3] fix broken builds --- pandas/core/arrays/timedeltas.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 55e9e70a40711..3d8f9f7edcc74 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -684,11 +684,14 @@ def __floordiv__(self, other): elif is_object_dtype(other.dtype): # error: Incompatible types in assignment (expression has type # "List[Any]", variable has type "ndarray") - result = [ # type: ignore[assignment] - self[n] // other[n] for n in range(len(self)) - ] - result = np.array(result) - inferred = lib.infer_dtype(result, skipna=False) + srav = self.ravel() + orav = other.ravel() + res_list = [srav[n] // orav[n] for n in range(len(srav))] + result_flat = np.asarray(res_list) + inferred = lib.infer_dtype(result_flat, skipna=False) + + result = result_flat.reshape(self.shape) + if inferred == "timedelta": result, _ = sequence_to_td64ns(result) return type(self)(result)