pandas-dev · mroeschke · Dec 27, 2022 · Nov 15, 2022 · Nov 22, 2022 · Nov 23, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -341,6 +341,7 @@ Other API changes
 - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
 - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
 - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
+- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations (:issue:`??`)
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations (:issue:`??`)
+- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations and the resulting type will be ``object`` (:issue:`49714`)
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations (:issue:`??`)
+- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations and the resulting type will be ``object`` (:issue:`49714`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1996,6 +1996,96 @@ class Timedelta(_Timedelta):
         return div, other - div * self
 
 
+def truediv_object_array(ndarray left, ndarray right):
+    cdef:
+        ndarray[object] result = np.empty((<object>left).shape, dtype=object)
+        object td64  # really timedelta64 if we find a way to declare that
+        object obj, res_value
+        _Timedelta td
+        Py_ssize_t i
+        bint seen_numeric = False, seen_td = False
+
+    for i in range(len(left)):
+        td64 = left[i]
+        obj = right[i]
+
+        if get_timedelta64_value(td64) == NPY_NAT:
+            # td here should be interpreted as a td64 NaT
+            if _should_cast_to_timedelta(obj):
+                res_value = np.nan
+                seen_numeric = True
+            else:
+                # if its a number then let numpy handle division, otherwise
+                #  numpy will raise
+                res_value = td64 / obj
+                seen_td = True
+        else:
+            td = Timedelta(td64)
+            res_value = td / obj
+            if is_float_object(res_value) or is_integer_object(res_value):
+                seen_numeric = True
+            else:
+                seen_td = True
+
+        result[i] = res_value
+
+    if not seen_numeric:
+        # if we haven't seen any numeric results, we have all-td64, so we
+        #  can cast back
+        return result.astype(left.dtype)
+    elif not seen_td:
+        # if we haven't seen any timedelta results, we have all-numeric, and
+        #  can cast
+        return result.astype(np.float64)
+    return result
+
+
+def floordiv_object_array(ndarray left, ndarray right):
+    cdef:
+        ndarray[object] result = np.empty((<object>left).shape, dtype=object)
+        object td64  # really timedelta64 if we find a way to declare that
+        object obj, res_value
+        _Timedelta td
+        Py_ssize_t i
+        bint seen_numeric = False, seen_td = False
+
+    for i in range(len(left)):
+        td64 = left[i]
+        obj = right[i]
+
+        if get_timedelta64_value(td64) == NPY_NAT:
+            # td here should be interpreted as a td64 NaT
+            if _should_cast_to_timedelta(obj):
+                res_value = np.nan
+                seen_numeric = True
+            else:
+                # if its a number then let numpy handle division, otherwise
+                #  numpy will raise
+                res_value = td64 // obj
+                seen_td = True
+        else:
+            td = Timedelta(td64)
+            res_value = td // obj
+            if is_float_object(res_value) or is_integer_object(res_value):
+                seen_numeric = True
+            else:
+                seen_td = True
+
+        result[i] = res_value
+
+    # We can't leave this inference to numpy because it will see [td64, int]
+    #  and cast that to all-td64
+    if not seen_numeric:
+        # if we haven't seen any numeric results, we have all-td64, so we
+        #  can cast back
+        return result.astype(left.dtype)
+    elif not seen_td:
+        # if we haven't seen any timedelta results, we have all-numeric, and
+        #  can cast
+        return result.astype(np.int64)
+    return result
+
+
 cdef bint is_any_td_scalar(object obj):
     """
     Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))`

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -32,8 +32,10 @@
 from pandas._libs.tslibs.fields import get_timedelta_field
 from pandas._libs.tslibs.timedeltas import (
     array_to_timedelta64,
+    floordiv_object_array,
     ints_to_pytimedelta,
     parse_timedelta_unit,
+    truediv_object_array,
 )
 from pandas._typing import (
     AxisInt,
@@ -57,12 +59,14 @@
     is_timedelta64_dtype,
     pandas_dtype,
 )
+from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import nanops
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays._ranges import generate_regular_range
 import pandas.core.common as com
+from pandas.core.construction import extract_array
 from pandas.core.ops.common import unpack_zerodim_and_defer
 
 if TYPE_CHECKING:
@@ -495,31 +499,16 @@ def __truediv__(self, other):
             return self._ndarray / other
 
         elif is_object_dtype(other.dtype):
-            # We operate on raveled arrays to avoid problems in inference
-            #  on NaT
-            # TODO: tests with non-nano
-            srav = self.ravel()
-            orav = other.ravel()
-            result_list = [srav[n] / orav[n] for n in range(len(srav))]
-            result = np.array(result_list).reshape(self.shape)
-
-            # We need to do dtype inference in order to keep DataFrame ops
-            #  behavior consistent with Series behavior
-            inferred = lib.infer_dtype(result, skipna=False)
-            if inferred == "timedelta":
-                flat = result.ravel()
-                result = type(self)._from_sequence(flat).reshape(result.shape)
-            elif inferred == "floating":
-                result = result.astype(float)
-            elif inferred == "datetime":
-                # GH#39750 this occurs when result is all-NaT, in which case
-                #  we want to interpret these NaTs as td64.
-                #  We construct an all-td64NaT result.
-                # error: Incompatible types in assignment (expression has type
-                # "TimedeltaArray", variable has type "ndarray[Any,
-                # dtype[floating[_64Bit]]]")
-                result = self * np.nan  # type: ignore[assignment]
+            other = extract_array(other, extract_numpy=True)
+            if self.ndim > 1:
+                res_cols = [left / right for left, right in zip(self, other)]
+                res_cols2 = [x.reshape(1, -1) for x in res_cols]
+                result = concat_compat(res_cols2, axis=0)
+            else:
+                result = truediv_object_array(self._ndarray, other)
 
+            if result.dtype.kind == "m":
+                result = type(self)(result)
             return result
 
         else:
@@ -619,24 +608,16 @@ def __floordiv__(self, other):
             return result
 
         elif is_object_dtype(other.dtype):
+            other = extract_array(other, extract_numpy=True)
             # error: Incompatible types in assignment (expression has type
             # "List[Any]", variable has type "ndarray")
-            srav = self.ravel()
-            orav = other.ravel()
-            res_list = [srav[n] // orav[n] for n in range(len(srav))]
-            result_flat = np.asarray(res_list)
-            inferred = lib.infer_dtype(result_flat, skipna=False)
-
-            result = result_flat.reshape(self.shape)
-
-            if inferred == "timedelta":
-                result, _ = sequence_to_td64ns(result)
-                return type(self)(result)
-            if inferred == "datetime":
-                # GH#39750 occurs when result is all-NaT, which in this
-                #  case should be interpreted as td64nat. This can only
-                #  occur when self is all-td64nat
-                return self * np.nan
+            if self.ndim > 1:
+                result = np.array([left // right for left, right in zip(self, other)])
+            else:
+                result = floordiv_object_array(self._ndarray, other)
+
+            if result.dtype.kind == "m":
+                result = type(self)(result)
             return result
 
         elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype):

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -6672,10 +6672,10 @@ def _logical_method(self, other, op):
     def _construct_result(self, result, name):
         if isinstance(result, tuple):
             return (
-                Index._with_infer(result[0], name=name),
-                Index._with_infer(result[1], name=name),
+                Index._with_infer(result[0], name=name, dtype=result[0].dtype),
+                Index._with_infer(result[1], name=name, dtype=result[1].dtype),
             )
-        return Index._with_infer(result, name=name)
+        return Index(result, name=name, dtype=result.dtype)
 
     def _arith_method(self, other, op):
         if (

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
@@ -232,18 +232,27 @@ def align_method_FRAME(
 
     def to_series(right):
         msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
+
+        # pass dtype to avoid doing inference, which would break consistency
+        #  with Index/Series ops
+        dtype = None
+        if getattr(right, "dtype", None) == object:
+            # can't pass right.dtype unconditionally as that would break on e.g.
+            #  datetime64[h] ndarray
+            dtype = object
+
         if axis is not None and left._get_axis_name(axis) == "index":
             if len(left.index) != len(right):
                 raise ValueError(
                     msg.format(req_len=len(left.index), given_len=len(right))
                 )
-            right = left._constructor_sliced(right, index=left.index)
+            right = left._constructor_sliced(right, index=left.index, dtype=dtype)
         else:
             if len(left.columns) != len(right):
                 raise ValueError(
                     msg.format(req_len=len(left.columns), given_len=len(right))
                 )
-            right = left._constructor_sliced(right, index=left.columns)
+            right = left._constructor_sliced(right, index=left.columns, dtype=dtype)
         return right
 
     if isinstance(right, np.ndarray):
@@ -252,13 +261,25 @@ def to_series(right):
             right = to_series(right)
 
         elif right.ndim == 2:
+            # We need to pass dtype=right.dtype to retain object dtype
+            #  otherwise we lose consistency with Index and array ops
+            dtype = None
+            if getattr(right, "dtype", None) == object:
+                # can't pass right.dtype unconditionally as that would break on e.g.
+                #  datetime64[h] ndarray
+                dtype = object
+
             if right.shape == left.shape:
-                right = left._constructor(right, index=left.index, columns=left.columns)
+                right = left._constructor(
+                    right, index=left.index, columns=left.columns, dtype=dtype
+                )
 
             elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
                 # Broadcast across columns
                 right = np.broadcast_to(right, left.shape)
-                right = left._constructor(right, index=left.index, columns=left.columns)
+                right = left._constructor(
+                    right, index=left.index, columns=left.columns, dtype=dtype
+                )
 
             elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
                 # Broadcast along rows
@@ -409,7 +430,10 @@ def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt
         rvalues = rvalues.reshape(1, -1)
 
     rvalues = np.broadcast_to(rvalues, frame.shape)
-    return type(frame)(rvalues, index=frame.index, columns=frame.columns)
+    # pass dtype to avoid doing inference
+    return type(frame)(
+        rvalues, index=frame.index, columns=frame.columns, dtype=series.dtype
+    )
 
 
 def flex_arith_method_FRAME(op):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2996,9 +2996,10 @@ def _construct_result(
             assert isinstance(res2, Series)
             return (res1, res2)
 
-        # We do not pass dtype to ensure that the Series constructor
-        #  does inference in the case where `result` has object-dtype.
-        out = self._constructor(result, index=self.index)
+        # We pass dtype to ensure the constructor does not do dtype inference
+        out = self._constructor(
+            result, index=self.index, dtype=getattr(result, "dtype", None)
+        )
         out = out.__finalize__(self)
 
         # Set the result's name after __finalize__ is called because __finalize__

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -320,6 +320,8 @@ def test_add_sub_datetimedeltalike_invalid(
                 r"operand type\(s\) all returned NotImplemented from __array_ufunc__",
                 "can only perform ops with numeric values",
                 "cannot subtract DatetimeArray from ndarray",
+                # pd.Timedelta(1) + Index([0, 1, 2])
+                "Cannot add or subtract Timedelta from integers",
             ]
         )
         assert_invalid_addsub_type(left, other, msg)

diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
@@ -182,12 +182,14 @@ def test_operators_na_handling(self):
     @pytest.mark.parametrize("dtype", [None, object])
     def test_series_with_dtype_radd_timedelta(self, dtype):
         # note this test is _not_ aimed at timedelta64-dtyped Series
+        # as of 2.0 we retain object dtype when ser.dtype == object
         ser = Series(
             [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
             dtype=dtype,
         )
         expected = Series(
-            [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")]
+            [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
+            dtype=dtype,
         )
 
         result = pd.Timedelta("3 days") + ser
@@ -227,7 +229,10 @@ def test_mixed_timezone_series_ops_object(self):
             name="xxx",
         )
         assert ser2.dtype == object
-        exp = Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx")
+        # as of 2.0 we preserve object dtype
+        exp = Series(
+            [pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
+        )
         tm.assert_series_equal(ser2 - ser, exp)
         tm.assert_series_equal(ser - ser2, -exp)
 
@@ -238,7 +243,12 @@ def test_mixed_timezone_series_ops_object(self):
         )
         assert ser.dtype == object
 
-        exp = Series([pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx")
+        # as of 2.0 we preserve object dtype
+        exp = Series(
+            [pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
+            name="xxx",
+            dtype=object,
+        )
         tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
         tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)