diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 75ba169600962..cff226c3d03a3 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -477,7 +477,7 @@ Other API changes - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) -- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`) +- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects(copy=False)`` to do type inference on the result (:issue:`49999`, :issue:`49714`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index f41bea11985f2..c9904e4592329 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -76,6 +76,12 @@ def delta_to_nanoseconds( reso: int = ..., # NPY_DATETIMEUNIT round_ok: bool = ..., ) -> int: ... +def floordiv_object_array( + left: np.ndarray, right: npt.NDArray[np.object_] +) -> np.ndarray: ... +def truediv_object_array( + left: np.ndarray, right: npt.NDArray[np.object_] +) -> np.ndarray: ... class Timedelta(timedelta): _creso: int diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7810bc9f75e66..8f9dd1fe02c19 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -2030,6 +2030,64 @@ class Timedelta(_Timedelta): return div, other - div * self +def truediv_object_array(ndarray left, ndarray right): + cdef: + ndarray[object] result = np.empty((left).shape, dtype=object) + object td64 # really timedelta64 if we find a way to declare that + object obj, res_value + _Timedelta td + Py_ssize_t i + + for i in range(len(left)): + td64 = left[i] + obj = right[i] + + if get_timedelta64_value(td64) == NPY_NAT: + # td here should be interpreted as a td64 NaT + if _should_cast_to_timedelta(obj): + res_value = np.nan + else: + # if its a number then let numpy handle division, otherwise + # numpy will raise + res_value = td64 / obj + else: + td = Timedelta(td64) + res_value = td / obj + + result[i] = res_value + + return result + + +def floordiv_object_array(ndarray left, ndarray right): + cdef: + ndarray[object] result = np.empty((left).shape, dtype=object) + object td64 # really timedelta64 if we find a way to declare that + object obj, res_value + _Timedelta td + Py_ssize_t i + + for i in range(len(left)): + td64 = left[i] + obj = right[i] + + if get_timedelta64_value(td64) == NPY_NAT: + # td here should be interpreted as a td64 NaT + if _should_cast_to_timedelta(obj): + res_value = np.nan + else: + # if its a number then let numpy handle division, otherwise + # numpy will raise + res_value = td64 // obj + else: + td = Timedelta(td64) + res_value = td // obj + + result[i] = res_value + + return result + + cdef bint is_any_td_scalar(object obj): """ Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))` diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 63940741c3fe3..e9eebf54df07f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1373,11 +1373,7 @@ def _addsub_object_array(self, other: np.ndarray, op): assert self.shape == other.shape, (self.shape, other.shape) res_values = op(self.astype("O"), np.asarray(other)) - - ext_arr = pd_array(res_values.ravel()) - result = cast(np.ndarray, extract_array(ext_arr, extract_numpy=True)) - result = result.reshape(self.shape) - return result + return res_values def _accumulate(self, name: str, *, skipna: bool = True, **kwargs): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index aa1b826ef0876..fe7ca3b5ba4ed 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -32,8 +32,10 @@ from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, + floordiv_object_array, ints_to_pytimedelta, parse_timedelta_unit, + truediv_object_array, ) from pandas._typing import ( AxisInt, @@ -63,6 +65,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer if TYPE_CHECKING: @@ -528,30 +531,13 @@ def __truediv__(self, other): return self._ndarray / other elif is_object_dtype(other.dtype): - # We operate on raveled arrays to avoid problems in inference - # on NaT - # TODO: tests with non-nano - srav = self.ravel() - orav = other.ravel() - result_list = [srav[n] / orav[n] for n in range(len(srav))] - result = np.array(result_list).reshape(self.shape) - - # We need to do dtype inference in order to keep DataFrame ops - # behavior consistent with Series behavior - inferred = lib.infer_dtype(result, skipna=False) - if inferred == "timedelta": - flat = result.ravel() - result = type(self)._from_sequence(flat).reshape(result.shape) - elif inferred == "floating": - result = result.astype(float) - elif inferred == "datetime": - # GH#39750 this occurs when result is all-NaT, in which case - # we want to interpret these NaTs as td64. - # We construct an all-td64NaT result. - # error: Incompatible types in assignment (expression has type - # "TimedeltaArray", variable has type "ndarray[Any, - # dtype[floating[_64Bit]]]") - result = self * np.nan # type: ignore[assignment] + other = extract_array(other, extract_numpy=True) + if self.ndim > 1: + res_cols = [left / right for left, right in zip(self, other)] + res_cols2 = [x.reshape(1, -1) for x in res_cols] + result = np.concatenate(res_cols2, axis=0) + else: + result = truediv_object_array(self._ndarray, other) return result @@ -652,24 +638,15 @@ def __floordiv__(self, other): return result elif is_object_dtype(other.dtype): - # error: Incompatible types in assignment (expression has type - # "List[Any]", variable has type "ndarray") - srav = self.ravel() - orav = other.ravel() - res_list = [srav[n] // orav[n] for n in range(len(srav))] - result_flat = np.asarray(res_list) - inferred = lib.infer_dtype(result_flat, skipna=False) - - result = result_flat.reshape(self.shape) - - if inferred == "timedelta": - result, _ = sequence_to_td64ns(result) - return type(self)(result) - if inferred == "datetime": - # GH#39750 occurs when result is all-NaT, which in this - # case should be interpreted as td64nat. This can only - # occur when self is all-td64nat - return self * np.nan + other = extract_array(other, extract_numpy=True) + if self.ndim > 1: + res_cols = [left // right for left, right in zip(self, other)] + res_cols2 = [x.reshape(1, -1) for x in res_cols] + result = np.concatenate(res_cols2, axis=0) + else: + result = floordiv_object_array(self._ndarray, other) + + assert result.dtype == object return result elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index c35962d7d2e96..e840668167f99 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -34,10 +34,6 @@ date_range, ) import pandas._testing as tm -from pandas.core.arrays import ( - DatetimeArray, - TimedeltaArray, -) from pandas.core.ops import roperator from pandas.tests.arithmetic.common import ( assert_cannot_add, @@ -1023,7 +1019,7 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): expected = dti - dti obj = tm.box_expected(dti, box_with_array) - expected = tm.box_expected(expected, box_with_array) + expected = tm.box_expected(expected, box_with_array).astype(object) with tm.assert_produces_warning(PerformanceWarning): result = obj - obj.astype(object) @@ -1572,10 +1568,13 @@ def test_dt64arr_add_sub_offset_array( other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))]) - expected = tm.box_expected(expected, box_with_array) + expected = tm.box_expected(expected, box_with_array).astype(object) if box_other: other = tm.box_expected(other, box_with_array) + if box_with_array is pd.array and op is roperator.radd: + # We expect a PandasArray, not ndarray[object] here + expected = pd.array(expected, dtype=object) with tm.assert_produces_warning(PerformanceWarning): res = op(dtarr, other) @@ -2373,7 +2372,7 @@ def test_dti_addsub_offset_arraylike( expected = DatetimeIndex( [op(dti[n], other[n]) for n in range(len(dti))], name=names[2], freq="infer" ) - expected = tm.box_expected(expected, xbox) + expected = tm.box_expected(expected, xbox).astype(object) tm.assert_equal(res, expected) @pytest.mark.parametrize("other_box", [pd.Index, np.array]) @@ -2388,14 +2387,14 @@ def test_dti_addsub_object_arraylike( xbox = get_upcast_box(dtarr, other) expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) - expected = tm.box_expected(expected, xbox) + expected = tm.box_expected(expected, xbox).astype(object) with tm.assert_produces_warning(PerformanceWarning): result = dtarr + other tm.assert_equal(result, expected) expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) - expected = tm.box_expected(expected, xbox) + expected = tm.box_expected(expected, xbox).astype(object) with tm.assert_produces_warning(PerformanceWarning): result = dtarr - other @@ -2435,15 +2434,11 @@ def test_dt64arr_addsub_object_dtype_2d(): with tm.assert_produces_warning(PerformanceWarning): expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1) - assert isinstance(result, DatetimeArray) - assert result.freq is None - tm.assert_numpy_array_equal(result._ndarray, expected._ndarray) + tm.assert_numpy_array_equal(result, expected) with tm.assert_produces_warning(PerformanceWarning): # Case where we expect to get a TimedeltaArray back result2 = dta - dta.astype(object) - assert isinstance(result2, TimedeltaArray) assert result2.shape == (4, 1) - assert result2.freq is None - assert (result2.asi8 == 0).all() + assert all(td.value == 0 for td in result2.ravel()) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 529dd6baa70c0..1311d34c4c0f5 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -320,6 +320,8 @@ def test_add_sub_datetimedeltalike_invalid( r"operand type\(s\) all returned NotImplemented from __array_ufunc__", "can only perform ops with numeric values", "cannot subtract DatetimeArray from ndarray", + # pd.Timedelta(1) + Index([0, 1, 2]) + "Cannot add or subtract Timedelta from integers", ] ) assert_invalid_addsub_type(left, other, msg) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index b94816687ecca..cacd580658149 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -194,6 +194,7 @@ def test_operators_na_handling(self): @pytest.mark.parametrize("dtype", [None, object]) def test_series_with_dtype_radd_timedelta(self, dtype): # note this test is _not_ aimed at timedelta64-dtyped Series + # as of 2.0 we retain object dtype when ser.dtype == object ser = Series( [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], dtype=dtype, diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 56ad0d622cfb6..7fdb7423d9a1d 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -839,7 +839,7 @@ def test_pi_add_offset_array(self, box): pd.offsets.QuarterEnd(n=-2, startingMonth=12), ] ) - expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")]) + expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")]).astype(object) with tm.assert_produces_warning(PerformanceWarning): res = pi + offs @@ -872,6 +872,7 @@ def test_pi_sub_offset_array(self, box): ) expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) + expected = expected.astype(object) with tm.assert_produces_warning(PerformanceWarning): res = pi - other @@ -1301,13 +1302,13 @@ def test_parr_add_sub_object_array(self): expected = PeriodIndex( ["2001-01-01", "2001-01-03", "2001-01-05"], freq="D" - ).array + )._data.astype(object) tm.assert_equal(result, expected) with tm.assert_produces_warning(PerformanceWarning): result = parr - other - expected = PeriodIndex(["2000-12-30"] * 3, freq="D").array + expected = PeriodIndex(["2000-12-30"] * 3, freq="D")._data.astype(object) tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f3ea741607692..4e537c8c4c993 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -27,6 +27,7 @@ ) import pandas._testing as tm from pandas.core.api import NumericIndex +from pandas.core.arrays import PandasArray from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, @@ -583,7 +584,7 @@ def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): with tm.assert_produces_warning(PerformanceWarning): result = obj + other.astype(object) - tm.assert_equal(result, other) + tm.assert_equal(result, other.astype(object)) # ------------------------------------------------------------- # Binary operations TimedeltaIndex and timedelta-like @@ -1295,8 +1296,8 @@ def test_td64arr_add_sub_offset_index(self, names, box_with_array): ) tdi = tm.box_expected(tdi, box) - expected = tm.box_expected(expected, box) - expected_sub = tm.box_expected(expected_sub, box) + expected = tm.box_expected(expected, box).astype(object, copy=False) + expected_sub = tm.box_expected(expected_sub, box).astype(object, copy=False) with tm.assert_produces_warning(PerformanceWarning): res = tdi + other @@ -1324,7 +1325,7 @@ def test_td64arr_add_sub_offset_array(self, box_with_array): ) tdi = tm.box_expected(tdi, box) - expected = tm.box_expected(expected, box) + expected = tm.box_expected(expected, box).astype(object) with tm.assert_produces_warning(PerformanceWarning): res = tdi + other @@ -1334,7 +1335,7 @@ def test_td64arr_add_sub_offset_array(self, box_with_array): res2 = other + tdi tm.assert_equal(res2, expected) - expected_sub = tm.box_expected(expected_sub, box_with_array) + expected_sub = tm.box_expected(expected_sub, box_with_array).astype(object) with tm.assert_produces_warning(PerformanceWarning): res_sub = tdi - other tm.assert_equal(res_sub, expected_sub) @@ -1348,9 +1349,11 @@ def test_td64arr_with_offset_series(self, names, box_with_array): tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) other = Series([offsets.Hour(n=1), offsets.Minute(n=-2)], name=names[1]) - expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], name=exname) + expected_add = Series( + [tdi[n] + other[n] for n in range(len(tdi))], name=exname, dtype=object + ) obj = tm.box_expected(tdi, box) - expected_add = tm.box_expected(expected_add, box2) + expected_add = tm.box_expected(expected_add, box2).astype(object) with tm.assert_produces_warning(PerformanceWarning): res = obj + other @@ -1360,8 +1363,10 @@ def test_td64arr_with_offset_series(self, names, box_with_array): res2 = other + obj tm.assert_equal(res2, expected_add) - expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], name=exname) - expected_sub = tm.box_expected(expected_sub, box2) + expected_sub = Series( + [tdi[n] - other[n] for n in range(len(tdi))], name=exname, dtype=object + ) + expected_sub = tm.box_expected(expected_sub, box2).astype(object) with tm.assert_produces_warning(PerformanceWarning): res3 = obj - other @@ -1394,7 +1399,7 @@ def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): # ------------------------------------------------------------------ # Unsorted - def test_td64arr_add_sub_object_array(self, box_with_array, using_array_manager): + def test_td64arr_add_sub_object_array(self, box_with_array): box = box_with_array xbox = np.ndarray if box is pd.array else box @@ -1409,12 +1414,7 @@ def test_td64arr_add_sub_object_array(self, box_with_array, using_array_manager) expected = pd.Index( [Timedelta(days=2), Timedelta(days=4), Timestamp("2000-01-07")] ) - expected = tm.box_expected(expected, xbox) - if not using_array_manager: - # TODO: avoid mismatched behavior. This occurs bc inference - # can happen within TimedeltaArray method, which means results - # depend on whether we split blocks. - expected = expected.astype(object) + expected = tm.box_expected(expected, xbox).astype(object) tm.assert_equal(result, expected) msg = "unsupported operand type|cannot subtract a datelike" @@ -1426,9 +1426,7 @@ def test_td64arr_add_sub_object_array(self, box_with_array, using_array_manager) result = other - tdarr expected = pd.Index([Timedelta(0), Timedelta(0), Timestamp("2000-01-01")]) - expected = tm.box_expected(expected, xbox) - if not using_array_manager: - expected = expected.astype(object) + expected = tm.box_expected(expected, xbox).astype(object) tm.assert_equal(result, expected) @@ -1668,7 +1666,7 @@ def test_td64arr_div_td64_ndarray(self, box_with_array): tm.assert_equal(result, expected) result = rng / other.astype(object) - tm.assert_equal(result, expected) + tm.assert_equal(result, expected.astype(object)) result = rng / list(other) tm.assert_equal(result, expected) @@ -1701,6 +1699,39 @@ def test_tdarr_div_length_mismatch(self, box_with_array): with pytest.raises(ValueError, match=msg): other / rng + def test_td64_div_object_mixed_result(self, box_with_array): + # Case where we having a NaT in the result inseat of timedelta64("NaT") + # is misleading + orig = timedelta_range("1 Day", periods=3).insert(1, NaT) + tdi = tm.box_expected(orig, box_with_array, transpose=False) + + other = np.array([orig[0], 1.5, 2.0, orig[2]], dtype=object) + other = tm.box_expected(other, box_with_array, transpose=False) + + res = tdi / other + + expected = pd.Index( + [1.0, np.timedelta64("NaT", "ns"), orig[0], 1.5], dtype=object + ) + expected = tm.box_expected(expected, box_with_array, transpose=False) + if isinstance(expected, PandasArray): + expected = expected.to_numpy() + tm.assert_equal(res, expected) + if box_with_array is DataFrame: + # We have a np.timedelta64(NaT), not pd.NaT + assert isinstance(res.iloc[1, 0], np.timedelta64) + + res = tdi // other + + expected = pd.Index([1, np.timedelta64("NaT", "ns"), orig[0], 1], dtype=object) + expected = tm.box_expected(expected, box_with_array, transpose=False) + if isinstance(expected, PandasArray): + expected = expected.to_numpy() + tm.assert_equal(res, expected) + if box_with_array is DataFrame: + # We have a np.timedelta64(NaT), not pd.NaT + assert isinstance(res.iloc[1, 0], np.timedelta64) + # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ @@ -1788,6 +1819,10 @@ def test_td64arr_mod_tdscalar(self, box_with_array, three_days): warn = None if box_with_array is DataFrame and isinstance(three_days, pd.DateOffset): warn = PerformanceWarning + # TODO: making expected be object here a result of DataFrame.__divmod__ + # being defined in a naive way that does not dispatch to the underlying + # array's __divmod__ + expected = expected.astype(object) with tm.assert_produces_warning(warn): result = divmod(tdarr, three_days) @@ -1992,11 +2027,16 @@ def test_td64arr_div_numeric_array( result = tdser / vector.astype(object) if box_with_array is DataFrame: expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] + expected = tm.box_expected(expected, xbox).astype(object) else: expected = [tdser[n] / vector[n] for n in range(len(tdser))] - expected = pd.Index(expected) # do dtype inference - expected = tm.box_expected(expected, xbox) - assert tm.get_dtype(expected) == "m8[ns]" + expected = [ + x if x is not NaT else np.timedelta64("NaT", "ns") for x in expected + ] + if xbox is tm.to_array: + expected = tm.to_array(expected).astype(object) + else: + expected = xbox(expected, dtype=object) tm.assert_equal(result, expected) @@ -2064,11 +2104,15 @@ def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array): left = tm.box_expected(tdi, box_with_array) right = np.array([2, 2.0], dtype=object) + expected = pd.Index([np.timedelta64("NaT", "ns")] * 2, dtype=object) + if box_with_array is not pd.Index: + expected = tm.box_expected(expected, box_with_array).astype(object) + result = left / right - tm.assert_equal(result, left) + tm.assert_equal(result, expected) result = left // right - tm.assert_equal(result, left) + tm.assert_equal(result, expected) class TestTimedelta64ArrayLikeArithmetic: