Skip to content

Commit 35a7f80

Browse files
authored
API: dont do type inference on arithmetic results (#49714)
* API: dont do type inference on arithmetic results * mypy fixup * use concat_compat * dont infer in TimedeltaArray * update addsub * avoid messing with box_expected
1 parent eff6566 commit 35a7f80

File tree

10 files changed

+171
-91
lines changed

10 files changed

+171
-91
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ Other API changes
478478
- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
479479
- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
480480
- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
481-
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`)
481+
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects(copy=False)`` to do type inference on the result (:issue:`49999`, :issue:`49714`)
482482
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
483483
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`)
484484
- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)

pandas/_libs/tslibs/timedeltas.pyi

+6
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ def delta_to_nanoseconds(
7676
reso: int = ..., # NPY_DATETIMEUNIT
7777
round_ok: bool = ...,
7878
) -> int: ...
79+
def floordiv_object_array(
80+
left: np.ndarray, right: npt.NDArray[np.object_]
81+
) -> np.ndarray: ...
82+
def truediv_object_array(
83+
left: np.ndarray, right: npt.NDArray[np.object_]
84+
) -> np.ndarray: ...
7985

8086
class Timedelta(timedelta):
8187
_creso: int

pandas/_libs/tslibs/timedeltas.pyx

+58
Original file line numberDiff line numberDiff line change
@@ -2030,6 +2030,64 @@ class Timedelta(_Timedelta):
20302030
return div, other - div * self
20312031

20322032

2033+
def truediv_object_array(ndarray left, ndarray right):
2034+
cdef:
2035+
ndarray[object] result = np.empty((<object>left).shape, dtype=object)
2036+
object td64 # really timedelta64 if we find a way to declare that
2037+
object obj, res_value
2038+
_Timedelta td
2039+
Py_ssize_t i
2040+
2041+
for i in range(len(left)):
2042+
td64 = left[i]
2043+
obj = right[i]
2044+
2045+
if get_timedelta64_value(td64) == NPY_NAT:
2046+
# td here should be interpreted as a td64 NaT
2047+
if _should_cast_to_timedelta(obj):
2048+
res_value = np.nan
2049+
else:
2050+
# if its a number then let numpy handle division, otherwise
2051+
# numpy will raise
2052+
res_value = td64 / obj
2053+
else:
2054+
td = Timedelta(td64)
2055+
res_value = td / obj
2056+
2057+
result[i] = res_value
2058+
2059+
return result
2060+
2061+
2062+
def floordiv_object_array(ndarray left, ndarray right):
2063+
cdef:
2064+
ndarray[object] result = np.empty((<object>left).shape, dtype=object)
2065+
object td64 # really timedelta64 if we find a way to declare that
2066+
object obj, res_value
2067+
_Timedelta td
2068+
Py_ssize_t i
2069+
2070+
for i in range(len(left)):
2071+
td64 = left[i]
2072+
obj = right[i]
2073+
2074+
if get_timedelta64_value(td64) == NPY_NAT:
2075+
# td here should be interpreted as a td64 NaT
2076+
if _should_cast_to_timedelta(obj):
2077+
res_value = np.nan
2078+
else:
2079+
# if its a number then let numpy handle division, otherwise
2080+
# numpy will raise
2081+
res_value = td64 // obj
2082+
else:
2083+
td = Timedelta(td64)
2084+
res_value = td // obj
2085+
2086+
result[i] = res_value
2087+
2088+
return result
2089+
2090+
20332091
cdef bint is_any_td_scalar(object obj):
20342092
"""
20352093
Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))`

pandas/core/arrays/datetimelike.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1373,11 +1373,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
13731373
assert self.shape == other.shape, (self.shape, other.shape)
13741374

13751375
res_values = op(self.astype("O"), np.asarray(other))
1376-
1377-
ext_arr = pd_array(res_values.ravel())
1378-
result = cast(np.ndarray, extract_array(ext_arr, extract_numpy=True))
1379-
result = result.reshape(self.shape)
1380-
return result
1376+
return res_values
13811377

13821378
def _accumulate(self, name: str, *, skipna: bool = True, **kwargs):
13831379

pandas/core/arrays/timedeltas.py

+19-42
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
from pandas._libs.tslibs.fields import get_timedelta_field
3333
from pandas._libs.tslibs.timedeltas import (
3434
array_to_timedelta64,
35+
floordiv_object_array,
3536
ints_to_pytimedelta,
3637
parse_timedelta_unit,
38+
truediv_object_array,
3739
)
3840
from pandas._typing import (
3941
AxisInt,
@@ -63,6 +65,7 @@
6365
from pandas.core.arrays import datetimelike as dtl
6466
from pandas.core.arrays._ranges import generate_regular_range
6567
import pandas.core.common as com
68+
from pandas.core.construction import extract_array
6669
from pandas.core.ops.common import unpack_zerodim_and_defer
6770

6871
if TYPE_CHECKING:
@@ -528,30 +531,13 @@ def __truediv__(self, other):
528531
return self._ndarray / other
529532

530533
elif is_object_dtype(other.dtype):
531-
# We operate on raveled arrays to avoid problems in inference
532-
# on NaT
533-
# TODO: tests with non-nano
534-
srav = self.ravel()
535-
orav = other.ravel()
536-
result_list = [srav[n] / orav[n] for n in range(len(srav))]
537-
result = np.array(result_list).reshape(self.shape)
538-
539-
# We need to do dtype inference in order to keep DataFrame ops
540-
# behavior consistent with Series behavior
541-
inferred = lib.infer_dtype(result, skipna=False)
542-
if inferred == "timedelta":
543-
flat = result.ravel()
544-
result = type(self)._from_sequence(flat).reshape(result.shape)
545-
elif inferred == "floating":
546-
result = result.astype(float)
547-
elif inferred == "datetime":
548-
# GH#39750 this occurs when result is all-NaT, in which case
549-
# we want to interpret these NaTs as td64.
550-
# We construct an all-td64NaT result.
551-
# error: Incompatible types in assignment (expression has type
552-
# "TimedeltaArray", variable has type "ndarray[Any,
553-
# dtype[floating[_64Bit]]]")
554-
result = self * np.nan # type: ignore[assignment]
534+
other = extract_array(other, extract_numpy=True)
535+
if self.ndim > 1:
536+
res_cols = [left / right for left, right in zip(self, other)]
537+
res_cols2 = [x.reshape(1, -1) for x in res_cols]
538+
result = np.concatenate(res_cols2, axis=0)
539+
else:
540+
result = truediv_object_array(self._ndarray, other)
555541

556542
return result
557543

@@ -652,24 +638,15 @@ def __floordiv__(self, other):
652638
return result
653639

654640
elif is_object_dtype(other.dtype):
655-
# error: Incompatible types in assignment (expression has type
656-
# "List[Any]", variable has type "ndarray")
657-
srav = self.ravel()
658-
orav = other.ravel()
659-
res_list = [srav[n] // orav[n] for n in range(len(srav))]
660-
result_flat = np.asarray(res_list)
661-
inferred = lib.infer_dtype(result_flat, skipna=False)
662-
663-
result = result_flat.reshape(self.shape)
664-
665-
if inferred == "timedelta":
666-
result, _ = sequence_to_td64ns(result)
667-
return type(self)(result)
668-
if inferred == "datetime":
669-
# GH#39750 occurs when result is all-NaT, which in this
670-
# case should be interpreted as td64nat. This can only
671-
# occur when self is all-td64nat
672-
return self * np.nan
641+
other = extract_array(other, extract_numpy=True)
642+
if self.ndim > 1:
643+
res_cols = [left // right for left, right in zip(self, other)]
644+
res_cols2 = [x.reshape(1, -1) for x in res_cols]
645+
result = np.concatenate(res_cols2, axis=0)
646+
else:
647+
result = floordiv_object_array(self._ndarray, other)
648+
649+
assert result.dtype == object
673650
return result
674651

675652
elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype):

pandas/tests/arithmetic/test_datetime64.py

+10-15
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,6 @@
3434
date_range,
3535
)
3636
import pandas._testing as tm
37-
from pandas.core.arrays import (
38-
DatetimeArray,
39-
TimedeltaArray,
40-
)
4137
from pandas.core.ops import roperator
4238
from pandas.tests.arithmetic.common import (
4339
assert_cannot_add,
@@ -1023,7 +1019,7 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture):
10231019
expected = dti - dti
10241020

10251021
obj = tm.box_expected(dti, box_with_array)
1026-
expected = tm.box_expected(expected, box_with_array)
1022+
expected = tm.box_expected(expected, box_with_array).astype(object)
10271023

10281024
with tm.assert_produces_warning(PerformanceWarning):
10291025
result = obj - obj.astype(object)
@@ -1572,10 +1568,13 @@ def test_dt64arr_add_sub_offset_array(
15721568

15731569
other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
15741570
expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))])
1575-
expected = tm.box_expected(expected, box_with_array)
1571+
expected = tm.box_expected(expected, box_with_array).astype(object)
15761572

15771573
if box_other:
15781574
other = tm.box_expected(other, box_with_array)
1575+
if box_with_array is pd.array and op is roperator.radd:
1576+
# We expect a PandasArray, not ndarray[object] here
1577+
expected = pd.array(expected, dtype=object)
15791578

15801579
with tm.assert_produces_warning(PerformanceWarning):
15811580
res = op(dtarr, other)
@@ -2373,7 +2372,7 @@ def test_dti_addsub_offset_arraylike(
23732372
expected = DatetimeIndex(
23742373
[op(dti[n], other[n]) for n in range(len(dti))], name=names[2], freq="infer"
23752374
)
2376-
expected = tm.box_expected(expected, xbox)
2375+
expected = tm.box_expected(expected, xbox).astype(object)
23772376
tm.assert_equal(res, expected)
23782377

23792378
@pytest.mark.parametrize("other_box", [pd.Index, np.array])
@@ -2388,14 +2387,14 @@ def test_dti_addsub_object_arraylike(
23882387
xbox = get_upcast_box(dtarr, other)
23892388

23902389
expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture)
2391-
expected = tm.box_expected(expected, xbox)
2390+
expected = tm.box_expected(expected, xbox).astype(object)
23922391

23932392
with tm.assert_produces_warning(PerformanceWarning):
23942393
result = dtarr + other
23952394
tm.assert_equal(result, expected)
23962395

23972396
expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture)
2398-
expected = tm.box_expected(expected, xbox)
2397+
expected = tm.box_expected(expected, xbox).astype(object)
23992398

24002399
with tm.assert_produces_warning(PerformanceWarning):
24012400
result = dtarr - other
@@ -2435,15 +2434,11 @@ def test_dt64arr_addsub_object_dtype_2d():
24352434
with tm.assert_produces_warning(PerformanceWarning):
24362435
expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1)
24372436

2438-
assert isinstance(result, DatetimeArray)
2439-
assert result.freq is None
2440-
tm.assert_numpy_array_equal(result._ndarray, expected._ndarray)
2437+
tm.assert_numpy_array_equal(result, expected)
24412438

24422439
with tm.assert_produces_warning(PerformanceWarning):
24432440
# Case where we expect to get a TimedeltaArray back
24442441
result2 = dta - dta.astype(object)
24452442

2446-
assert isinstance(result2, TimedeltaArray)
24472443
assert result2.shape == (4, 1)
2448-
assert result2.freq is None
2449-
assert (result2.asi8 == 0).all()
2444+
assert all(td.value == 0 for td in result2.ravel())

pandas/tests/arithmetic/test_numeric.py

+2
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ def test_add_sub_datetimedeltalike_invalid(
320320
r"operand type\(s\) all returned NotImplemented from __array_ufunc__",
321321
"can only perform ops with numeric values",
322322
"cannot subtract DatetimeArray from ndarray",
323+
# pd.Timedelta(1) + Index([0, 1, 2])
324+
"Cannot add or subtract Timedelta from integers",
323325
]
324326
)
325327
assert_invalid_addsub_type(left, other, msg)

pandas/tests/arithmetic/test_object.py

+1
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ def test_operators_na_handling(self):
194194
@pytest.mark.parametrize("dtype", [None, object])
195195
def test_series_with_dtype_radd_timedelta(self, dtype):
196196
# note this test is _not_ aimed at timedelta64-dtyped Series
197+
# as of 2.0 we retain object dtype when ser.dtype == object
197198
ser = Series(
198199
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
199200
dtype=dtype,

pandas/tests/arithmetic/test_period.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ def test_pi_add_offset_array(self, box):
839839
pd.offsets.QuarterEnd(n=-2, startingMonth=12),
840840
]
841841
)
842-
expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")])
842+
expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")]).astype(object)
843843

844844
with tm.assert_produces_warning(PerformanceWarning):
845845
res = pi + offs
@@ -872,6 +872,7 @@ def test_pi_sub_offset_array(self, box):
872872
)
873873

874874
expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))])
875+
expected = expected.astype(object)
875876

876877
with tm.assert_produces_warning(PerformanceWarning):
877878
res = pi - other
@@ -1301,13 +1302,13 @@ def test_parr_add_sub_object_array(self):
13011302

13021303
expected = PeriodIndex(
13031304
["2001-01-01", "2001-01-03", "2001-01-05"], freq="D"
1304-
).array
1305+
)._data.astype(object)
13051306
tm.assert_equal(result, expected)
13061307

13071308
with tm.assert_produces_warning(PerformanceWarning):
13081309
result = parr - other
13091310

1310-
expected = PeriodIndex(["2000-12-30"] * 3, freq="D").array
1311+
expected = PeriodIndex(["2000-12-30"] * 3, freq="D")._data.astype(object)
13111312
tm.assert_equal(result, expected)
13121313

13131314

0 commit comments

Comments
 (0)