Skip to content

BUG: 2D DTA/TDA arithmetic with object-dtype #32185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 3, 2020
21 changes: 8 additions & 13 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
import pandas.core.common as com
from pandas.core.construction import array, extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.invalid import invalid_comparison, make_invalid_op
Expand Down Expand Up @@ -623,7 +624,7 @@ def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return self._box_values(self.asi8)
return self._box_values(self.asi8.ravel()).reshape(self.shape)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
Expand Down Expand Up @@ -1256,19 +1257,13 @@ def _addsub_object_array(self, other: np.ndarray, op):
PerformanceWarning,
)

# For EA self.astype('O') returns a numpy array, not an Index
left = self.astype("O")
# Caller is responsible for broadcasting if necessary
assert self.shape == other.shape, (self.shape, other.shape)

res_values = op(left, np.array(other))
kwargs = {}
if not is_period_dtype(self):
kwargs["freq"] = "infer"
try:
res = type(self)._from_sequence(res_values, **kwargs)
except ValueError:
# e.g. we've passed a Timestamp to TimedeltaArray
res = res_values
return res
res_values = op(self.astype("O"), np.array(other))
result = array(res_values.ravel())
result = extract_array(result, extract_numpy=True).reshape(self.shape)
return result

def _time_shift(self, periods, freq=None):
"""
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
date_range,
)
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray, TimedeltaArray
from pandas.core.ops import roperator
from pandas.tests.arithmetic.common import (
assert_invalid_addsub_type,
Expand Down Expand Up @@ -956,6 +957,18 @@ def test_dt64arr_sub_NaT(self, box_with_array):
# -------------------------------------------------------------
# Subtraction of datetime-like array-like

def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture):
dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture)
expected = dti - dti

obj = tm.box_expected(dti, box_with_array)
expected = tm.box_expected(expected, box_with_array)

warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
result = obj - obj.astype(object)
tm.assert_equal(result, expected)

def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array):
dti = pd.date_range("2016-01-01", periods=3, tz=None)
dt64vals = dti.values
Expand Down Expand Up @@ -2395,3 +2408,31 @@ def test_shift_months(years, months):
raw = [x + pd.offsets.DateOffset(years=years, months=months) for x in dti]
expected = DatetimeIndex(raw)
tm.assert_index_equal(actual, expected)


def test_dt64arr_addsub_object_dtype_2d():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok with either class based or free functions, mixing is slightly odd.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah, this is a bit of an odd-man-out because the class-based tests are all trying to use box_with_array, whereas this is specifically aiming at the 2D array case

# block-wise DataFrame operations will require operating on 2D
# DatetimeArray/TimedeltaArray, so check that specifically.
dti = pd.date_range("1994-02-13", freq="2W", periods=4)
dta = dti._data.reshape((4, 1))

other = np.array([[pd.offsets.Day(n)] for n in range(4)])
assert other.shape == dta.shape

with tm.assert_produces_warning(PerformanceWarning):
result = dta + other
with tm.assert_produces_warning(PerformanceWarning):
expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1)

assert isinstance(result, DatetimeArray)
assert result.freq is None
tm.assert_numpy_array_equal(result._data, expected._data)

with tm.assert_produces_warning(PerformanceWarning):
# Case where we expect to get a TimedeltaArray back
result2 = dta - dta.astype(object)

assert isinstance(result2, TimedeltaArray)
assert result2.shape == (4, 1)
assert result2.freq is None
assert (result2.asi8 == 0).all()
14 changes: 14 additions & 0 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,20 @@ def test_tda_add_sub_index(self):
expected = tdi - tdi
tm.assert_index_equal(result, expected)

def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture):
# Result should be cast back to DatetimeArray
dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture)
dti._set_freq(None)
tdi = dti - dti

obj = tm.box_expected(tdi, box_df_fail)
other = tm.box_expected(dti, box_df_fail)

warn = PerformanceWarning if box_df_fail is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
result = obj + other.astype(object)
tm.assert_equal(result, other)

# -------------------------------------------------------------
# Binary operations TimedeltaIndex and timedelta-like

Expand Down