Skip to content

PERF: op(frame, series) when series is not EA #33600

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Apr 25, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b01ff4a
PERF: operate blockwise in Frame + Series
jbrockmendel Apr 15, 2020
43518f1
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 16, 2020
f75ca17
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 16, 2020
3374660
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 16, 2020
9217e7e
benchmark, cleanup special case
jbrockmendel Apr 16, 2020
d0bc914
clean up comments
jbrockmendel Apr 16, 2020
6450b2b
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 17, 2020
6fa2da9
revert unrelated
jbrockmendel Apr 17, 2020
4f4d35a
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 17, 2020
0e31f95
npdev fix
jbrockmendel Apr 17, 2020
574afb9
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 18, 2020
63823f1
troubleshoot asv
jbrockmendel Apr 18, 2020
281807c
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 19, 2020
f8a4ea4
Merge branch 'master' of https://github.com/pandas-dev/pandas into pe…
jbrockmendel Apr 21, 2020
229369b
remove asv to troubleshoot CI
jbrockmendel Apr 21, 2020
711977e
whatsnew, asv
jbrockmendel Apr 21, 2020
86b64be
typo fixup
jbrockmendel Apr 21, 2020
0648cc8
restore asv
jbrockmendel Apr 21, 2020
2a2e9fc
div->truediv
jbrockmendel Apr 21, 2020
5cdf0b5
troubleshoot asvs
jbrockmendel Apr 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,22 @@ def __truediv__(self, other):
return self._data / other

elif is_object_dtype(other.dtype):
# Note: we do not do type inference on the result, so either
# an object array or numeric-dtyped (if numpy does inference)
# will be returned. GH#23829
result = [self[n] / other[n] for n in range(len(self))]
result = np.array(result)
# We operate on raveled arrays to avoid problems in inference
# on NaT
srav = self.ravel()
orav = other.ravel()
result = [srav[n] / orav[n] for n in range(len(srav))]
result = np.array(result).reshape(self.shape)

# We need to do dtype inference in order to keep DataFrame ops
# behavior consistent with Series behavior
inferred = lib.infer_dtype(result)
if inferred == "timedelta":
flat = result.ravel()
result = type(self)._from_sequence(flat).reshape(result.shape)
elif inferred == "floating":
result = result.astype(float)

return result

else:
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,16 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str):
new_data = dispatch_to_series(left, right, func)

else:
rvalues = right._values
if isinstance(rvalues, np.ndarray):
# We can operate block-wise
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, left.shape)

array_op = get_array_op(func, str_rep=str_rep)
bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"])
return type(left)(bm)

new_data = dispatch_to_series(left, right, func, axis="columns")

return left._construct_result(new_data)
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,10 @@ def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array):

other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])

warn = None if box_with_array is pd.DataFrame else PerformanceWarning
warn = PerformanceWarning
if box_with_array is pd.DataFrame and tz is not None:
warn = None

with tm.assert_produces_warning(warn):
res = dtarr + other
expected = DatetimeIndex(
Expand Down Expand Up @@ -2434,7 +2437,10 @@ def test_dti_addsub_object_arraylike(
expected = pd.DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture)
expected = tm.box_expected(expected, xbox)

warn = None if box_with_array is pd.DataFrame else PerformanceWarning
warn = PerformanceWarning
if box_with_array is pd.DataFrame and tz is not None:
warn = None

with tm.assert_produces_warning(warn):
result = dtarr + other
tm.assert_equal(result, expected)
Expand Down
36 changes: 14 additions & 22 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1323,14 +1323,11 @@ def test_td64arr_add_offset_index(self, names, box):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, box)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = PerformanceWarning if box is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi + other
tm.assert_equal(res, expected)

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res2 = other + tdi
tm.assert_equal(res2, expected)

Expand All @@ -1349,14 +1346,11 @@ def test_td64arr_add_offset_array(self, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, box)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = PerformanceWarning if box is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi + other
tm.assert_equal(res, expected)

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res2 = other + tdi
tm.assert_equal(res2, expected)

Expand Down Expand Up @@ -1385,10 +1379,7 @@ def test_td64arr_sub_offset_index(self, names, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, xbox)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = PerformanceWarning if box is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi - other
tm.assert_equal(res, expected)

Expand All @@ -1404,10 +1395,7 @@ def test_td64arr_sub_offset_array(self, box_with_array):
tdi = tm.box_expected(tdi, box_with_array)
expected = tm.box_expected(expected, box_with_array)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = None if box_with_array is pd.DataFrame else PerformanceWarning
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi - other
tm.assert_equal(res, expected)

Expand Down Expand Up @@ -1478,28 +1466,31 @@ def test_td64arr_add_sub_object_array(self, box_with_array):
[pd.Timedelta(days=1), pd.offsets.Day(2), pd.Timestamp("2000-01-04")]
)

warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
result = tdarr + other

expected = pd.Index(
[pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")]
)
expected = tm.box_expected(expected, box_with_array)
if box_with_array is pd.DataFrame:
expected = expected.astype(object)
tm.assert_equal(result, expected)

msg = "unsupported operand type|cannot subtract a datelike"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
tdarr - other

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
result = other - tdarr

expected = pd.Index(
[pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")]
)
expected = tm.box_expected(expected, box_with_array)
if box_with_array is pd.DataFrame:
expected = expected.astype(object)
tm.assert_equal(result, expected)


Expand Down Expand Up @@ -2039,6 +2030,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype)
expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))]
else:
expected = [tdser[n] / vector[n] for n in range(len(tdser))]
expected = pd.Index(expected) # do dtype inference
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)

Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,13 +613,6 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):

expected = pd.DataFrame(exvals, columns=df.columns, index=df.index)

if opname in ["__rmod__", "__rfloordiv__"]:
# exvals will have dtypes [f8, i8, i8] so expected will be
# all-f8, but the DataFrame operation will return mixed dtypes
# use exvals[-1].dtype instead of "i8" for compat with 32-bit
# systems/pythons
expected[False] = expected[False].astype(exvals[-1].dtype)

result = getattr(df, opname)(rowlike)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1042,7 +1035,7 @@ def test_combine_series(

# no upcast needed
added = mixed_float_frame + series
_check_mixed_float(added)
assert np.all(added.dtypes == series.dtype)

# vs mix (upcast) as needed
added = mixed_float_frame + series.astype("float32")
Expand Down
27 changes: 14 additions & 13 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,23 +266,24 @@ def test_scalar_na_logical_ops_corners(self):
result = s & list(s)
tm.assert_series_equal(result, expected)

def test_scalar_na_logical_ops_corners_aligns(self):
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
s[::2] = np.nan
d = DataFrame({"A": s})
# TODO: Fix this exception - needs to be fixed! (see GH5035)
# (previously this was a TypeError because series returned
# NotImplemented

# this is an alignment issue; these are equivalent
# https://github.com/pandas-dev/pandas/issues/5284
expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9)))

with pytest.raises(TypeError):
d.__and__(s, axis="columns")
with pytest.raises(TypeError):
d.__and__(s, axis=1)
result = d.__and__(s, axis="columns")
tm.assert_frame_equal(result, expected)

with pytest.raises(TypeError):
s & d
with pytest.raises(TypeError):
d & s
result = d.__and__(s, axis=1)
tm.assert_frame_equal(result, expected)

result = s & d
tm.assert_frame_equal(result, expected)

result = d & s
tm.assert_frame_equal(result, expected)

expected = (s & s).to_frame("A")
result = d.__and__(s, axis="index")
Expand Down