Skip to content

Commit a7741e3

Browse files
authored
PERF: op(frame, series) when series is not EA (#33600)
1 parent ad4465e commit a7741e3

File tree

9 files changed

+73
-55
lines changed

9 files changed

+73
-55
lines changed

asv_bench/benchmarks/arithmetic.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
6767
self.ser.add(self.ser, fill_value=4)
6868

6969

70-
class MixedFrameWithSeriesAxis0:
70+
class MixedFrameWithSeriesAxis:
7171
params = [
7272
[
7373
"eq",
@@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
7878
"gt",
7979
"add",
8080
"sub",
81-
"div",
81+
"truediv",
8282
"floordiv",
8383
"mul",
8484
"pow",
@@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0:
8787
param_names = ["opname"]
8888

8989
def setup(self, opname):
90-
arr = np.arange(10 ** 6).reshape(100, -1)
90+
arr = np.arange(10 ** 6).reshape(1000, -1)
9191
df = DataFrame(arr)
9292
df["C"] = 1.0
9393
self.df = df
9494
self.ser = df[0]
95+
self.row = df.iloc[0]
9596

9697
def time_frame_op_with_series_axis0(self, opname):
9798
getattr(self.df, opname)(self.ser, axis=0)
9899

100+
def time_frame_op_with_series_axis1(self, opname):
101+
getattr(operator, opname)(self.df, self.ser)
102+
99103

100104
class Ops:
101105

asv_bench/benchmarks/stat_ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
if op == "mad" and dtype == "Int64" and axis == 1:
15-
# GH-33036
14+
if op == "mad" and dtype == "Int64":
15+
# GH-33036, GH#33600
1616
raise NotImplementedError
1717
values = np.random.randn(100000, 4)
1818
if dtype == "Int64":

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ Performance improvements
450450
- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
451451
- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
452452
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
453+
- Performance improvement in arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=1`` (:issue:`33600`)
453454
- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index,
454455
avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of
455456
existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`)

pandas/core/arrays/timedeltas.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,22 @@ def __truediv__(self, other):
518518
return self._data / other
519519

520520
elif is_object_dtype(other.dtype):
521-
# Note: we do not do type inference on the result, so either
522-
# an object array or numeric-dtyped (if numpy does inference)
523-
# will be returned. GH#23829
524-
result = [self[n] / other[n] for n in range(len(self))]
525-
result = np.array(result)
521+
# We operate on raveled arrays to avoid problems in inference
522+
# on NaT
523+
srav = self.ravel()
524+
orav = other.ravel()
525+
result = [srav[n] / orav[n] for n in range(len(srav))]
526+
result = np.array(result).reshape(self.shape)
527+
528+
# We need to do dtype inference in order to keep DataFrame ops
529+
# behavior consistent with Series behavior
530+
inferred = lib.infer_dtype(result)
531+
if inferred == "timedelta":
532+
flat = result.ravel()
533+
result = type(self)._from_sequence(flat).reshape(result.shape)
534+
elif inferred == "floating":
535+
result = result.astype(float)
536+
526537
return result
527538

528539
else:

pandas/core/ops/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,16 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str):
522522
new_data = dispatch_to_series(left, right, func)
523523

524524
else:
525+
rvalues = right._values
526+
if isinstance(rvalues, np.ndarray):
527+
# We can operate block-wise
528+
rvalues = rvalues.reshape(1, -1)
529+
rvalues = np.broadcast_to(rvalues, left.shape)
530+
531+
array_op = get_array_op(func, str_rep=str_rep)
532+
bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"])
533+
return type(left)(bm)
534+
525535
new_data = dispatch_to_series(left, right, func, axis="columns")
526536

527537
return left._construct_result(new_data)

pandas/tests/arithmetic/test_datetime64.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1473,7 +1473,10 @@ def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array):
14731473

14741474
other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
14751475

1476-
warn = None if box_with_array is pd.DataFrame else PerformanceWarning
1476+
warn = PerformanceWarning
1477+
if box_with_array is pd.DataFrame and tz is not None:
1478+
warn = None
1479+
14771480
with tm.assert_produces_warning(warn):
14781481
res = dtarr + other
14791482
expected = DatetimeIndex(
@@ -2438,7 +2441,10 @@ def test_dti_addsub_object_arraylike(
24382441
expected = pd.DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture)
24392442
expected = tm.box_expected(expected, xbox)
24402443

2441-
warn = None if box_with_array is pd.DataFrame else PerformanceWarning
2444+
warn = PerformanceWarning
2445+
if box_with_array is pd.DataFrame and tz is not None:
2446+
warn = None
2447+
24422448
with tm.assert_produces_warning(warn):
24432449
result = dtarr + other
24442450
tm.assert_equal(result, expected)

pandas/tests/arithmetic/test_timedelta64.py

+14-22
Original file line numberDiff line numberDiff line change
@@ -1327,14 +1327,11 @@ def test_td64arr_add_offset_index(self, names, box):
13271327
tdi = tm.box_expected(tdi, box)
13281328
expected = tm.box_expected(expected, box)
13291329

1330-
# The DataFrame operation is transposed and so operates as separate
1331-
# scalar operations, which do not issue a PerformanceWarning
1332-
warn = PerformanceWarning if box is not pd.DataFrame else None
1333-
with tm.assert_produces_warning(warn):
1330+
with tm.assert_produces_warning(PerformanceWarning):
13341331
res = tdi + other
13351332
tm.assert_equal(res, expected)
13361333

1337-
with tm.assert_produces_warning(warn):
1334+
with tm.assert_produces_warning(PerformanceWarning):
13381335
res2 = other + tdi
13391336
tm.assert_equal(res2, expected)
13401337

@@ -1353,14 +1350,11 @@ def test_td64arr_add_offset_array(self, box_with_array):
13531350
tdi = tm.box_expected(tdi, box)
13541351
expected = tm.box_expected(expected, box)
13551352

1356-
# The DataFrame operation is transposed and so operates as separate
1357-
# scalar operations, which do not issue a PerformanceWarning
1358-
warn = PerformanceWarning if box is not pd.DataFrame else None
1359-
with tm.assert_produces_warning(warn):
1353+
with tm.assert_produces_warning(PerformanceWarning):
13601354
res = tdi + other
13611355
tm.assert_equal(res, expected)
13621356

1363-
with tm.assert_produces_warning(warn):
1357+
with tm.assert_produces_warning(PerformanceWarning):
13641358
res2 = other + tdi
13651359
tm.assert_equal(res2, expected)
13661360

@@ -1389,10 +1383,7 @@ def test_td64arr_sub_offset_index(self, names, box_with_array):
13891383
tdi = tm.box_expected(tdi, box)
13901384
expected = tm.box_expected(expected, xbox)
13911385

1392-
# The DataFrame operation is transposed and so operates as separate
1393-
# scalar operations, which do not issue a PerformanceWarning
1394-
warn = PerformanceWarning if box is not pd.DataFrame else None
1395-
with tm.assert_produces_warning(warn):
1386+
with tm.assert_produces_warning(PerformanceWarning):
13961387
res = tdi - other
13971388
tm.assert_equal(res, expected)
13981389

@@ -1408,10 +1399,7 @@ def test_td64arr_sub_offset_array(self, box_with_array):
14081399
tdi = tm.box_expected(tdi, box_with_array)
14091400
expected = tm.box_expected(expected, box_with_array)
14101401

1411-
# The DataFrame operation is transposed and so operates as separate
1412-
# scalar operations, which do not issue a PerformanceWarning
1413-
warn = None if box_with_array is pd.DataFrame else PerformanceWarning
1414-
with tm.assert_produces_warning(warn):
1402+
with tm.assert_produces_warning(PerformanceWarning):
14151403
res = tdi - other
14161404
tm.assert_equal(res, expected)
14171405

@@ -1482,28 +1470,31 @@ def test_td64arr_add_sub_object_array(self, box_with_array):
14821470
[pd.Timedelta(days=1), pd.offsets.Day(2), pd.Timestamp("2000-01-04")]
14831471
)
14841472

1485-
warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
1486-
with tm.assert_produces_warning(warn):
1473+
with tm.assert_produces_warning(PerformanceWarning):
14871474
result = tdarr + other
14881475

14891476
expected = pd.Index(
14901477
[pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")]
14911478
)
14921479
expected = tm.box_expected(expected, box_with_array)
1480+
if box_with_array is pd.DataFrame:
1481+
expected = expected.astype(object)
14931482
tm.assert_equal(result, expected)
14941483

14951484
msg = "unsupported operand type|cannot subtract a datelike"
14961485
with pytest.raises(TypeError, match=msg):
1497-
with tm.assert_produces_warning(warn):
1486+
with tm.assert_produces_warning(PerformanceWarning):
14981487
tdarr - other
14991488

1500-
with tm.assert_produces_warning(warn):
1489+
with tm.assert_produces_warning(PerformanceWarning):
15011490
result = other - tdarr
15021491

15031492
expected = pd.Index(
15041493
[pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")]
15051494
)
15061495
expected = tm.box_expected(expected, box_with_array)
1496+
if box_with_array is pd.DataFrame:
1497+
expected = expected.astype(object)
15071498
tm.assert_equal(result, expected)
15081499

15091500

@@ -2043,6 +2034,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype)
20432034
expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))]
20442035
else:
20452036
expected = [tdser[n] / vector[n] for n in range(len(tdser))]
2037+
expected = pd.Index(expected) # do dtype inference
20462038
expected = tm.box_expected(expected, xbox)
20472039
tm.assert_equal(result, expected)
20482040

pandas/tests/frame/test_arithmetic.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -613,13 +613,6 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
613613

614614
expected = pd.DataFrame(exvals, columns=df.columns, index=df.index)
615615

616-
if opname in ["__rmod__", "__rfloordiv__"]:
617-
# exvals will have dtypes [f8, i8, i8] so expected will be
618-
# all-f8, but the DataFrame operation will return mixed dtypes
619-
# use exvals[-1].dtype instead of "i8" for compat with 32-bit
620-
# systems/pythons
621-
expected[False] = expected[False].astype(exvals[-1].dtype)
622-
623616
result = getattr(df, opname)(rowlike)
624617
tm.assert_frame_equal(result, expected)
625618

@@ -1042,7 +1035,7 @@ def test_combine_series(
10421035

10431036
# no upcast needed
10441037
added = mixed_float_frame + series
1045-
_check_mixed_float(added)
1038+
assert np.all(added.dtypes == series.dtype)
10461039

10471040
# vs mix (upcast) as needed
10481041
added = mixed_float_frame + series.astype("float32")

pandas/tests/series/test_operators.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -266,23 +266,24 @@ def test_scalar_na_logical_ops_corners(self):
266266
result = s & list(s)
267267
tm.assert_series_equal(result, expected)
268268

269+
def test_scalar_na_logical_ops_corners_aligns(self):
270+
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
271+
s[::2] = np.nan
269272
d = DataFrame({"A": s})
270-
# TODO: Fix this exception - needs to be fixed! (see GH5035)
271-
# (previously this was a TypeError because series returned
272-
# NotImplemented
273273

274-
# this is an alignment issue; these are equivalent
275-
# https://github.com/pandas-dev/pandas/issues/5284
274+
expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9)))
276275

277-
with pytest.raises(TypeError):
278-
d.__and__(s, axis="columns")
279-
with pytest.raises(TypeError):
280-
d.__and__(s, axis=1)
276+
result = d.__and__(s, axis="columns")
277+
tm.assert_frame_equal(result, expected)
281278

282-
with pytest.raises(TypeError):
283-
s & d
284-
with pytest.raises(TypeError):
285-
d & s
279+
result = d.__and__(s, axis=1)
280+
tm.assert_frame_equal(result, expected)
281+
282+
result = s & d
283+
tm.assert_frame_equal(result, expected)
284+
285+
result = d & s
286+
tm.assert_frame_equal(result, expected)
286287

287288
expected = (s & s).to_frame("A")
288289
result = d.__and__(s, axis="index")

0 commit comments

Comments
 (0)