diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 2745db58e83e3..8aa29468559b2 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self): self.ser.add(self.ser, fill_value=4) -class MixedFrameWithSeriesAxis0: +class MixedFrameWithSeriesAxis: params = [ [ "eq", @@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0: "gt", "add", "sub", - "div", + "truediv", "floordiv", "mul", "pow", @@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0: param_names = ["opname"] def setup(self, opname): - arr = np.arange(10 ** 6).reshape(100, -1) + arr = np.arange(10 ** 6).reshape(1000, -1) df = DataFrame(arr) df["C"] = 1.0 self.df = df self.ser = df[0] + self.row = df.iloc[0] def time_frame_op_with_series_axis0(self, opname): getattr(self.df, opname)(self.ser, axis=0) + def time_frame_op_with_series_axis1(self, opname): + getattr(operator, opname)(self.df, self.ser) + class Ops: diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index ebbd3c9eddfdb..5639d6702a92c 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -11,8 +11,8 @@ class FrameOps: param_names = ["op", "dtype", "axis"] def setup(self, op, dtype, axis): - if op == "mad" and dtype == "Int64" and axis == 1: - # GH-33036 + if op == "mad" and dtype == "Int64": + # GH-33036, GH#33600 raise NotImplementedError values = np.random.randn(100000, 4) if dtype == "Int64": diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9d40f9b6ffa2c..873ca5b54dcd7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -418,6 +418,7 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) +- Performance improvement in arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=1`` (:issue:`33600`) - The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index, avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8c93dca783113..d161501138162 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -518,11 +518,22 @@ def __truediv__(self, other): return self._data / other elif is_object_dtype(other.dtype): - # Note: we do not do type inference on the result, so either - # an object array or numeric-dtyped (if numpy does inference) - # will be returned. GH#23829 - result = [self[n] / other[n] for n in range(len(self))] - result = np.array(result) + # We operate on raveled arrays to avoid problems in inference + # on NaT + srav = self.ravel() + orav = other.ravel() + result = [srav[n] / orav[n] for n in range(len(srav))] + result = np.array(result).reshape(self.shape) + + # We need to do dtype inference in order to keep DataFrame ops + # behavior consistent with Series behavior + inferred = lib.infer_dtype(result) + if inferred == "timedelta": + flat = result.ravel() + result = type(self)._from_sequence(flat).reshape(result.shape) + elif inferred == "floating": + result = result.astype(float) + return result else: diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 9a7c9fdadf90d..57ca582384f39 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -522,6 +522,16 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): new_data = dispatch_to_series(left, right, func) else: + rvalues = right._values + if isinstance(rvalues, np.ndarray): + # We can operate block-wise + rvalues = rvalues.reshape(1, -1) + rvalues = np.broadcast_to(rvalues, left.shape) + + array_op = get_array_op(func, str_rep=str_rep) + bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) + return type(left)(bm) + new_data = dispatch_to_series(left, right, func, axis="columns") return left._construct_result(new_data) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index a8e9ad9ff7cc9..cfe2a27a1b5eb 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1473,7 +1473,10 @@ def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array): other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) - warn = None if box_with_array is pd.DataFrame else PerformanceWarning + warn = PerformanceWarning + if box_with_array is pd.DataFrame and tz is not None: + warn = None + with tm.assert_produces_warning(warn): res = dtarr + other expected = DatetimeIndex( @@ -2434,7 +2437,10 @@ def test_dti_addsub_object_arraylike( expected = pd.DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox) - warn = None if box_with_array is pd.DataFrame else PerformanceWarning + warn = PerformanceWarning + if box_with_array is pd.DataFrame and tz is not None: + warn = None + with tm.assert_produces_warning(warn): result = dtarr + other tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 8387e4d708662..0dc97f0780085 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1323,14 +1323,11 @@ def test_td64arr_add_offset_index(self, names, box): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = PerformanceWarning if box is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res2 = other + tdi tm.assert_equal(res2, expected) @@ -1349,14 +1346,11 @@ def test_td64arr_add_offset_array(self, box_with_array): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = PerformanceWarning if box is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res2 = other + tdi tm.assert_equal(res2, expected) @@ -1385,10 +1379,7 @@ def test_td64arr_sub_offset_index(self, names, box_with_array): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, xbox) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = PerformanceWarning if box is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi - other tm.assert_equal(res, expected) @@ -1404,10 +1395,7 @@ def test_td64arr_sub_offset_array(self, box_with_array): tdi = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = None if box_with_array is pd.DataFrame else PerformanceWarning - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi - other tm.assert_equal(res, expected) @@ -1478,28 +1466,31 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [pd.Timedelta(days=1), pd.offsets.Day(2), pd.Timestamp("2000-01-04")] ) - warn = PerformanceWarning if box_with_array is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = tdarr + other expected = pd.Index( [pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")] ) expected = tm.box_expected(expected, box_with_array) + if box_with_array is pd.DataFrame: + expected = expected.astype(object) tm.assert_equal(result, expected) msg = "unsupported operand type|cannot subtract a datelike" with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): tdarr - other - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = other - tdarr expected = pd.Index( [pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")] ) expected = tm.box_expected(expected, box_with_array) + if box_with_array is pd.DataFrame: + expected = expected.astype(object) tm.assert_equal(result, expected) @@ -2039,6 +2030,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype) expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] else: expected = [tdser[n] / vector[n] for n in range(len(tdser))] + expected = pd.Index(expected) # do dtype inference expected = tm.box_expected(expected, xbox) tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d929d3e030508..d75f1f14b6369 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -613,13 +613,6 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): expected = pd.DataFrame(exvals, columns=df.columns, index=df.index) - if opname in ["__rmod__", "__rfloordiv__"]: - # exvals will have dtypes [f8, i8, i8] so expected will be - # all-f8, but the DataFrame operation will return mixed dtypes - # use exvals[-1].dtype instead of "i8" for compat with 32-bit - # systems/pythons - expected[False] = expected[False].astype(exvals[-1].dtype) - result = getattr(df, opname)(rowlike) tm.assert_frame_equal(result, expected) @@ -1042,7 +1035,7 @@ def test_combine_series( # no upcast needed added = mixed_float_frame + series - _check_mixed_float(added) + assert np.all(added.dtypes == series.dtype) # vs mix (upcast) as needed added = mixed_float_frame + series.astype("float32") diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 1340f514e31ce..ba1b3e9d0ca8e 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -266,23 +266,24 @@ def test_scalar_na_logical_ops_corners(self): result = s & list(s) tm.assert_series_equal(result, expected) + def test_scalar_na_logical_ops_corners_aligns(self): + s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) + s[::2] = np.nan d = DataFrame({"A": s}) - # TODO: Fix this exception - needs to be fixed! (see GH5035) - # (previously this was a TypeError because series returned - # NotImplemented - # this is an alignment issue; these are equivalent - # https://github.com/pandas-dev/pandas/issues/5284 + expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9))) - with pytest.raises(TypeError): - d.__and__(s, axis="columns") - with pytest.raises(TypeError): - d.__and__(s, axis=1) + result = d.__and__(s, axis="columns") + tm.assert_frame_equal(result, expected) - with pytest.raises(TypeError): - s & d - with pytest.raises(TypeError): - d & s + result = d.__and__(s, axis=1) + tm.assert_frame_equal(result, expected) + + result = s & d + tm.assert_frame_equal(result, expected) + + result = d & s + tm.assert_frame_equal(result, expected) expected = (s & s).to_frame("A") result = d.__and__(s, axis="index")