From b01ff4abbe36e1f22cbb15a59949d7b0aaacd703 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 19:40:25 -0700 Subject: [PATCH 01/12] PERF: operate blockwise in Frame + Series --- pandas/core/arrays/timedeltas.py | 12 ++++++- pandas/core/internals/managers.py | 16 +++++----- pandas/core/ops/__init__.py | 18 +++++++++-- pandas/tests/arithmetic/test_datetime64.py | 10 ++++-- pandas/tests/arithmetic/test_timedelta64.py | 35 ++++++++------------- pandas/tests/frame/test_arithmetic.py | 9 +----- pandas/tests/series/test_operators.py | 27 ++++++++-------- 7 files changed, 71 insertions(+), 56 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8c93dca783113..3db7575cbd0d3 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -39,7 +39,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com -from pandas.core.construction import extract_array +from pandas.core.construction import array as pd_array, extract_array from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick @@ -521,8 +521,18 @@ def __truediv__(self, other): # Note: we do not do type inference on the result, so either # an object array or numeric-dtyped (if numpy does inference) # will be returned. GH#23829 + # FIXME: the above comment is no longer accurate... sometimes result = [self[n] / other[n] for n in range(len(self))] result = np.array(result) + + if self.ndim == 2: + # FIXME: kludge, just trying to get the tests passing + res = extract_array(pd_array(result.ravel()), extract_numpy=True) + result = res.reshape(result.shape) + if result.dtype.kind == "m": + # TODO: no real reason for this, but we test it + result = np.asarray(result) + return result else: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e693341d10a55..8d4289e9a33e6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -390,11 +390,7 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T: if f == "where": align_copy = True - aligned_args = { - k: kwargs[k] - for k in align_keys - if isinstance(kwargs[k], (ABCSeries, ABCDataFrame)) - } + aligned_args = {k: kwargs[k] for k in align_keys} for b in self.blocks: @@ -402,8 +398,14 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T: b_items = self.items[b.mgr_locs.indexer] for k, obj in aligned_args.items(): - axis = obj._info_axis_number - kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)._values + if isinstance(obj, (ABCSeries, ABCDataFrame)): + axis = obj._info_axis_number + kwargs[k] = obj.reindex( + b_items, axis=axis, copy=align_copy + )._values + else: + # e.g. ndarray passed from combine_series_frame + kwargs[k] = obj[b.mgr_locs.indexer] if callable(f): applied = b.apply(f, **kwargs) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index c14c4a311d66c..accf70bb6442d 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -340,18 +340,20 @@ def column_op(a, b): right = np.asarray(right) def column_op(a, b): - return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))} + return {i: func(a._ixs(i, axis=1), b[i]) for i in range(len(a.columns))} else: def column_op(a, b): - return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))} + return { + i: func(a._ixs(i, axis=1), b.iloc[i]) for i in range(len(a.columns)) + } elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): - return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} + return {i: func(a._get_column_array(i), b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules @@ -520,6 +522,16 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): new_data = dispatch_to_series(left, right, func) else: + rvalues = right._values + if isinstance(rvalues, np.ndarray): + # We can operate block-wise + rvalues = rvalues.reshape(1, -1) + rvalues = np.broadcast_to(rvalues, left.shape) + + array_op = get_array_op(func, str_rep=str_rep) + bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) + return type(left)(bm) + new_data = dispatch_to_series(left, right, func, axis="columns") return left._construct_result(new_data) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 56c5647d865d3..bacbe2acd801b 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1473,7 +1473,10 @@ def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array): other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) - warn = None if box_with_array is pd.DataFrame else PerformanceWarning + warn = PerformanceWarning + if box_with_array is pd.DataFrame and tz is not None: + warn = None + with tm.assert_produces_warning(warn): res = dtarr + other expected = DatetimeIndex( @@ -2434,7 +2437,10 @@ def test_dti_addsub_object_arraylike( expected = pd.DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox) - warn = None if box_with_array is pd.DataFrame else PerformanceWarning + warn = PerformanceWarning + if box_with_array is pd.DataFrame and tz is not None: + warn = None + with tm.assert_produces_warning(warn): result = dtarr + other tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index beb16c9549cc4..b8f66bf13bc56 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1318,14 +1318,11 @@ def test_td64arr_add_offset_index(self, names, box): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = PerformanceWarning if box is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res2 = other + tdi tm.assert_equal(res2, expected) @@ -1344,14 +1341,11 @@ def test_td64arr_add_offset_array(self, box_with_array): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = PerformanceWarning if box is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res2 = other + tdi tm.assert_equal(res2, expected) @@ -1380,10 +1374,7 @@ def test_td64arr_sub_offset_index(self, names, box_with_array): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, xbox) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = PerformanceWarning if box is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi - other tm.assert_equal(res, expected) @@ -1399,10 +1390,7 @@ def test_td64arr_sub_offset_array(self, box_with_array): tdi = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) - # The DataFrame operation is transposed and so operates as separate - # scalar operations, which do not issue a PerformanceWarning - warn = None if box_with_array is pd.DataFrame else PerformanceWarning - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = tdi - other tm.assert_equal(res, expected) @@ -1473,28 +1461,31 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [pd.Timedelta(days=1), pd.offsets.Day(2), pd.Timestamp("2000-01-04")] ) - warn = PerformanceWarning if box_with_array is not pd.DataFrame else None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = tdarr + other expected = pd.Index( [pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")] ) expected = tm.box_expected(expected, box_with_array) + if box_with_array is pd.DataFrame: + expected = expected.astype(object) tm.assert_equal(result, expected) msg = "unsupported operand type|cannot subtract a datelike" with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): tdarr - other - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = other - tdarr expected = pd.Index( [pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")] ) expected = tm.box_expected(expected, box_with_array) + if box_with_array is pd.DataFrame: + expected = expected.astype(object) tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d929d3e030508..d75f1f14b6369 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -613,13 +613,6 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): expected = pd.DataFrame(exvals, columns=df.columns, index=df.index) - if opname in ["__rmod__", "__rfloordiv__"]: - # exvals will have dtypes [f8, i8, i8] so expected will be - # all-f8, but the DataFrame operation will return mixed dtypes - # use exvals[-1].dtype instead of "i8" for compat with 32-bit - # systems/pythons - expected[False] = expected[False].astype(exvals[-1].dtype) - result = getattr(df, opname)(rowlike) tm.assert_frame_equal(result, expected) @@ -1042,7 +1035,7 @@ def test_combine_series( # no upcast needed added = mixed_float_frame + series - _check_mixed_float(added) + assert np.all(added.dtypes == series.dtype) # vs mix (upcast) as needed added = mixed_float_frame + series.astype("float32") diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 1340f514e31ce..ba1b3e9d0ca8e 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -266,23 +266,24 @@ def test_scalar_na_logical_ops_corners(self): result = s & list(s) tm.assert_series_equal(result, expected) + def test_scalar_na_logical_ops_corners_aligns(self): + s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) + s[::2] = np.nan d = DataFrame({"A": s}) - # TODO: Fix this exception - needs to be fixed! (see GH5035) - # (previously this was a TypeError because series returned - # NotImplemented - # this is an alignment issue; these are equivalent - # https://github.com/pandas-dev/pandas/issues/5284 + expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9))) - with pytest.raises(TypeError): - d.__and__(s, axis="columns") - with pytest.raises(TypeError): - d.__and__(s, axis=1) + result = d.__and__(s, axis="columns") + tm.assert_frame_equal(result, expected) - with pytest.raises(TypeError): - s & d - with pytest.raises(TypeError): - d & s + result = d.__and__(s, axis=1) + tm.assert_frame_equal(result, expected) + + result = s & d + tm.assert_frame_equal(result, expected) + + result = d & s + tm.assert_frame_equal(result, expected) expected = (s & s).to_frame("A") result = d.__and__(s, axis="index") From 9217e7ed139c86fe08a356c0296c40a4f22a0b59 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 16 Apr 2020 16:50:23 -0700 Subject: [PATCH 02/12] benchmark, cleanup special case --- asv_bench/benchmarks/arithmetic.py | 6 +++++- pandas/core/arrays/timedeltas.py | 18 ++++++++++-------- pandas/tests/arithmetic/test_timedelta64.py | 1 + 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 2745db58e83e3..46657088e865d 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self): self.ser.add(self.ser, fill_value=4) -class MixedFrameWithSeriesAxis0: +class MixedFrameWithSeries: params = [ [ "eq", @@ -92,10 +92,14 @@ def setup(self, opname): df["C"] = 1.0 self.df = df self.ser = df[0] + self.row = df.iloc[0] def time_frame_op_with_series_axis0(self, opname): getattr(self.df, opname)(self.ser, axis=0) + def time_frame_op_with_series_axis1(self, opname): + getattr(self.df, f"__{opname}__")(self.row) + class Ops: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3db7575cbd0d3..3987be6bce242 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -39,7 +39,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com -from pandas.core.construction import array as pd_array, extract_array +from pandas.core.construction import extract_array from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick @@ -518,6 +518,7 @@ def __truediv__(self, other): return self._data / other elif is_object_dtype(other.dtype): + # We do inference on the result if we're 2-dimensional # Note: we do not do type inference on the result, so either # an object array or numeric-dtyped (if numpy does inference) # will be returned. GH#23829 @@ -525,13 +526,14 @@ def __truediv__(self, other): result = [self[n] / other[n] for n in range(len(self))] result = np.array(result) - if self.ndim == 2: - # FIXME: kludge, just trying to get the tests passing - res = extract_array(pd_array(result.ravel()), extract_numpy=True) - result = res.reshape(result.shape) - if result.dtype.kind == "m": - # TODO: no real reason for this, but we test it - result = np.asarray(result) + # We need to do dtype inference in order to keep DataFrame ops + # behavior consistent with Series behavior + inferred = lib.infer_dtype(result) + if inferred == "timedelta": + flat = result.ravel() + result = type(self)._from_sequence(flat).reshape(result.shape) + elif inferred == "floating": + result = result.astype(float) return result diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b8f66bf13bc56..126c1c107473d 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2025,6 +2025,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype) expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] else: expected = [tdser[n] / vector[n] for n in range(len(tdser))] + expected = pd.Index(expected) # do dtype inference expected = tm.box_expected(expected, xbox) tm.assert_equal(result, expected) From d0bc9145ecb0b6dd85acb8a4fb00a795502ee546 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 16 Apr 2020 16:55:51 -0700 Subject: [PATCH 03/12] clean up comments --- pandas/core/arrays/timedeltas.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3987be6bce242..b6633305c433e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -518,11 +518,6 @@ def __truediv__(self, other): return self._data / other elif is_object_dtype(other.dtype): - # We do inference on the result if we're 2-dimensional - # Note: we do not do type inference on the result, so either - # an object array or numeric-dtyped (if numpy does inference) - # will be returned. GH#23829 - # FIXME: the above comment is no longer accurate... sometimes result = [self[n] / other[n] for n in range(len(self))] result = np.array(result) From 6fa2da9554385dd23ab87c7f45c74f9828a33285 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 16 Apr 2020 17:38:46 -0700 Subject: [PATCH 04/12] revert unrelated --- pandas/core/ops/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 8cce91d2e8952..57ca582384f39 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -340,20 +340,18 @@ def column_op(a, b): right = np.asarray(right) def column_op(a, b): - return {i: func(a._ixs(i, axis=1), b[i]) for i in range(len(a.columns))} + return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))} else: def column_op(a, b): - return { - i: func(a._ixs(i, axis=1), b.iloc[i]) for i in range(len(a.columns)) - } + return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): - return {i: func(a._get_column_array(i), b) for i in range(len(a.columns))} + return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules From 0e31f95122b9e7d384e82015415ccd3801743221 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 17 Apr 2020 11:31:55 -0700 Subject: [PATCH 05/12] npdev fix --- asv_bench/benchmarks/arithmetic.py | 2 +- pandas/core/arrays/timedeltas.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 46657088e865d..df960c6acc86c 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -87,7 +87,7 @@ class MixedFrameWithSeries: param_names = ["opname"] def setup(self, opname): - arr = np.arange(10 ** 6).reshape(100, -1) + arr = np.arange(10 ** 6).reshape(1000, -1) df = DataFrame(arr) df["C"] = 1.0 self.df = df diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index b6633305c433e..d161501138162 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -518,8 +518,12 @@ def __truediv__(self, other): return self._data / other elif is_object_dtype(other.dtype): - result = [self[n] / other[n] for n in range(len(self))] - result = np.array(result) + # We operate on raveled arrays to avoid problems in inference + # on NaT + srav = self.ravel() + orav = other.ravel() + result = [srav[n] / orav[n] for n in range(len(srav))] + result = np.array(result).reshape(self.shape) # We need to do dtype inference in order to keep DataFrame ops # behavior consistent with Series behavior From 63823f16e423e9047c531c3ada8a41ad1af58c6f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 18 Apr 2020 10:06:47 -0700 Subject: [PATCH 06/12] troubleshoot asv --- asv_bench/benchmarks/arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index df960c6acc86c..4cd37adbca4fd 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -98,7 +98,7 @@ def time_frame_op_with_series_axis0(self, opname): getattr(self.df, opname)(self.ser, axis=0) def time_frame_op_with_series_axis1(self, opname): - getattr(self.df, f"__{opname}__")(self.row) + getattr(self.df, opname)(self.row, axis=1) class Ops: From 229369b50ddf3ced3ba7257ac2ed1d22b16dea22 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 20 Apr 2020 18:01:48 -0700 Subject: [PATCH 07/12] remove asv to troubleshoot CI --- asv_bench/benchmarks/arithmetic.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 4cd37adbca4fd..2745db58e83e3 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self): self.ser.add(self.ser, fill_value=4) -class MixedFrameWithSeries: +class MixedFrameWithSeriesAxis0: params = [ [ "eq", @@ -87,19 +87,15 @@ class MixedFrameWithSeries: param_names = ["opname"] def setup(self, opname): - arr = np.arange(10 ** 6).reshape(1000, -1) + arr = np.arange(10 ** 6).reshape(100, -1) df = DataFrame(arr) df["C"] = 1.0 self.df = df self.ser = df[0] - self.row = df.iloc[0] def time_frame_op_with_series_axis0(self, opname): getattr(self.df, opname)(self.ser, axis=0) - def time_frame_op_with_series_axis1(self, opname): - getattr(self.df, opname)(self.row, axis=1) - class Ops: From 711977e5da6b80e8f97a3b1e9af0775646d026d1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 08:42:44 -0700 Subject: [PATCH 08/12] whatsnew, asv --- asv_bench/benchmarks/arithmetic.py | 32 ++++++++++++++++++++++++++++++ doc/source/whatsnew/v1.1.0.rst | 1 + 2 files changed, 33 insertions(+) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 2745db58e83e3..92619ba54999f 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -97,6 +97,38 @@ def time_frame_op_with_series_axis0(self, opname): getattr(self.df, opname)(self.ser, axis=0) +class MixedFrameWithSeriesAxis1: + # TODO: combine this with MixedFrameWithSeriesAxis0. + # Initial attempts have failed the CI for reasons unknown, see GH#33600 + params = [ + [ + "eq", + "ne", + "lt", + "le", + "ge", + "gt", + "add", + "sub", + "div", + "floordiv", + "mul", + "pow", + ] + ] + param_names = ["opname"] + + def setup(self, opname): + arr = np.arange(10 ** 6).reshape(1000, -1) + df = DataFrame(arr) + df["C"] = 1.0 + self.df = df + self.row = df.iloc[0] + + def time_frame_op_with_series_axis1(self, opname): + getattr(opname)(self.df, self.ser) + + class Ops: params = [[True, False], ["default", 1]] diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9d40f9b6ffa2c..873ca5b54dcd7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -418,6 +418,7 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) +- Performance improvement in arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=1`` (:issue:`33600`) - The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index, avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`) From 86b64bef384356413735963db3f8b63a846cb2b5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 10:44:11 -0700 Subject: [PATCH 09/12] typo fixup --- asv_bench/benchmarks/arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 92619ba54999f..1505c1feeb0fa 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -126,7 +126,7 @@ def setup(self, opname): self.row = df.iloc[0] def time_frame_op_with_series_axis1(self, opname): - getattr(opname)(self.df, self.ser) + getattr(operator, opname)(self.df, self.ser) class Ops: From 0648cc805d8ac88e38367cb00149e3a52d381b46 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 11:46:22 -0700 Subject: [PATCH 10/12] restore asv --- asv_bench/benchmarks/arithmetic.py | 34 +++--------------------------- asv_bench/benchmarks/stat_ops.py | 4 ++-- 2 files changed, 5 insertions(+), 33 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 1505c1feeb0fa..b23141af8b57b 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self): self.ser.add(self.ser, fill_value=4) -class MixedFrameWithSeriesAxis0: +class MixedFrameWithSeriesAxis: params = [ [ "eq", @@ -87,44 +87,16 @@ class MixedFrameWithSeriesAxis0: param_names = ["opname"] def setup(self, opname): - arr = np.arange(10 ** 6).reshape(100, -1) + arr = np.arange(10 ** 6).reshape(1000, -1) df = DataFrame(arr) df["C"] = 1.0 self.df = df self.ser = df[0] + self.row = df.iloc[0] def time_frame_op_with_series_axis0(self, opname): getattr(self.df, opname)(self.ser, axis=0) - -class MixedFrameWithSeriesAxis1: - # TODO: combine this with MixedFrameWithSeriesAxis0. - # Initial attempts have failed the CI for reasons unknown, see GH#33600 - params = [ - [ - "eq", - "ne", - "lt", - "le", - "ge", - "gt", - "add", - "sub", - "div", - "floordiv", - "mul", - "pow", - ] - ] - param_names = ["opname"] - - def setup(self, opname): - arr = np.arange(10 ** 6).reshape(1000, -1) - df = DataFrame(arr) - df["C"] = 1.0 - self.df = df - self.row = df.iloc[0] - def time_frame_op_with_series_axis1(self, opname): getattr(operator, opname)(self.df, self.ser) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index ebbd3c9eddfdb..66176816c6fe8 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -11,8 +11,8 @@ class FrameOps: param_names = ["op", "dtype", "axis"] def setup(self, op, dtype, axis): - if op == "mad" and dtype == "Int64" and axis == 1: - # GH-33036 + if op == "mad" and dtype == "Int64" and axis == 0: + # GH-33036, GH#33600 raise NotImplementedError values = np.random.randn(100000, 4) if dtype == "Int64": From 2a2e9fc6f89d83ae6bdf429fb6032b0a0257ff3c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 12:26:57 -0700 Subject: [PATCH 11/12] div->truediv --- asv_bench/benchmarks/arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index b23141af8b57b..8aa29468559b2 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis: "gt", "add", "sub", - "div", + "truediv", "floordiv", "mul", "pow", From 5cdf0b59b840c8348658d3bd6eb5a1347169a92c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 13:47:07 -0700 Subject: [PATCH 12/12] troubleshoot asvs --- asv_bench/benchmarks/stat_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 66176816c6fe8..5639d6702a92c 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -11,7 +11,7 @@ class FrameOps: param_names = ["op", "dtype", "axis"] def setup(self, op, dtype, axis): - if op == "mad" and dtype == "Int64" and axis == 0: + if op == "mad" and dtype == "Int64": # GH-33036, GH#33600 raise NotImplementedError values = np.random.randn(100000, 4)