From 3deef7c75acd39cba6b96f06e30d1b573cba1483 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 7 Jul 2022 16:18:36 -0700 Subject: [PATCH 01/18] start adding arith tests --- pandas/core/arrays/arrow/array.py | 51 ++++++++++++++++++++++++++++ pandas/tests/extension/test_arrow.py | 32 +++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 92aedbb836b38..27483e721041f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -57,6 +57,34 @@ "ge": pc.greater_equal, } + ARROW_LOGICAL_FUNCS = { + "and": pc.and_kleene, + "rand": lambda x, y: pc.and_kleene(y, x), + "or": pc.or_kleene, + "ror": lambda x, y: pc.or_kleene(y, x), + "xor": pc.xor, + "rxor": lambda x, y: pc.xor(y, x), + } + + ARROW_ARITHMETIC_FUNCS = { + "add": pc.add_checked, + "radd": lambda x, y: pc.add(y, x), + "sub": pc.subtract_checked, + "rsub": lambda x, y: pc.subtract_checked(y, x), + "mul": pc.multiply_checked, + "rmul": lambda x, y: pc.multiply_checked(y, x), + "truediv": NotImplemented, # pc.divide_checked, + "rtruediv": NotImplemented, # lambda x, y: pc.divide_checked(y, x), + "floordiv": NotImplemented, + "rfloordiv": NotImplemented, + "mod": NotImplemented, + "rmod": NotImplemented, + "divmod": NotImplemented, + "rdivmod": NotImplemented, + "pow": pc.power_checked, + "rpow": lambda x, y: pc.power_checked(y, x), + } + if TYPE_CHECKING: from pandas import Series @@ -74,6 +102,7 @@ def to_pyarrow_type( elif isinstance(dtype, pa.DataType): pa_dtype = dtype elif dtype: + # Accepts python types too pa_dtype = pa.from_numpy_dtype(dtype) else: pa_dtype = None @@ -263,6 +292,28 @@ def _cmp_method(self, other, op): result = result.to_numpy() return BooleanArray._from_sequence(result) + def _evaluate_op_method(self, other, op, arrow_funcs): + pc_func = arrow_funcs[op.__name__] + if pc_func is NotImplemented: + raise NotImplementedError(f"{op.__name__} not implemented.") + if isinstance(other, ArrowExtensionArray): + result = pc_func(self._data, other._data) + elif isinstance(other, (np.ndarray, list)): + result = pc_func(self._data, other) + elif is_scalar(other): + result = pc_func(self._data, pa.scalar(other)) + else: + raise NotImplementedError( + f"{op.__name__} not implemented for {type(other)}" + ) + return type(self)(result) + + def _logical_method(self, other, op): + return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) + + def _arith_method(self, other, op): + return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS) + def equals(self, other) -> bool: if not isinstance(other, ArrowExtensionArray): return False diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 861cc44310751..985f00e7aa3ba 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1295,6 +1295,38 @@ def test_where_series(self, data, na_value, as_frame, request, using_array_manag super().test_where_series(data, na_value, as_frame) +class TestBaseArithmeticOps(base.BaseArithmeticOpsTests): + series_scalar_exc = None + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + pa_dtype = data.dtype.pyarrow_dtype + if all_arithmetic_operators in { + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + }: + self.series_scalar_exc = NotImplementedError + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + self.series_scalar_exc = pa.ArrowNotImplementedError + else: + self.series_scalar_exc = None + if all_arithmetic_operators == "__rpow__" and ( + pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"GH 29997: 1**pandas.NA == 1 while 1**pyarrow.NA == NULL " + f"for {pa_dtype}" + ) + ) + ) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") From 82d7734f93fac05f6c81245ff71f3d712d439354 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 7 Jul 2022 18:16:24 -0700 Subject: [PATCH 02/18] Add more arithmetic tests --- pandas/tests/extension/test_arrow.py | 58 +++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 985f00e7aa3ba..35030eee07358 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1296,8 +1296,6 @@ def test_where_series(self, data, na_value, as_frame, request, using_array_manag class TestBaseArithmeticOps(base.BaseArithmeticOpsTests): - series_scalar_exc = None - def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype if all_arithmetic_operators in { @@ -1326,6 +1324,62 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) ) super().test_arith_series_with_scalar(data, all_arithmetic_operators) + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + pa_dtype = data.dtype.pyarrow_dtype + if all_arithmetic_operators in { + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + }: + self.frame_scalar_exc = NotImplementedError + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + self.frame_scalar_exc = pa.ArrowNotImplementedError + else: + self.frame_scalar_exc = None + if all_arithmetic_operators == "__rpow__" and ( + pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"GH 29997: 1**pandas.NA == 1 while 1**pyarrow.NA == NULL " + f"for {pa_dtype}" + ) + ) + ) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + pa_dtype = data.dtype.pyarrow_dtype + if all_arithmetic_operators in { + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + }: + self.series_array_exc = NotImplementedError + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + self.series_array_exc = pa.ArrowNotImplementedError + else: + self.series_array_exc = None + if all_arithmetic_operators == "__rpow__" and ( + pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"GH 29997: 1**pandas.NA == 1 while 1**pyarrow.NA == NULL " + f"for {pa_dtype}" + ) + ) + ) + super().test_arith_series_with_array(data, all_arithmetic_operators) + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): From 4957475319e67f2625e5c4a1e52a161bfc8f7a16 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 7 Jul 2022 18:55:12 -0700 Subject: [PATCH 03/18] Override _combine in the future --- pandas/tests/extension/test_arrow.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 35030eee07358..a746181a9a4b2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1378,7 +1378,15 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): ) ) ) - super().test_arith_series_with_array(data, all_arithmetic_operators) + op_name = all_arithmetic_operators + ser = pd.Series(data) + # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray + # since ser.iloc[0] is a python scalar + other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) + if pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype): + # BaseOpsUtil._combine upcasts results while ops maintain original type + pass + self.check_opname(ser, op_name, other, exc=self.series_array_exc) def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): From 5c7d4bf19b6df45820784216628c0bdd9c77b4af Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 8 Jul 2022 12:58:40 -0700 Subject: [PATCH 04/18] Finalize tests --- pandas/tests/extension/test_arrow.py | 57 ++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 3bd1fd5e6c92a..39470db4d2318 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -179,6 +179,16 @@ def data_missing_for_sorting(data_for_grouping): ) +@pytest.fixture +def data_for_twos(data): + """Length-100 array in which all the elements are two.""" + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): + return pd.array([2] * 100, dtype=data.dtype) + # tests will be xfailed where 2 is not a valid scalar for pa_dtype + return data + + @pytest.fixture def na_value(): """The scalar missing value for this type. Default 'None'""" @@ -1492,6 +1502,9 @@ def test_where_series(self, data, na_value, as_frame, request, using_array_manag class TestBaseArithmeticOps(base.BaseArithmeticOpsTests): + + divmod_exc = NotImplementedError + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype if all_arithmetic_operators in { @@ -1548,7 +1561,9 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ) super().test_arith_frame_with_scalar(data, all_arithmetic_operators) - def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + def test_arith_series_with_array( + self, data, all_arithmetic_operators, request, monkeypatch + ): pa_dtype = data.dtype.pyarrow_dtype if all_arithmetic_operators in { "__truediv__", @@ -1574,16 +1589,52 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request): ) ) ) + elif all_arithmetic_operators in ( + "__sub__", + "__rsub__", + ) and pa.types.is_unsigned_integer(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + f"Implemented pyarrow.compute.subtract_checked " + f"which raises on overflow for {pa_dtype}" + ), + ) + ) op_name = all_arithmetic_operators ser = pd.Series(data) # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray # since ser.iloc[0] is a python scalar other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) if pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype): - # BaseOpsUtil._combine upcasts results while ops maintain original type - pass + # BaseOpsUtil._combine can upcast expected dtype + # (because it generates expected on python scalars) + # while ArrowExtensionArray maintains original type + super_combine = TestBaseArithmeticOps._combine + + def _patch_combine(self, obj, other, op): + expected = super_combine(self, obj, other, op) + if isinstance(expected, pd.Series): + pa_array = pa.array(expected._values).cast(obj.dtype.pyarrow_dtype) + pd_array = type(expected._values)(pa_array) + expected = pd.Series(pd_array) + return expected + + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", _patch_combine) self.check_opname(ser, op_name, other, exc=self.series_array_exc) + def test_add_series_with_extension_array(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if not (pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype)): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"add_checked not implemented for {pa_dtype}", + ) + ) + super().test_add_series_with_extension_array(data) + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): From f38bf941b07d367475d5d70c530eae3221e72902 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 8 Jul 2022 12:59:46 -0700 Subject: [PATCH 05/18] Add checked --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 27483e721041f..0991c4742a76e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -68,7 +68,7 @@ ARROW_ARITHMETIC_FUNCS = { "add": pc.add_checked, - "radd": lambda x, y: pc.add(y, x), + "radd": lambda x, y: pc.add_checked(y, x), "sub": pc.subtract_checked, "rsub": lambda x, y: pc.subtract_checked(y, x), "mul": pc.multiply_checked, From 6f5b57b16d19886ef51c5b972950cabf9ce07b31 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 8 Jul 2022 17:11:27 -0700 Subject: [PATCH 06/18] Can raise NotImplimented instead of TypeError now --- pandas/core/strings/object_array.py | 2 +- pandas/tests/arrays/string_/test_string.py | 6 +++--- pandas/tests/extension/test_arrow.py | 14 ++++++++++++++ pandas/tests/strings/test_api.py | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 7421645baa463..f884264e9ab75 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -360,7 +360,7 @@ def _str_get_dummies(self, sep="|"): arr = Series(self).fillna("") try: arr = sep + arr + sep - except TypeError: + except (TypeError, NotImplementedError): arr = sep + arr.astype(str) + sep tags: set[str] = set() diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index b563f84207b22..a5eb6189db6f1 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -101,7 +101,7 @@ def test_add(dtype, request): "unsupported operand type(s) for +: 'ArrowStringArray' and " "'ArrowStringArray'" ) - mark = pytest.mark.xfail(raises=TypeError, reason=reason) + mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) request.node.add_marker(mark) a = pd.Series(["a", "b", "c", None, None], dtype=dtype) @@ -142,7 +142,7 @@ def test_add_2d(dtype, request): def test_add_sequence(dtype, request): if dtype.storage == "pyarrow": reason = "unsupported operand type(s) for +: 'ArrowStringArray' and 'list'" - mark = pytest.mark.xfail(raises=TypeError, reason=reason) + mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) request.node.add_marker(mark) a = pd.array(["a", "b", None, None], dtype=dtype) @@ -160,7 +160,7 @@ def test_add_sequence(dtype, request): def test_mul(dtype, request): if dtype.storage == "pyarrow": reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'" - mark = pytest.mark.xfail(raises=TypeError, reason=reason) + mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) request.node.add_marker(mark) a = pd.array(["a", "b", None], dtype=dtype) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 39470db4d2318..4255a2d32f9a1 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1221,6 +1221,20 @@ def test_EA_types(self, engine, data, request): class TestBaseMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("periods", [1, -2]) + def test_diff(self, data, periods, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_unsigned_integer(pa_dtype) and periods == 1: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + f"diff with {pa_dtype} and periods={periods} will overflow" + ), + ) + ) + super().test_diff(data, periods) + @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): pa_dtype = all_data.dtype.pyarrow_dtype diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index 974ecc152f17b..d76ed65be9e1b 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -132,7 +132,7 @@ def test_api_for_categorical(any_string_method, any_string_dtype, request): any_string_dtype == "string" and get_option("string_storage") == "pyarrow" ): # unsupported operand type(s) for +: 'ArrowStringArray' and 'str' - mark = pytest.mark.xfail(raises=TypeError, reason="Not Implemented") + mark = pytest.mark.xfail(raises=NotImplementedError, reason="Not Implemented") request.node.add_marker(mark) s = Series(list("aabb"), dtype=any_string_dtype) From 3d5d96d6899faf2f1d49b9b1b4ef6e188e9f1598 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 9 Jul 2022 18:31:51 -0700 Subject: [PATCH 07/18] Fix typing, compute kernel compat --- pandas/core/arrays/arrow/array.py | 23 ++++++++++++++++++----- pandas/tests/extension/base/ops.py | 8 ++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0991c4742a76e..f7955af0f0dda 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -58,14 +58,27 @@ } ARROW_LOGICAL_FUNCS = { - "and": pc.and_kleene, - "rand": lambda x, y: pc.and_kleene(y, x), - "or": pc.or_kleene, - "ror": lambda x, y: pc.or_kleene(y, x), + "and": NotImplemented if pa_version_under2p0 else pc.and_kleene, + "rand": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.and_kleene(y, x), + "or": NotImplemented if pa_version_under2p0 else pc.or_kleene, + "ror": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.or_kleene(y, x), "xor": pc.xor, "rxor": lambda x, y: pc.xor(y, x), } + def divide_compat(arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar): + # https://github.com/pandas-dev/pandas/pull/47645#discussion_r917247366= + # Ensure int / int -> float to align with numpy & python + if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( + pa_object.type + ): + arrow_array = arrow_array.cast(pa.float64()) + return pc.divide_checked(arrow_array, pa_object) + ARROW_ARITHMETIC_FUNCS = { "add": pc.add_checked, "radd": lambda x, y: pc.add_checked(y, x), @@ -299,7 +312,7 @@ def _evaluate_op_method(self, other, op, arrow_funcs): if isinstance(other, ArrowExtensionArray): result = pc_func(self._data, other._data) elif isinstance(other, (np.ndarray, list)): - result = pc_func(self._data, other) + result = pc_func(self._data, pa.array(other, from_pandas=True)) elif is_scalar(other): result = pc_func(self._data, pa.scalar(other)) else: diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index a1d232b737da7..569782e55fd72 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -67,10 +67,10 @@ class BaseArithmeticOpsTests(BaseOpsUtil): * divmod_exc = TypeError """ - series_scalar_exc: type[TypeError] | None = TypeError - frame_scalar_exc: type[TypeError] | None = TypeError - series_array_exc: type[TypeError] | None = TypeError - divmod_exc: type[TypeError] | None = TypeError + series_scalar_exc: type[Exception] | None = TypeError + frame_scalar_exc: type[Exception] | None = TypeError + series_array_exc: type[Exception] | None = TypeError + divmod_exc: type[Exception] | None = TypeError def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar From f03f774df18d58edb3b15a8d322a394b8f776cf5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 9 Jul 2022 18:52:20 -0700 Subject: [PATCH 08/18] pyarrow 8 supports some duration ops --- pandas/tests/extension/test_arrow.py | 58 ++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4255a2d32f9a1..b8ae63cd1d399 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -24,6 +24,7 @@ from pandas.compat import ( pa_version_under2p0, pa_version_under3p0, + pa_version_under8p0, ) import pandas as pd @@ -1521,6 +1522,17 @@ class TestBaseArithmeticOps(base.BaseArithmeticOpsTests): def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype + + arrow_temporal_supported = not pa_version_under8p0 and ( + all_arithmetic_operators in ("__add__", "__radd__") + and pa.types.is_duration(pa_dtype) + or all_arithmetic_operators in ("__sub__", "__rsub__") + and ( + pa.types.is_date(pa_dtype) + or pa.types.is_time(pa_dtype) + or pa.types.is_duration(pa_dtype) + ) + ) if all_arithmetic_operators in { "__truediv__", "__rtruediv__", @@ -1530,7 +1542,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) "__rmod__", }: self.series_scalar_exc = NotImplementedError - elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + elif not ( + pa.types.is_floating(pa_dtype) + or pa.types.is_integer(pa_dtype) + or arrow_temporal_supported + ): self.series_scalar_exc = pa.ArrowNotImplementedError else: self.series_scalar_exc = None @@ -1549,6 +1565,17 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype + + arrow_temporal_supported = not pa_version_under8p0 and ( + all_arithmetic_operators in ("__add__", "__radd__") + and pa.types.is_duration(pa_dtype) + or all_arithmetic_operators in ("__sub__", "__rsub__") + and ( + pa.types.is_date(pa_dtype) + or pa.types.is_time(pa_dtype) + or pa.types.is_duration(pa_dtype) + ) + ) if all_arithmetic_operators in { "__truediv__", "__rtruediv__", @@ -1558,7 +1585,11 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): "__rmod__", }: self.frame_scalar_exc = NotImplementedError - elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + elif not ( + pa.types.is_floating(pa_dtype) + or pa.types.is_integer(pa_dtype) + or arrow_temporal_supported + ): self.frame_scalar_exc = pa.ArrowNotImplementedError else: self.frame_scalar_exc = None @@ -1579,6 +1610,17 @@ def test_arith_series_with_array( self, data, all_arithmetic_operators, request, monkeypatch ): pa_dtype = data.dtype.pyarrow_dtype + + arrow_temporal_supported = not pa_version_under8p0 and ( + all_arithmetic_operators in ("__add__", "__radd__") + and pa.types.is_duration(pa_dtype) + or all_arithmetic_operators in ("__sub__", "__rsub__") + and ( + pa.types.is_date(pa_dtype) + or pa.types.is_time(pa_dtype) + or pa.types.is_duration(pa_dtype) + ) + ) if all_arithmetic_operators in { "__truediv__", "__rtruediv__", @@ -1588,7 +1630,11 @@ def test_arith_series_with_array( "__rmod__", }: self.series_array_exc = NotImplementedError - elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + elif not ( + pa.types.is_floating(pa_dtype) + or pa.types.is_integer(pa_dtype) + or arrow_temporal_supported + ): self.series_array_exc = pa.ArrowNotImplementedError else: self.series_array_exc = None @@ -1640,7 +1686,11 @@ def _patch_combine(self, obj, other, op): def test_add_series_with_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if not (pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype)): + if not ( + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)), + ): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError, From 726c6b719ea1e23d984741a363a61c7c81f69eca Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 9 Jul 2022 22:10:19 -0700 Subject: [PATCH 09/18] Add to pandas compat --- pandas/compat/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 2ab710a5762d3..5db859897b663 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -27,6 +27,7 @@ pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, + pa_version_under8p0, ) if TYPE_CHECKING: @@ -158,4 +159,5 @@ def get_lzma_file() -> type[lzma.LZMAFile]: "pa_version_under5p0", "pa_version_under6p0", "pa_version_under7p0", + "pa_version_under8p0", ] From 2de348cb4650ad695b7ac9e52b4e6126ee8d1b57 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 9 Jul 2022 22:55:06 -0700 Subject: [PATCH 10/18] xor not implememnted in min pyarrow --- pandas/core/arrays/arrow/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index f7955af0f0dda..5690a5e08aa7b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -66,8 +66,8 @@ "ror": NotImplemented if pa_version_under2p0 else lambda x, y: pc.or_kleene(y, x), - "xor": pc.xor, - "rxor": lambda x, y: pc.xor(y, x), + "xor": NotImplemented if pa_version_under2p0 else pc.xor, + "rxor": NotImplemented if pa_version_under2p0 else lambda x, y: pc.xor(y, x), } def divide_compat(arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar): From 1dd2f7914858dec6d61af87796e0e2ae3ed3f02d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 9 Jul 2022 23:26:31 -0700 Subject: [PATCH 11/18] xor not implememnted in min pyarrow --- pandas/core/arrays/arrow/array.py | 33 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 5690a5e08aa7b..4103a3ee9aa6d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -70,22 +70,19 @@ "rxor": NotImplemented if pa_version_under2p0 else lambda x, y: pc.xor(y, x), } - def divide_compat(arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar): - # https://github.com/pandas-dev/pandas/pull/47645#discussion_r917247366= - # Ensure int / int -> float to align with numpy & python - if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( - pa_object.type - ): - arrow_array = arrow_array.cast(pa.float64()) - return pc.divide_checked(arrow_array, pa_object) - ARROW_ARITHMETIC_FUNCS = { - "add": pc.add_checked, - "radd": lambda x, y: pc.add_checked(y, x), - "sub": pc.subtract_checked, - "rsub": lambda x, y: pc.subtract_checked(y, x), - "mul": pc.multiply_checked, - "rmul": lambda x, y: pc.multiply_checked(y, x), + "add": NotImplemented if pa_version_under2p0 else pc.add_checked, + "radd": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.add_checked(y, x), + "sub": NotImplemented if pa_version_under2p0 else pc.subtract_checked, + "rsub": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.subtract_checked(y, x), + "mul": NotImplemented if pa_version_under2p0 else pc.multiply_checked, + "rmul": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.multiply_checked(y, x), "truediv": NotImplemented, # pc.divide_checked, "rtruediv": NotImplemented, # lambda x, y: pc.divide_checked(y, x), "floordiv": NotImplemented, @@ -94,8 +91,10 @@ def divide_compat(arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar) "rmod": NotImplemented, "divmod": NotImplemented, "rdivmod": NotImplemented, - "pow": pc.power_checked, - "rpow": lambda x, y: pc.power_checked(y, x), + "pow": NotImplemented if pa_version_under2p0 else pc.power_checked, + "rpow": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.power_checked(y, x), } if TYPE_CHECKING: From 0aed029b18dc4db319034b637c444c9a33a989c3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 10 Jul 2022 13:44:41 -0700 Subject: [PATCH 12/18] Fix pyarrow=8 temporal condition --- pandas/tests/extension/test_arrow.py | 68 +++++++++++++++++----------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index b8ae63cd1d399..5ec3f0dab5e43 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1527,11 +1527,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) all_arithmetic_operators in ("__add__", "__radd__") and pa.types.is_duration(pa_dtype) or all_arithmetic_operators in ("__sub__", "__rsub__") - and ( - pa.types.is_date(pa_dtype) - or pa.types.is_time(pa_dtype) - or pa.types.is_duration(pa_dtype) - ) + and pa.types.is_temporal(pa_dtype) ) if all_arithmetic_operators in { "__truediv__", @@ -1542,6 +1538,8 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) "__rmod__", }: self.series_scalar_exc = NotImplementedError + elif arrow_temporal_supported: + self.series_scalar_exc = None elif not ( pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) @@ -1561,6 +1559,16 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) ) ) ) + elif arrow_temporal_supported: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_arithmetic_operators} not supported between" + f"pd.NA and {pa_dtype} Python scalar" + ), + ) + ) super().test_arith_series_with_scalar(data, all_arithmetic_operators) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): @@ -1570,11 +1578,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): all_arithmetic_operators in ("__add__", "__radd__") and pa.types.is_duration(pa_dtype) or all_arithmetic_operators in ("__sub__", "__rsub__") - and ( - pa.types.is_date(pa_dtype) - or pa.types.is_time(pa_dtype) - or pa.types.is_duration(pa_dtype) - ) + and pa.types.is_temporal(pa_dtype) ) if all_arithmetic_operators in { "__truediv__", @@ -1585,11 +1589,9 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): "__rmod__", }: self.frame_scalar_exc = NotImplementedError - elif not ( - pa.types.is_floating(pa_dtype) - or pa.types.is_integer(pa_dtype) - or arrow_temporal_supported - ): + elif arrow_temporal_supported: + self.frame_scalar_exc = None + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): self.frame_scalar_exc = pa.ArrowNotImplementedError else: self.frame_scalar_exc = None @@ -1604,6 +1606,16 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ) ) ) + elif arrow_temporal_supported: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_arithmetic_operators} not supported between" + f"pd.NA and {pa_dtype} Python scalar" + ), + ) + ) super().test_arith_frame_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array( @@ -1615,11 +1627,7 @@ def test_arith_series_with_array( all_arithmetic_operators in ("__add__", "__radd__") and pa.types.is_duration(pa_dtype) or all_arithmetic_operators in ("__sub__", "__rsub__") - and ( - pa.types.is_date(pa_dtype) - or pa.types.is_time(pa_dtype) - or pa.types.is_duration(pa_dtype) - ) + and pa.types.is_temporal(pa_dtype) ) if all_arithmetic_operators in { "__truediv__", @@ -1630,11 +1638,9 @@ def test_arith_series_with_array( "__rmod__", }: self.series_array_exc = NotImplementedError - elif not ( - pa.types.is_floating(pa_dtype) - or pa.types.is_integer(pa_dtype) - or arrow_temporal_supported - ): + elif arrow_temporal_supported: + self.series_array_exc = None + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): self.series_array_exc = pa.ArrowNotImplementedError else: self.series_array_exc = None @@ -1662,6 +1668,16 @@ def test_arith_series_with_array( ), ) ) + elif arrow_temporal_supported: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_arithmetic_operators} not supported between" + f"pd.NA and {pa_dtype} Python scalar" + ), + ) + ) op_name = all_arithmetic_operators ser = pd.Series(data) # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray @@ -1689,7 +1705,7 @@ def test_add_series_with_extension_array(self, data, request): if not ( pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype) - or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)), + or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)) ): request.node.add_marker( pytest.mark.xfail( From d30877f1e8cf651e23198bf85ce6bdfb9f519a4f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 10 Jul 2022 16:19:23 -0700 Subject: [PATCH 13/18] min version compat --- pandas/tests/extension/test_arrow.py | 67 ++++++++++++++++++---------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 5ec3f0dab5e43..bc554c8f9d879 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1529,14 +1529,18 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) or all_arithmetic_operators in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype) ) - if all_arithmetic_operators in { - "__truediv__", - "__rtruediv__", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - }: + if ( + all_arithmetic_operators + in { + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + } + or pa_version_under2p0 + ): self.series_scalar_exc = NotImplementedError elif arrow_temporal_supported: self.series_scalar_exc = None @@ -1580,14 +1584,18 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): or all_arithmetic_operators in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype) ) - if all_arithmetic_operators in { - "__truediv__", - "__rtruediv__", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - }: + if ( + all_arithmetic_operators + in { + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + } + or pa_version_under2p0 + ): self.frame_scalar_exc = NotImplementedError elif arrow_temporal_supported: self.frame_scalar_exc = None @@ -1629,14 +1637,18 @@ def test_arith_series_with_array( or all_arithmetic_operators in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype) ) - if all_arithmetic_operators in { - "__truediv__", - "__rtruediv__", - "__floordiv__", - "__rfloordiv__", - "__mod__", - "__rmod__", - }: + if ( + all_arithmetic_operators + in { + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__mod__", + "__rmod__", + } + or pa_version_under2p0 + ): self.series_array_exc = NotImplementedError elif arrow_temporal_supported: self.series_array_exc = None @@ -1713,6 +1725,13 @@ def test_add_series_with_extension_array(self, data, request): reason=f"add_checked not implemented for {pa_dtype}", ) ) + elif pa_version_under2p0: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"add_checked not implemented for {pa_dtype}", + ) + ) super().test_add_series_with_extension_array(data) From f37445171d628e35ced62fb4de2412a44f7ddb72 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 11 Jul 2022 12:56:17 -0700 Subject: [PATCH 14/18] more compat --- pandas/tests/extension/test_arrow.py | 49 ++++++++++++++++------------ 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index bc554c8f9d879..6da3ad0dcc2bf 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1552,8 +1552,10 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) self.series_scalar_exc = pa.ArrowNotImplementedError else: self.series_scalar_exc = None - if all_arithmetic_operators == "__rpow__" and ( - pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + if ( + all_arithmetic_operators == "__rpow__" + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1603,8 +1605,10 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): self.frame_scalar_exc = pa.ArrowNotImplementedError else: self.frame_scalar_exc = None - if all_arithmetic_operators == "__rpow__" and ( - pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + if ( + all_arithmetic_operators == "__rpow__" + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1656,8 +1660,10 @@ def test_arith_series_with_array( self.series_array_exc = pa.ArrowNotImplementedError else: self.series_array_exc = None - if all_arithmetic_operators == "__rpow__" and ( - pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + if ( + all_arithmetic_operators == "__rpow__" + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1667,10 +1673,15 @@ def test_arith_series_with_array( ) ) ) - elif all_arithmetic_operators in ( - "__sub__", - "__rsub__", - ) and pa.types.is_unsigned_integer(pa_dtype): + elif ( + all_arithmetic_operators + in ( + "__sub__", + "__rsub__", + ) + and pa.types.is_unsigned_integer(pa_dtype) + and not pa_version_under2p0 + ): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowInvalid, @@ -1714,18 +1725,14 @@ def _patch_combine(self, obj, other, op): def test_add_series_with_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if not ( - pa.types.is_integer(pa_dtype) - or pa.types.is_floating(pa_dtype) - or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)) - ): - request.node.add_marker( - pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason=f"add_checked not implemented for {pa_dtype}", - ) + if ( + not ( + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)) ) - elif pa_version_under2p0: + or pa_version_under2p0 + ): request.node.add_marker( pytest.mark.xfail( raises=NotImplementedError, From 88449db14202182fb4e0c7f13c2f941507a80d1f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 11 Jul 2022 15:51:57 -0700 Subject: [PATCH 15/18] Add support for truediv --- pandas/core/arrays/arrow/array.py | 19 ++++++++++++-- pandas/tests/extension/test_arrow.py | 37 ++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4103a3ee9aa6d..848c51728194c 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -70,6 +70,17 @@ "rxor": NotImplemented if pa_version_under2p0 else lambda x, y: pc.xor(y, x), } + def cast_for_division( + arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar + ) -> pa.ChunkedArray: + # Ensure int / int = float mirroring Python/Numpy behavior + # as pc.divide_checked(int, int) -> int + if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( + pa_object.type + ): + return arrow_array.cast(pa.float64()) + return arrow_array + ARROW_ARITHMETIC_FUNCS = { "add": NotImplemented if pa_version_under2p0 else pc.add_checked, "radd": NotImplemented @@ -83,8 +94,12 @@ "rmul": NotImplemented if pa_version_under2p0 else lambda x, y: pc.multiply_checked(y, x), - "truediv": NotImplemented, # pc.divide_checked, - "rtruediv": NotImplemented, # lambda x, y: pc.divide_checked(y, x), + "truediv": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.divide_checked(cast_for_division(x, y), y), + "rtruediv": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.divide_checked(y, cast_for_division(x, y)), "floordiv": NotImplemented, "rfloordiv": NotImplemented, "mod": NotImplemented, diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 6da3ad0dcc2bf..0d22f873b6b7d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1532,8 +1532,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) if ( all_arithmetic_operators in { - "__truediv__", - "__rtruediv__", "__floordiv__", "__rfloordiv__", "__mod__", @@ -1575,6 +1573,15 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) ), ) ) + elif all_arithmetic_operators == "__rtruediv__" and ( + pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason="divide by 0", + ) + ) super().test_arith_series_with_scalar(data, all_arithmetic_operators) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): @@ -1589,8 +1596,6 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if ( all_arithmetic_operators in { - "__truediv__", - "__rtruediv__", "__floordiv__", "__rfloordiv__", "__mod__", @@ -1628,6 +1633,15 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ), ) ) + elif all_arithmetic_operators == "__rtruediv__" and ( + pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason="divide by 0", + ) + ) super().test_arith_frame_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array( @@ -1644,8 +1658,6 @@ def test_arith_series_with_array( if ( all_arithmetic_operators in { - "__truediv__", - "__rtruediv__", "__floordiv__", "__rfloordiv__", "__mod__", @@ -1701,12 +1713,23 @@ def test_arith_series_with_array( ), ) ) + elif all_arithmetic_operators == "__rtruediv__" and ( + pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason="divide by 0", + ) + ) op_name = all_arithmetic_operators ser = pd.Series(data) # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray # since ser.iloc[0] is a python scalar other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) - if pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype): + if pa.types.is_floating(pa_dtype) or ( + pa.types.is_integer(pa_dtype) and all_arithmetic_operators != "__truediv__" + ): # BaseOpsUtil._combine can upcast expected dtype # (because it generates expected on python scalars) # while ArrowExtensionArray maintains original type From 4034a1cc3536c04ba6919029708fdcf6cece7a7f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 11 Jul 2022 16:59:03 -0700 Subject: [PATCH 16/18] Add floordiv --- pandas/core/arrays/arrow/array.py | 27 +++++++++--- pandas/tests/extension/test_arrow.py | 66 +++++++++++++++++----------- 2 files changed, 62 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 848c51728194c..07b09d78016fd 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -70,10 +70,10 @@ "rxor": NotImplemented if pa_version_under2p0 else lambda x, y: pc.xor(y, x), } - def cast_for_division( + def cast_for_truediv( arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar ) -> pa.ChunkedArray: - # Ensure int / int = float mirroring Python/Numpy behavior + # Ensure int / int -> float mirroring Python/Numpy behavior # as pc.divide_checked(int, int) -> int if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( pa_object.type @@ -81,6 +81,17 @@ def cast_for_division( return arrow_array.cast(pa.float64()) return arrow_array + def floordiv_compat( + left: pa.ChunkedArray | pa.Array | pa.Scalar, + right: pa.ChunkedArray | pa.Array | pa.Scalar, + ) -> pa.ChunkedArray: + # Ensure int // int -> int mirroring Python/Numpy behavior + # as pc.floor(pc.divide_checked(int, int)) -> float + result = pc.floor(pc.divide_checked(left, right)) + if pa.types.is_integer(left.type) and pa.types.is_integer(right.type): + result = result.cast(left.type) + return result + ARROW_ARITHMETIC_FUNCS = { "add": NotImplemented if pa_version_under2p0 else pc.add_checked, "radd": NotImplemented @@ -96,12 +107,16 @@ def cast_for_division( else lambda x, y: pc.multiply_checked(y, x), "truediv": NotImplemented if pa_version_under2p0 - else lambda x, y: pc.divide_checked(cast_for_division(x, y), y), + else lambda x, y: pc.divide_checked(cast_for_truediv(x, y), y), "rtruediv": NotImplemented if pa_version_under2p0 - else lambda x, y: pc.divide_checked(y, cast_for_division(x, y)), - "floordiv": NotImplemented, - "rfloordiv": NotImplemented, + else lambda x, y: pc.divide_checked(y, cast_for_truediv(x, y)), + "floordiv": NotImplemented + if pa_version_under2p0 + else lambda x, y: floordiv_compat(x, y), + "rfloordiv": NotImplemented + if pa_version_under2p0 + else lambda x, y: floordiv_compat(y, x), "mod": NotImplemented, "rmod": NotImplemented, "divmod": NotImplemented, diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0d22f873b6b7d..7ad5c899f8b2e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1520,7 +1520,32 @@ class TestBaseArithmeticOps(base.BaseArithmeticOpsTests): divmod_exc = NotImplementedError - def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + def _patch_combine(self, obj, other, op): + # BaseOpsUtil._combine can upcast expected dtype + # (because it generates expected on python scalars) + # while ArrowExtensionArray maintains original type + expected = base.BaseArithmeticOpsTests._combine(self, obj, other, op) + was_frame = False + if isinstance(expected, pd.DataFrame): + was_frame = True + expected_data = expected.iloc[:, 0] + original_dtype = obj.iloc[:, 0].dtype + else: + expected_data = expected + original_dtype = obj.dtype + pa_array = pa.array(expected_data._values).cast(original_dtype.pyarrow_dtype) + pd_array = type(expected_data._values)(pa_array) + if was_frame: + expected = pd.DataFrame( + pd_array, index=expected.index, columns=expected.columns + ) + else: + expected = pd.Series(pd_array) + return expected + + def test_arith_series_with_scalar( + self, data, all_arithmetic_operators, request, monkeypatch + ): pa_dtype = data.dtype.pyarrow_dtype arrow_temporal_supported = not pa_version_under8p0 and ( @@ -1532,8 +1557,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) if ( all_arithmetic_operators in { - "__floordiv__", - "__rfloordiv__", "__mod__", "__rmod__", } @@ -1573,7 +1596,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) ), ) ) - elif all_arithmetic_operators == "__rtruediv__" and ( + elif all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and ( pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) ): request.node.add_marker( @@ -1582,9 +1605,15 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) reason="divide by 0", ) ) + if all_arithmetic_operators == "__floordiv__" and pa.types.is_integer(pa_dtype): + # BaseOpsUtil._combine always returns int64, while ArrowExtensionArray does + # not upcast + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", self._patch_combine) super().test_arith_series_with_scalar(data, all_arithmetic_operators) - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + def test_arith_frame_with_scalar( + self, data, all_arithmetic_operators, request, monkeypatch + ): pa_dtype = data.dtype.pyarrow_dtype arrow_temporal_supported = not pa_version_under8p0 and ( @@ -1596,8 +1625,6 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): if ( all_arithmetic_operators in { - "__floordiv__", - "__rfloordiv__", "__mod__", "__rmod__", } @@ -1633,7 +1660,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ), ) ) - elif all_arithmetic_operators == "__rtruediv__" and ( + elif all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and ( pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) ): request.node.add_marker( @@ -1642,6 +1669,10 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): reason="divide by 0", ) ) + if all_arithmetic_operators == "__floordiv__" and pa.types.is_integer(pa_dtype): + # BaseOpsUtil._combine always returns int64, while ArrowExtensionArray does + # not upcast + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", self._patch_combine) super().test_arith_frame_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array( @@ -1658,8 +1689,6 @@ def test_arith_series_with_array( if ( all_arithmetic_operators in { - "__floordiv__", - "__rfloordiv__", "__mod__", "__rmod__", } @@ -1713,7 +1742,7 @@ def test_arith_series_with_array( ), ) ) - elif all_arithmetic_operators == "__rtruediv__" and ( + elif all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and ( pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) ): request.node.add_marker( @@ -1730,20 +1759,7 @@ def test_arith_series_with_array( if pa.types.is_floating(pa_dtype) or ( pa.types.is_integer(pa_dtype) and all_arithmetic_operators != "__truediv__" ): - # BaseOpsUtil._combine can upcast expected dtype - # (because it generates expected on python scalars) - # while ArrowExtensionArray maintains original type - super_combine = TestBaseArithmeticOps._combine - - def _patch_combine(self, obj, other, op): - expected = super_combine(self, obj, other, op) - if isinstance(expected, pd.Series): - pa_array = pa.array(expected._values).cast(obj.dtype.pyarrow_dtype) - pd_array = type(expected._values)(pa_array) - expected = pd.Series(pd_array) - return expected - - monkeypatch.setattr(TestBaseArithmeticOps, "_combine", _patch_combine) + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", self._patch_combine) self.check_opname(ser, op_name, other, exc=self.series_array_exc) def test_add_series_with_extension_array(self, data, request): From 81c609fe74303cbd78f88e5fba48f8c6b5a93403 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 12 Jul 2022 10:58:46 -0700 Subject: [PATCH 17/18] min version compat --- pandas/tests/extension/test_arrow.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7ad5c899f8b2e..4f0b91d965d9b 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1596,8 +1596,10 @@ def test_arith_series_with_scalar( ), ) ) - elif all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and ( - pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + elif ( + all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1660,8 +1662,10 @@ def test_arith_frame_with_scalar( ), ) ) - elif all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and ( - pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + elif ( + all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1742,8 +1746,10 @@ def test_arith_series_with_array( ), ) ) - elif all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and ( - pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype) + elif ( + all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 ): request.node.add_marker( pytest.mark.xfail( From 72e8923fb5cd28c0549f4343d18b35c87fed9bcd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 14 Jul 2022 12:01:27 -0700 Subject: [PATCH 18/18] Add comparison tests --- pandas/tests/extension/test_arrow.py | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4f0b91d965d9b..ef576692c83b6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1787,6 +1787,54 @@ def test_add_series_with_extension_array(self, data, request): super().test_add_series_with_extension_array(data) +class TestBaseComparisonOps(base.BaseComparisonOpsTests): + def assert_series_equal(self, left, right, *args, **kwargs): + # Series.combine for "expected" retains bool[pyarrow] dtype + # While "result" return "boolean" dtype + right = pd.Series(right._values.to_numpy(), dtype="boolean") + super().assert_series_equal(left, right, *args, **kwargs) + + def test_compare_array(self, data, comparison_op, na_value, request): + pa_dtype = data.dtype.pyarrow_dtype + ser = pd.Series(data) + # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray + # since ser.iloc[0] is a python scalar + other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) + if comparison_op.__name__ in ["eq", "ne"]: + # comparison should match point-wise comparisons + result = comparison_op(ser, other) + # Series.combine does not calculate the NA mask correctly + # when comparing over an array + assert result[8] is na_value + assert result[97] is na_value + expected = ser.combine(other, comparison_op) + expected[8] = na_value + expected[97] = na_value + self.assert_series_equal(result, expected) + + else: + exc = None + try: + result = comparison_op(ser, other) + except Exception as err: + exc = err + + if exc is None: + # Didn't error, then should match point-wise behavior + if pa.types.is_temporal(pa_dtype): + # point-wise comparison with pd.NA raises TypeError + assert result[8] is na_value + assert result[97] is na_value + result = result.drop([8, 97]).reset_index(drop=True) + ser = ser.drop([8, 97]) + other = other.drop([8, 97]) + expected = ser.combine(other, comparison_op) + self.assert_series_equal(result, expected) + else: + with pytest.raises(type(exc)): + ser.combine(other, comparison_op) + + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")