From 2b3c1ecadb947534eab5bf699782769e317a5357 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sun, 17 Dec 2023 14:21:49 -0500 Subject: [PATCH 1/7] allow repeat count to be a series --- pandas/core/arrays/arrow/array.py | 26 ++++++++++++++++---------- pandas/tests/extension/test_arrow.py | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 6f7f42eca3794..7a73001216eb4 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -695,22 +695,21 @@ def _evaluate_op_method(self, other, op, arrow_funcs): other = self._box_pa(other) if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type): - if op in [operator.add, roperator.radd, operator.mul, roperator.rmul]: + if op in [operator.add, roperator.radd]: sep = pa.scalar("", type=pa_type) if op is operator.add: result = pc.binary_join_element_wise(self._pa_array, other, sep) elif op is roperator.radd: result = pc.binary_join_element_wise(other, self._pa_array, sep) - else: - if not ( - isinstance(other, pa.Scalar) and pa.types.is_integer(other.type) - ): - raise TypeError("Can only string multiply by an integer.") - result = pc.binary_join_element_wise( - *([self._pa_array] * other.as_py()), sep - ) return type(self)(result) - + elif op in [operator.mul, roperator.rmul]: + result = type(self)._evaluate_binary_repeat(self._pa_array, other) + return type(self)(result) + elif pa.types.is_integer(pa_type) and ( + pa.types.is_string(other.type) or pa.types.is_binary(other.type) + ): + result = type(self)._evaluate_binary_repeat(other, self._pa_array) + return type(self)(result) if ( isinstance(other, pa.Scalar) and pc.is_null(other).as_py() @@ -726,6 +725,13 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = pc_func(self._pa_array, other) return type(self)(result) + @staticmethod + def _evaluate_binary_repeat(binary, integral): + if not pa.types.is_integer(integral.type): + raise TypeError("Can only string multiply by an integer.") + pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) + return pc.binary_repeat(binary, pa_integral) + def _logical_method(self, other, op): # For integer types `^`, `|`, `&` are bitwise operators and return # integer types. Otherwise these are boolean ops. diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index de9f872aca01d..133091f5c04f0 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1330,6 +1330,25 @@ def test_arrowdtype_construct_from_string_type_only_one_pyarrow(): pd.Series(range(3), dtype=invalid) +def test_arrow_string_multiplication(): + binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string())) + repeat = pd.Series([2, -2], dtype="int64[pyarrow]") + result = binary * repeat + expected = pd.Series(["abcabc", ""], dtype=ArrowDtype(pa.string())) + tm.assert_series_equal(result, expected) + reflected_result = repeat * binary + tm.assert_series_equal(result, reflected_result) + + +def test_arrow_string_multiplication_scalar_repeat(): + binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string())) + result = binary * 2 + expected = pd.Series(["abcabc", "defgdefg"], dtype=ArrowDtype(pa.string())) + tm.assert_series_equal(result, expected) + reflected_result = 2 * binary + tm.assert_series_equal(reflected_result, expected) + + @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] ) From 02ead68614ec3e4663acdcf985308684c27f669f Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sun, 17 Dec 2023 14:24:24 -0500 Subject: [PATCH 2/7] fix validation --- pandas/core/arrays/arrow/array.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 7a73001216eb4..665e38e32f24b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -705,8 +705,10 @@ def _evaluate_op_method(self, other, op, arrow_funcs): elif op in [operator.mul, roperator.rmul]: result = type(self)._evaluate_binary_repeat(self._pa_array, other) return type(self)(result) - elif pa.types.is_integer(pa_type) and ( - pa.types.is_string(other.type) or pa.types.is_binary(other.type) + elif ( + pa.types.is_integer(pa_type) + and (pa.types.is_string(other.type) or pa.types.is_binary(other.type)) + and op in [operator.mul, roperator.rmul] ): result = type(self)._evaluate_binary_repeat(other, self._pa_array) return type(self)(result) From f1d14aa633920db7b3968a673906d41a51355af6 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sun, 17 Dec 2023 14:29:38 -0500 Subject: [PATCH 3/7] gh reference --- pandas/tests/extension/test_arrow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 133091f5c04f0..276bd1917c010 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1331,6 +1331,7 @@ def test_arrowdtype_construct_from_string_type_only_one_pyarrow(): def test_arrow_string_multiplication(): + # GH 56537 binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string())) repeat = pd.Series([2, -2], dtype="int64[pyarrow]") result = binary * repeat From 15f19901744ddcc72562efa8da6508d81abbf2f5 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Mon, 18 Dec 2023 14:22:27 -0500 Subject: [PATCH 4/7] fix conditional logic --- pandas/core/arrays/arrow/array.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 665e38e32f24b..4284271ae69dc 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -702,16 +702,16 @@ def _evaluate_op_method(self, other, op, arrow_funcs): elif op is roperator.radd: result = pc.binary_join_element_wise(other, self._pa_array, sep) return type(self)(result) - elif op in [operator.mul, roperator.rmul]: - result = type(self)._evaluate_binary_repeat(self._pa_array, other) - return type(self)(result) - elif ( - pa.types.is_integer(pa_type) - and (pa.types.is_string(other.type) or pa.types.is_binary(other.type)) - and op in [operator.mul, roperator.rmul] - ): - result = type(self)._evaluate_binary_repeat(other, self._pa_array) - return type(self)(result) + + if op in [operator.mul, roperator.rmul]: + if pa.types.is_integer(other.type) and ( + pa.types.is_string(pa_type) or pa.types.is_binary(pa_type) + ): + return type(self)._evaluate_binary_repeat(self._pa_array, other) + elif pa.types.is_integer(pa_type) and ( + pa.types.is_string(other.type) or pa.types.is_binary(other.type) + ): + return type(self)._evaluate_binary_repeat(other, self._pa_array) if ( isinstance(other, pa.Scalar) and pc.is_null(other).as_py() @@ -727,12 +727,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = pc_func(self._pa_array, other) return type(self)(result) - @staticmethod - def _evaluate_binary_repeat(binary, integral): + @classmethod + def _evaluate_binary_repeat(cls, binary, integral): if not pa.types.is_integer(integral.type): raise TypeError("Can only string multiply by an integer.") pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) - return pc.binary_repeat(binary, pa_integral) + return cls(pc.binary_repeat(binary, pa_integral)) def _logical_method(self, other, op): # For integer types `^`, `|`, `&` are bitwise operators and return From 188e6098fecf8f211eed1b34a037c4fe32b027e3 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Mon, 18 Dec 2023 15:20:15 -0500 Subject: [PATCH 5/7] Revert "fix conditional logic" This reverts commit 15f19901744ddcc72562efa8da6508d81abbf2f5. --- pandas/core/arrays/arrow/array.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4284271ae69dc..665e38e32f24b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -702,16 +702,16 @@ def _evaluate_op_method(self, other, op, arrow_funcs): elif op is roperator.radd: result = pc.binary_join_element_wise(other, self._pa_array, sep) return type(self)(result) - - if op in [operator.mul, roperator.rmul]: - if pa.types.is_integer(other.type) and ( - pa.types.is_string(pa_type) or pa.types.is_binary(pa_type) - ): - return type(self)._evaluate_binary_repeat(self._pa_array, other) - elif pa.types.is_integer(pa_type) and ( - pa.types.is_string(other.type) or pa.types.is_binary(other.type) - ): - return type(self)._evaluate_binary_repeat(other, self._pa_array) + elif op in [operator.mul, roperator.rmul]: + result = type(self)._evaluate_binary_repeat(self._pa_array, other) + return type(self)(result) + elif ( + pa.types.is_integer(pa_type) + and (pa.types.is_string(other.type) or pa.types.is_binary(other.type)) + and op in [operator.mul, roperator.rmul] + ): + result = type(self)._evaluate_binary_repeat(other, self._pa_array) + return type(self)(result) if ( isinstance(other, pa.Scalar) and pc.is_null(other).as_py() @@ -727,12 +727,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = pc_func(self._pa_array, other) return type(self)(result) - @classmethod - def _evaluate_binary_repeat(cls, binary, integral): + @staticmethod + def _evaluate_binary_repeat(binary, integral): if not pa.types.is_integer(integral.type): raise TypeError("Can only string multiply by an integer.") pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) - return cls(pc.binary_repeat(binary, pa_integral)) + return pc.binary_repeat(binary, pa_integral) def _logical_method(self, other, op): # For integer types `^`, `|`, `&` are bitwise operators and return From c619cb89b428cc6d76a1f305607b556c0964b4c6 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 19 Dec 2023 13:09:32 -0500 Subject: [PATCH 6/7] remove condition --- pandas/core/arrays/arrow/array.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 665e38e32f24b..956c6bd03607e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -706,10 +706,8 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = type(self)._evaluate_binary_repeat(self._pa_array, other) return type(self)(result) elif ( - pa.types.is_integer(pa_type) - and (pa.types.is_string(other.type) or pa.types.is_binary(other.type)) - and op in [operator.mul, roperator.rmul] - ): + pa.types.is_string(other.type) or pa.types.is_binary(other.type) + ) and op in [operator.mul, roperator.rmul]: result = type(self)._evaluate_binary_repeat(other, self._pa_array) return type(self)(result) if ( From defdc64d7a1137eba25e13a039fe4f235e1cc544 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 19 Dec 2023 17:04:58 -0500 Subject: [PATCH 7/7] inline --- pandas/core/arrays/arrow/array.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 956c6bd03607e..d62a11f900694 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -703,12 +703,22 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = pc.binary_join_element_wise(other, self._pa_array, sep) return type(self)(result) elif op in [operator.mul, roperator.rmul]: - result = type(self)._evaluate_binary_repeat(self._pa_array, other) + binary = self._pa_array + integral = other + if not pa.types.is_integer(integral.type): + raise TypeError("Can only string multiply by an integer.") + pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) + result = pc.binary_repeat(binary, pa_integral) return type(self)(result) elif ( pa.types.is_string(other.type) or pa.types.is_binary(other.type) ) and op in [operator.mul, roperator.rmul]: - result = type(self)._evaluate_binary_repeat(other, self._pa_array) + binary = other + integral = self._pa_array + if not pa.types.is_integer(integral.type): + raise TypeError("Can only string multiply by an integer.") + pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) + result = pc.binary_repeat(binary, pa_integral) return type(self)(result) if ( isinstance(other, pa.Scalar) @@ -725,13 +735,6 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = pc_func(self._pa_array, other) return type(self)(result) - @staticmethod - def _evaluate_binary_repeat(binary, integral): - if not pa.types.is_integer(integral.type): - raise TypeError("Can only string multiply by an integer.") - pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) - return pc.binary_repeat(binary, pa_integral) - def _logical_method(self, other, op): # For integer types `^`, `|`, `&` are bitwise operators and return # integer types. Otherwise these are boolean ops.