Skip to content

Commit 98e1d2f

Browse files
rohanjain101Rohan Jain
and
Rohan Jain
authored
Support multiplication of pd.ArrowDtype(pa.string()) and integral value where integral value is a series (pandas-dev#56538)
* allow repeat count to be a series * fix validation * gh reference * fix conditional logic * Revert "fix conditional logic" This reverts commit 15f1990. * remove condition * inline --------- Co-authored-by: Rohan Jain <[email protected]>
1 parent 38c2877 commit 98e1d2f

File tree

2 files changed

+39
-10
lines changed

2 files changed

+39
-10
lines changed

pandas/core/arrays/arrow/array.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -693,22 +693,31 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
693693
other = self._box_pa(other)
694694

695695
if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type):
696-
if op in [operator.add, roperator.radd, operator.mul, roperator.rmul]:
696+
if op in [operator.add, roperator.radd]:
697697
sep = pa.scalar("", type=pa_type)
698698
if op is operator.add:
699699
result = pc.binary_join_element_wise(self._pa_array, other, sep)
700700
elif op is roperator.radd:
701701
result = pc.binary_join_element_wise(other, self._pa_array, sep)
702-
else:
703-
if not (
704-
isinstance(other, pa.Scalar) and pa.types.is_integer(other.type)
705-
):
706-
raise TypeError("Can only string multiply by an integer.")
707-
result = pc.binary_join_element_wise(
708-
*([self._pa_array] * other.as_py()), sep
709-
)
710702
return type(self)(result)
711-
703+
elif op in [operator.mul, roperator.rmul]:
704+
binary = self._pa_array
705+
integral = other
706+
if not pa.types.is_integer(integral.type):
707+
raise TypeError("Can only string multiply by an integer.")
708+
pa_integral = pc.if_else(pc.less(integral, 0), 0, integral)
709+
result = pc.binary_repeat(binary, pa_integral)
710+
return type(self)(result)
711+
elif (
712+
pa.types.is_string(other.type) or pa.types.is_binary(other.type)
713+
) and op in [operator.mul, roperator.rmul]:
714+
binary = other
715+
integral = self._pa_array
716+
if not pa.types.is_integer(integral.type):
717+
raise TypeError("Can only string multiply by an integer.")
718+
pa_integral = pc.if_else(pc.less(integral, 0), 0, integral)
719+
result = pc.binary_repeat(binary, pa_integral)
720+
return type(self)(result)
712721
if (
713722
isinstance(other, pa.Scalar)
714723
and pc.is_null(other).as_py()

pandas/tests/extension/test_arrow.py

+20
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,26 @@ def test_arrowdtype_construct_from_string_type_only_one_pyarrow():
13341334
pd.Series(range(3), dtype=invalid)
13351335

13361336

1337+
def test_arrow_string_multiplication():
1338+
# GH 56537
1339+
binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string()))
1340+
repeat = pd.Series([2, -2], dtype="int64[pyarrow]")
1341+
result = binary * repeat
1342+
expected = pd.Series(["abcabc", ""], dtype=ArrowDtype(pa.string()))
1343+
tm.assert_series_equal(result, expected)
1344+
reflected_result = repeat * binary
1345+
tm.assert_series_equal(result, reflected_result)
1346+
1347+
1348+
def test_arrow_string_multiplication_scalar_repeat():
1349+
binary = pd.Series(["abc", "defg"], dtype=ArrowDtype(pa.string()))
1350+
result = binary * 2
1351+
expected = pd.Series(["abcabc", "defgdefg"], dtype=ArrowDtype(pa.string()))
1352+
tm.assert_series_equal(result, expected)
1353+
reflected_result = 2 * binary
1354+
tm.assert_series_equal(reflected_result, expected)
1355+
1356+
13371357
@pytest.mark.parametrize(
13381358
"interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
13391359
)

0 commit comments

Comments
 (0)