Skip to content

Commit 46c8da3

Browse files
authored
Fix negative n for str.replace with arrow string (pandas-dev#56406)
1 parent 68c1af5 commit 46c8da3

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ Strings
577577
- Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`)
578578
- Bug in :meth:`DataFrame.apply` failing when ``engine="numba"`` and columns or index have ``StringDtype`` (:issue:`56189`)
579579
- Bug in :meth:`Series.__mul__` for :class:`ArrowDtype` with ``pyarrow.string`` dtype and ``string[pyarrow]`` for the pyarrow backend (:issue:`51970`)
580+
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`)
580581
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)
581582

582583
Interval

pandas/core/arrays/arrow/array.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -2155,7 +2155,15 @@ def _str_replace(
21552155
)
21562156

21572157
func = pc.replace_substring_regex if regex else pc.replace_substring
2158-
result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
2158+
# https://github.com/apache/arrow/issues/39149
2159+
# GH 56404, unexpected behavior with negative max_replacements with pyarrow.
2160+
pa_max_replacements = None if n < 0 else n
2161+
result = func(
2162+
self._pa_array,
2163+
pattern=pat,
2164+
replacement=repl,
2165+
max_replacements=pa_max_replacements,
2166+
)
21592167
return type(self)(result)
21602168

21612169
def _str_repeat(self, repeats: int | Sequence[int]):

pandas/tests/extension/test_arrow.py

+8
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,14 @@ def test_str_replace(pat, repl, n, regex, exp):
17761776
tm.assert_series_equal(result, expected)
17771777

17781778

1779+
def test_str_replace_negative_n():
1780+
# GH 56404
1781+
ser = pd.Series(["abc", "aaaaaa"], dtype=ArrowDtype(pa.string()))
1782+
actual = ser.str.replace("a", "", -3, True)
1783+
expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string()))
1784+
tm.assert_series_equal(expected, actual)
1785+
1786+
17791787
def test_str_repeat_unsupported():
17801788
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
17811789
with pytest.raises(NotImplementedError, match="repeat is not"):

0 commit comments

Comments
 (0)