Skip to content

Commit d43af63

Browse files
Backport PR #56691 on branch 2.2.x (Bug pyarrow implementation of str.fullmatch matches partial string. issue #56652) (#56715)
Backport PR #56691: Bug pyarrow implementation of str.fullmatch matches partial string. issue #56652 Co-authored-by: JackCollins91 <[email protected]>
1 parent ee4c377 commit d43af63

File tree

5 files changed

+24
-9
lines changed

5 files changed

+24
-9
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,7 @@ Strings
805805
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56404`)
806806
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for :class:`ArrowDtype` with ``pyarrow.string`` dtype (:issue:`56579`)
807807
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)
808+
- Bug in :meth:`str.fullmatch` when ``dtype=pandas.ArrowDtype(pyarrow.string()))`` allows partial matches when regex ends in literal //$ (:issue:`56652`)
808809
- Bug in comparison operations for ``dtype="string[pyarrow_numpy]"`` raising if dtypes can't be compared (:issue:`56008`)
809810

810811
Interval

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2277,7 +2277,7 @@ def _str_match(
22772277
def _str_fullmatch(
22782278
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
22792279
):
2280-
if not pat.endswith("$") or pat.endswith("//$"):
2280+
if not pat.endswith("$") or pat.endswith("\\$"):
22812281
pat = f"{pat}$"
22822282
return self._str_match(pat, case, flags, na)
22832283

pandas/core/arrays/string_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ def _str_match(
433433
def _str_fullmatch(
434434
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
435435
):
436-
if not pat.endswith("$") or pat.endswith("//$"):
436+
if not pat.endswith("$") or pat.endswith("\\$"):
437437
pat = f"{pat}$"
438438
return self._str_match(pat, case, flags, na)
439439

pandas/tests/extension/test_arrow.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -1903,16 +1903,21 @@ def test_str_match(pat, case, na, exp):
19031903
@pytest.mark.parametrize(
19041904
"pat, case, na, exp",
19051905
[
1906-
["abc", False, None, [True, None]],
1907-
["Abc", True, None, [False, None]],
1908-
["bc", True, None, [False, None]],
1909-
["ab", False, True, [True, True]],
1910-
["a[a-z]{2}", False, None, [True, None]],
1911-
["A[a-z]{1}", True, None, [False, None]],
1906+
["abc", False, None, [True, True, False, None]],
1907+
["Abc", True, None, [False, False, False, None]],
1908+
["bc", True, None, [False, False, False, None]],
1909+
["ab", False, None, [True, True, False, None]],
1910+
["a[a-z]{2}", False, None, [True, True, False, None]],
1911+
["A[a-z]{1}", True, None, [False, False, False, None]],
1912+
# GH Issue: #56652
1913+
["abc$", False, None, [True, False, False, None]],
1914+
["abc\\$", False, None, [False, True, False, None]],
1915+
["Abc$", True, None, [False, False, False, None]],
1916+
["Abc\\$", True, None, [False, False, False, None]],
19121917
],
19131918
)
19141919
def test_str_fullmatch(pat, case, na, exp):
1915-
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
1920+
ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
19161921
result = ser.str.match(pat, case=case, na=na)
19171922
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
19181923
tm.assert_series_equal(result, expected)

pandas/tests/strings/test_find_replace.py

+9
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,15 @@ def test_fullmatch(any_string_dtype):
730730
tm.assert_series_equal(result, expected)
731731

732732

733+
def test_fullmatch_dollar_literal(any_string_dtype):
734+
# GH 56652
735+
ser = Series(["foo", "foo$foo", np.nan, "foo$"], dtype=any_string_dtype)
736+
result = ser.str.fullmatch("foo\\$")
737+
expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
738+
expected = Series([False, False, np.nan, True], dtype=expected_dtype)
739+
tm.assert_series_equal(result, expected)
740+
741+
733742
def test_fullmatch_na_kwarg(any_string_dtype):
734743
ser = Series(
735744
["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype

0 commit comments

Comments
 (0)