Skip to content

Commit eefe5c1

Browse files
committed
BUG-Pyarrow-implementation-of-str.fullmatch-matches-partial-string.-Issue-pandas-dev#56652
changed array.py: Makes Series(["abc$abc]).str.fullmatch("abc\\$") give the same result as when dtype = pyarow.string as opposed to dtype = str. Issue reporter (Issue-pandas-dev#56652) requested change "//$" to "\$", but this resulted in DepreciationWarnng, so used "\\$" instead. Change test_arrow.py: updated test_str_fullmatch to account for edge cases where string starts with and ends with literal $ as well as ends with the string.
1 parent 86ad444 commit eefe5c1

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2277,7 +2277,7 @@ def _str_match(
22772277
def _str_fullmatch(
22782278
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
22792279
):
2280-
if not pat.endswith("$") or pat.endswith("//$"):
2280+
if not pat.endswith("$") or pat.endswith("\\$"):
22812281
pat = f"{pat}$"
22822282
return self._str_match(pat, case, flags, na)
22832283

pandas/tests/extension/test_arrow.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -1903,18 +1903,24 @@ def test_str_match(pat, case, na, exp):
19031903
@pytest.mark.parametrize(
19041904
"pat, case, na, exp",
19051905
[
1906-
["abc", False, None, [True, None]],
1907-
["Abc", True, None, [False, None]],
1908-
["bc", True, None, [False, None]],
1909-
["ab", False, True, [True, True]],
1910-
["a[a-z]{2}", False, None, [True, None]],
1911-
["A[a-z]{1}", True, None, [False, None]],
1906+
["abc", False, None, [True, True, False, None]],
1907+
["Abc", True, None, [False, False, False, None]],
1908+
["bc", True, None, [False, False, False, None]],
1909+
["ab", False, None, [True, True, False, None]],
1910+
["a[a-z]{2}", False, None, [True, True, False, None]],
1911+
["A[a-z]{1}", True, None, [False, False, False, None]],
1912+
# GH Issue: #56652
1913+
["abc$", False, None, [True, False, False, None]],
1914+
["abc\\$", False, None, [False, True, False, None]],
1915+
["Abc$", True, None, [False, False, False, None]],
1916+
["Abc\\$", True, None, [False, False, False, None]],
19121917
],
19131918
)
19141919
def test_str_fullmatch(pat, case, na, exp):
1915-
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
1920+
ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
19161921
result = ser.str.match(pat, case=case, na=na)
19171922
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
1923+
print(result)
19181924
tm.assert_series_equal(result, expected)
19191925

19201926

0 commit comments

Comments
 (0)