diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 0f68d1043b49d..2560d6726249e 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -83,6 +83,9 @@ def time_find(self, dtype): def time_rfind(self, dtype): self.s.str.rfind("[A-Z]+") + def time_fullmatch(self, dtype): + self.s.str.fullmatch("A") + def time_get(self, dtype): self.s.str.get(0) diff --git a/doc/redirects.csv b/doc/redirects.csv index de69d0168835d..9b8a5a73dedff 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -1197,6 +1197,7 @@ generated/pandas.Series.str.extractall,../reference/api/pandas.Series.str.extrac generated/pandas.Series.str.extract,../reference/api/pandas.Series.str.extract generated/pandas.Series.str.findall,../reference/api/pandas.Series.str.findall generated/pandas.Series.str.find,../reference/api/pandas.Series.str.find +generated/pandas.Series.str.fullmatch,../reference/api/pandas.Series.str.fullmatch generated/pandas.Series.str.get_dummies,../reference/api/pandas.Series.str.get_dummies generated/pandas.Series.str.get,../reference/api/pandas.Series.str.get generated/pandas.Series.str,../reference/api/pandas.Series.str diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index cc2937695e80f..3ff3b2bb53fda 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -415,6 +415,7 @@ strings and apply several methods to it. These can be accessed like Series.str.extractall Series.str.find Series.str.findall + Series.str.fullmatch Series.str.get Series.str.index Series.str.join diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 252bc215869ac..d5ee28eb7017e 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -844,6 +844,14 @@ def _str_match( pat = "^" + pat return self._str_contains(pat, case, flags, na, regex=True) + def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None): + if pa_version_under4p0: + return super()._str_fullmatch(pat, case, flags, na) + + if not pat.endswith("$") or pat.endswith("//$"): + pat = pat + "$" + return self._str_match(pat, case, flags, na) + def _str_isalnum(self): result = pc.utf8_is_alnum(self._data) return BooleanDtype().__from_arrow__(result) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 5606380908f38..696b06f174e28 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1170,7 +1170,7 @@ def match(self, pat, case=True, flags=0, na=None): Returns ------- - Series/array of boolean values + Series/Index/array of boolean values See Also -------- @@ -1197,14 +1197,14 @@ def fullmatch(self, pat, case=True, flags=0, na=None): If True, case sensitive. flags : int, default 0 (no flags) Regex module flags, e.g. re.IGNORECASE. - na : scalar, optional. + na : scalar, optional Fill value for missing values. The default depends on dtype of the array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, ``pandas.NA`` is used. Returns ------- - Series/array of boolean values + Series/Index/array of boolean values See Also -------- diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 843b0ba55e691..0815d23f2b493 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -494,10 +494,32 @@ def test_fullmatch(any_string_dtype): expected = Series([True, False, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) + +def test_fullmatch_na_kwarg(any_string_dtype): + ser = Series( + ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype + ) + result = ser.str.fullmatch(".*BAD[_]+.*BAD", na=False) + expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + expected = Series([True, False, False, False], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + +def test_fullmatch_case_kwarg(any_string_dtype): ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype) - result = ser.str.fullmatch("ab", case=False) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" + + expected = Series([True, False, False, False], dtype=expected_dtype) + + result = ser.str.fullmatch("ab", case=True) + tm.assert_series_equal(result, expected) + expected = Series([True, True, False, False], dtype=expected_dtype) + + result = ser.str.fullmatch("ab", case=False) + tm.assert_series_equal(result, expected) + + result = ser.str.fullmatch("ab", flags=re.IGNORECASE) tm.assert_series_equal(result, expected)