diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 1b2c80f90f97b..528440f454e57 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1057,14 +1057,14 @@ You can check whether elements contain a pattern: .. ipython:: python pattern = r'[a-z][0-9]' - Series(['1', '2', '3a', '3b', '03c']).contains(pattern) + Series(['1', '2', '3a', '3b', '03c']).str.contains(pattern) or match a pattern: .. ipython:: python - Series(['1', '2', '3a', '3b', '03c']).match(pattern, as_indexer=True) + Series(['1', '2', '3a', '3b', '03c']).str.match(pattern, as_indexer=True) The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 02f422bb0b635..1d9139fa9a1c7 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -333,15 +333,11 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): Returns ------- - boolean Series + Series of boolean values if as_indexer=True Series of tuples if as_indexer=False, default but deprecated - Returns - ------- - Series of boolean values - See Also -------- contains : analagous, but less strict, relying on re.search instead of @@ -414,14 +410,27 @@ def str_extract(arr, pat, flags=0): A pattern with more than one group will return a DataFrame. >>> Series(['a1', 'b2', 'c3']).str.extract('([ab])(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN NaN A pattern may contain optional groups. >>> Series(['a1', 'b2', 'c3']).str.extract('([ab])?(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN 3 Named groups will become column names in the result. >>> Series(['a1', 'b2', 'c3']).str.extract('(?P[ab])(?P\d)') + letter digit + 0 a 1 + 1 b 2 + 2 NaN NaN + """ regex = re.compile(pat, flags=flags)