@@ -840,19 +840,22 @@ def _str_extract_frame(arr, pat, flags=0):
840
840
841
841
def str_extract (arr , pat , flags = 0 , expand = True ):
842
842
r"""
843
+ Extract capture groups in the regex `pat` as columns in a DataFrame.
844
+
843
845
For each subject string in the Series, extract groups from the
844
- first match of regular expression pat.
846
+ first match of regular expression ` pat` .
845
847
846
848
Parameters
847
849
----------
848
850
pat : string
849
- Regular expression pattern with capturing groups
851
+ Regular expression pattern with capturing groups.
850
852
flags : int, default 0 (no flags)
851
- re module flags, e.g. re.IGNORECASE
852
-
853
+ ``re`` module flags, e.g. `` re.IGNORECASE``.
854
+ See :mod:`re`
853
855
expand : bool, default True
854
- * If True, return DataFrame.
855
- * If False, return Series/Index/DataFrame.
856
+ If True, return DataFrame with one column per capture group.
857
+ If False, return a Series/Index if there is one capture group
858
+ or DataFrame if there are multiple capture groups.
856
859
857
860
.. versionadded:: 0.18.0
858
861
@@ -875,7 +878,7 @@ def str_extract(arr, pat, flags=0, expand=True):
875
878
A pattern with two groups will return a DataFrame with two columns.
876
879
Non-matches will be NaN.
877
880
878
- >>> s = Series(['a1', 'b2', 'c3'])
881
+ >>> s = pd. Series(['a1', 'b2', 'c3'])
879
882
>>> s.str.extract(r'([ab])(\d)')
880
883
0 1
881
884
0 a 1
@@ -914,7 +917,6 @@ def str_extract(arr, pat, flags=0, expand=True):
914
917
1 2
915
918
2 NaN
916
919
dtype: object
917
-
918
920
"""
919
921
if not isinstance (expand , bool ):
920
922
raise ValueError ("expand must be True or False" )
0 commit comments