From effaedee2e8ebc94894de1d574896d4a7f48e676 Mon Sep 17 00:00:00 2001
From: Arjun <arjunsharma147@yahoo.com>
Date: Sat, 10 Mar 2018 19:06:07 +0530
Subject: [PATCH 1/5] Modified docstring of str.extract

---
 pandas/core/strings.py | 103 +++++++----------------------------------
 1 file changed, 16 insertions(+), 87 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index fac607f4621a8..daae98086dae9 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -633,21 +633,22 @@ def _str_extract_frame(arr, pat, flags=0):
 
 def str_extract(arr, pat, flags=0, expand=True):
     r"""
+    Return the match object corresponding to regex `pat`.
+
     For each subject string in the Series, extract groups from the
-    first match of regular expression pat.
+    first match of regular expression `pat`.
 
     Parameters
     ----------
     pat : string
-        Regular expression pattern with capturing groups
+        Regular expression pattern with capturing groups.
     flags : int, default 0 (no flags)
-        re module flags, e.g. re.IGNORECASE
+        Re module flags, e.g. re.IGNORECASE.
 
     expand : bool, default True
-        * If True, return DataFrame.
-        * If False, return Series/Index/DataFrame.
-
-        .. versionadded:: 0.18.0
+        If True, return DataFrame, else return Series/Index/DataFrame.
+        
+        .. versionadded:: 0.18.0.
 
     Returns
     -------
@@ -668,7 +669,7 @@ def str_extract(arr, pat, flags=0, expand=True):
     A pattern with two groups will return a DataFrame with two columns.
     Non-matches will be NaN.
 
-    >>> s = Series(['a1', 'b2', 'c3'])
+    >>> s = pd.Series(['a1', 'b2', 'c3'])
     >>> s.str.extract(r'([ab])(\d)')
          0    1
     0    a    1
@@ -707,7 +708,6 @@ def str_extract(arr, pat, flags=0, expand=True):
     1      2
     2    NaN
     dtype: object
-
     """
     if not isinstance(expand, bool):
         raise ValueError("expand must be True or False")
@@ -898,94 +898,23 @@ def str_join(arr, sep):
 
 def str_findall(arr, pat, flags=0):
     """
-    Find all occurrences of pattern or regular expression in the Series/Index.
-
-    Equivalent to applying :func:`re.findall` to all the elements in the
-    Series/Index.
+    Find all occurrences of pattern or regular expression in the
+    Series/Index. Equivalent to :func:`re.findall`.
 
     Parameters
     ----------
     pat : string
-        Pattern or regular expression.
-    flags : int, default 0
-        ``re`` module flags, e.g. `re.IGNORECASE` (default is 0, which means
-        no flags).
+        Pattern or regular expression
+    flags : int, default 0 (no flags)
+        re module flags, e.g. re.IGNORECASE
 
     Returns
     -------
-    Series/Index of lists of strings
-        All non-overlapping matches of pattern or regular expression in each
-        string of this Series/Index.
+    matches : Series/Index of lists
 
     See Also
     --------
-    count : Count occurrences of pattern or regular expression in each string
-        of the Series/Index.
-    extractall : For each string in the Series, extract groups from all matches
-        of regular expression and return a DataFrame with one row for each
-        match and one column for each group.
-    re.findall : The equivalent ``re`` function to all non-overlapping matches
-        of pattern or regular expression in string, as a list of strings.
-
-    Examples
-    --------
-
-    >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
-
-    The search for the pattern 'Monkey' returns one match:
-
-    >>> s.str.findall('Monkey')
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    On the other hand, the search for the pattern 'MONKEY' doesn't return any
-    match:
-
-    >>> s.str.findall('MONKEY')
-    0    []
-    1    []
-    2    []
-    dtype: object
-
-    Flags can be added to the pattern or regular expression. For instance,
-    to find the pattern 'MONKEY' ignoring the case:
-
-    >>> import re
-    >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    When the pattern matches more than one string in the Series, all matches
-    are returned:
-
-    >>> s.str.findall('on')
-    0    [on]
-    1    [on]
-    2      []
-    dtype: object
-
-    Regular expressions are supported too. For instance, the search for all the
-    strings ending with the word 'on' is shown next:
-
-    >>> s.str.findall('on$')
-    0    [on]
-    1      []
-    2      []
-    dtype: object
-
-    If the pattern is found more than once in the same string, then a list of
-    multiple strings is returned:
-
-    >>> s.str.findall('b')
-    0        []
-    1        []
-    2    [b, b]
-    dtype: object
-
+    extractall : returns DataFrame with one column per capture group
     """
     regex = re.compile(pat, flags=flags)
     return _na_map(regex.findall, arr)

From c9f4ac881654f4316a8329e7752b1ac573507a3d Mon Sep 17 00:00:00 2001
From: Arjun <arjunsharma147@yahoo.com>
Date: Sat, 10 Mar 2018 19:23:09 +0530
Subject: [PATCH 2/5] Updated docstring of str.extract

---
 pandas/core/strings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index daae98086dae9..e2ac43b0b92e9 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -644,7 +644,6 @@ def str_extract(arr, pat, flags=0, expand=True):
         Regular expression pattern with capturing groups.
     flags : int, default 0 (no flags)
         Re module flags, e.g. re.IGNORECASE.
-
     expand : bool, default True
         If True, return DataFrame, else return Series/Index/DataFrame.
         

From 3f7aadf2e5866b4e7d7f86caa4f51079fca071d6 Mon Sep 17 00:00:00 2001
From: Arjun <arjunsharma147@yahoo.com>
Date: Sat, 10 Mar 2018 19:27:39 +0530
Subject: [PATCH 3/5] Updated docstring of str.extract

---
 pandas/core/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index e2ac43b0b92e9..f6637585ce204 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -646,7 +646,7 @@ def str_extract(arr, pat, flags=0, expand=True):
         Re module flags, e.g. re.IGNORECASE.
     expand : bool, default True
         If True, return DataFrame, else return Series/Index/DataFrame.
-        
+
         .. versionadded:: 0.18.0.
 
     Returns

From 3900c1a2fe2d5f5981cf82b08096563a5ab0f7e5 Mon Sep 17 00:00:00 2001
From: Arjun <arjunsharma147@yahoo.com>
Date: Thu, 15 Mar 2018 22:14:28 +0530
Subject: [PATCH 4/5] Minor changes to str.extract

---
 pandas/core/strings.py | 87 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 79 insertions(+), 8 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 1c4faf2b8adb7..8bd1c136cf30a 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -703,7 +703,7 @@ def str_extract(arr, pat, flags=0, expand=True):
     pat : string
         Regular expression pattern with capturing groups.
     flags : int, default 0 (no flags)
-        Re module flags, e.g. re.IGNORECASE.
+        ``re`` module flags, e.g. ``re.IGNORECASE``.
     expand : bool, default True
         If True, return DataFrame, else return Series/Index/DataFrame.
 
@@ -957,23 +957,94 @@ def str_join(arr, sep):
 
 def str_findall(arr, pat, flags=0):
     """
-    Find all occurrences of pattern or regular expression in the
-    Series/Index. Equivalent to :func:`re.findall`.
+    Find all occurrences of pattern or regular expression in the Series/Index.
+
+    Equivalent to applying :func:`re.findall` to all the elements in the
+    Series/Index.
 
     Parameters
     ----------
     pat : string
-        Pattern or regular expression
-    flags : int, default 0 (no flags)
-        re module flags, e.g. re.IGNORECASE
+        Pattern or regular expression.
+    flags : int, default 0
+        ``re`` module flags, e.g. `re.IGNORECASE` (default is 0, which means
+        no flags).
 
     Returns
     -------
-    matches : Series/Index of lists
+    Series/Index of lists of strings
+        All non-overlapping matches of pattern or regular expression in each
+        string of this Series/Index.
 
     See Also
     --------
-    extractall : returns DataFrame with one column per capture group
+    count : Count occurrences of pattern or regular expression in each string
+        of the Series/Index.
+    extractall : For each string in the Series, extract groups from all matches
+        of regular expression and return a DataFrame with one row for each
+        match and one column for each group.
+    re.findall : The equivalent ``re`` function to all non-overlapping matches
+        of pattern or regular expression in string, as a list of strings.
+
+    Examples
+    --------
+
+    >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
+
+    The search for the pattern 'Monkey' returns one match:
+
+    >>> s.str.findall('Monkey')
+    0          []
+    1    [Monkey]
+    2          []
+    dtype: object
+
+    On the other hand, the search for the pattern 'MONKEY' doesn't return any
+    match:
+
+    >>> s.str.findall('MONKEY')
+    0    []
+    1    []
+    2    []
+    dtype: object
+
+    Flags can be added to the pattern or regular expression. For instance,
+    to find the pattern 'MONKEY' ignoring the case:
+
+    >>> import re
+    >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
+    0          []
+    1    [Monkey]
+    2          []
+    dtype: object
+
+    When the pattern matches more than one string in the Series, all matches
+    are returned:
+
+    >>> s.str.findall('on')
+    0    [on]
+    1    [on]
+    2      []
+    dtype: object
+
+    Regular expressions are supported too. For instance, the search for all the
+    strings ending with the word 'on' is shown next:
+
+    >>> s.str.findall('on$')
+    0    [on]
+    1      []
+    2      []
+    dtype: object
+
+    If the pattern is found more than once in the same string, then a list of
+    multiple strings is returned:
+
+    >>> s.str.findall('b')
+    0        []
+    1        []
+    2    [b, b]
+    dtype: object
+
     """
     regex = re.compile(pat, flags=flags)
     return _na_map(regex.findall, arr)

From ec8bd444625645386a94387fb8dad94b3b1ddf6b Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sat, 7 Jul 2018 10:21:42 -0500
Subject: [PATCH 5/5] small cleanup

---
 pandas/core/strings.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index a79edf9a6f1ce..9028ce1a77304 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -840,7 +840,7 @@ def _str_extract_frame(arr, pat, flags=0):
 
 def str_extract(arr, pat, flags=0, expand=True):
     r"""
-    Return the match object corresponding to regex `pat`.
+    Extract capture groups in the regex `pat` as columns in a DataFrame.
 
     For each subject string in the Series, extract groups from the
     first match of regular expression `pat`.
@@ -851,10 +851,13 @@ def str_extract(arr, pat, flags=0, expand=True):
         Regular expression pattern with capturing groups.
     flags : int, default 0 (no flags)
         ``re`` module flags, e.g. ``re.IGNORECASE``.
+        See :mod:`re`
     expand : bool, default True
-        If True, return DataFrame, else return Series/Index/DataFrame.
+        If True, return DataFrame with one column per capture group.
+        If False, return a Series/Index if there is one capture group
+        or DataFrame if there are multiple capture groups.
 
-        .. versionadded:: 0.18.0.
+        .. versionadded:: 0.18.0
 
     Returns
     -------