simplify str_extract(), pass name into _wrap_result()

mortada · mortada · commit a0545a8c8a9d · 2015-05-07T13:42:50.000-07:00
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -484,17 +484,12 @@ def f(x):
             return empty_row
 
     if regex.groups == 1:
-        if isinstance(arr, Index):
-            result = Index([f(val)[0] for val in arr],
-                           name=_get_single_group_name(regex),
-                           dtype=object)
-        else:
-            result = Series([f(val)[0] for val in arr],
-                            name=_get_single_group_name(regex),
-                            index=arr.index, dtype=object)
+        result = np.array([f(val)[0] for val in arr], dtype=object)
+        name = _get_single_group_name(regex)
     else:
         if isinstance(arr, Index):
             raise ValueError("only one regex group is supported with Index")
+        name = None
         names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
         columns = [names.get(1 + i, i) for i in range(regex.groups)]
         if arr.empty:
@@ -504,7 +499,7 @@ def f(x):
                                columns=columns,
                                index=arr.index,
                                dtype=object)
-    return result
+    return result, name
 
 
 def str_get_dummies(arr, sep='|'):
@@ -1005,7 +1000,7 @@ def __iter__(self):
             i += 1
             g = self.get(i)
 
-    def _wrap_result(self, result):
+    def _wrap_result(self, result, **kwargs):
         # leave as it is to keep extract and get_dummies results
         # can be merged to _wrap_result_expand in v0.17
         from pandas.core.series import Series
@@ -1014,16 +1009,20 @@ def _wrap_result(self, result):
 
         if not hasattr(result, 'ndim'):
             return result
-        elif result.ndim == 1:
-            name = getattr(result, 'name', None)
+
+        if 'name' in kwargs:
+            name = kwargs['name']
+        else:
+            name = getattr(result, 'name', None) or self.series.name
+
+        if result.ndim == 1:
             if isinstance(self.series, Index):
                 # if result is a boolean np.array, return the np.array
                 # instead of wrapping it into a boolean Index (GH 8875)
                 if is_bool_dtype(result):
                     return result
-                return Index(result, name=name or self.series.name)
-            return Series(result, index=self.series.index,
-                          name=name or self.series.name)
+                return Index(result, name=name)
+            return Series(result, index=self.series.index, name=name)
         else:
             assert result.ndim < 3
             return DataFrame(result, index=self.series.index)
@@ -1271,7 +1270,11 @@ def get_dummies(self, sep='|'):
     startswith = _pat_wrapper(str_startswith, na=True)
     endswith = _pat_wrapper(str_endswith, na=True)
     findall = _pat_wrapper(str_findall, flags=True)
-    extract = _pat_wrapper(str_extract, flags=True)
+
+    @copy(str_extract)
+    def extract(self, pat, flags=0):
+        result, name = str_extract(self.series, pat, flags=flags)
+        return self._wrap_result(result, name=name)
 
     _shared_docs['find'] = ("""
     Return %(side)s indexes in each strings in the Series/Index