pandas-dev · wbadart · Jul 10, 2020 · Jul 10, 2020 · Jul 10, 2020 · Jul 10, 2020
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -2184,6 +2184,7 @@ def _wrap_result(
         expand=None,
         fill_value=np.nan,
         returns_string=True,
+        pad_sequences=True,
     ):
 
         from pandas import Index, Series, MultiIndex
@@ -2217,22 +2218,19 @@ def _wrap_result(
             expand = result.ndim != 1
 
         elif expand is True and not isinstance(self._orig, ABCIndexClass):
-            # required when expand=True is explicitly specified
-            # not needed when inferred
-
-            def cons_row(x):
-                if is_list_like(x):
-                    return x
-                else:
-                    return [x]
-
-            result = [cons_row(x) for x in result]
-            if result:
-                # propagate nan values to match longest sequence (GH 18450)
-                max_len = max(len(x) for x in result)
-                result = [
-                    x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result
-                ]
+            if pad_sequences:
+                # required when expand=True is explicitly specified
+                # not needed when inferred
+                result = [x if is_list_like(x) else [x] for x in result]
+                if result:
+                    # propagate nan values to match longest sequence (GH 18450)
+                    max_len = max(len(x) for x in result)
+                    result = [
+                        x * max_len if len(x) == 0 or x[0] is np.nan else x
+                        for x in result
+                    ]
+            else:
+                result = result.tolist()
 
         if not isinstance(expand, bool):
             raise ValueError("expand must be True or False")
@@ -2569,6 +2567,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
         * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
         * If ``False``, return Series/Index, containing lists of strings.
+    pad_sequences : bool, default True
+        When expand is ``True``, pad the ending of resulting sequences with
+        ``nan`` to the length of the longest sequence, ensuring each row in the
+        resulting DataFrame has the same number of columns.
+        If ``False``, you must be sure pre-hoc that the split strings will have
+        uniform lengths.
+        Has no effect when expand is ``False``.
 
     Returns
     -------
@@ -2681,19 +2686,38 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     >>> s.str.split(r"\+|=", expand=True)
          0    1    2
     0    1    1    2
+
+    When using ``expand=True``, if you have already verified that the
+    particular split will result in sequences of uniform length, you may opt
+    out of the (sometimes expensive) length normalzation process with
+    ``pad_sequences=False``.
+
+    >>> s = pd.Series(["foo bar", "baz qaz"])
+    >>> s
+    0    foo bar
+    1    baz qaz
+    dtype: object
+    >>> s.str.split(expand=True, pad_sequences=False)
+         0    1
+    0  foo  bar
+    1  baz  qaz
     """
 
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
     @forbid_nonstring_types(["bytes"])
-    def split(self, pat=None, n=-1, expand=False):
+    def split(self, pat=None, n=-1, expand=False, pad_sequences=True):
         result = str_split(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
+        return self._wrap_result(
+            result, expand=expand, returns_string=expand, pad_sequences=pad_sequences
+        )
 
     @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
     @forbid_nonstring_types(["bytes"])
-    def rsplit(self, pat=None, n=-1, expand=False):
+    def rsplit(self, pat=None, n=-1, expand=False, pad_sequences=True):
         result = str_rsplit(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
+        return self._wrap_result(
+            result, expand=expand, returns_string=expand, pad_sequences=pad_sequences
+        )
 
     _shared_docs[
         "str_partition"