BUG: Don't raise exceptions splitting a blank string (#20067)

montanalow · TomAugspurger · commit a192650d2b56 · 2018-03-17T14:42:53.000-05:00
* don't raise exceptions splitting a blank string
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -843,6 +843,7 @@ Categorical
   ``self`` but in a different order (:issue:`19551`)
 - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
 - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
+- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`).
 - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
 - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`)
 - Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -1749,7 +1749,8 @@ def cons_row(x):
             if result:
                 # propagate nan values to match longest sequence (GH 18450)
                 max_len = max(len(x) for x in result)
-                result = [x * max_len if x[0] is np.nan else x for x in result]
+                result = [x * max_len if len(x) == 0 or x[0] is np.nan
+                          else x for x in result]
 
         if not isinstance(expand, bool):
             raise ValueError("expand must be True or False")
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -1992,6 +1992,19 @@ def test_rsplit(self):
         exp = Series([['a_b', 'c'], ['c_d', 'e'], NA, ['f_g', 'h']])
         tm.assert_series_equal(result, exp)
 
+    def test_split_blank_string(self):
+        # expand blank split GH 20067
+        values = Series([''], name='test')
+        result = values.str.split(expand=True)
+        exp = DataFrame([[]])
+        tm.assert_frame_equal(result, exp)
+
+        values = Series(['a b c', 'a b', '', ' '], name='test')
+        result = values.str.split(expand=True)
+        exp = DataFrame([['a', 'b', 'c'], ['a', 'b', np.nan],
+                         [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]])
+        tm.assert_frame_equal(result, exp)
+
     def test_split_noargs(self):
         # #1859
         s = Series(['Wes McKinney', 'Travis  Oliphant'])