From 72dd2524182a725fe644100ab2916ba86daa89d7 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 8 Mar 2018 19:13:34 -0800 Subject: [PATCH 1/2] don't raise exceptions splitting a blank string --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/strings.py | 3 ++- pandas/tests/test_strings.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 791365295c268..f451399be59da 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -839,6 +839,7 @@ Categorical ``self`` but in a different order (:issue:`19551`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :meth:`Series.str.split` with `expand=True` on blank strings causing exceptions. - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index cb55108e9d05a..75ff1ba9d5a5e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1633,7 +1633,8 @@ def cons_row(x): if result: # propagate nan values to match longest sequence (GH 18450) max_len = max(len(x) for x in result) - result = [x * max_len if x[0] is np.nan else x for x in result] + result = [x * max_len if len(x) == 0 or x[0] is np.nan + else x for x in result] if not isinstance(expand, bool): raise ValueError("expand must be True or False") diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index a878d6ed7b052..aa94b992facfc 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1992,6 +1992,19 @@ def test_rsplit(self): exp = Series([['a_b', 'c'], ['c_d', 'e'], NA, ['f_g', 'h']]) tm.assert_series_equal(result, exp) + def test_split_blank_string(self): + # expand blank split GH 20067 + values = Series([''], name='test') + result = values.str.split(expand=True) + exp = DataFrame([[]]) + tm.assert_frame_equal(result, exp) + + values = Series(['a b c', 'a b', '', ' '], name='test') + result = values.str.split(expand=True) + exp = DataFrame([['a', 'b', 'c'], ['a', 'b', np.nan], + [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]]) + tm.assert_frame_equal(result, exp) + def test_split_noargs(self): # #1859 s = Series(['Wes McKinney', 'Travis Oliphant']) From 7fb3297272a4c94deb37b2c786b4aa686bc5f783 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 17 Mar 2018 14:42:10 -0500 Subject: [PATCH 2/2] Release note [ci skip] [ci skip] --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f451399be59da..cb06d4e197859 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -839,7 +839,7 @@ Categorical ``self`` but in a different order (:issue:`19551`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :meth:`Series.str.split` with `expand=True` on blank strings causing exceptions. +- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`)