From a08e9403b796743ecd00980e57f85cfbb051166f Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 23 Nov 2017 21:46:57 -0500
Subject: [PATCH 1/2] Propogating NaN values when using str.split (#18450)

---
 doc/source/whatsnew/v0.21.1.txt | 2 +-
 pandas/core/strings.py          | 4 ++++
 pandas/tests/test_strings.py    | 9 +++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index 637ccf0603e0f..a0d19fa25f188 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -141,6 +141,6 @@ Categorical
 Other
 ^^^^^
 
--
+- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
 -
 -
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index abef6f6086dbd..9614641aa1abf 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1423,6 +1423,10 @@ def cons_row(x):
                     return [x]
 
             result = [cons_row(x) for x in result]
+            if result:
+                # propogate nan values to match longest sequence (GH 18450)
+                max_len = max(len(x) for x in result)
+                result = [x * max_len if x[0] is np.nan else x for x in result]
 
         if not isinstance(expand, bool):
             raise ValueError("expand must be True or False")
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index f1b97081b6d93..512579cdc8ab9 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -2086,6 +2086,15 @@ def test_rsplit_to_multiindex_expand(self):
         tm.assert_index_equal(result, exp)
         assert result.nlevels == 2
 
+    def test_split_nan_expand(self):
+        s = Series(["foo,bar,baz", NA])
+        result = s.str.split(",", expand=True)
+        exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]])
+        tm.assert_frame_equal(result, exp)
+
+        # extra nan check - see GH 18463
+        assert all(np.isnan(x) for x in result.iloc[1])
+
     def test_split_with_name(self):
         # GH 12617
 

From 5c644e86969745cd5c43df76316962c260d0e316 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 25 Nov 2017 16:47:18 -0500
Subject: [PATCH 2/2] small doc edits

---
 doc/source/whatsnew/v0.21.1.txt | 6 +++++-
 pandas/tests/test_strings.py    | 5 ++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index a0d19fa25f188..576b22fb990b1 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -138,9 +138,13 @@ Categorical
 - ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
 - Bug in ``Categorical.unique()`` returning read-only ``codes``  array when all categories were ``NaN`` (:issue:`18051`)
 
+String
+^^^^^^
+
+- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
+
 Other
 ^^^^^
 
-- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
 -
 -
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 512579cdc8ab9..8aa69bcbfdf7f 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -2087,12 +2087,15 @@ def test_rsplit_to_multiindex_expand(self):
         assert result.nlevels == 2
 
     def test_split_nan_expand(self):
+        # gh-18450
         s = Series(["foo,bar,baz", NA])
         result = s.str.split(",", expand=True)
         exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]])
         tm.assert_frame_equal(result, exp)
 
-        # extra nan check - see GH 18463
+        # check that these are actually np.nan and not None
+        # TODO see GH 18463
+        # tm.assert_frame_equal does not differentiate
         assert all(np.isnan(x) for x in result.iloc[1])
 
     def test_split_with_name(self):