Skip to content

Commit a08e940

Browse files
committed
Propogating NaN values when using str.split (#18450)
1 parent be66ef8 commit a08e940

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

doc/source/whatsnew/v0.21.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,6 @@ Categorical
141141
Other
142142
^^^^^
143143

144-
-
144+
- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
145145
-
146146
-

pandas/core/strings.py

+4
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,10 @@ def cons_row(x):
14231423
return [x]
14241424

14251425
result = [cons_row(x) for x in result]
1426+
if result:
1427+
# propogate nan values to match longest sequence (GH 18450)
1428+
max_len = max(len(x) for x in result)
1429+
result = [x * max_len if x[0] is np.nan else x for x in result]
14261430

14271431
if not isinstance(expand, bool):
14281432
raise ValueError("expand must be True or False")

pandas/tests/test_strings.py

+9
Original file line numberDiff line numberDiff line change
@@ -2086,6 +2086,15 @@ def test_rsplit_to_multiindex_expand(self):
20862086
tm.assert_index_equal(result, exp)
20872087
assert result.nlevels == 2
20882088

2089+
def test_split_nan_expand(self):
2090+
s = Series(["foo,bar,baz", NA])
2091+
result = s.str.split(",", expand=True)
2092+
exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]])
2093+
tm.assert_frame_equal(result, exp)
2094+
2095+
# extra nan check - see GH 18463
2096+
assert all(np.isnan(x) for x in result.iloc[1])
2097+
20892098
def test_split_with_name(self):
20902099
# GH 12617
20912100

0 commit comments

Comments
 (0)