Skip to content

Commit d64995a

Browse files
committed
Propogating NaN values when using str.split (#18450)
1 parent e6eac0b commit d64995a

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

pandas/core/strings.py

+4
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,10 @@ def cons_row(x):
14231423
return [x]
14241424

14251425
result = [cons_row(x) for x in result]
1426+
if result:
1427+
# propogate nan values to match longest sequence (GH 18450)
1428+
max_len = max(len(x) for x in result)
1429+
result = [x * max_len if x[0] is np.nan else x for x in result]
14261430

14271431
if not isinstance(expand, bool):
14281432
raise ValueError("expand must be True or False")

pandas/tests/test_strings.py

+6
Original file line numberDiff line numberDiff line change
@@ -2002,6 +2002,12 @@ def test_split_to_dataframe(self):
20022002
5: [NA, 'not']})
20032003
tm.assert_frame_equal(result, exp)
20042004

2005+
# make sure we propogate NaN values across all columns
2006+
s = Series(["foo,bar,baz", NA])
2007+
result = s.str.split(",", expand=True)
2008+
exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]])
2009+
tm.assert_frame_equal(result, exp)
2010+
20052011
s = Series(['some_splits', 'with_index'], index=['preserve', 'me'])
20062012
result = s.str.split('_', expand=True)
20072013
exp = DataFrame({0: ['some', 'with'], 1: ['splits', 'index']},

0 commit comments

Comments
 (0)