Skip to content

Commit d881f94

Browse files
committed
Propogating NaN values when using str.split (pandas-dev#18450)
1 parent e6eac0b commit d881f94

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

pandas/core/strings.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1407,7 +1407,6 @@ def _wrap_result(self, result, use_codes=True,
14071407
if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
14081408
return result
14091409
assert result.ndim < 3
1410-
14111410
if expand is None:
14121411
# infer from ndim if expand is not specified
14131412
expand = False if result.ndim == 1 else True
@@ -1423,6 +1422,10 @@ def cons_row(x):
14231422
return [x]
14241423

14251424
result = [cons_row(x) for x in result]
1425+
if result:
1426+
# propogate nan values to match longest sequence (GH 18450)
1427+
max_len = max(len(x) for x in result)
1428+
result = [x * max_len if x[0] is np.nan else x for x in result]
14261429

14271430
if not isinstance(expand, bool):
14281431
raise ValueError("expand must be True or False")

pandas/tests/test_strings.py

+6
Original file line numberDiff line numberDiff line change
@@ -2002,6 +2002,12 @@ def test_split_to_dataframe(self):
20022002
5: [NA, 'not']})
20032003
tm.assert_frame_equal(result, exp)
20042004

2005+
# make sure we propogate NaN values across all columns
2006+
s = Series(["foo,bar,baz", NA])
2007+
result = s.str.split(",", expand=True)
2008+
exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]])
2009+
tm.assert_frame_equal(result, exp)
2010+
20052011
s = Series(['some_splits', 'with_index'], index=['preserve', 'me'])
20062012
result = s.str.split('_', expand=True)
20072013
exp = DataFrame({0: ['some', 'with'], 1: ['splits', 'index']},

0 commit comments

Comments
 (0)