Skip to content

BUG: Index.str.partition not nan-safe (#23558) #23618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 18, 2018
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,7 @@ Numeric
Strings
^^^^^^^

-
- BUG Index.str.partition not nan-safe (:issue:`23558`)
-
-

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2274,7 +2274,7 @@ def to_object_array_tuples(rows: list):

k = 0
for i in range(n):
tmp = len(rows[i])
tmp = 1 if util.is_nan(rows[i]) else len(rows[i])
if tmp > k:
k = tmp

Expand All @@ -2288,7 +2288,7 @@ def to_object_array_tuples(rows: list):
except Exception:
# upcast any subclasses to tuple
for i in range(n):
row = tuple(rows[i])
row = (rows[i],) if util.is_nan(rows[i]) else tuple(rows[i])
for j in range(len(row)):
result[i, j] = row[j]

Expand Down
18 changes: 10 additions & 8 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2493,7 +2493,7 @@ def test_partition_series(self):
('f g', ' ', 'h')])
tm.assert_series_equal(result, exp)

# Not splited
# Not split
values = Series(['abc', 'cde', NA, 'fgh'])
result = values.str.partition('_', expand=False)
exp = Series([('abc', '', ''), ('cde', '', ''), NA, ('fgh', '', '')])
Expand Down Expand Up @@ -2524,28 +2524,30 @@ def test_partition_series(self):
assert result == [v.rpartition('_') for v in values]

def test_partition_index(self):
values = Index(['a_b_c', 'c_d_e', 'f_g_h'])
values = Index(['a_b_c', 'c_d_e', 'f_g_h', np.nan])

result = values.str.partition('_', expand=False)
exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_',
'g_h')]))
exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'),
('f', '_', 'g_h'), np.nan]))
tm.assert_index_equal(result, exp)
assert result.nlevels == 1

result = values.str.rpartition('_', expand=False)
exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'), (
'f_g', '_', 'h')]))
exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'),
('f_g', '_', 'h'), np.nan]))
tm.assert_index_equal(result, exp)
assert result.nlevels == 1

result = values.str.partition('_')
exp = Index([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')])
exp = Index([('a', '_', 'b_c'), ('c', '_', 'd_e'),
('f', '_', 'g_h'), (np.nan, np.nan, np.nan)])
tm.assert_index_equal(result, exp)
assert isinstance(result, MultiIndex)
assert result.nlevels == 3

result = values.str.rpartition('_')
exp = Index([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')])
exp = Index([('a_b', '_', 'c'), ('c_d', '_', 'e'),
('f_g', '_', 'h'), (np.nan, np.nan, np.nan)])
tm.assert_index_equal(result, exp)
assert isinstance(result, MultiIndex)
assert result.nlevels == 3
Expand Down