Skip to content

Commit baf54ed

Browse files
committed
BUG: Series.str.split bug if split on more than 1 char #2513
1 parent 709d8ad commit baf54ed

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

pandas/core/strings.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ def str_center(arr, width):
392392
return str_pad(arr, width, side='both')
393393

394394

395-
def str_split(arr, pat=None, n=-1):
395+
def str_split(arr, pat=None, n=None):
396396
"""
397397
Split each string (a la re.split) in array by given pattern, propagating NA
398398
values
@@ -401,18 +401,28 @@ def str_split(arr, pat=None, n=-1):
401401
----------
402402
pat : string, default None
403403
String or regular expression to split on. If None, splits on whitespace
404-
n : int, default -1 (all)
404+
n : int, default None (all)
405+
406+
Notes
407+
-----
408+
Both 0 and -1 will be interpreted as return all splits
405409
406410
Returns
407411
-------
408412
split : array
409413
"""
410414
if pat is None:
415+
if n is None or n == 0:
416+
n = -1
411417
f = lambda x: x.split()
412418
else:
413419
if len(pat) == 1:
420+
if n is None or n == 0:
421+
n = -1
414422
f = lambda x: x.split(pat, n)
415423
else:
424+
if n is None or n == -1:
425+
n = 0
416426
regex = re.compile(pat)
417427
f = lambda x: regex.split(x, maxsplit=n)
418428

pandas/tests/test_strings.py

+23
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,11 @@ def test_split(self):
456456
exp = Series([['a', 'b', 'c'], ['c', 'd', 'e'], NA, ['f', 'g', 'h']])
457457
tm.assert_series_equal(result, exp)
458458

459+
#more than one char
460+
values = Series(['a__b__c', 'c__d__e', NA, 'f__g__h'])
461+
result = values.str.split('__')
462+
tm.assert_series_equal(result, exp)
463+
459464
#mixed
460465
mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(),
461466
None, 1, 2.])
@@ -482,6 +487,24 @@ def test_split_noargs(self):
482487
result = s.str.split()
483488
self.assertEquals(result[1], ['Travis', 'Oliphant'])
484489

490+
def test_split_maxsplit(self):
491+
#re.split 0, str.split -1
492+
s = Series(['bd asdf jfg', 'kjasdflqw asdfnfk'])
493+
494+
result = s.str.split(n=-1)
495+
xp = s.str.split()
496+
tm.assert_series_equal(result, xp)
497+
498+
result = s.str.split(n=0)
499+
tm.assert_series_equal(result, xp)
500+
501+
xp = s.str.split('asdf')
502+
result = s.str.split('asdf', n=0)
503+
tm.assert_series_equal(result, xp)
504+
505+
result = s.str.split('asdf', n=-1)
506+
tm.assert_series_equal(result, xp)
507+
485508
def test_pipe_failures(self):
486509
# #2119
487510
s = Series(['A|B|C'])

0 commit comments

Comments
 (0)