Skip to content

Commit fe5cc26

Browse files
committed
BUG: fix str.split failure with pipe '|' pattern. close #2119
1 parent 0a04723 commit fe5cc26

File tree

3 files changed

+36
-14
lines changed

3 files changed

+36
-14
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ pandas 0.9.1
4242
- Fix zero-trimming DataFrame formatting bug
4343
- Correctly compute/box datetime64 min/max values from Series.min/max (#2083)
4444
- Fix unstacking edge case with unrepresented groups (#2100)
45+
- Fix Series.str failures when using pipe pattern '|' (#2119)
4546
4647
pandas 0.9.0
4748
============

pandas/core/strings.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def str_upper(arr):
299299
return _na_map(lambda x: x.upper(), arr)
300300

301301

302-
def str_replace(arr, pat, repl, n=0, case=True, flags=0):
302+
def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
303303
"""
304304
Replace
305305
@@ -309,7 +309,7 @@ def str_replace(arr, pat, repl, n=0, case=True, flags=0):
309309
Character sequence or regular expression
310310
repl : string
311311
Replacement sequence
312-
n : int, default 0 (all)
312+
n : int, default -1 (all)
313313
Number of replacements to make from start
314314
case : boolean, default True
315315
If True, case sensitive
@@ -320,13 +320,17 @@ def str_replace(arr, pat, repl, n=0, case=True, flags=0):
320320
-------
321321
replaced : array
322322
"""
323-
if not case:
324-
flags |= re.IGNORECASE
325-
326-
regex = re.compile(pat, flags=flags)
323+
use_re = not case or len(pat) > 1 or flags
327324

328-
def f(x):
329-
return regex.sub(repl, x, count=n)
325+
if use_re:
326+
if not case:
327+
flags |= re.IGNORECASE
328+
regex = re.compile(pat, flags=flags)
329+
n = n if n >= 0 else 0
330+
def f(x):
331+
return regex.sub(repl, x, count=n)
332+
else:
333+
f = lambda x: x.replace(pat, repl, n)
330334

331335
return _na_map(f, arr)
332336

@@ -480,7 +484,7 @@ def str_center(arr, width):
480484
return str_pad(arr, width, side='both')
481485

482486

483-
def str_split(arr, pat=None, n=0):
487+
def str_split(arr, pat=None, n=-1):
484488
"""
485489
Split each string (a la re.split) in array by given pattern, propagating NA
486490
values
@@ -489,7 +493,7 @@ def str_split(arr, pat=None, n=0):
489493
----------
490494
pat : string, default None
491495
String or regular expression to split on. If None, splits on whitespace
492-
n : int, default 0 (all)
496+
n : int, default -1 (all)
493497
494498
Returns
495499
-------
@@ -498,8 +502,11 @@ def str_split(arr, pat=None, n=0):
498502
if pat is None:
499503
f = lambda x: x.split()
500504
else:
501-
regex = re.compile(pat)
502-
f = lambda x: regex.split(x, maxsplit=n)
505+
if len(pat) == 1:
506+
f = lambda x: x.split(pat, n)
507+
else:
508+
regex = re.compile(pat)
509+
f = lambda x: regex.split(x, maxsplit=n)
503510

504511
return _na_map(f, arr)
505512

@@ -709,7 +716,7 @@ def cat(self, others=None, sep=None, na_rep=None):
709716
return self._wrap_result(result)
710717

711718
@copy(str_split)
712-
def split(self, pat=None, n=0):
719+
def split(self, pat=None, n=-1):
713720
result = str_split(self.series, pat, n=n)
714721
return self._wrap_result(result)
715722

@@ -730,7 +737,7 @@ def contains(self, pat, case=True, flags=0, na=np.nan):
730737
return self._wrap_result(result)
731738

732739
@copy(str_replace)
733-
def replace(self, pat, repl, n=0, case=True):
740+
def replace(self, pat, repl, n=-1, case=True):
734741
result = str_replace(self.series, pat, repl, n=n, case=case)
735742
return self._wrap_result(result)
736743

pandas/tests/test_strings.py

+14
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,20 @@ def test_split_noargs(self):
480480
result = s.str.split()
481481
self.assertEquals(result[1], ['Travis', 'Oliphant'])
482482

483+
def test_pipe_failures(self):
484+
# #2119
485+
s = Series(['A|B|C'])
486+
487+
result = s.str.split('|')
488+
exp = Series([['A', 'B', 'C']])
489+
490+
tm.assert_series_equal(result, exp)
491+
492+
result = s.str.replace('|', ' ')
493+
exp = Series(['A B C'])
494+
495+
tm.assert_series_equal(result, exp)
496+
483497
def test_slice(self):
484498
values = Series(['aafootwo','aabartwo', NA, 'aabazqux'])
485499

0 commit comments

Comments
 (0)