Skip to content

Commit 70038ac

Browse files
committed
PERF: increase performance of string split when expand=True
1 parent e34e4be commit 70038ac

File tree

3 files changed

+8
-2
lines changed

3 files changed

+8
-2
lines changed

doc/source/whatsnew/v0.16.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Performance Improvements
4747
~~~~~~~~~~~~~~~~~~~~~~~~
4848

4949
- Improved ``Series.resample`` performance with dtype=datetime64[ns] (:issue:`7754`)
50+
- Increase performance of string split when expand=True (:issue:`10081`)
5051

5152
.. _whatsnew_0162.bug_fixes:
5253

pandas/core/strings.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22

33
from pandas.compat import zip
4-
from pandas.core.common import isnull, _values_from_object, is_bool_dtype
4+
from pandas.core.common import isnull, _values_from_object, is_bool_dtype, is_list_like
55
import pandas.compat as compat
66
from pandas.util.decorators import Appender, deprecate_kwarg
77
import re
@@ -1090,7 +1090,11 @@ def _wrap_result_expand(self, result, expand=False):
10901090
else:
10911091
index = self.series.index
10921092
if expand:
1093-
cons_row = self.series._constructor
1093+
def cons_row(x):
1094+
if is_list_like(x):
1095+
return x
1096+
else:
1097+
return [ x ]
10941098
cons = self.series._constructor_expanddim
10951099
data = [cons_row(x) for x in result]
10961100
return cons(data, index=index)

vb_suite/strings.py

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def make_series(letters, strlen, size):
3535
strings_match = Benchmark("many.str.match(r'mat..this')", setup)
3636
strings_extract = Benchmark("many.str.extract(r'(\w*)matchthis(\w*)')", setup)
3737
strings_join_split = Benchmark("many.str.join(r'--').str.split('--')", setup)
38+
strings_join_split_expand = Benchmark("many.str.join(r'--').str.split('--',expand=True)", setup)
3839
strings_len = Benchmark("many.str.len()", setup)
3940
strings_findall = Benchmark("many.str.findall(r'[A-Z]+')", setup)
4041
strings_pad = Benchmark("many.str.pad(100, side='both')", setup)

0 commit comments

Comments
 (0)