Skip to content

Commit 6da5a72

Browse files
h-vetinariTomAugspurger
authored andcommitted
ASV: more for str.cat (pandas-dev#22652)
1 parent 59cfd8c commit 6da5a72

File tree

1 file changed

+27
-4
lines changed

1 file changed

+27
-4
lines changed

asv_bench/benchmarks/strings.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import warnings
22

33
import numpy as np
4-
from pandas import Series
4+
from pandas import Series, DataFrame
55
import pandas.util.testing as tm
66

77

@@ -12,9 +12,6 @@ class Methods(object):
1212
def setup(self):
1313
self.s = Series(tm.makeStringIndex(10**5))
1414

15-
def time_cat(self):
16-
self.s.str.cat(sep=',')
17-
1815
def time_center(self):
1916
self.s.str.center(100)
2017

@@ -87,6 +84,32 @@ def time_repeat(self, repeats):
8784
self.s.str.repeat(self.repeat)
8885

8986

87+
class Cat(object):
88+
89+
goal_time = 0.2
90+
params = ([0, 3], [None, ','], [None, '-'], [0.0, 0.001, 0.15])
91+
param_names = ['other_cols', 'sep', 'na_rep', 'na_frac']
92+
93+
def setup(self, other_cols, sep, na_rep, na_frac):
94+
N = 10 ** 5
95+
mask_gen = lambda: np.random.choice([True, False], N,
96+
p=[1 - na_frac, na_frac])
97+
self.s = Series(tm.makeStringIndex(N)).where(mask_gen())
98+
if other_cols == 0:
99+
# str.cat self-concatenates only for others=None
100+
self.others = None
101+
else:
102+
self.others = DataFrame({i: tm.makeStringIndex(N).where(mask_gen())
103+
for i in range(other_cols)})
104+
105+
def time_cat(self, other_cols, sep, na_rep, na_frac):
106+
# before the concatenation (one caller + other_cols columns), the total
107+
# expected fraction of rows containing any NaN is:
108+
# reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0)
109+
# for other_cols=3 and na_frac=0.15, this works out to ~48%
110+
self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep)
111+
112+
90113
class Contains(object):
91114

92115
goal_time = 0.2

0 commit comments

Comments
 (0)