1
1
import warnings
2
2
3
3
import numpy as np
4
- from pandas import Series
4
+ from pandas import Series , DataFrame
5
5
import pandas .util .testing as tm
6
6
7
7
@@ -12,9 +12,6 @@ class Methods(object):
12
12
def setup (self ):
13
13
self .s = Series (tm .makeStringIndex (10 ** 5 ))
14
14
15
- def time_cat (self ):
16
- self .s .str .cat (sep = ',' )
17
-
18
15
def time_center (self ):
19
16
self .s .str .center (100 )
20
17
@@ -87,6 +84,32 @@ def time_repeat(self, repeats):
87
84
self .s .str .repeat (self .repeat )
88
85
89
86
87
+ class Cat (object ):
88
+
89
+ goal_time = 0.2
90
+ params = ([0 , 3 ], [None , ',' ], [None , '-' ], [0.0 , 0.001 , 0.15 ])
91
+ param_names = ['other_cols' , 'sep' , 'na_rep' , 'na_frac' ]
92
+
93
+ def setup (self , other_cols , sep , na_rep , na_frac ):
94
+ N = 10 ** 5
95
+ mask_gen = lambda : np .random .choice ([True , False ], N ,
96
+ p = [1 - na_frac , na_frac ])
97
+ self .s = Series (tm .makeStringIndex (N )).where (mask_gen ())
98
+ if other_cols == 0 :
99
+ # str.cat self-concatenates only for others=None
100
+ self .others = None
101
+ else :
102
+ self .others = DataFrame ({i : tm .makeStringIndex (N ).where (mask_gen ())
103
+ for i in range (other_cols )})
104
+
105
+ def time_cat (self , other_cols , sep , na_rep , na_frac ):
106
+ # before the concatenation (one caller + other_cols columns), the total
107
+ # expected fraction of rows containing any NaN is:
108
+ # reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0)
109
+ # for other_cols=3 and na_frac=0.15, this works out to ~48%
110
+ self .s .str .cat (others = self .others , sep = sep , na_rep = na_rep )
111
+
112
+
90
113
class Contains (object ):
91
114
92
115
goal_time = 0.2
0 commit comments