11
11
from .pandas_vb_common import tm
12
12
13
13
14
+ class Dtypes :
15
+ params = ["str" , "string" , "arrow_string" ]
16
+ param_names = ["dtype" ]
17
+
18
+ def setup (self , dtype ):
19
+ from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
20
+
21
+ try :
22
+ self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
23
+ except ImportError :
24
+ raise NotImplementedError
25
+
26
+
14
27
class Construction :
15
28
16
29
params = ["str" , "string" ]
@@ -49,18 +62,7 @@ def peakmem_cat_frame_construction(self, dtype):
49
62
DataFrame (self .frame_cat_arr , dtype = dtype )
50
63
51
64
52
- class Methods :
53
- params = ["str" , "string" , "arrow_string" ]
54
- param_names = ["dtype" ]
55
-
56
- def setup (self , dtype ):
57
- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
58
-
59
- try :
60
- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
61
- except ImportError :
62
- raise NotImplementedError
63
-
65
+ class Methods (Dtypes ):
64
66
def time_center (self , dtype ):
65
67
self .s .str .center (100 )
66
68
@@ -83,6 +85,9 @@ def time_find(self, dtype):
83
85
def time_rfind (self , dtype ):
84
86
self .s .str .rfind ("[A-Z]+" )
85
87
88
+ def time_fullmatch (self , dtype ):
89
+ self .s .str .fullmatch ("A" )
90
+
86
91
def time_get (self , dtype ):
87
92
self .s .str .get (0 )
88
93
@@ -211,43 +216,53 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
211
216
self .s .str .cat (others = self .others , sep = sep , na_rep = na_rep )
212
217
213
218
214
- class Contains :
219
+ class Contains ( Dtypes ) :
215
220
216
- params = ([ "str" , "string" , "arrow_string" ] , [True , False ])
221
+ params = (Dtypes . params , [True , False ])
217
222
param_names = ["dtype" , "regex" ]
218
223
219
224
def setup (self , dtype , regex ):
220
- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
221
-
222
- try :
223
- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
224
- except ImportError :
225
- raise NotImplementedError
225
+ super ().setup (dtype )
226
226
227
227
def time_contains (self , dtype , regex ):
228
228
self .s .str .contains ("A" , regex = regex )
229
229
230
230
231
- class Split :
231
+ class Split ( Dtypes ) :
232
232
233
- params = [True , False ]
234
- param_names = ["expand" ]
233
+ params = ( Dtypes . params , [True , False ])
234
+ param_names = ["dtype" , " expand" ]
235
235
236
- def setup (self , expand ):
237
- self .s = Series (tm .makeStringIndex (10 ** 5 )).str .join ("--" )
236
+ def setup (self , dtype , expand ):
237
+ super ().setup (dtype )
238
+ self .s = self .s .str .join ("--" )
238
239
239
- def time_split (self , expand ):
240
+ def time_split (self , dtype , expand ):
240
241
self .s .str .split ("--" , expand = expand )
241
242
242
- def time_rsplit (self , expand ):
243
+ def time_rsplit (self , dtype , expand ):
243
244
self .s .str .rsplit ("--" , expand = expand )
244
245
245
246
246
- class Dummies :
247
- def setup (self ):
248
- self .s = Series (tm .makeStringIndex (10 ** 5 )).str .join ("|" )
247
+ class Extract (Dtypes ):
248
+
249
+ params = (Dtypes .params , [True , False ])
250
+ param_names = ["dtype" , "expand" ]
251
+
252
+ def setup (self , dtype , expand ):
253
+ super ().setup (dtype )
254
+
255
+ def time_extract_single_group (self , dtype , expand ):
256
+ with warnings .catch_warnings (record = True ):
257
+ self .s .str .extract ("(\\ w*)A" , expand = expand )
249
258
250
- def time_get_dummies (self ):
259
+
260
+ class Dummies (Dtypes ):
261
+ def setup (self , dtype ):
262
+ super ().setup (dtype )
263
+ self .s = self .s .str .join ("|" )
264
+
265
+ def time_get_dummies (self , dtype ):
251
266
self .s .str .get_dummies ("|" )
252
267
253
268
@@ -266,3 +281,9 @@ def setup(self):
266
281
def time_vector_slice (self ):
267
282
# GH 2602
268
283
self .s .str [:5 ]
284
+
285
+
286
+ class Iter (Dtypes ):
287
+ def time_iter (self , dtype ):
288
+ for i in self .s :
289
+ pass
0 commit comments