11
11
from .pandas_vb_common import tm
12
12
13
13
14
+ class Dtypes :
15
+ params = ["str" , "string" , "arrow_string" ]
16
+ param_names = ["dtype" ]
17
+
18
+ def setup (self , dtype ):
19
+ from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
20
+
21
+ try :
22
+ self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
23
+ except ImportError :
24
+ raise NotImplementedError
25
+
26
+
14
27
class Construction :
15
28
16
29
params = ["str" , "string" ]
@@ -49,18 +62,7 @@ def peakmem_cat_frame_construction(self, dtype):
49
62
DataFrame (self .frame_cat_arr , dtype = dtype )
50
63
51
64
52
- class Methods :
53
- params = ["str" , "string" , "arrow_string" ]
54
- param_names = ["dtype" ]
55
-
56
- def setup (self , dtype ):
57
- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
58
-
59
- try :
60
- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
61
- except ImportError :
62
- raise NotImplementedError
63
-
65
+ class Methods (Dtypes ):
64
66
def time_center (self , dtype ):
65
67
self .s .str .center (100 )
66
68
@@ -211,35 +213,26 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
211
213
self .s .str .cat (others = self .others , sep = sep , na_rep = na_rep )
212
214
213
215
214
- class Contains :
216
+ class Contains ( Dtypes ) :
215
217
216
- params = ([ "str" , "string" , "arrow_string" ] , [True , False ])
218
+ params = (Dtypes . params , [True , False ])
217
219
param_names = ["dtype" , "regex" ]
218
220
219
221
def setup (self , dtype , regex ):
220
- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
221
-
222
- try :
223
- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
224
- except ImportError :
225
- raise NotImplementedError
222
+ super ().setup (dtype )
226
223
227
224
def time_contains (self , dtype , regex ):
228
225
self .s .str .contains ("A" , regex = regex )
229
226
230
227
231
- class Split :
228
+ class Split ( Dtypes ) :
232
229
233
- params = ([ "str" , "string" , "arrow_string" ] , [True , False ])
230
+ params = (Dtypes . params , [True , False ])
234
231
param_names = ["dtype" , "expand" ]
235
232
236
233
def setup (self , dtype , expand ):
237
- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
238
-
239
- try :
240
- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype ).str .join ("--" )
241
- except ImportError :
242
- raise NotImplementedError
234
+ super ().setup (dtype )
235
+ self .s = self .s .str .join ("--" )
243
236
244
237
def time_split (self , dtype , expand ):
245
238
self .s .str .split ("--" , expand = expand )
@@ -248,17 +241,23 @@ def time_rsplit(self, dtype, expand):
248
241
self .s .str .rsplit ("--" , expand = expand )
249
242
250
243
251
- class Dummies :
252
- params = ["str" , "string" , "arrow_string" ]
253
- param_names = ["dtype" ]
244
+ class Extract (Dtypes ):
254
245
255
- def setup ( self , dtype ):
256
- from pandas . core . arrays . string_arrow import ArrowStringDtype # noqa: F401
246
+ params = ( Dtypes . params , [ True , False ])
247
+ param_names = [ "dtype" , "expand" ]
257
248
258
- try :
259
- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype ).str .join ("|" )
260
- except ImportError :
261
- raise NotImplementedError
249
+ def setup (self , dtype , expand ):
250
+ super ().setup (dtype )
251
+
252
+ def time_extract_single_group (self , dtype , expand ):
253
+ with warnings .catch_warnings (record = True ):
254
+ self .s .str .extract ("(\\ w*)A" , expand = expand )
255
+
256
+
257
+ class Dummies (Dtypes ):
258
+ def setup (self , dtype ):
259
+ super ().setup (dtype )
260
+ self .s = self .s .str .join ("|" )
262
261
263
262
def time_get_dummies (self , dtype ):
264
263
self .s .str .get_dummies ("|" )
@@ -279,3 +278,9 @@ def setup(self):
279
278
def time_vector_slice (self ):
280
279
# GH 2602
281
280
self .s .str [:5 ]
281
+
282
+
283
+ class Iter (Dtypes ):
284
+ def time_iter (self , dtype ):
285
+ for i in self .s :
286
+ pass
0 commit comments