1
1
"""
2
- Benchmarks in this file depend exclusively on code in _libs/
2
+ Benchmarks in this file depend mostly on code in _libs/
3
+
4
+ We have to created masked arrays to test the masked engine though. The
5
+ array is unpacked on the Cython level.
3
6
4
7
If a PR does not edit anything in _libs, it is very unlikely that benchmarks
5
8
in this file will be affected.
9
12
10
13
from pandas ._libs import index as libindex
11
14
15
+ from pandas .core .arrays import BaseMaskedArray
16
+
12
17
13
18
def _get_numeric_engines ():
14
19
engine_names = [
@@ -30,6 +35,26 @@ def _get_numeric_engines():
30
35
]
31
36
32
37
38
+ def _get_masked_engines ():
39
+ engine_names = [
40
+ ("MaskedInt64Engine" , "Int64" ),
41
+ ("MaskedInt32Engine" , "Int32" ),
42
+ ("MaskedInt16Engine" , "Int16" ),
43
+ ("MaskedInt8Engine" , "Int8" ),
44
+ ("MaskedUInt64Engine" , "UInt64" ),
45
+ ("MaskedUInt32Engine" , "UInt32" ),
46
+ ("MaskedUInt16engine" , "UInt16" ),
47
+ ("MaskedUInt8Engine" , "UInt8" ),
48
+ ("MaskedFloat64Engine" , "Float64" ),
49
+ ("MaskedFloat32Engine" , "Float32" ),
50
+ ]
51
+ return [
52
+ (getattr (libindex , engine_name ), dtype )
53
+ for engine_name , dtype in engine_names
54
+ if hasattr (libindex , engine_name )
55
+ ]
56
+
57
+
33
58
class NumericEngineIndexing :
34
59
35
60
params = [
@@ -80,6 +105,61 @@ def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
80
105
self .data .get_loc (self .key_middle )
81
106
82
107
108
+ class MaskedNumericEngineIndexing :
109
+
110
+ params = [
111
+ _get_masked_engines (),
112
+ ["monotonic_incr" , "monotonic_decr" , "non_monotonic" ],
113
+ [True , False ],
114
+ [10 ** 5 , 2 * 10 ** 6 ], # 2e6 is above SIZE_CUTOFF
115
+ ]
116
+ param_names = ["engine_and_dtype" , "index_type" , "unique" , "N" ]
117
+
118
+ def setup (self , engine_and_dtype , index_type , unique , N ):
119
+ engine , dtype = engine_and_dtype
120
+
121
+ if index_type == "monotonic_incr" :
122
+ if unique :
123
+ arr = np .arange (N * 3 , dtype = dtype .lower ())
124
+ else :
125
+ values = list ([1 ] * N + [2 ] * N + [3 ] * N )
126
+ arr = np .array (values , dtype = dtype .lower ())
127
+ mask = np .zeros (N * 3 , dtype = np .bool_ )
128
+ elif index_type == "monotonic_decr" :
129
+ if unique :
130
+ arr = np .arange (N * 3 , dtype = dtype .lower ())[::- 1 ]
131
+ else :
132
+ values = list ([1 ] * N + [2 ] * N + [3 ] * N )
133
+ arr = np .array (values , dtype = dtype .lower ())[::- 1 ]
134
+ mask = np .zeros (N * 3 , dtype = np .bool_ )
135
+ else :
136
+ assert index_type == "non_monotonic"
137
+ if unique :
138
+ arr = np .zeros (N * 3 , dtype = dtype .lower ())
139
+ arr [:N ] = np .arange (N * 2 , N * 3 , dtype = dtype .lower ())
140
+ arr [N :] = np .arange (N * 2 , dtype = dtype .lower ())
141
+
142
+ else :
143
+ arr = np .array ([1 , 2 , 3 ] * N , dtype = dtype .lower ())
144
+ mask = np .zeros (N * 3 , dtype = np .bool_ )
145
+ mask [- 1 ] = True
146
+
147
+ self .data = engine (BaseMaskedArray (arr , mask ))
148
+ # code belows avoids populating the mapping etc. while timing.
149
+ self .data .get_loc (2 )
150
+
151
+ self .key_middle = arr [len (arr ) // 2 ]
152
+ self .key_early = arr [2 ]
153
+
154
+ def time_get_loc (self , engine_and_dtype , index_type , unique , N ):
155
+ self .data .get_loc (self .key_early )
156
+
157
+ def time_get_loc_near_middle (self , engine_and_dtype , index_type , unique , N ):
158
+ # searchsorted performance may be different near the middle of a range
159
+ # vs near an endpoint
160
+ self .data .get_loc (self .key_middle )
161
+
162
+
83
163
class ObjectEngineIndexing :
84
164
85
165
params = [("monotonic_incr" , "monotonic_decr" , "non_monotonic" )]
0 commit comments