Skip to content

Commit 5ac96c2

Browse files
jbrockmendelgasparitiago
authored andcommitted
BENCH: indexing_engines (pandas-dev#43916)
1 parent e1175ed commit 5ac96c2

File tree

1 file changed

+35
-11
lines changed

1 file changed

+35
-11
lines changed

asv_bench/benchmarks/indexing_engines.py

+35-11
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,49 @@ class NumericEngineIndexing:
3535
params = [
3636
_get_numeric_engines(),
3737
["monotonic_incr", "monotonic_decr", "non_monotonic"],
38+
[True, False],
39+
[10 ** 5, 2 * 10 ** 6], # 2e6 is above SIZE_CUTOFF
3840
]
39-
param_names = ["engine_and_dtype", "index_type"]
41+
param_names = ["engine_and_dtype", "index_type", "unique", "N"]
4042

41-
def setup(self, engine_and_dtype, index_type):
43+
def setup(self, engine_and_dtype, index_type, unique, N):
4244
engine, dtype = engine_and_dtype
43-
N = 10 ** 5
44-
values = list([1] * N + [2] * N + [3] * N)
45-
arr = {
46-
"monotonic_incr": np.array(values, dtype=dtype),
47-
"monotonic_decr": np.array(list(reversed(values)), dtype=dtype),
48-
"non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
49-
}[index_type]
45+
46+
if index_type == "monotonic_incr":
47+
if unique:
48+
arr = np.arange(N * 3, dtype=dtype)
49+
else:
50+
values = list([1] * N + [2] * N + [3] * N)
51+
arr = np.array(values, dtype=dtype)
52+
elif index_type == "monotonic_decr":
53+
if unique:
54+
arr = np.arange(N * 3, dtype=dtype)[::-1]
55+
else:
56+
values = list([1] * N + [2] * N + [3] * N)
57+
arr = np.array(values, dtype=dtype)[::-1]
58+
else:
59+
assert index_type == "non_monotonic"
60+
if unique:
61+
arr = np.empty(N * 3, dtype=dtype)
62+
arr[:N] = np.arange(N * 2, N * 3, dtype=dtype)
63+
arr[N:] = np.arange(N * 2, dtype=dtype)
64+
else:
65+
arr = np.array([1, 2, 3] * N, dtype=dtype)
5066

5167
self.data = engine(arr)
5268
# code belows avoids populating the mapping etc. while timing.
5369
self.data.get_loc(2)
5470

55-
def time_get_loc(self, engine_and_dtype, index_type):
56-
self.data.get_loc(2)
71+
self.key_middle = arr[len(arr) // 2]
72+
self.key_early = arr[2]
73+
74+
def time_get_loc(self, engine_and_dtype, index_type, unique, N):
75+
self.data.get_loc(self.key_early)
76+
77+
def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
78+
# searchsorted performance may be different near the middle of a range
79+
# vs near an endpoint
80+
self.data.get_loc(self.key_middle)
5781

5882

5983
class ObjectEngineIndexing:

0 commit comments

Comments
 (0)