diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py index 0cbc300ee2fc4..60e07a9d1469c 100644 --- a/asv_bench/benchmarks/indexing_engines.py +++ b/asv_bench/benchmarks/indexing_engines.py @@ -35,25 +35,49 @@ class NumericEngineIndexing: params = [ _get_numeric_engines(), ["monotonic_incr", "monotonic_decr", "non_monotonic"], + [True, False], + [10 ** 5, 2 * 10 ** 6], # 2e6 is above SIZE_CUTOFF ] - param_names = ["engine_and_dtype", "index_type"] + param_names = ["engine_and_dtype", "index_type", "unique", "N"] - def setup(self, engine_and_dtype, index_type): + def setup(self, engine_and_dtype, index_type, unique, N): engine, dtype = engine_and_dtype - N = 10 ** 5 - values = list([1] * N + [2] * N + [3] * N) - arr = { - "monotonic_incr": np.array(values, dtype=dtype), - "monotonic_decr": np.array(list(reversed(values)), dtype=dtype), - "non_monotonic": np.array([1, 2, 3] * N, dtype=dtype), - }[index_type] + + if index_type == "monotonic_incr": + if unique: + arr = np.arange(N * 3, dtype=dtype) + else: + values = list([1] * N + [2] * N + [3] * N) + arr = np.array(values, dtype=dtype) + elif index_type == "monotonic_decr": + if unique: + arr = np.arange(N * 3, dtype=dtype)[::-1] + else: + values = list([1] * N + [2] * N + [3] * N) + arr = np.array(values, dtype=dtype)[::-1] + else: + assert index_type == "non_monotonic" + if unique: + arr = np.empty(N * 3, dtype=dtype) + arr[:N] = np.arange(N * 2, N * 3, dtype=dtype) + arr[N:] = np.arange(N * 2, dtype=dtype) + else: + arr = np.array([1, 2, 3] * N, dtype=dtype) self.data = engine(arr) # code belows avoids populating the mapping etc. while timing. self.data.get_loc(2) - def time_get_loc(self, engine_and_dtype, index_type): - self.data.get_loc(2) + self.key_middle = arr[len(arr) // 2] + self.key_early = arr[2] + + def time_get_loc(self, engine_and_dtype, index_type, unique, N): + self.data.get_loc(self.key_early) + + def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N): + # searchsorted performance may be different near the middle of a range + # vs near an endpoint + self.data.get_loc(self.key_middle) class ObjectEngineIndexing: