diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py index 30ef7f63dc0dc..dc9576bc94d4c 100644 --- a/asv_bench/benchmarks/indexing_engines.py +++ b/asv_bench/benchmarks/indexing_engines.py @@ -48,7 +48,7 @@ def setup(self, engine_and_dtype, index_type): "non_monotonic": np.array([1, 2, 3] * N, dtype=dtype), }[index_type] - self.data = engine(lambda: arr, len(arr)) + self.data = engine(arr) # code belows avoids populating the mapping etc. while timing. self.data.get_loc(2) @@ -70,7 +70,7 @@ def setup(self, index_type): "non_monotonic": np.array(list("abc") * N, dtype=object), }[index_type] - self.data = libindex.ObjectEngine(lambda: arr, len(arr)) + self.data = libindex.ObjectEngine(arr) # code belows avoids populating the mapping etc. while timing. self.data.get_loc("b") diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi index c4a695acc2768..9f526dd3fe653 100644 --- a/pandas/_libs/index.pyi +++ b/pandas/_libs/index.pyi @@ -6,7 +6,7 @@ from pandas import MultiIndex class IndexEngine: over_size_threshold: bool - def __init__(self, vgetter, n: int): ... + def __init__(self, values: np.ndarray): ... def __contains__(self, val: object) -> bool: ... # -> int | slice | np.ndarray[bool] def get_loc(self, val: object) -> int | slice | np.ndarray: ... diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 7aff683173855..c511fa85d2ec4 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -53,7 +53,7 @@ _SIZE_CUTOFF = 1_000_000 cdef class IndexEngine: cdef readonly: - object vgetter + ndarray values HashTable mapping bint over_size_threshold @@ -61,10 +61,10 @@ cdef class IndexEngine: bint unique, monotonic_inc, monotonic_dec bint need_monotonic_check, need_unique_check - def __init__(self, vgetter, n): - self.vgetter = vgetter + def __init__(self, ndarray values): + self.values = values - self.over_size_threshold = n >= _SIZE_CUTOFF + self.over_size_threshold = len(values) >= _SIZE_CUTOFF self.clear_mapping() def __contains__(self, val: object) -> bool: @@ -214,8 +214,8 @@ cdef class IndexEngine: self.unique = 1 self.need_unique_check = 0 - cdef _get_index_values(self): - return self.vgetter() + cdef ndarray _get_index_values(self): + return self.values cdef _call_monotonic(self, values): return algos.is_monotonic(values, timelike=False) @@ -438,8 +438,8 @@ cdef class DatetimeEngine(Int64Engine): self._ensure_mapping_populated() return conv in self.mapping - cdef _get_index_values(self): - return self.vgetter().view('i8') + cdef ndarray _get_index_values(self): + return self.values.view('i8') cdef _call_monotonic(self, values): return algos.is_monotonic(values, timelike=True) @@ -537,9 +537,6 @@ cdef class PeriodEngine(Int64Engine): return Int64Engine.get_loc(self, conv) - cdef _get_index_values(self): - return super(PeriodEngine, self).vgetter().view("i8") - cdef _call_monotonic(self, values): return algos.is_monotonic(values, timelike=True) @@ -598,7 +595,7 @@ cdef class BaseMultiIndexCodesEngine: # Initialize underlying index (e.g. libindex.UInt64Engine) with # integers representing labels: we will use its get_loc and get_indexer - self._base.__init__(self, lambda: lab_ints, len(lab_ints)) + self._base.__init__(self, lab_ints) def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray: raise NotImplementedError("Implemented by subclass") diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index f0351e06f2b8c..d6c5bfc9d1839 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -28,8 +28,6 @@ dtypes = [('Float64', 'float64'), cdef class {{name}}Engine(IndexEngine): - # constructor-caller is responsible for ensuring that vgetter() - # returns an ndarray with dtype {{dtype}}_t cdef _make_hash_table(self, Py_ssize_t n): return _hash.{{name}}HashTable(n) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 45f450e11c16c..104dcbf62940c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -819,7 +819,7 @@ def _engine(self) -> libindex.IndexEngine: # to avoid a reference cycle, bind `target_values` to a local variable, so # `self` is not passed into the lambda. target_values = self._get_engine_target() - return self._engine_type(lambda: target_values, len(self)) + return self._engine_type(target_values) @final @cache_readonly diff --git a/pandas/tests/indexes/test_engines.py b/pandas/tests/indexes/test_engines.py index 9f41c68909f6e..45e8e56d46381 100644 --- a/pandas/tests/indexes/test_engines.py +++ b/pandas/tests/indexes/test_engines.py @@ -96,18 +96,18 @@ def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype): arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) # monotonic increasing - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) assert engine.is_monotonic_increasing is True assert engine.is_monotonic_decreasing is False # monotonic decreasing - engine = engine_type(lambda: arr[::-1], len(arr)) + engine = engine_type(arr[::-1]) assert engine.is_monotonic_increasing is False assert engine.is_monotonic_decreasing is True # neither monotonic increasing or decreasing arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype) - engine = engine_type(lambda: arr[::-1], len(arr)) + engine = engine_type(arr[::-1]) assert engine.is_monotonic_increasing is False assert engine.is_monotonic_decreasing is False @@ -116,12 +116,12 @@ def test_is_unique(self, numeric_indexing_engine_type_and_dtype): # unique arr = np.array([1, 3, 2], dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) assert engine.is_unique is True # not unique arr = np.array([1, 2, 1], dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) assert engine.is_unique is False def test_get_loc(self, numeric_indexing_engine_type_and_dtype): @@ -129,18 +129,18 @@ def test_get_loc(self, numeric_indexing_engine_type_and_dtype): # unique arr = np.array([1, 2, 3], dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) assert engine.get_loc(2) == 1 # monotonic num = 1000 arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) assert engine.get_loc(2) == slice(1000, 2000) # not monotonic arr = np.array([1, 2, 3] * num, dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) expected = np.array([False, True, False] * num, dtype=bool) result = engine.get_loc(2) assert (result == expected).all() @@ -149,7 +149,7 @@ def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype): engine_type, dtype = numeric_indexing_engine_type_and_dtype arr = np.array([1, 5, 10], dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) new = np.arange(12, dtype=dtype) result = engine.get_backfill_indexer(new) @@ -161,7 +161,7 @@ def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype): engine_type, dtype = numeric_indexing_engine_type_and_dtype arr = np.array([1, 5, 10], dtype=dtype) - engine = engine_type(lambda: arr, len(arr)) + engine = engine_type(arr) new = np.arange(12, dtype=dtype) result = engine.get_pad_indexer(new) @@ -181,54 +181,54 @@ def test_is_monotonic(self): arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype) # monotonic increasing - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) assert engine.is_monotonic_increasing is True assert engine.is_monotonic_decreasing is False # monotonic decreasing - engine = self.engine_type(lambda: arr[::-1], len(arr)) + engine = self.engine_type(arr[::-1]) assert engine.is_monotonic_increasing is False assert engine.is_monotonic_decreasing is True # neither monotonic increasing or decreasing arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype) - engine = self.engine_type(lambda: arr[::-1], len(arr)) + engine = self.engine_type(arr[::-1]) assert engine.is_monotonic_increasing is False assert engine.is_monotonic_decreasing is False def test_is_unique(self): # unique arr = np.array(self.values, dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) assert engine.is_unique is True # not unique arr = np.array(["a", "b", "a"], dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) assert engine.is_unique is False def test_get_loc(self): # unique arr = np.array(self.values, dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) assert engine.get_loc("b") == 1 # monotonic num = 1000 arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) assert engine.get_loc("b") == slice(1000, 2000) # not monotonic arr = np.array(self.values * num, dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) expected = np.array([False, True, False] * num, dtype=bool) result = engine.get_loc("b") assert (result == expected).all() def test_get_backfill_indexer(self): arr = np.array(["a", "e", "j"], dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) new = np.array(list("abcdefghij"), dtype=self.dtype) result = engine.get_backfill_indexer(new) @@ -238,7 +238,7 @@ def test_get_backfill_indexer(self): def test_get_pad_indexer(self): arr = np.array(["a", "e", "j"], dtype=self.dtype) - engine = self.engine_type(lambda: arr, len(arr)) + engine = self.engine_type(arr) new = np.array(list("abcdefghij"), dtype=self.dtype) result = engine.get_pad_indexer(new)