Skip to content

Commit d09adae

Browse files
authored
REF: simplify IndexEngine signature (#43661)
1 parent e7e7b40 commit d09adae

File tree

6 files changed

+33
-38
lines changed

6 files changed

+33
-38
lines changed

asv_bench/benchmarks/indexing_engines.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def setup(self, engine_and_dtype, index_type):
4848
"non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
4949
}[index_type]
5050

51-
self.data = engine(lambda: arr, len(arr))
51+
self.data = engine(arr)
5252
# code belows avoids populating the mapping etc. while timing.
5353
self.data.get_loc(2)
5454

@@ -70,7 +70,7 @@ def setup(self, index_type):
7070
"non_monotonic": np.array(list("abc") * N, dtype=object),
7171
}[index_type]
7272

73-
self.data = libindex.ObjectEngine(lambda: arr, len(arr))
73+
self.data = libindex.ObjectEngine(arr)
7474
# code belows avoids populating the mapping etc. while timing.
7575
self.data.get_loc("b")
7676

pandas/_libs/index.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ from pandas import MultiIndex
66

77
class IndexEngine:
88
over_size_threshold: bool
9-
def __init__(self, vgetter, n: int): ...
9+
def __init__(self, values: np.ndarray): ...
1010
def __contains__(self, val: object) -> bool: ...
1111
# -> int | slice | np.ndarray[bool]
1212
def get_loc(self, val: object) -> int | slice | np.ndarray: ...

pandas/_libs/index.pyx

+9-12
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,18 @@ _SIZE_CUTOFF = 1_000_000
5353
cdef class IndexEngine:
5454

5555
cdef readonly:
56-
object vgetter
56+
ndarray values
5757
HashTable mapping
5858
bint over_size_threshold
5959

6060
cdef:
6161
bint unique, monotonic_inc, monotonic_dec
6262
bint need_monotonic_check, need_unique_check
6363

64-
def __init__(self, vgetter, n):
65-
self.vgetter = vgetter
64+
def __init__(self, ndarray values):
65+
self.values = values
6666

67-
self.over_size_threshold = n >= _SIZE_CUTOFF
67+
self.over_size_threshold = len(values) >= _SIZE_CUTOFF
6868
self.clear_mapping()
6969

7070
def __contains__(self, val: object) -> bool:
@@ -214,8 +214,8 @@ cdef class IndexEngine:
214214
self.unique = 1
215215
self.need_unique_check = 0
216216

217-
cdef _get_index_values(self):
218-
return self.vgetter()
217+
cdef ndarray _get_index_values(self):
218+
return self.values
219219

220220
cdef _call_monotonic(self, values):
221221
return algos.is_monotonic(values, timelike=False)
@@ -438,8 +438,8 @@ cdef class DatetimeEngine(Int64Engine):
438438
self._ensure_mapping_populated()
439439
return conv in self.mapping
440440

441-
cdef _get_index_values(self):
442-
return self.vgetter().view('i8')
441+
cdef ndarray _get_index_values(self):
442+
return self.values.view('i8')
443443

444444
cdef _call_monotonic(self, values):
445445
return algos.is_monotonic(values, timelike=True)
@@ -537,9 +537,6 @@ cdef class PeriodEngine(Int64Engine):
537537

538538
return Int64Engine.get_loc(self, conv)
539539

540-
cdef _get_index_values(self):
541-
return super(PeriodEngine, self).vgetter().view("i8")
542-
543540
cdef _call_monotonic(self, values):
544541
return algos.is_monotonic(values, timelike=True)
545542

@@ -598,7 +595,7 @@ cdef class BaseMultiIndexCodesEngine:
598595

599596
# Initialize underlying index (e.g. libindex.UInt64Engine) with
600597
# integers representing labels: we will use its get_loc and get_indexer
601-
self._base.__init__(self, lambda: lab_ints, len(lab_ints))
598+
self._base.__init__(self, lab_ints)
602599

603600
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
604601
raise NotImplementedError("Implemented by subclass")

pandas/_libs/index_class_helper.pxi.in

-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ dtypes = [('Float64', 'float64'),
2828

2929

3030
cdef class {{name}}Engine(IndexEngine):
31-
# constructor-caller is responsible for ensuring that vgetter()
32-
# returns an ndarray with dtype {{dtype}}_t
3331

3432
cdef _make_hash_table(self, Py_ssize_t n):
3533
return _hash.{{name}}HashTable(n)

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ def _engine(self) -> libindex.IndexEngine:
819819
# to avoid a reference cycle, bind `target_values` to a local variable, so
820820
# `self` is not passed into the lambda.
821821
target_values = self._get_engine_target()
822-
return self._engine_type(lambda: target_values, len(self))
822+
return self._engine_type(target_values)
823823

824824
@final
825825
@cache_readonly

pandas/tests/indexes/test_engines.py

+20-20
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,18 @@ def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
9696
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
9797

9898
# monotonic increasing
99-
engine = engine_type(lambda: arr, len(arr))
99+
engine = engine_type(arr)
100100
assert engine.is_monotonic_increasing is True
101101
assert engine.is_monotonic_decreasing is False
102102

103103
# monotonic decreasing
104-
engine = engine_type(lambda: arr[::-1], len(arr))
104+
engine = engine_type(arr[::-1])
105105
assert engine.is_monotonic_increasing is False
106106
assert engine.is_monotonic_decreasing is True
107107

108108
# neither monotonic increasing or decreasing
109109
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
110-
engine = engine_type(lambda: arr[::-1], len(arr))
110+
engine = engine_type(arr[::-1])
111111
assert engine.is_monotonic_increasing is False
112112
assert engine.is_monotonic_decreasing is False
113113

@@ -116,31 +116,31 @@ def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
116116

117117
# unique
118118
arr = np.array([1, 3, 2], dtype=dtype)
119-
engine = engine_type(lambda: arr, len(arr))
119+
engine = engine_type(arr)
120120
assert engine.is_unique is True
121121

122122
# not unique
123123
arr = np.array([1, 2, 1], dtype=dtype)
124-
engine = engine_type(lambda: arr, len(arr))
124+
engine = engine_type(arr)
125125
assert engine.is_unique is False
126126

127127
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
128128
engine_type, dtype = numeric_indexing_engine_type_and_dtype
129129

130130
# unique
131131
arr = np.array([1, 2, 3], dtype=dtype)
132-
engine = engine_type(lambda: arr, len(arr))
132+
engine = engine_type(arr)
133133
assert engine.get_loc(2) == 1
134134

135135
# monotonic
136136
num = 1000
137137
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
138-
engine = engine_type(lambda: arr, len(arr))
138+
engine = engine_type(arr)
139139
assert engine.get_loc(2) == slice(1000, 2000)
140140

141141
# not monotonic
142142
arr = np.array([1, 2, 3] * num, dtype=dtype)
143-
engine = engine_type(lambda: arr, len(arr))
143+
engine = engine_type(arr)
144144
expected = np.array([False, True, False] * num, dtype=bool)
145145
result = engine.get_loc(2)
146146
assert (result == expected).all()
@@ -149,7 +149,7 @@ def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype):
149149
engine_type, dtype = numeric_indexing_engine_type_and_dtype
150150

151151
arr = np.array([1, 5, 10], dtype=dtype)
152-
engine = engine_type(lambda: arr, len(arr))
152+
engine = engine_type(arr)
153153

154154
new = np.arange(12, dtype=dtype)
155155
result = engine.get_backfill_indexer(new)
@@ -161,7 +161,7 @@ def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype):
161161
engine_type, dtype = numeric_indexing_engine_type_and_dtype
162162

163163
arr = np.array([1, 5, 10], dtype=dtype)
164-
engine = engine_type(lambda: arr, len(arr))
164+
engine = engine_type(arr)
165165

166166
new = np.arange(12, dtype=dtype)
167167
result = engine.get_pad_indexer(new)
@@ -181,54 +181,54 @@ def test_is_monotonic(self):
181181
arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype)
182182

183183
# monotonic increasing
184-
engine = self.engine_type(lambda: arr, len(arr))
184+
engine = self.engine_type(arr)
185185
assert engine.is_monotonic_increasing is True
186186
assert engine.is_monotonic_decreasing is False
187187

188188
# monotonic decreasing
189-
engine = self.engine_type(lambda: arr[::-1], len(arr))
189+
engine = self.engine_type(arr[::-1])
190190
assert engine.is_monotonic_increasing is False
191191
assert engine.is_monotonic_decreasing is True
192192

193193
# neither monotonic increasing or decreasing
194194
arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype)
195-
engine = self.engine_type(lambda: arr[::-1], len(arr))
195+
engine = self.engine_type(arr[::-1])
196196
assert engine.is_monotonic_increasing is False
197197
assert engine.is_monotonic_decreasing is False
198198

199199
def test_is_unique(self):
200200
# unique
201201
arr = np.array(self.values, dtype=self.dtype)
202-
engine = self.engine_type(lambda: arr, len(arr))
202+
engine = self.engine_type(arr)
203203
assert engine.is_unique is True
204204

205205
# not unique
206206
arr = np.array(["a", "b", "a"], dtype=self.dtype)
207-
engine = self.engine_type(lambda: arr, len(arr))
207+
engine = self.engine_type(arr)
208208
assert engine.is_unique is False
209209

210210
def test_get_loc(self):
211211
# unique
212212
arr = np.array(self.values, dtype=self.dtype)
213-
engine = self.engine_type(lambda: arr, len(arr))
213+
engine = self.engine_type(arr)
214214
assert engine.get_loc("b") == 1
215215

216216
# monotonic
217217
num = 1000
218218
arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype)
219-
engine = self.engine_type(lambda: arr, len(arr))
219+
engine = self.engine_type(arr)
220220
assert engine.get_loc("b") == slice(1000, 2000)
221221

222222
# not monotonic
223223
arr = np.array(self.values * num, dtype=self.dtype)
224-
engine = self.engine_type(lambda: arr, len(arr))
224+
engine = self.engine_type(arr)
225225
expected = np.array([False, True, False] * num, dtype=bool)
226226
result = engine.get_loc("b")
227227
assert (result == expected).all()
228228

229229
def test_get_backfill_indexer(self):
230230
arr = np.array(["a", "e", "j"], dtype=self.dtype)
231-
engine = self.engine_type(lambda: arr, len(arr))
231+
engine = self.engine_type(arr)
232232

233233
new = np.array(list("abcdefghij"), dtype=self.dtype)
234234
result = engine.get_backfill_indexer(new)
@@ -238,7 +238,7 @@ def test_get_backfill_indexer(self):
238238

239239
def test_get_pad_indexer(self):
240240
arr = np.array(["a", "e", "j"], dtype=self.dtype)
241-
engine = self.engine_type(lambda: arr, len(arr))
241+
engine = self.engine_type(arr)
242242

243243
new = np.array(list("abcdefghij"), dtype=self.dtype)
244244
result = engine.get_pad_indexer(new)

0 commit comments

Comments
 (0)