Skip to content

REF: simplify IndexEngine signature #43661

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/indexing_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def setup(self, engine_and_dtype, index_type):
"non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
}[index_type]

self.data = engine(lambda: arr, len(arr))
self.data = engine(arr)
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc(2)

Expand All @@ -70,7 +70,7 @@ def setup(self, index_type):
"non_monotonic": np.array(list("abc") * N, dtype=object),
}[index_type]

self.data = libindex.ObjectEngine(lambda: arr, len(arr))
self.data = libindex.ObjectEngine(arr)
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc("b")

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/index.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from pandas import MultiIndex

class IndexEngine:
over_size_threshold: bool
def __init__(self, vgetter, n: int): ...
def __init__(self, values: np.ndarray): ...
def __contains__(self, val: object) -> bool: ...
# -> int | slice | np.ndarray[bool]
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
Expand Down
21 changes: 9 additions & 12 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,18 @@ _SIZE_CUTOFF = 1_000_000
cdef class IndexEngine:

cdef readonly:
object vgetter
ndarray values
HashTable mapping
bint over_size_threshold

cdef:
bint unique, monotonic_inc, monotonic_dec
bint need_monotonic_check, need_unique_check

def __init__(self, vgetter, n):
self.vgetter = vgetter
def __init__(self, ndarray values):
self.values = values

self.over_size_threshold = n >= _SIZE_CUTOFF
self.over_size_threshold = len(values) >= _SIZE_CUTOFF
self.clear_mapping()

def __contains__(self, val: object) -> bool:
Expand Down Expand Up @@ -214,8 +214,8 @@ cdef class IndexEngine:
self.unique = 1
self.need_unique_check = 0

cdef _get_index_values(self):
return self.vgetter()
cdef ndarray _get_index_values(self):
return self.values

cdef _call_monotonic(self, values):
return algos.is_monotonic(values, timelike=False)
Expand Down Expand Up @@ -438,8 +438,8 @@ cdef class DatetimeEngine(Int64Engine):
self._ensure_mapping_populated()
return conv in self.mapping

cdef _get_index_values(self):
return self.vgetter().view('i8')
cdef ndarray _get_index_values(self):
return self.values.view('i8')

cdef _call_monotonic(self, values):
return algos.is_monotonic(values, timelike=True)
Expand Down Expand Up @@ -537,9 +537,6 @@ cdef class PeriodEngine(Int64Engine):

return Int64Engine.get_loc(self, conv)

cdef _get_index_values(self):
return super(PeriodEngine, self).vgetter().view("i8")

cdef _call_monotonic(self, values):
return algos.is_monotonic(values, timelike=True)

Expand Down Expand Up @@ -598,7 +595,7 @@ cdef class BaseMultiIndexCodesEngine:

# Initialize underlying index (e.g. libindex.UInt64Engine) with
# integers representing labels: we will use its get_loc and get_indexer
self._base.__init__(self, lambda: lab_ints, len(lab_ints))
self._base.__init__(self, lab_ints)

def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
raise NotImplementedError("Implemented by subclass")
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ dtypes = [('Float64', 'float64'),


cdef class {{name}}Engine(IndexEngine):
# constructor-caller is responsible for ensuring that vgetter()
# returns an ndarray with dtype {{dtype}}_t

cdef _make_hash_table(self, Py_ssize_t n):
return _hash.{{name}}HashTable(n)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ def _engine(self) -> libindex.IndexEngine:
# to avoid a reference cycle, bind `target_values` to a local variable, so
# `self` is not passed into the lambda.
target_values = self._get_engine_target()
return self._engine_type(lambda: target_values, len(self))
return self._engine_type(target_values)

@final
@cache_readonly
Expand Down
40 changes: 20 additions & 20 deletions pandas/tests/indexes/test_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,18 @@ def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)

# monotonic increasing
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False

# monotonic decreasing
engine = engine_type(lambda: arr[::-1], len(arr))
engine = engine_type(arr[::-1])
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True

# neither monotonic increasing or decreasing
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
engine = engine_type(lambda: arr[::-1], len(arr))
engine = engine_type(arr[::-1])
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False

Expand All @@ -116,31 +116,31 @@ def test_is_unique(self, numeric_indexing_engine_type_and_dtype):

# unique
arr = np.array([1, 3, 2], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)
assert engine.is_unique is True

# not unique
arr = np.array([1, 2, 1], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)
assert engine.is_unique is False

def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

# unique
arr = np.array([1, 2, 3], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)
assert engine.get_loc(2) == 1

# monotonic
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)
assert engine.get_loc(2) == slice(1000, 2000)

# not monotonic
arr = np.array([1, 2, 3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc(2)
assert (result == expected).all()
Expand All @@ -149,7 +149,7 @@ def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)

new = np.arange(12, dtype=dtype)
result = engine.get_backfill_indexer(new)
Expand All @@ -161,7 +161,7 @@ def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
engine = engine_type(arr)

new = np.arange(12, dtype=dtype)
result = engine.get_pad_indexer(new)
Expand All @@ -181,54 +181,54 @@ def test_is_monotonic(self):
arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype)

# monotonic increasing
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False

# monotonic decreasing
engine = self.engine_type(lambda: arr[::-1], len(arr))
engine = self.engine_type(arr[::-1])
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True

# neither monotonic increasing or decreasing
arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr[::-1], len(arr))
engine = self.engine_type(arr[::-1])
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False

def test_is_unique(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)
assert engine.is_unique is True

# not unique
arr = np.array(["a", "b", "a"], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)
assert engine.is_unique is False

def test_get_loc(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)
assert engine.get_loc("b") == 1

# monotonic
num = 1000
arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)
assert engine.get_loc("b") == slice(1000, 2000)

# not monotonic
arr = np.array(self.values * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc("b")
assert (result == expected).all()

def test_get_backfill_indexer(self):
arr = np.array(["a", "e", "j"], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)

new = np.array(list("abcdefghij"), dtype=self.dtype)
result = engine.get_backfill_indexer(new)
Expand All @@ -238,7 +238,7 @@ def test_get_backfill_indexer(self):

def test_get_pad_indexer(self):
arr = np.array(["a", "e", "j"], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
engine = self.engine_type(arr)

new = np.array(list("abcdefghij"), dtype=self.dtype)
result = engine.get_pad_indexer(new)
Expand Down