Skip to content

REF: unify i8 casting in DatetimeEngine #43679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 10 additions & 45 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ cdef class IndexEngine:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
values = self.values

self._check_type(val)
try:
Expand Down Expand Up @@ -116,7 +116,7 @@ cdef class IndexEngine:
Py_ssize_t diff

if self.is_monotonic_increasing:
values = self._get_index_values()
values = self.values
try:
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
Expand All @@ -139,7 +139,7 @@ cdef class IndexEngine:
cdef:
ndarray[uint8_t, ndim=1, cast=True] indexer

indexer = self._get_index_values() == val
indexer = self.values == val
return self._unpack_bool_indexer(indexer, val)

cdef _unpack_bool_indexer(self,
Expand Down Expand Up @@ -199,7 +199,7 @@ cdef class IndexEngine:
cdef:
bint is_unique
try:
values = self._get_index_values()
values = self.values
self.monotonic_inc, self.monotonic_dec, is_unique = \
self._call_monotonic(values)
except TypeError:
Expand All @@ -214,17 +214,14 @@ cdef class IndexEngine:
self.unique = 1
self.need_unique_check = 0

cdef ndarray _get_index_values(self):
return self.values

cdef _call_monotonic(self, values):
return algos.is_monotonic(values, timelike=False)

def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
return algos.backfill(self._get_index_values(), other, limit=limit)
return algos.backfill(self.values, other, limit=limit)

def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
return algos.pad(self._get_index_values(), other, limit=limit)
return algos.pad(self.values, other, limit=limit)

cdef _make_hash_table(self, Py_ssize_t n):
raise NotImplementedError
Expand All @@ -243,7 +240,7 @@ cdef class IndexEngine:

if not self.is_mapping_populated:

values = self._get_index_values()
values = self.values
self.mapping = self._make_hash_table(len(values))
self._call_map_locations(values)

Expand Down Expand Up @@ -291,7 +288,7 @@ cdef class IndexEngine:
bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True

self._ensure_mapping_populated()
values = np.array(self._get_index_values(), copy=False)
values = self.values
stargets = set(targets)

n = len(values)
Expand Down Expand Up @@ -411,9 +408,6 @@ cdef class ObjectEngine(IndexEngine):

cdef class DatetimeEngine(Int64Engine):

cdef str _get_box_dtype(self):
return 'M8[ns]'

cdef int64_t _unbox_scalar(self, scalar) except? -1:
# NB: caller is responsible for ensuring tzawareness compat
# before we get here
Expand All @@ -431,16 +425,13 @@ cdef class DatetimeEngine(Int64Engine):
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(conv)
values = self._get_index_values()
values = self.values
loc = values.searchsorted(conv, side='left')
return values[loc] == conv

self._ensure_mapping_populated()
return conv in self.mapping

cdef ndarray _get_index_values(self):
return self.values.view('i8')

cdef _call_monotonic(self, values):
return algos.is_monotonic(values, timelike=True)

Expand All @@ -462,7 +453,7 @@ cdef class DatetimeEngine(Int64Engine):
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(conv)
values = self._get_index_values()
values = self.values

loc = values.searchsorted(conv, side='left')

Expand All @@ -479,35 +470,9 @@ cdef class DatetimeEngine(Int64Engine):
except KeyError:
raise KeyError(val)

def get_indexer_non_unique(self, ndarray targets):
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
return super().get_indexer_non_unique(targets.view("i8"))

def get_indexer(self, ndarray values) -> np.ndarray:
self._ensure_mapping_populated()
if values.dtype != self._get_box_dtype():
return np.repeat(-1, len(values)).astype(np.intp)
values = np.asarray(values).view('i8')
return self.mapping.lookup(values)

def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype(np.intp)
other = np.asarray(other).view('i8')
return algos.pad(self._get_index_values(), other, limit=limit)

def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype(np.intp)
other = np.asarray(other).view('i8')
return algos.backfill(self._get_index_values(), other, limit=limit)


cdef class TimedeltaEngine(DatetimeEngine):

cdef str _get_box_dtype(self):
return 'm8[ns]'

cdef int64_t _unbox_scalar(self, scalar) except? -1:
if not (isinstance(scalar, _Timedelta) or scalar is NaT):
raise TypeError(scalar)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ cdef class {{name}}Engine(IndexEngine):

self._check_type(val)

values = self._get_index_values()
values = self.values
try:
with warnings.catch_warnings():
# e.g. if values is float64 and `val` is a str, suppress warning
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,12 @@ def ravel(self, order="C"):
FutureWarning,
stacklevel=2,
)
values = self._get_engine_target()
if needs_i8_conversion(self.dtype):
# Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]"
# has no attribute "_ndarray"
values = self._data._ndarray # type: ignore[union-attr]
else:
values = self._get_engine_target()
return values.ravel(order=order)

def view(self, cls=None):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,8 @@ def _wrap_joined_index(self, joined, other):
result._data._freq = self._get_join_freq(other)
return result

def _get_join_target(self) -> np.ndarray:
def _get_engine_target(self) -> np.ndarray:
# engine methods and libjoin methods need dt64/td64 values cast to i8
return self._data._ndarray.view("i8")

def _from_join_target(self, result: np.ndarray):
Expand Down