Skip to content

BUG: get_loc with time object matching NaT micros #35114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,7 @@ Indexing
- Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`)
- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`)
- Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`)
- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`35114`)

Missing
^^^^^^^
Expand Down
22 changes: 0 additions & 22 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ from pandas._libs.tslibs.ccalendar import (
get_locale_names, MONTHS_FULL, DAYS_FULL,
)
from pandas._libs.tslibs.ccalendar cimport (
DAY_NANOS,
get_days_in_month, is_leapyear, dayofweek, get_week_of_year,
get_day_of_year, get_iso_calendar, iso_calendar_t,
month_offset,
Expand All @@ -26,27 +25,6 @@ from pandas._libs.tslibs.np_datetime cimport (
from pandas._libs.tslibs.nattype cimport NPY_NAT


def get_time_micros(const int64_t[:] dtindex):
"""
Return the number of microseconds in the time component of a
nanosecond timestamp.

Parameters
----------
dtindex : ndarray[int64_t]

Returns
-------
micros : ndarray[int64_t]
"""
cdef:
ndarray[int64_t] micros

micros = np.mod(dtindex, DAY_NANOS, dtype=np.int64)
micros //= 1000
return micros


@cython.wraparound(False)
@cython.boundscheck(False)
def build_field_sarray(const int64_t[:] dtindex):
Expand Down
23 changes: 17 additions & 6 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib
from pandas._libs.tslibs import Resolution, fields, parsing, timezones, to_offset
from pandas._libs.tslibs import Resolution, parsing, timezones, to_offset
from pandas._libs.tslibs.offsets import prefix_mapping
from pandas._typing import DtypeObj, Label
from pandas.errors import InvalidIndexError
Expand Down Expand Up @@ -86,7 +86,6 @@ def _new_DatetimeIndex(cls, d):
"tzinfo",
"dtype",
"to_pydatetime",
"_local_timestamps",
"_has_same_tz",
"_format_native_types",
"date",
Expand Down Expand Up @@ -380,10 +379,22 @@ def union_many(self, others):
# --------------------------------------------------------------------

def _get_time_micros(self):
"""
Return the number of microseconds since midnight.

Returns
-------
ndarray[int64_t]
"""
values = self.asi8
if self.tz is not None and not timezones.is_utc(self.tz):
values = self._data._local_timestamps()
return fields.get_time_micros(values)

nanos = values % (24 * 3600 * 1_000_000_000)
micros = nanos // 1000

micros[self._isnan] = -1
return micros

def to_series(self, keep_tz=lib.no_default, index=None, name=None):
"""
Expand Down Expand Up @@ -1094,6 +1105,6 @@ def bdate_range(
)


def _time_to_micros(time):
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
return 1000000 * seconds + time.microsecond
def _time_to_micros(time_obj: time) -> int:
seconds = time_obj.hour * 60 * 60 + 60 * time_obj.minute + time_obj.second
return 1_000_000 * seconds + time_obj.microsecond
10 changes: 10 additions & 0 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,16 @@ def test_get_loc(self):
with pytest.raises(NotImplementedError, match=msg):
idx.get_loc(time(12, 30), method="pad")

def test_get_loc_time_nat(self):
# GH#35114
# Case where key's total microseconds happens to match iNaT % 1e6 // 1000
tic = time(minute=12, second=43, microsecond=145224)
dti = pd.DatetimeIndex([pd.NaT])

loc = dti.get_loc(tic)
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(loc, expected)

def test_get_loc_tz_aware(self):
# https://github.com/pandas-dev/pandas/issues/32140
dti = pd.date_range(
Expand Down