From 0a01634ab054022cf1b6ae9d41de66ba00098b8d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Jul 2020 11:36:47 -0700 Subject: [PATCH 1/3] BUG: get_loc with time object matching NaT micros --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/tslibs/fields.pyx | 22 --------------- pandas/core/indexes/datetimes.py | 27 +++++++++++++------ .../tests/indexes/datetimes/test_indexing.py | 9 +++++++ 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9bd4ddbb624d9..1f7188147c865 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -976,6 +976,7 @@ Indexing - Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`) - Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`) - Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`) +- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`???`) Missing ^^^^^^^ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 126deb67e4189..582191838b648 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -15,7 +15,6 @@ from pandas._libs.tslibs.ccalendar import ( get_locale_names, MONTHS_FULL, DAYS_FULL, ) from pandas._libs.tslibs.ccalendar cimport ( - DAY_NANOS, get_days_in_month, is_leapyear, dayofweek, get_week_of_year, get_day_of_year, get_iso_calendar, iso_calendar_t, month_offset, @@ -26,27 +25,6 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT -def get_time_micros(const int64_t[:] dtindex): - """ - Return the number of microseconds in the time component of a - nanosecond timestamp. - - Parameters - ---------- - dtindex : ndarray[int64_t] - - Returns - ------- - micros : ndarray[int64_t] - """ - cdef: - ndarray[int64_t] micros - - micros = np.mod(dtindex, DAY_NANOS, dtype=np.int64) - micros //= 1000 - return micros - - @cython.wraparound(False) @cython.boundscheck(False) def build_field_sarray(const int64_t[:] dtindex): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 86c6cdf5b15c7..672331319c4f1 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -6,7 +6,7 @@ import numpy as np from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib -from pandas._libs.tslibs import Resolution, fields, parsing, timezones, to_offset +from pandas._libs.tslibs import Resolution, parsing, timezones, to_offset from pandas._libs.tslibs.offsets import prefix_mapping from pandas._typing import DtypeObj, Label from pandas.errors import InvalidIndexError @@ -86,7 +86,6 @@ def _new_DatetimeIndex(cls, d): "tzinfo", "dtype", "to_pydatetime", - "_local_timestamps", "_has_same_tz", "_format_native_types", "date", @@ -379,11 +378,23 @@ def union_many(self, others): # -------------------------------------------------------------------- - def _get_time_micros(self): + def _get_time_micros(self) -> np.ndarray: + """ + Return the number of microseconds since midnight. + + Returns + ------- + ndarray[int64_t] + """ values = self.asi8 if self.tz is not None and not timezones.is_utc(self.tz): values = self._data._local_timestamps() - return fields.get_time_micros(values) + + nanos = values % (24 * 3600 * 1_000_000_000) + micros = nanos // 1000 + + micros[self._isnan] = -1 + return micros def to_series(self, keep_tz=lib.no_default, index=None, name=None): """ @@ -734,7 +745,7 @@ def inferred_type(self) -> str: # sure we can't have ambiguous indexing return "datetime64" - def indexer_at_time(self, time, asof=False): + def indexer_at_time(self, time, asof: bool = False) -> np.ndarray: """ Return index locations of values at particular time of day (e.g. 9:30AM). @@ -1094,6 +1105,6 @@ def bdate_range( ) -def _time_to_micros(time): - seconds = time.hour * 60 * 60 + 60 * time.minute + time.second - return 1000000 * seconds + time.microsecond +def _time_to_micros(time_obj: time) -> int: + seconds = time_obj.hour * 60 * 60 + 60 * time_obj.minute + time_obj.second + return 1_000_000 * seconds + time_obj.microsecond diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index b1faaa2115f55..c3a03ac9bd23a 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -471,6 +471,15 @@ def test_get_loc(self): with pytest.raises(NotImplementedError, match=msg): idx.get_loc(time(12, 30), method="pad") + def test_get_loc_time_nat(self): + # Case where key's total microseconds happens to match iNaT % 1e6 // 1000 + tic = time(minute=12, second=43, microsecond=145224) + dti = pd.DatetimeIndex([pd.NaT]) + + loc = dti.get_loc(tic) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(loc, expected) + def test_get_loc_tz_aware(self): # https://github.com/pandas-dev/pandas/issues/32140 dti = pd.date_range( From 5564706dd9963a28d01ba0c6b043bfa34af4dcd9 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Jul 2020 11:38:14 -0700 Subject: [PATCH 2/3] add GH refs --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/tests/indexes/datetimes/test_indexing.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1f7188147c865..95a9a331e7f6b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -976,7 +976,7 @@ Indexing - Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`) - Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`) - Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`) -- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`???`) +- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`35114`) Missing ^^^^^^^ diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index c3a03ac9bd23a..2678eb89d1112 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -472,6 +472,7 @@ def test_get_loc(self): idx.get_loc(time(12, 30), method="pad") def test_get_loc_time_nat(self): + # GH#35114 # Case where key's total microseconds happens to match iNaT % 1e6 // 1000 tic = time(minute=12, second=43, microsecond=145224) dti = pd.DatetimeIndex([pd.NaT]) From 1c88a6231ea639495cc2167c21cda2f26e2b4ed8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Jul 2020 18:30:19 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/indexes/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 672331319c4f1..0317d0b93859b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -378,7 +378,7 @@ def union_many(self, others): # -------------------------------------------------------------------- - def _get_time_micros(self) -> np.ndarray: + def _get_time_micros(self): """ Return the number of microseconds since midnight. @@ -745,7 +745,7 @@ def inferred_type(self) -> str: # sure we can't have ambiguous indexing return "datetime64" - def indexer_at_time(self, time, asof: bool = False) -> np.ndarray: + def indexer_at_time(self, time, asof=False): """ Return index locations of values at particular time of day (e.g. 9:30AM).