From 6fcdfb02d0a59a61266e59ea9caa9eb826600540 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 May 2022 20:10:25 -0700 Subject: [PATCH 1/2] infer_freq handle non-nano --- pandas/_libs/tslibs/__init__.py | 8 ++- pandas/_libs/tslibs/dtypes.pxd | 2 +- pandas/_libs/tslibs/dtypes.pyi | 2 + pandas/_libs/tslibs/dtypes.pyx | 2 +- pandas/_libs/tslibs/fields.pyi | 3 +- pandas/_libs/tslibs/fields.pyx | 6 +- pandas/core/arrays/datetimelike.py | 4 +- .../tseries/frequencies/test_inference.py | 16 +++++ pandas/tests/tslibs/test_api.py | 2 + pandas/tseries/frequencies.py | 66 ++++++++++++------- 10 files changed, 78 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 72bc6886b5175..73c93eb905ab2 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -26,11 +26,16 @@ "BaseOffset", "tz_compare", "is_unitless", + "get_unit_from_dtype", + "periods_per_day", ] from pandas._libs.tslibs import dtypes from pandas._libs.tslibs.conversion import localize_pydatetime -from pandas._libs.tslibs.dtypes import Resolution +from pandas._libs.tslibs.dtypes import ( + Resolution, + periods_per_day, +) from pandas._libs.tslibs.nattype import ( NaT, NaTType, @@ -41,6 +46,7 @@ OutOfBoundsDatetime, OutOfBoundsTimedelta, is_unitless, + py_get_unit_from_dtype as get_unit_from_dtype, ) from pandas._libs.tslibs.offsets import ( BaseOffset, diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index 833ba4ce70bd7..84a3af0490c0a 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -5,7 +5,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil -cdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 +cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 cdef dict attrname_to_abbrevs diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index 31ed25791389f..edc3de05f5df2 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -5,6 +5,8 @@ from enum import Enum _attrname_to_abbrevs: dict[str, str] _period_code_map: dict[str, int] +def periods_per_day(reso: int) -> int: ... + class PeriodDtypeBase: _dtype_code: int # PeriodDtypeCode diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 3be21ba754f27..5da3944bfb147 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -318,7 +318,7 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil: # TODO: use in _matplotlib.converter? -cdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1: +cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1: """ How many of the given time units fit into a single day? """ diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index b1d9e0342f81e..71363ad836370 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -4,6 +4,7 @@ from pandas._typing import npt def build_field_sarray( dtindex: npt.NDArray[np.int64], # const int64_t[:] + reso: int, # NPY_DATETIMEUNIT ) -> np.ndarray: ... def month_position_check(fields, weekdays) -> str | None: ... def get_date_name_field( @@ -33,7 +34,7 @@ def isleapyear_arr( ) -> npt.NDArray[np.bool_]: ... def build_isocalendar_sarray( dtindex: npt.NDArray[np.int64], # const int64_t[:] - reso: int = ..., # NPY_DATETIMEUNIT + reso: int, # NPY_DATETIMEUNIT ) -> np.ndarray: ... def _get_locale_names(name_type: str, locale: str | None = ...): ... diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 5865b8c6877b0..e5ca8b373519a 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -56,7 +56,7 @@ from pandas._libs.tslibs.np_datetime cimport ( @cython.wraparound(False) @cython.boundscheck(False) -def build_field_sarray(const int64_t[:] dtindex): +def build_field_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso): """ Datetime as int64 representation to a structured array of fields """ @@ -86,7 +86,7 @@ def build_field_sarray(const int64_t[:] dtindex): mus = out['u'] for i in range(count): - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) years[i] = dts.year months[i] = dts.month days[i] = dts.day @@ -565,7 +565,7 @@ cpdef isleapyear_arr(ndarray years): @cython.wraparound(False) @cython.boundscheck(False) -def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso=NPY_FR_ns): +def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso): """ Given a int64-based datetime array, return the ISO 8601 year, week, and day as a structured array. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4ee5838ab5c17..ad346dec01f2f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -35,6 +35,7 @@ Tick, Timestamp, delta_to_nanoseconds, + get_unit_from_dtype, iNaT, ints_to_pydatetime, ints_to_pytimedelta, @@ -44,7 +45,6 @@ RoundTo, round_nsint64, ) -from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype from pandas._libs.tslibs.timestamps import integer_op_not_supported from pandas._typing import ( ArrayLike, @@ -1813,7 +1813,7 @@ class TimelikeOps(DatetimeLikeArrayMixin): @cache_readonly def _reso(self) -> int: - return py_get_unit_from_dtype(self._ndarray.dtype) + return get_unit_from_dtype(self._ndarray.dtype) def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): if ( diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index d15164bfeac64..396cb950bd8b2 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -22,6 +22,10 @@ period_range, ) import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) from pandas.core.tools.datetimes import to_datetime import pandas.tseries.frequencies as frequencies @@ -506,3 +510,15 @@ def test_ms_vs_capital_ms(): def test_infer_freq_warn_deprecated(): with tm.assert_produces_warning(FutureWarning): frequencies.infer_freq(date_range(2022, periods=3), warn=False) + + +def test_infer_freq_non_nano(): + arr = np.arange(10).astype(np.int64).view("M8[s]") + dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) + res = frequencies.infer_freq(dta) + assert res == "S" + + arr2 = arr.view("m8[ms]") + tda = TimedeltaArray._simple_new(arr2, dtype=arr2.dtype) + res2 = frequencies.infer_freq(tda) + assert res2 == "L" diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 273a7985ff50b..5021b85867903 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -51,6 +51,8 @@ def test_namespace(): "to_offset", "tz_compare", "is_unitless", + "get_unit_from_dtype", + "periods_per_day", ] expected = set(submodules + api) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index bc851447b59e1..c541003f1160c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -7,6 +7,8 @@ from pandas._libs.algos import unique_deltas from pandas._libs.tslibs import ( Timestamp, + get_unit_from_dtype, + periods_per_day, tz_convert_from_utc, ) from pandas._libs.tslibs.ccalendar import ( @@ -37,17 +39,13 @@ is_period_dtype, is_timedelta64_dtype, ) -from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) from pandas.core.algorithms import unique -_ONE_MICRO = 1000 -_ONE_MILLI = _ONE_MICRO * 1000 -_ONE_SECOND = _ONE_MILLI * 1000 -_ONE_MINUTE = 60 * _ONE_SECOND -_ONE_HOUR = 60 * _ONE_MINUTE -_ONE_DAY = 24 * _ONE_HOUR - # --------------------------------------------------------------------- # Offset names ("time rules") and related functions @@ -213,6 +211,18 @@ def __init__(self, index, warn: bool = True) -> None: self.index = index self.i8values = index.asi8 + # For get_unit_from_dtype we need the dtype to the underlying ndarray, + # which for tz-aware is not the same as index.dtype + if isinstance(index, ABCIndex): + # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray, + # ndarray[Any, Any]]" has no attribute "_ndarray" + self._reso = get_unit_from_dtype( + index._data._ndarray.dtype # type: ignore[union-attr] + ) + else: + # otherwise we have DTA/TDA + self._reso = get_unit_from_dtype(index._ndarray.dtype) + # This moves the values, which are implicitly in UTC, to the # the timezone so they are in local time if hasattr(index, "tz"): @@ -266,7 +276,8 @@ def get_freq(self) -> str | None: return None delta = self.deltas[0] - if delta and _is_multiple(delta, _ONE_DAY): + ppd = periods_per_day(self._reso) + if delta and _is_multiple(delta, ppd): return self._infer_daily_rule() # Business hourly, maybe. 17: one day / 65: one weekend @@ -280,36 +291,41 @@ def get_freq(self) -> str | None: return None delta = self.deltas_asi8[0] - if _is_multiple(delta, _ONE_HOUR): + pph = ppd // 24 + ppm = pph // 60 + pps = ppm // 60 + if _is_multiple(delta, pph): # Hours - return _maybe_add_count("H", delta / _ONE_HOUR) - elif _is_multiple(delta, _ONE_MINUTE): + return _maybe_add_count("H", delta / pph) + elif _is_multiple(delta, ppm): # Minutes - return _maybe_add_count("T", delta / _ONE_MINUTE) - elif _is_multiple(delta, _ONE_SECOND): + return _maybe_add_count("T", delta / ppm) + elif _is_multiple(delta, pps): # Seconds - return _maybe_add_count("S", delta / _ONE_SECOND) - elif _is_multiple(delta, _ONE_MILLI): + return _maybe_add_count("S", delta / pps) + elif _is_multiple(delta, (pps // 1000)): # Milliseconds - return _maybe_add_count("L", delta / _ONE_MILLI) - elif _is_multiple(delta, _ONE_MICRO): + return _maybe_add_count("L", delta / (pps // 1000)) + elif _is_multiple(delta, (pps // 1_000_000)): # Microseconds - return _maybe_add_count("U", delta / _ONE_MICRO) + return _maybe_add_count("U", delta / (pps // 1_000_000)) else: # Nanoseconds return _maybe_add_count("N", delta) @cache_readonly def day_deltas(self): - return [x / _ONE_DAY for x in self.deltas] + ppd = periods_per_day(self._reso) + return [x / ppd for x in self.deltas] @cache_readonly def hour_deltas(self): - return [x / _ONE_HOUR for x in self.deltas] + pph = periods_per_day(self._reso) // 24 + return [x / pph for x in self.deltas] @cache_readonly def fields(self) -> np.ndarray: # structured array of fields - return build_field_sarray(self.i8values) + return build_field_sarray(self.i8values, reso=self._reso) @cache_readonly def rep_stamp(self): @@ -360,7 +376,8 @@ def _infer_daily_rule(self) -> str | None: return None def _get_daily_rule(self) -> str | None: - days = self.deltas[0] / _ONE_DAY + ppd = periods_per_day(self._reso) + days = self.deltas[0] / ppd if days % 7 == 0: # Weekly wd = int_to_weekday[self.rep_stamp.weekday()] @@ -403,7 +420,8 @@ def _is_business_daily(self) -> bool: # probably business daily, but need to confirm first_weekday = self.index[0].weekday() shifts = np.diff(self.index.asi8) - shifts = np.floor_divide(shifts, _ONE_DAY) + ppd = periods_per_day(self._reso) + shifts = np.floor_divide(shifts, ppd) weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) return bool( From f3b7040a0360db493fbceebc598d47fcbdcd4d13 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 May 2022 20:12:16 -0700 Subject: [PATCH 2/2] remove unused import --- pandas/_libs/tslibs/fields.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index e5ca8b373519a..bc5e5b37b9a76 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -45,7 +45,6 @@ from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - dt64_to_dtstruct, get_unit_from_dtype, npy_datetimestruct, pandas_datetime_to_datetimestruct,