Skip to content

infer_freq handle non-nano #47126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@
"BaseOffset",
"tz_compare",
"is_unitless",
"get_unit_from_dtype",
"periods_per_day",
]

from pandas._libs.tslibs import dtypes
from pandas._libs.tslibs.conversion import localize_pydatetime
from pandas._libs.tslibs.dtypes import Resolution
from pandas._libs.tslibs.dtypes import (
Resolution,
periods_per_day,
)
from pandas._libs.tslibs.nattype import (
NaT,
NaTType,
Expand All @@ -41,6 +46,7 @@
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT

cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1

cdef dict attrname_to_abbrevs
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from enum import Enum
_attrname_to_abbrevs: dict[str, str]
_period_code_map: dict[str, int]

def periods_per_day(reso: int) -> int: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil:


# TODO: use in _matplotlib.converter?
cdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1:
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1:
"""
How many of the given time units fit into a single day?
"""
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/fields.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ from pandas._typing import npt

def build_field_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int, # NPY_DATETIMEUNIT
) -> np.ndarray: ...
def month_position_check(fields, weekdays) -> str | None: ...
def get_date_name_field(
Expand Down Expand Up @@ -33,7 +34,7 @@ def isleapyear_arr(
) -> npt.NDArray[np.bool_]: ...
def build_isocalendar_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int = ..., # NPY_DATETIMEUNIT
reso: int, # NPY_DATETIMEUNIT
) -> np.ndarray: ...
def _get_locale_names(name_type: str, locale: str | None = ...): ...

Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ from pandas._libs.tslibs.nattype cimport NPY_NAT
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
dt64_to_dtstruct,
get_unit_from_dtype,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
Expand All @@ -56,7 +55,7 @@ from pandas._libs.tslibs.np_datetime cimport (

@cython.wraparound(False)
@cython.boundscheck(False)
def build_field_sarray(const int64_t[:] dtindex):
def build_field_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso):
"""
Datetime as int64 representation to a structured array of fields
"""
Expand Down Expand Up @@ -86,7 +85,7 @@ def build_field_sarray(const int64_t[:] dtindex):
mus = out['u']

for i in range(count):
dt64_to_dtstruct(dtindex[i], &dts)
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)
years[i] = dts.year
months[i] = dts.month
days[i] = dts.day
Expand Down Expand Up @@ -565,7 +564,7 @@ cpdef isleapyear_arr(ndarray years):

@cython.wraparound(False)
@cython.boundscheck(False)
def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso=NPY_FR_ns):
def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso):
"""
Given a int64-based datetime array, return the ISO 8601 year, week, and day
as a structured array.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
Tick,
Timestamp,
delta_to_nanoseconds,
get_unit_from_dtype,
iNaT,
ints_to_pydatetime,
ints_to_pytimedelta,
Expand All @@ -44,7 +45,6 @@
RoundTo,
round_nsint64,
)
from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype
from pandas._libs.tslibs.timestamps import integer_op_not_supported
from pandas._typing import (
ArrayLike,
Expand Down Expand Up @@ -1813,7 +1813,7 @@ class TimelikeOps(DatetimeLikeArrayMixin):

@cache_readonly
def _reso(self) -> int:
return py_get_unit_from_dtype(self._ndarray.dtype)
return get_unit_from_dtype(self._ndarray.dtype)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if (
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/tseries/frequencies/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
period_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
DatetimeArray,
TimedeltaArray,
)
from pandas.core.tools.datetimes import to_datetime

import pandas.tseries.frequencies as frequencies
Expand Down Expand Up @@ -506,3 +510,15 @@ def test_ms_vs_capital_ms():
def test_infer_freq_warn_deprecated():
with tm.assert_produces_warning(FutureWarning):
frequencies.infer_freq(date_range(2022, periods=3), warn=False)


def test_infer_freq_non_nano():
arr = np.arange(10).astype(np.int64).view("M8[s]")
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
res = frequencies.infer_freq(dta)
assert res == "S"

arr2 = arr.view("m8[ms]")
tda = TimedeltaArray._simple_new(arr2, dtype=arr2.dtype)
res2 = frequencies.infer_freq(tda)
assert res2 == "L"
2 changes: 2 additions & 0 deletions pandas/tests/tslibs/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def test_namespace():
"to_offset",
"tz_compare",
"is_unitless",
"get_unit_from_dtype",
"periods_per_day",
]

expected = set(submodules + api)
Expand Down
66 changes: 42 additions & 24 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from pandas._libs.algos import unique_deltas
from pandas._libs.tslibs import (
Timestamp,
get_unit_from_dtype,
periods_per_day,
tz_convert_from_utc,
)
from pandas._libs.tslibs.ccalendar import (
Expand Down Expand Up @@ -37,17 +39,13 @@
is_period_dtype,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.generic import (
ABCIndex,
ABCSeries,
)

from pandas.core.algorithms import unique

_ONE_MICRO = 1000
_ONE_MILLI = _ONE_MICRO * 1000
_ONE_SECOND = _ONE_MILLI * 1000
_ONE_MINUTE = 60 * _ONE_SECOND
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR

# ---------------------------------------------------------------------
# Offset names ("time rules") and related functions

Expand Down Expand Up @@ -213,6 +211,18 @@ def __init__(self, index, warn: bool = True) -> None:
self.index = index
self.i8values = index.asi8

# For get_unit_from_dtype we need the dtype to the underlying ndarray,
# which for tz-aware is not the same as index.dtype
if isinstance(index, ABCIndex):
# error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
# ndarray[Any, Any]]" has no attribute "_ndarray"
self._reso = get_unit_from_dtype(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the DTI/TDI cases already tested? I saw the DTA/TDA cases you added

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

index._data._ndarray.dtype # type: ignore[union-attr]
)
else:
# otherwise we have DTA/TDA
self._reso = get_unit_from_dtype(index._ndarray.dtype)

# This moves the values, which are implicitly in UTC, to the
# the timezone so they are in local time
if hasattr(index, "tz"):
Expand Down Expand Up @@ -266,7 +276,8 @@ def get_freq(self) -> str | None:
return None

delta = self.deltas[0]
if delta and _is_multiple(delta, _ONE_DAY):
ppd = periods_per_day(self._reso)
if delta and _is_multiple(delta, ppd):
return self._infer_daily_rule()

# Business hourly, maybe. 17: one day / 65: one weekend
Expand All @@ -280,36 +291,41 @@ def get_freq(self) -> str | None:
return None

delta = self.deltas_asi8[0]
if _is_multiple(delta, _ONE_HOUR):
pph = ppd // 24
ppm = pph // 60
pps = ppm // 60
if _is_multiple(delta, pph):
# Hours
return _maybe_add_count("H", delta / _ONE_HOUR)
elif _is_multiple(delta, _ONE_MINUTE):
return _maybe_add_count("H", delta / pph)
elif _is_multiple(delta, ppm):
# Minutes
return _maybe_add_count("T", delta / _ONE_MINUTE)
elif _is_multiple(delta, _ONE_SECOND):
return _maybe_add_count("T", delta / ppm)
elif _is_multiple(delta, pps):
# Seconds
return _maybe_add_count("S", delta / _ONE_SECOND)
elif _is_multiple(delta, _ONE_MILLI):
return _maybe_add_count("S", delta / pps)
elif _is_multiple(delta, (pps // 1000)):
# Milliseconds
return _maybe_add_count("L", delta / _ONE_MILLI)
elif _is_multiple(delta, _ONE_MICRO):
return _maybe_add_count("L", delta / (pps // 1000))
elif _is_multiple(delta, (pps // 1_000_000)):
# Microseconds
return _maybe_add_count("U", delta / _ONE_MICRO)
return _maybe_add_count("U", delta / (pps // 1_000_000))
else:
# Nanoseconds
return _maybe_add_count("N", delta)

@cache_readonly
def day_deltas(self):
return [x / _ONE_DAY for x in self.deltas]
ppd = periods_per_day(self._reso)
return [x / ppd for x in self.deltas]

@cache_readonly
def hour_deltas(self):
return [x / _ONE_HOUR for x in self.deltas]
pph = periods_per_day(self._reso) // 24
return [x / pph for x in self.deltas]

@cache_readonly
def fields(self) -> np.ndarray: # structured array of fields
return build_field_sarray(self.i8values)
return build_field_sarray(self.i8values, reso=self._reso)

@cache_readonly
def rep_stamp(self):
Expand Down Expand Up @@ -360,7 +376,8 @@ def _infer_daily_rule(self) -> str | None:
return None

def _get_daily_rule(self) -> str | None:
days = self.deltas[0] / _ONE_DAY
ppd = periods_per_day(self._reso)
days = self.deltas[0] / ppd
if days % 7 == 0:
# Weekly
wd = int_to_weekday[self.rep_stamp.weekday()]
Expand Down Expand Up @@ -403,7 +420,8 @@ def _is_business_daily(self) -> bool:
# probably business daily, but need to confirm
first_weekday = self.index[0].weekday()
shifts = np.diff(self.index.asi8)
shifts = np.floor_divide(shifts, _ONE_DAY)
ppd = periods_per_day(self._reso)
shifts = np.floor_divide(shifts, ppd)
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)

return bool(
Expand Down