Skip to content

Commit e9350a4

Browse files
authored
infer_freq handle non-nano (#47126)
* infer_freq handle non-nano * remove unused import
1 parent adc9830 commit e9350a4

File tree

10 files changed

+78
-34
lines changed

10 files changed

+78
-34
lines changed

pandas/_libs/tslibs/__init__.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,16 @@
2626
"BaseOffset",
2727
"tz_compare",
2828
"is_unitless",
29+
"get_unit_from_dtype",
30+
"periods_per_day",
2931
]
3032

3133
from pandas._libs.tslibs import dtypes
3234
from pandas._libs.tslibs.conversion import localize_pydatetime
33-
from pandas._libs.tslibs.dtypes import Resolution
35+
from pandas._libs.tslibs.dtypes import (
36+
Resolution,
37+
periods_per_day,
38+
)
3439
from pandas._libs.tslibs.nattype import (
3540
NaT,
3641
NaTType,
@@ -41,6 +46,7 @@
4146
OutOfBoundsDatetime,
4247
OutOfBoundsTimedelta,
4348
is_unitless,
49+
py_get_unit_from_dtype as get_unit_from_dtype,
4450
)
4551
from pandas._libs.tslibs.offsets import (
4652
BaseOffset,

pandas/_libs/tslibs/dtypes.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
55

66
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
77
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
8-
cdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
8+
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
99
cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
1010

1111
cdef dict attrname_to_abbrevs

pandas/_libs/tslibs/dtypes.pyi

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ from enum import Enum
55
_attrname_to_abbrevs: dict[str, str]
66
_period_code_map: dict[str, int]
77

8+
def periods_per_day(reso: int) -> int: ...
9+
810
class PeriodDtypeBase:
911
_dtype_code: int # PeriodDtypeCode
1012

pandas/_libs/tslibs/dtypes.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil:
318318

319319

320320
# TODO: use in _matplotlib.converter?
321-
cdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1:
321+
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1:
322322
"""
323323
How many of the given time units fit into a single day?
324324
"""

pandas/_libs/tslibs/fields.pyi

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ from pandas._typing import npt
44

55
def build_field_sarray(
66
dtindex: npt.NDArray[np.int64], # const int64_t[:]
7+
reso: int, # NPY_DATETIMEUNIT
78
) -> np.ndarray: ...
89
def month_position_check(fields, weekdays) -> str | None: ...
910
def get_date_name_field(
@@ -33,7 +34,7 @@ def isleapyear_arr(
3334
) -> npt.NDArray[np.bool_]: ...
3435
def build_isocalendar_sarray(
3536
dtindex: npt.NDArray[np.int64], # const int64_t[:]
36-
reso: int = ..., # NPY_DATETIMEUNIT
37+
reso: int, # NPY_DATETIMEUNIT
3738
) -> np.ndarray: ...
3839
def _get_locale_names(name_type: str, locale: str | None = ...): ...
3940

pandas/_libs/tslibs/fields.pyx

+3-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ from pandas._libs.tslibs.nattype cimport NPY_NAT
4545
from pandas._libs.tslibs.np_datetime cimport (
4646
NPY_DATETIMEUNIT,
4747
NPY_FR_ns,
48-
dt64_to_dtstruct,
4948
get_unit_from_dtype,
5049
npy_datetimestruct,
5150
pandas_datetime_to_datetimestruct,
@@ -56,7 +55,7 @@ from pandas._libs.tslibs.np_datetime cimport (
5655

5756
@cython.wraparound(False)
5857
@cython.boundscheck(False)
59-
def build_field_sarray(const int64_t[:] dtindex):
58+
def build_field_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso):
6059
"""
6160
Datetime as int64 representation to a structured array of fields
6261
"""
@@ -86,7 +85,7 @@ def build_field_sarray(const int64_t[:] dtindex):
8685
mus = out['u']
8786

8887
for i in range(count):
89-
dt64_to_dtstruct(dtindex[i], &dts)
88+
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)
9089
years[i] = dts.year
9190
months[i] = dts.month
9291
days[i] = dts.day
@@ -565,7 +564,7 @@ cpdef isleapyear_arr(ndarray years):
565564

566565
@cython.wraparound(False)
567566
@cython.boundscheck(False)
568-
def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso=NPY_FR_ns):
567+
def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso):
569568
"""
570569
Given a int64-based datetime array, return the ISO 8601 year, week, and day
571570
as a structured array.

pandas/core/arrays/datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
Tick,
3636
Timestamp,
3737
delta_to_nanoseconds,
38+
get_unit_from_dtype,
3839
iNaT,
3940
ints_to_pydatetime,
4041
ints_to_pytimedelta,
@@ -44,7 +45,6 @@
4445
RoundTo,
4546
round_nsint64,
4647
)
47-
from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype
4848
from pandas._libs.tslibs.timestamps import integer_op_not_supported
4949
from pandas._typing import (
5050
ArrayLike,
@@ -1807,7 +1807,7 @@ class TimelikeOps(DatetimeLikeArrayMixin):
18071807

18081808
@cache_readonly
18091809
def _reso(self) -> int:
1810-
return py_get_unit_from_dtype(self._ndarray.dtype)
1810+
return get_unit_from_dtype(self._ndarray.dtype)
18111811

18121812
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
18131813
if (

pandas/tests/tseries/frequencies/test_inference.py

+16
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
period_range,
2323
)
2424
import pandas._testing as tm
25+
from pandas.core.arrays import (
26+
DatetimeArray,
27+
TimedeltaArray,
28+
)
2529
from pandas.core.tools.datetimes import to_datetime
2630

2731
import pandas.tseries.frequencies as frequencies
@@ -506,3 +510,15 @@ def test_ms_vs_capital_ms():
506510
def test_infer_freq_warn_deprecated():
507511
with tm.assert_produces_warning(FutureWarning):
508512
frequencies.infer_freq(date_range(2022, periods=3), warn=False)
513+
514+
515+
def test_infer_freq_non_nano():
516+
arr = np.arange(10).astype(np.int64).view("M8[s]")
517+
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
518+
res = frequencies.infer_freq(dta)
519+
assert res == "S"
520+
521+
arr2 = arr.view("m8[ms]")
522+
tda = TimedeltaArray._simple_new(arr2, dtype=arr2.dtype)
523+
res2 = frequencies.infer_freq(tda)
524+
assert res2 == "L"

pandas/tests/tslibs/test_api.py

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def test_namespace():
5151
"to_offset",
5252
"tz_compare",
5353
"is_unitless",
54+
"get_unit_from_dtype",
55+
"periods_per_day",
5456
]
5557

5658
expected = set(submodules + api)

pandas/tseries/frequencies.py

+42-24
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from pandas._libs.algos import unique_deltas
88
from pandas._libs.tslibs import (
99
Timestamp,
10+
get_unit_from_dtype,
11+
periods_per_day,
1012
tz_convert_from_utc,
1113
)
1214
from pandas._libs.tslibs.ccalendar import (
@@ -37,17 +39,13 @@
3739
is_period_dtype,
3840
is_timedelta64_dtype,
3941
)
40-
from pandas.core.dtypes.generic import ABCSeries
42+
from pandas.core.dtypes.generic import (
43+
ABCIndex,
44+
ABCSeries,
45+
)
4146

4247
from pandas.core.algorithms import unique
4348

44-
_ONE_MICRO = 1000
45-
_ONE_MILLI = _ONE_MICRO * 1000
46-
_ONE_SECOND = _ONE_MILLI * 1000
47-
_ONE_MINUTE = 60 * _ONE_SECOND
48-
_ONE_HOUR = 60 * _ONE_MINUTE
49-
_ONE_DAY = 24 * _ONE_HOUR
50-
5149
# ---------------------------------------------------------------------
5250
# Offset names ("time rules") and related functions
5351

@@ -213,6 +211,18 @@ def __init__(self, index, warn: bool = True) -> None:
213211
self.index = index
214212
self.i8values = index.asi8
215213

214+
# For get_unit_from_dtype we need the dtype to the underlying ndarray,
215+
# which for tz-aware is not the same as index.dtype
216+
if isinstance(index, ABCIndex):
217+
# error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
218+
# ndarray[Any, Any]]" has no attribute "_ndarray"
219+
self._reso = get_unit_from_dtype(
220+
index._data._ndarray.dtype # type: ignore[union-attr]
221+
)
222+
else:
223+
# otherwise we have DTA/TDA
224+
self._reso = get_unit_from_dtype(index._ndarray.dtype)
225+
216226
# This moves the values, which are implicitly in UTC, to the
217227
# the timezone so they are in local time
218228
if hasattr(index, "tz"):
@@ -266,7 +276,8 @@ def get_freq(self) -> str | None:
266276
return None
267277

268278
delta = self.deltas[0]
269-
if delta and _is_multiple(delta, _ONE_DAY):
279+
ppd = periods_per_day(self._reso)
280+
if delta and _is_multiple(delta, ppd):
270281
return self._infer_daily_rule()
271282

272283
# Business hourly, maybe. 17: one day / 65: one weekend
@@ -280,36 +291,41 @@ def get_freq(self) -> str | None:
280291
return None
281292

282293
delta = self.deltas_asi8[0]
283-
if _is_multiple(delta, _ONE_HOUR):
294+
pph = ppd // 24
295+
ppm = pph // 60
296+
pps = ppm // 60
297+
if _is_multiple(delta, pph):
284298
# Hours
285-
return _maybe_add_count("H", delta / _ONE_HOUR)
286-
elif _is_multiple(delta, _ONE_MINUTE):
299+
return _maybe_add_count("H", delta / pph)
300+
elif _is_multiple(delta, ppm):
287301
# Minutes
288-
return _maybe_add_count("T", delta / _ONE_MINUTE)
289-
elif _is_multiple(delta, _ONE_SECOND):
302+
return _maybe_add_count("T", delta / ppm)
303+
elif _is_multiple(delta, pps):
290304
# Seconds
291-
return _maybe_add_count("S", delta / _ONE_SECOND)
292-
elif _is_multiple(delta, _ONE_MILLI):
305+
return _maybe_add_count("S", delta / pps)
306+
elif _is_multiple(delta, (pps // 1000)):
293307
# Milliseconds
294-
return _maybe_add_count("L", delta / _ONE_MILLI)
295-
elif _is_multiple(delta, _ONE_MICRO):
308+
return _maybe_add_count("L", delta / (pps // 1000))
309+
elif _is_multiple(delta, (pps // 1_000_000)):
296310
# Microseconds
297-
return _maybe_add_count("U", delta / _ONE_MICRO)
311+
return _maybe_add_count("U", delta / (pps // 1_000_000))
298312
else:
299313
# Nanoseconds
300314
return _maybe_add_count("N", delta)
301315

302316
@cache_readonly
303317
def day_deltas(self):
304-
return [x / _ONE_DAY for x in self.deltas]
318+
ppd = periods_per_day(self._reso)
319+
return [x / ppd for x in self.deltas]
305320

306321
@cache_readonly
307322
def hour_deltas(self):
308-
return [x / _ONE_HOUR for x in self.deltas]
323+
pph = periods_per_day(self._reso) // 24
324+
return [x / pph for x in self.deltas]
309325

310326
@cache_readonly
311327
def fields(self) -> np.ndarray: # structured array of fields
312-
return build_field_sarray(self.i8values)
328+
return build_field_sarray(self.i8values, reso=self._reso)
313329

314330
@cache_readonly
315331
def rep_stamp(self):
@@ -360,7 +376,8 @@ def _infer_daily_rule(self) -> str | None:
360376
return None
361377

362378
def _get_daily_rule(self) -> str | None:
363-
days = self.deltas[0] / _ONE_DAY
379+
ppd = periods_per_day(self._reso)
380+
days = self.deltas[0] / ppd
364381
if days % 7 == 0:
365382
# Weekly
366383
wd = int_to_weekday[self.rep_stamp.weekday()]
@@ -403,7 +420,8 @@ def _is_business_daily(self) -> bool:
403420
# probably business daily, but need to confirm
404421
first_weekday = self.index[0].weekday()
405422
shifts = np.diff(self.index.asi8)
406-
shifts = np.floor_divide(shifts, _ONE_DAY)
423+
ppd = periods_per_day(self._reso)
424+
shifts = np.floor_divide(shifts, ppd)
407425
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
408426

409427
return bool(

0 commit comments

Comments
 (0)