From 361858a7a036aeb8ebe4b918692b6bdbbd9c3ff7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 27 May 2020 19:11:45 -0700 Subject: [PATCH 1/7] REF: make Resolution an Enum --- pandas/_libs/tslibs/resolution.pyx | 96 +++++++++++++++++------------- pandas/core/arrays/datetimelike.py | 12 +++- pandas/tseries/frequencies.py | 8 ++- 3 files changed, 69 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 3d76483f76600..52851533cb159 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,3 +1,5 @@ +from enum import Enum + import numpy as np from numpy cimport ndarray, int64_t, int32_t @@ -25,6 +27,41 @@ cdef: int RESO_HR = 5 int RESO_DAY = 6 +reso_str_bump_map = { + "D": "H", + "H": "T", + "T": "S", + "S": "L", + "L": "U", + "U": "N", + "N": None, +} + +_freq_reso_map = {v: k for k, v in attrname_to_abbrevs.items()} + +_reso_str_map = { + RESO_NS: "nanosecond", + RESO_US: "microsecond", + RESO_MS: "millisecond", + RESO_SEC: "second", + RESO_MIN: "minute", + RESO_HR: "hour", + RESO_DAY: "day", +} + +_str_reso_map = {v: k for k, v in _reso_str_map.items()} + +# factor to multiply a value by to convert it to the next finer grained +# resolution +_reso_mult_map = { + RESO_NS: None, + RESO_US: 1000, + RESO_MS: 1000, + RESO_SEC: 1000, + RESO_MIN: 60, + RESO_HR: 60, + RESO_DAY: 24, +} # ---------------------------------------------------------------------- @@ -36,7 +73,7 @@ cpdef resolution(const int64_t[:] stamps, tz=None): if tz is not None: tz = maybe_get_tz(tz) - return _reso_local(stamps, tz) + return Resolution(_reso_local(stamps, tz)) cdef _reso_local(const int64_t[:] stamps, object tz): @@ -107,7 +144,7 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY -class Resolution: +class Resolution(Enum): # Note: cython won't allow us to reference the cdef versions at the # module level @@ -119,41 +156,14 @@ class Resolution: RESO_HR = 5 RESO_DAY = 6 - _reso_str_map = { - RESO_NS: 'nanosecond', - RESO_US: 'microsecond', - RESO_MS: 'millisecond', - RESO_SEC: 'second', - RESO_MIN: 'minute', - RESO_HR: 'hour', - RESO_DAY: 'day'} - - # factor to multiply a value by to convert it to the next finer grained - # resolution - _reso_mult_map = { - RESO_NS: None, - RESO_US: 1000, - RESO_MS: 1000, - RESO_SEC: 1000, - RESO_MIN: 60, - RESO_HR: 60, - RESO_DAY: 24} - - reso_str_bump_map = { - 'D': 'H', - 'H': 'T', - 'T': 'S', - 'S': 'L', - 'L': 'U', - 'U': 'N', - 'N': None} - - _str_reso_map = {v: k for k, v in _reso_str_map.items()} - - _freq_reso_map = {v: k for k, v in attrname_to_abbrevs.items()} + def __lt__(self, other): + return self.value < other.value + + def __ge__(self, other): + return self.value >= other.value @classmethod - def get_str(cls, reso: int) -> str: + def get_str(cls, reso: "Resolution") -> str: """ Return resolution str against resolution code. @@ -162,10 +172,10 @@ class Resolution: >>> Resolution.get_str(Resolution.RESO_SEC) 'second' """ - return cls._reso_str_map.get(reso, 'day') + return _reso_str_map[reso.value] @classmethod - def get_reso(cls, resostr: str) -> int: + def get_reso(cls, resostr: str) -> "Resolution": """ Return resolution str against resolution code. @@ -177,7 +187,7 @@ class Resolution: >>> Resolution.get_reso('second') == Resolution.RESO_SEC True """ - return cls._str_reso_map.get(resostr, cls.RESO_DAY) + return cls(_str_reso_map[resostr]) @classmethod def get_str_from_freq(cls, freq: str) -> str: @@ -189,10 +199,10 @@ class Resolution: >>> Resolution.get_str_from_freq('H') 'hour' """ - return cls._freq_reso_map.get(freq, 'day') + return _freq_reso_map[freq] @classmethod - def get_reso_from_freq(cls, freq: str) -> int: + def get_reso_from_freq(cls, freq: str) -> "Resolution": """ Return resolution code against frequency str. @@ -237,13 +247,13 @@ class Resolution: return int(value), freq else: start_reso = cls.get_reso_from_freq(freq) - if start_reso == 0: + if start_reso.value == 0: raise ValueError( "Could not convert to integer offset at any resolution" ) - next_value = cls._reso_mult_map[start_reso] * value - next_name = cls.reso_str_bump_map[freq] + next_value = _reso_mult_map[start_reso.value] * value + next_name = reso_str_bump_map[freq] return cls.get_stride_from_decimal(next_value, next_name) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index cf3cde155a3bb..07eaefb2b4164 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -804,7 +804,7 @@ def _validate_scalar(self, value, msg: str, cast_str: bool = False): return value def _validate_listlike( - self, value, opname: str, cast_str: bool = False, allow_object: bool = False, + self, value, opname: str, cast_str: bool = False, allow_object: bool = False ): if isinstance(value, type(self)): return value @@ -1104,13 +1104,21 @@ def inferred_freq(self): @property # NB: override with cache_readonly in immutable subclasses def _resolution(self): - return Resolution.get_reso_from_freq(self.freqstr) + try: + return Resolution.get_reso_from_freq(self.freqstr) + except KeyError: + return None @property # NB: override with cache_readonly in immutable subclasses def resolution(self) -> str: """ Returns day, hour, minute, second, millisecond or microsecond """ + if self._resolution is None: + if is_period_dtype(self.dtype): + # somewhere in the past it was decided we default to day + return "day" + # otherwise we fall through and will raise return Resolution.get_str(self._resolution) @classmethod diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 84113afdb0969..88a1d6a7bb411 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -12,7 +12,11 @@ import pandas._libs.tslibs.frequencies as libfreqs from pandas._libs.tslibs.offsets import _offset_to_period_map from pandas._libs.tslibs.parsing import get_rule_month -from pandas._libs.tslibs.resolution import Resolution, month_position_check +from pandas._libs.tslibs.resolution import ( + Resolution, + month_position_check, + reso_str_bump_map, +) from pandas._libs.tslibs.timezones import UTC from pandas._libs.tslibs.tzconversion import tz_convert from pandas.util._decorators import cache_readonly @@ -159,7 +163,7 @@ def to_offset(freq) -> Optional[DateOffset]: stride_sign = -1 if stride.startswith("-") else 1 if not stride: stride = 1 - if prefix in Resolution.reso_str_bump_map: + if prefix in reso_str_bump_map: stride, name = Resolution.get_stride_from_decimal( float(stride), prefix ) From 9c0e91f543a59df47248ce8f973ce8e0b428e6e9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 May 2020 08:53:57 -0700 Subject: [PATCH 2/7] more informative names --- pandas/_libs/tslibs/resolution.pyx | 16 +++++++++------- .../tests/tseries/frequencies/test_freq_code.py | 6 +++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index fb32286eee535..33915e481a8f8 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -37,7 +37,7 @@ reso_str_bump_map = { "N": None, } -_freq_reso_map = {v: k for k, v in attrname_to_abbrevs.items()} +_abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()} _reso_str_map = { RESO_NS: "nanosecond", @@ -182,22 +182,24 @@ class Resolution(Enum): return cls(_str_reso_map[resostr]) @classmethod - def get_str_from_freq(cls, freq: str) -> str: + def get_attrname_from_abbrev(cls, freq: str) -> str: """ Return resolution str against frequency str. Examples -------- - >>> Resolution.get_str_from_freq('H') + >>> Resolution.get_attrname_from_abbrev('H') 'hour' """ - return _freq_reso_map[freq] + return _abbrev_to_attrnames[freq] @classmethod def get_reso_from_freq(cls, freq: str) -> "Resolution": """ Return resolution code against frequency str. + `freq` is given the `offset.freqstr` for some DateOffset object. + Examples -------- >>> Resolution.get_reso_from_freq('H') @@ -206,16 +208,16 @@ class Resolution(Enum): >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR True """ - return cls.get_reso(cls.get_str_from_freq(freq)) + return cls.get_reso(cls.get_attrname_from_abbrev(freq)) @classmethod - def get_stride_from_decimal(cls, value, freq): + def get_stride_from_decimal(cls, value: float, freq: str): """ Convert freq with decimal stride into a higher freq with integer stride Parameters ---------- - value : int or float + value : float freq : str Frequency string diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index 1c51ad0c45238..51554854378ea 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -104,13 +104,13 @@ def test_get_to_timestamp_base(freqstr, exp_freqstr): ("N", "nanosecond"), ], ) -def test_get_str_from_freq(freqstr, expected): - assert _reso.get_str_from_freq(freqstr) == expected +def test_get_attrname_from_abbrev(freqstr, expected): + assert _reso.get_attrname_from_abbrev(freqstr) == expected @pytest.mark.parametrize("freq", ["A", "Q", "M", "D", "H", "T", "S", "L", "U", "N"]) def test_get_freq_roundtrip(freq): - result = _attrname_to_abbrevs[_reso.get_str_from_freq(freq)] + result = _attrname_to_abbrevs[_reso.get_attrname_from_abbrev(freq)] assert freq == result From d242adecdc181b3227548d1510cc8c72ceb004b8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 May 2020 20:07:34 -0700 Subject: [PATCH 3/7] post-rebase cleanup --- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/_libs/tslibs/resolution.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index a27b0903e9d75..a56d5014713c2 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -3660,7 +3660,7 @@ cpdef to_offset(freq): from .resolution import Resolution # TODO: avoid runtime import - if prefix in Resolution.reso_str_bump_map: + if prefix in {"D", "H", "T", "S", "L", "U", "N"}: stride, name = Resolution.get_stride_from_decimal( float(stride), prefix ) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 33915e481a8f8..5d9db6ac8e989 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -198,7 +198,7 @@ class Resolution(Enum): """ Return resolution code against frequency str. - `freq` is given the `offset.freqstr` for some DateOffset object. + `freq` is given by the `offset.freqstr` for some DateOffset object. Examples -------- From f0a2f0a9d2ca0e960e428458a5b434ff94fd2f42 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 May 2020 10:04:00 -0700 Subject: [PATCH 4/7] rename --- pandas/_libs/tslibs/offsets.pyx | 5 +++-- pandas/_libs/tslibs/resolution.pyx | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index eb12ab3f80587..7f7dd62540387 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -3598,9 +3598,10 @@ cpdef to_offset(freq): if not stride: stride = 1 - from .resolution import Resolution # TODO: avoid runtime import + # TODO: avoid runtime import + from .resolution import Resolution, reso_str_bump_map - if prefix in {"D", "H", "T", "S", "L", "U", "N"}: + if prefix in reso_str_bump_map: stride, name = Resolution.get_stride_from_decimal( float(stride), prefix ) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 5d9db6ac8e989..4224a3b72021d 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -65,7 +65,7 @@ _reso_mult_map = { # ---------------------------------------------------------------------- -def resolution(const int64_t[:] stamps, tz=None): +def get_resolution(const int64_t[:] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) npy_datetimestruct dts From b85421407ee7db806b8c9c87e25ffdff36c2f8b1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 May 2020 10:08:54 -0700 Subject: [PATCH 5/7] update, annotate --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 07eaefb2b4164..41eddd83edcef 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1103,7 +1103,7 @@ def inferred_freq(self): return None @property # NB: override with cache_readonly in immutable subclasses - def _resolution(self): + def _resolution(self) -> Optional[Resolution]: try: return Resolution.get_reso_from_freq(self.freqstr) except KeyError: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 894a519cb693e..4e31477571a5f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -538,8 +538,8 @@ def is_normalized(self): return conversion.is_date_array_normalized(self.asi8, self.tz) @property # NB: override with cache_readonly in immutable subclasses - def _resolution(self): - return libresolution.resolution(self.asi8, self.tz) + def _resolution(self) -> libresolution.Resolution: + return libresolution.get_resolution(self.asi8, self.tz) # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods From 10995f700552c9e154c77155b7cabf320278f945 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 May 2020 10:41:24 -0700 Subject: [PATCH 6/7] fixup missing import --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 41eddd83edcef..b9f712e4d64fe 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta import operator -from typing import Any, Callable, Sequence, Tuple, Type, TypeVar, Union, cast +from typing import Any, Callable, Optional, Sequence, Tuple, Type, TypeVar, Union, cast import warnings import numpy as np From e68b2f46a442fc97cf2a834ad59c38e2dc78284a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 May 2020 11:37:11 -0700 Subject: [PATCH 7/7] lint fixup --- pandas/_libs/tslibs/resolution.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4224a3b72021d..b3fc1e32f68e8 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -65,6 +65,7 @@ _reso_mult_map = { # ---------------------------------------------------------------------- + def get_resolution(const int64_t[:] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps)