Skip to content

Commit 3334c8c

Browse files
authored
ENH: Resolutions for month/qtr/year (#34587)
1 parent a7c9877 commit 3334c8c

File tree

7 files changed

+98
-53
lines changed

7 files changed

+98
-53
lines changed

pandas/_libs/tslibs/resolution.pyx

+50-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@ from pandas._libs.tslibs.util cimport get_nat
88
from pandas._libs.tslibs.np_datetime cimport (
99
npy_datetimestruct, dt64_to_dtstruct)
1010
from pandas._libs.tslibs.frequencies cimport attrname_to_abbrevs
11+
from pandas._libs.tslibs.frequencies import FreqGroup
1112
from pandas._libs.tslibs.timezones cimport (
1213
is_utc, is_tzlocal, maybe_get_tz, get_dst_info)
13-
from pandas._libs.tslibs.ccalendar cimport get_days_in_month
14+
from pandas._libs.tslibs.ccalendar cimport get_days_in_month, c_MONTH_NUMBERS
1415
from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal
1516

1617
# ----------------------------------------------------------------------
@@ -26,6 +27,9 @@ cdef:
2627
int RESO_MIN = 4
2728
int RESO_HR = 5
2829
int RESO_DAY = 6
30+
int RESO_MTH = 7
31+
int RESO_QTR = 8
32+
int RESO_YR = 9
2933

3034
_abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()}
3135

@@ -37,6 +41,9 @@ _reso_str_map = {
3741
RESO_MIN: "minute",
3842
RESO_HR: "hour",
3943
RESO_DAY: "day",
44+
RESO_MTH: "month",
45+
RESO_QTR: "quarter",
46+
RESO_YR: "year",
4047
}
4148

4249
_str_reso_map = {v: k for k, v in _reso_str_map.items()}
@@ -126,13 +133,42 @@ class Resolution(Enum):
126133
RESO_MIN = 4
127134
RESO_HR = 5
128135
RESO_DAY = 6
136+
RESO_MTH = 7
137+
RESO_QTR = 8
138+
RESO_YR = 9
129139

130140
def __lt__(self, other):
131141
return self.value < other.value
132142

133143
def __ge__(self, other):
134144
return self.value >= other.value
135145

146+
@property
147+
def freq_group(self):
148+
# TODO: annotate as returning FreqGroup once that is an enum
149+
if self == Resolution.RESO_NS:
150+
return FreqGroup.FR_NS
151+
elif self == Resolution.RESO_US:
152+
return FreqGroup.FR_US
153+
elif self == Resolution.RESO_MS:
154+
return FreqGroup.FR_MS
155+
elif self == Resolution.RESO_SEC:
156+
return FreqGroup.FR_SEC
157+
elif self == Resolution.RESO_MIN:
158+
return FreqGroup.FR_MIN
159+
elif self == Resolution.RESO_HR:
160+
return FreqGroup.FR_HR
161+
elif self == Resolution.RESO_DAY:
162+
return FreqGroup.FR_DAY
163+
elif self == Resolution.RESO_MTH:
164+
return FreqGroup.FR_MTH
165+
elif self == Resolution.RESO_QTR:
166+
return FreqGroup.FR_QTR
167+
elif self == Resolution.RESO_YR:
168+
return FreqGroup.FR_ANN
169+
else:
170+
raise ValueError(self)
171+
136172
@property
137173
def attrname(self) -> str:
138174
"""
@@ -175,7 +211,19 @@ class Resolution(Enum):
175211
>>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR
176212
True
177213
"""
178-
attr_name = _abbrev_to_attrnames[freq]
214+
try:
215+
attr_name = _abbrev_to_attrnames[freq]
216+
except KeyError:
217+
# For quarterly and yearly resolutions, we need to chop off
218+
# a month string.
219+
split_freq = freq.split("-")
220+
if len(split_freq) != 2:
221+
raise
222+
if split_freq[1] not in c_MONTH_NUMBERS:
223+
# i.e. we want e.g. "Q-DEC", not "Q-INVALID"
224+
raise
225+
attr_name = _abbrev_to_attrnames[split_freq[0]]
226+
179227
return cls.from_attrname(attr_name)
180228

181229

pandas/core/arrays/datetimelike.py

-5
Original file line numberDiff line numberDiff line change
@@ -1122,11 +1122,6 @@ def resolution(self) -> str:
11221122
"""
11231123
Returns day, hour, minute, second, millisecond or microsecond
11241124
"""
1125-
if self._resolution_obj is None:
1126-
if is_period_dtype(self.dtype):
1127-
# somewhere in the past it was decided we default to day
1128-
return "day"
1129-
# otherwise we fall through and will raise
11301125
return self._resolution_obj.attrname # type: ignore
11311126

11321127
@classmethod

pandas/core/indexes/datetimelike.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -363,19 +363,23 @@ def _format_attrs(self):
363363
# --------------------------------------------------------------------
364364
# Indexing Methods
365365

366-
def _validate_partial_date_slice(self, reso: str):
366+
def _validate_partial_date_slice(self, reso: Resolution):
367367
raise NotImplementedError
368368

369-
def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
369+
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
370370
raise NotImplementedError
371371

372372
def _partial_date_slice(
373-
self, reso: str, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True
373+
self,
374+
reso: Resolution,
375+
parsed: datetime,
376+
use_lhs: bool = True,
377+
use_rhs: bool = True,
374378
):
375379
"""
376380
Parameters
377381
----------
378-
reso : str
382+
reso : Resolution
379383
parsed : datetime
380384
use_lhs : bool, default True
381385
use_rhs : bool, default True

pandas/core/indexes/datetimes.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib
99
from pandas._libs.tslibs import Resolution, fields, parsing, timezones, to_offset
10-
from pandas._libs.tslibs.frequencies import get_freq_group
1110
from pandas._libs.tslibs.offsets import prefix_mapping
1211
from pandas._typing import DtypeObj, Label
1312
from pandas.util._decorators import cache_readonly
@@ -470,7 +469,7 @@ def snap(self, freq="S"):
470469
dta = DatetimeArray(snapped, dtype=self.dtype)
471470
return DatetimeIndex._simple_new(dta, name=self.name)
472471

473-
def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
472+
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
474473
"""
475474
Calculate datetime bounds for parsed time string and its resolution.
476475
@@ -485,6 +484,7 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
485484
-------
486485
lower, upper: pd.Timestamp
487486
"""
487+
assert isinstance(reso, Resolution), (type(reso), reso)
488488
valid_resos = {
489489
"year",
490490
"month",
@@ -497,10 +497,10 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
497497
"second",
498498
"microsecond",
499499
}
500-
if reso not in valid_resos:
500+
if reso.attrname not in valid_resos:
501501
raise KeyError
502502

503-
grp = get_freq_group(reso)
503+
grp = reso.freq_group
504504
per = Period(parsed, freq=grp)
505505
start, end = per.start_time, per.end_time
506506

@@ -521,11 +521,12 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
521521
end = end.tz_localize(self.tz)
522522
return start, end
523523

524-
def _validate_partial_date_slice(self, reso: str):
524+
def _validate_partial_date_slice(self, reso: Resolution):
525+
assert isinstance(reso, Resolution), (type(reso), reso)
525526
if (
526527
self.is_monotonic
527-
and reso in ["day", "hour", "minute", "second"]
528-
and self._resolution_obj >= Resolution.from_attrname(reso)
528+
and reso.attrname in ["day", "hour", "minute", "second"]
529+
and self._resolution_obj >= reso
529530
):
530531
# These resolution/monotonicity validations came from GH3931,
531532
# GH3452 and GH2369.
@@ -625,6 +626,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
625626
if isinstance(label, str):
626627
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
627628
parsed, reso = parsing.parse_time_string(label, freq)
629+
reso = Resolution.from_attrname(reso)
628630
lower, upper = self._parsed_string_to_bounds(reso, parsed)
629631
# lower, upper form the half-open interval:
630632
# [parsed, parsed + 1 freq)
@@ -641,6 +643,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
641643
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True):
642644
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
643645
parsed, reso = parsing.parse_time_string(key, freq)
646+
reso = Resolution.from_attrname(reso)
644647
loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs)
645648
return loc
646649

pandas/core/indexes/period.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from pandas._libs import index as libindex
77
from pandas._libs.lib import no_default
8-
from pandas._libs.tslibs import Period
8+
from pandas._libs.tslibs import Period, Resolution
99
from pandas._libs.tslibs.frequencies import get_freq_group
1010
from pandas._libs.tslibs.parsing import DateParseError, parse_time_string
1111
from pandas._typing import DtypeObj, Label
@@ -501,7 +501,8 @@ def get_loc(self, key, method=None, tolerance=None):
501501
# A string with invalid format
502502
raise KeyError(f"Cannot interpret '{key}' as period") from err
503503

504-
grp = get_freq_group(reso)
504+
reso = Resolution.from_attrname(reso)
505+
grp = reso.freq_group
505506
freqn = get_freq_group(self.freq)
506507

507508
# _get_string_slice will handle cases where grp < freqn
@@ -558,6 +559,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str):
558559
elif isinstance(label, str):
559560
try:
560561
parsed, reso = parse_time_string(label, self.freq)
562+
reso = Resolution.from_attrname(reso)
561563
bounds = self._parsed_string_to_bounds(reso, parsed)
562564
return bounds[0 if side == "left" else 1]
563565
except ValueError as err:
@@ -569,16 +571,14 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str):
569571

570572
return label
571573

572-
def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
573-
if reso not in ["year", "month", "quarter", "day", "hour", "minute", "second"]:
574-
raise KeyError(reso)
575-
576-
grp = get_freq_group(reso)
574+
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
575+
grp = reso.freq_group
577576
iv = Period(parsed, freq=grp)
578577
return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
579578

580-
def _validate_partial_date_slice(self, reso: str):
581-
grp = get_freq_group(reso)
579+
def _validate_partial_date_slice(self, reso: Resolution):
580+
assert isinstance(reso, Resolution), (type(reso), reso)
581+
grp = reso.freq_group
582582
freqn = get_freq_group(self.freq)
583583

584584
if not grp < freqn:
@@ -590,7 +590,7 @@ def _validate_partial_date_slice(self, reso: str):
590590
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True):
591591
# TODO: Check for non-True use_lhs/use_rhs
592592
parsed, reso = parse_time_string(key, self.freq)
593-
593+
reso = Resolution.from_attrname(reso)
594594
try:
595595
return self._partial_date_slice(reso, parsed, use_lhs, use_rhs)
596596
except KeyError as err:

pandas/tests/indexes/period/test_ops.py

+17-18
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,23 @@
77

88

99
class TestPeriodIndexOps:
10-
def test_resolution(self):
11-
for freq, expected in zip(
12-
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
13-
[
14-
"day",
15-
"day",
16-
"day",
17-
"day",
18-
"hour",
19-
"minute",
20-
"second",
21-
"millisecond",
22-
"microsecond",
23-
],
24-
):
25-
26-
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
27-
assert idx.resolution == expected
10+
@pytest.mark.parametrize(
11+
"freq,expected",
12+
[
13+
("A", "year"),
14+
("Q", "quarter"),
15+
("M", "month"),
16+
("D", "day"),
17+
("H", "hour"),
18+
("T", "minute"),
19+
("S", "second"),
20+
("L", "millisecond"),
21+
("U", "microsecond"),
22+
],
23+
)
24+
def test_resolution(self, freq, expected):
25+
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
26+
assert idx.resolution == expected
2827

2928
def test_value_counts_unique(self):
3029
# GH 7735

pandas/tests/tseries/frequencies/test_freq_code.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ def test_get_to_timestamp_base(freqstr, exp_freqstr):
9090
@pytest.mark.parametrize(
9191
"freqstr,expected",
9292
[
93+
("A", "year"),
94+
("Q", "quarter"),
95+
("M", "month"),
9396
("D", "day"),
9497
("H", "hour"),
9598
("T", "minute"),
@@ -103,13 +106,6 @@ def test_get_attrname_from_abbrev(freqstr, expected):
103106
assert Resolution.get_reso_from_freq(freqstr).attrname == expected
104107

105108

106-
@pytest.mark.parametrize("freq", ["A", "Q", "M"])
107-
def test_get_freq_unsupported_(freq):
108-
# Lowest-frequency resolution is for Day
109-
with pytest.raises(KeyError, match=freq.lower()):
110-
Resolution.get_reso_from_freq(freq)
111-
112-
113109
@pytest.mark.parametrize("freq", ["D", "H", "T", "S", "L", "U", "N"])
114110
def test_get_freq_roundtrip2(freq):
115111
obj = Resolution.get_reso_from_freq(freq)

0 commit comments

Comments
 (0)