Skip to content

ENH: Resolutions for month/qtr/year #34587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ from pandas._libs.tslibs.util cimport get_nat
from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct, dt64_to_dtstruct)
from pandas._libs.tslibs.frequencies cimport attrname_to_abbrevs
from pandas._libs.tslibs.frequencies import FreqGroup
from pandas._libs.tslibs.timezones cimport (
is_utc, is_tzlocal, maybe_get_tz, get_dst_info)
from pandas._libs.tslibs.ccalendar cimport get_days_in_month
from pandas._libs.tslibs.ccalendar cimport get_days_in_month, c_MONTH_NUMBERS
from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal

# ----------------------------------------------------------------------
Expand All @@ -26,6 +27,9 @@ cdef:
int RESO_MIN = 4
int RESO_HR = 5
int RESO_DAY = 6
int RESO_MTH = 7
int RESO_QTR = 8
int RESO_YR = 9

_abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()}

Expand All @@ -37,6 +41,9 @@ _reso_str_map = {
RESO_MIN: "minute",
RESO_HR: "hour",
RESO_DAY: "day",
RESO_MTH: "month",
RESO_QTR: "quarter",
RESO_YR: "year",
}

_str_reso_map = {v: k for k, v in _reso_str_map.items()}
Expand Down Expand Up @@ -126,13 +133,42 @@ class Resolution(Enum):
RESO_MIN = 4
RESO_HR = 5
RESO_DAY = 6
RESO_MTH = 7
RESO_QTR = 8
RESO_YR = 9

def __lt__(self, other):
return self.value < other.value

def __ge__(self, other):
return self.value >= other.value

@property
def freq_group(self):
# TODO: annotate as returning FreqGroup once that is an enum
if self == Resolution.RESO_NS:
return FreqGroup.FR_NS
elif self == Resolution.RESO_US:
return FreqGroup.FR_US
elif self == Resolution.RESO_MS:
return FreqGroup.FR_MS
elif self == Resolution.RESO_SEC:
return FreqGroup.FR_SEC
elif self == Resolution.RESO_MIN:
return FreqGroup.FR_MIN
elif self == Resolution.RESO_HR:
return FreqGroup.FR_HR
elif self == Resolution.RESO_DAY:
return FreqGroup.FR_DAY
elif self == Resolution.RESO_MTH:
return FreqGroup.FR_MTH
elif self == Resolution.RESO_QTR:
return FreqGroup.FR_QTR
elif self == Resolution.RESO_YR:
return FreqGroup.FR_ANN
else:
raise ValueError(self)

@property
def attrname(self) -> str:
"""
Expand Down Expand Up @@ -175,7 +211,19 @@ class Resolution(Enum):
>>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR
True
"""
attr_name = _abbrev_to_attrnames[freq]
try:
attr_name = _abbrev_to_attrnames[freq]
except KeyError:
# For quarterly and yearly resolutions, we need to chop off
# a month string.
split_freq = freq.split("-")
if len(split_freq) != 2:
raise
if split_freq[1] not in c_MONTH_NUMBERS:
# i.e. we want e.g. "Q-DEC", not "Q-INVALID"
raise
attr_name = _abbrev_to_attrnames[split_freq[0]]

return cls.from_attrname(attr_name)


Expand Down
5 changes: 0 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,11 +1122,6 @@ def resolution(self) -> str:
"""
Returns day, hour, minute, second, millisecond or microsecond
"""
if self._resolution_obj is None:
if is_period_dtype(self.dtype):
# somewhere in the past it was decided we default to day
return "day"
# otherwise we fall through and will raise
return self._resolution_obj.attrname # type: ignore

@classmethod
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,19 +363,23 @@ def _format_attrs(self):
# --------------------------------------------------------------------
# Indexing Methods

def _validate_partial_date_slice(self, reso: str):
def _validate_partial_date_slice(self, reso: Resolution):
raise NotImplementedError

def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
raise NotImplementedError

def _partial_date_slice(
self, reso: str, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True
self,
reso: Resolution,
parsed: datetime,
use_lhs: bool = True,
use_rhs: bool = True,
):
"""
Parameters
----------
reso : str
reso : Resolution
parsed : datetime
use_lhs : bool, default True
use_rhs : bool, default True
Expand Down
17 changes: 10 additions & 7 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib
from pandas._libs.tslibs import Resolution, fields, parsing, timezones, to_offset
from pandas._libs.tslibs.frequencies import get_freq_group
from pandas._libs.tslibs.offsets import prefix_mapping
from pandas._typing import DtypeObj, Label
from pandas.util._decorators import cache_readonly
Expand Down Expand Up @@ -470,7 +469,7 @@ def snap(self, freq="S"):
dta = DatetimeArray(snapped, dtype=self.dtype)
return DatetimeIndex._simple_new(dta, name=self.name)

def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
"""
Calculate datetime bounds for parsed time string and its resolution.

Expand All @@ -485,6 +484,7 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
-------
lower, upper: pd.Timestamp
"""
assert isinstance(reso, Resolution), (type(reso), reso)
valid_resos = {
"year",
"month",
Expand All @@ -497,10 +497,10 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
"second",
"microsecond",
}
if reso not in valid_resos:
if reso.attrname not in valid_resos:
raise KeyError

grp = get_freq_group(reso)
grp = reso.freq_group
per = Period(parsed, freq=grp)
start, end = per.start_time, per.end_time

Expand All @@ -521,11 +521,12 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
end = end.tz_localize(self.tz)
return start, end

def _validate_partial_date_slice(self, reso: str):
def _validate_partial_date_slice(self, reso: Resolution):
assert isinstance(reso, Resolution), (type(reso), reso)
if (
self.is_monotonic
and reso in ["day", "hour", "minute", "second"]
and self._resolution_obj >= Resolution.from_attrname(reso)
and reso.attrname in ["day", "hour", "minute", "second"]
and self._resolution_obj >= reso
):
# These resolution/monotonicity validations came from GH3931,
# GH3452 and GH2369.
Expand Down Expand Up @@ -625,6 +626,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
if isinstance(label, str):
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
parsed, reso = parsing.parse_time_string(label, freq)
reso = Resolution.from_attrname(reso)
lower, upper = self._parsed_string_to_bounds(reso, parsed)
# lower, upper form the half-open interval:
# [parsed, parsed + 1 freq)
Expand All @@ -641,6 +643,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True):
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
parsed, reso = parsing.parse_time_string(key, freq)
reso = Resolution.from_attrname(reso)
loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs)
return loc

Expand Down
20 changes: 10 additions & 10 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pandas._libs import index as libindex
from pandas._libs.lib import no_default
from pandas._libs.tslibs import Period
from pandas._libs.tslibs import Period, Resolution
from pandas._libs.tslibs.frequencies import get_freq_group
from pandas._libs.tslibs.parsing import DateParseError, parse_time_string
from pandas._typing import DtypeObj, Label
Expand Down Expand Up @@ -501,7 +501,8 @@ def get_loc(self, key, method=None, tolerance=None):
# A string with invalid format
raise KeyError(f"Cannot interpret '{key}' as period") from err

grp = get_freq_group(reso)
reso = Resolution.from_attrname(reso)
grp = reso.freq_group
freqn = get_freq_group(self.freq)

# _get_string_slice will handle cases where grp < freqn
Expand Down Expand Up @@ -558,6 +559,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str):
elif isinstance(label, str):
try:
parsed, reso = parse_time_string(label, self.freq)
reso = Resolution.from_attrname(reso)
bounds = self._parsed_string_to_bounds(reso, parsed)
return bounds[0 if side == "left" else 1]
except ValueError as err:
Expand All @@ -569,16 +571,14 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str):

return label

def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
if reso not in ["year", "month", "quarter", "day", "hour", "minute", "second"]:
raise KeyError(reso)

grp = get_freq_group(reso)
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
grp = reso.freq_group
iv = Period(parsed, freq=grp)
return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))

def _validate_partial_date_slice(self, reso: str):
grp = get_freq_group(reso)
def _validate_partial_date_slice(self, reso: Resolution):
assert isinstance(reso, Resolution), (type(reso), reso)
grp = reso.freq_group
freqn = get_freq_group(self.freq)

if not grp < freqn:
Expand All @@ -590,7 +590,7 @@ def _validate_partial_date_slice(self, reso: str):
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True):
# TODO: Check for non-True use_lhs/use_rhs
parsed, reso = parse_time_string(key, self.freq)

reso = Resolution.from_attrname(reso)
try:
return self._partial_date_slice(reso, parsed, use_lhs, use_rhs)
except KeyError as err:
Expand Down
35 changes: 17 additions & 18 deletions pandas/tests/indexes/period/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,23 @@


class TestPeriodIndexOps:
def test_resolution(self):
for freq, expected in zip(
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
[
"day",
"day",
"day",
"day",
"hour",
"minute",
"second",
"millisecond",
"microsecond",
],
):

idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
assert idx.resolution == expected
@pytest.mark.parametrize(
"freq,expected",
[
("A", "year"),
("Q", "quarter"),
("M", "month"),
("D", "day"),
("H", "hour"),
("T", "minute"),
("S", "second"),
("L", "millisecond"),
("U", "microsecond"),
],
)
def test_resolution(self, freq, expected):
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
assert idx.resolution == expected

def test_value_counts_unique(self):
# GH 7735
Expand Down
10 changes: 3 additions & 7 deletions pandas/tests/tseries/frequencies/test_freq_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def test_get_to_timestamp_base(freqstr, exp_freqstr):
@pytest.mark.parametrize(
"freqstr,expected",
[
("A", "year"),
("Q", "quarter"),
("M", "month"),
("D", "day"),
("H", "hour"),
("T", "minute"),
Expand All @@ -103,13 +106,6 @@ def test_get_attrname_from_abbrev(freqstr, expected):
assert Resolution.get_reso_from_freq(freqstr).attrname == expected


@pytest.mark.parametrize("freq", ["A", "Q", "M"])
def test_get_freq_unsupported_(freq):
# Lowest-frequency resolution is for Day
with pytest.raises(KeyError, match=freq.lower()):
Resolution.get_reso_from_freq(freq)


@pytest.mark.parametrize("freq", ["D", "H", "T", "S", "L", "U", "N"])
def test_get_freq_roundtrip2(freq):
obj = Resolution.get_reso_from_freq(freq)
Expand Down