Skip to content

REF/PERF: PeriodDtype decouple from DateOffset #34499

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 4, 2020
56 changes: 56 additions & 0 deletions pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

cdef enum PeriodDtypeCode:
# Annual freqs with various fiscal year ends.
# eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005
A = 1000 # Default alias
A_DEC = 1000 # Annual - December year end
A_JAN = 1001 # Annual - January year end
A_FEB = 1002 # Annual - February year end
A_MAR = 1003 # Annual - March year end
A_APR = 1004 # Annual - April year end
A_MAY = 1005 # Annual - May year end
A_JUN = 1006 # Annual - June year end
A_JUL = 1007 # Annual - July year end
A_AUG = 1008 # Annual - August year end
A_SEP = 1009 # Annual - September year end
A_OCT = 1010 # Annual - October year end
A_NOV = 1011 # Annual - November year end

# Quarterly frequencies with various fiscal year ends.
# eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005
Q_DEC = 2000 # Quarterly - December year end
Q_JAN = 2001 # Quarterly - January year end
Q_FEB = 2002 # Quarterly - February year end
Q_MAR = 2003 # Quarterly - March year end
Q_APR = 2004 # Quarterly - April year end
Q_MAY = 2005 # Quarterly - May year end
Q_JUN = 2006 # Quarterly - June year end
Q_JUL = 2007 # Quarterly - July year end
Q_AUG = 2008 # Quarterly - August year end
Q_SEP = 2009 # Quarterly - September year end
Q_OCT = 2010 # Quarterly - October year end
Q_NOV = 2011 # Quarterly - November year end

M = 3000 # Monthly

W_SUN = 4000 # Weekly - Sunday end of week
W_MON = 4001 # Weekly - Monday end of week
W_TUE = 4002 # Weekly - Tuesday end of week
W_WED = 4003 # Weekly - Wednesday end of week
W_THU = 4004 # Weekly - Thursday end of week
W_FRI = 4005 # Weekly - Friday end of week
W_SAT = 4006 # Weekly - Saturday end of week

B = 5000 # Business days
D = 6000 # Daily
H = 7000 # Hourly
T = 8000 # Minutely
S = 9000 # Secondly
L = 10000 # Millisecondly
U = 11000 # Microsecondly
N = 12000 # Nanosecondly


cdef class PeriodPseudoDtype:
cdef readonly:
PeriodDtypeCode dtype_code
108 changes: 108 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# period frequency constants corresponding to scikits timeseries
# originals


cdef class PeriodPseudoDtype:
"""
Similar to an actual dtype, this contains all of the information
describing a PeriodDtype in an integer code.
"""
# cdef readonly:
Copy link
Member

@jorisvandenbossche jorisvandenbossche Jun 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a docstring explaining for what / where this is used?

# PeriodDtypeCode dtype_code

def __cinit__(self, PeriodDtypeCode code):
self.dtype_code = code

def __eq__(self, other):
if not isinstance(other, PeriodPseudoDtype):
return False
if not isinstance(self, PeriodPseudoDtype):
# cython semantics, this is a reversed op
return False
return self.dtype_code == other.dtype_code

@property
def date_offset(self):
"""
Corresponding DateOffset object.

This mapping is mainly for backward-compatibility.
"""
from .offsets import to_offset

freqstr = _reverse_period_code_map.get(self.dtype_code)
# equiv: freqstr = libfrequencies.get_freq_str(self.dtype_code)

return to_offset(freqstr)

@classmethod
def from_date_offset(cls, offset):
code = offset._period_dtype_code
return cls(code)


_period_code_map = {
# Annual freqs with various fiscal year ends.
# eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
"A-DEC": 1000, # Annual - December year end
"A-JAN": 1001, # Annual - January year end
"A-FEB": 1002, # Annual - February year end
"A-MAR": 1003, # Annual - March year end
"A-APR": 1004, # Annual - April year end
"A-MAY": 1005, # Annual - May year end
"A-JUN": 1006, # Annual - June year end
"A-JUL": 1007, # Annual - July year end
"A-AUG": 1008, # Annual - August year end
"A-SEP": 1009, # Annual - September year end
"A-OCT": 1010, # Annual - October year end
"A-NOV": 1011, # Annual - November year end

# Quarterly frequencies with various fiscal year ends.
# eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
"Q-DEC": 2000, # Quarterly - December year end
"Q-JAN": 2001, # Quarterly - January year end
"Q-FEB": 2002, # Quarterly - February year end
"Q-MAR": 2003, # Quarterly - March year end
"Q-APR": 2004, # Quarterly - April year end
"Q-MAY": 2005, # Quarterly - May year end
"Q-JUN": 2006, # Quarterly - June year end
"Q-JUL": 2007, # Quarterly - July year end
"Q-AUG": 2008, # Quarterly - August year end
"Q-SEP": 2009, # Quarterly - September year end
"Q-OCT": 2010, # Quarterly - October year end
"Q-NOV": 2011, # Quarterly - November year end

"M": 3000, # Monthly

"W-SUN": 4000, # Weekly - Sunday end of week
"W-MON": 4001, # Weekly - Monday end of week
"W-TUE": 4002, # Weekly - Tuesday end of week
"W-WED": 4003, # Weekly - Wednesday end of week
"W-THU": 4004, # Weekly - Thursday end of week
"W-FRI": 4005, # Weekly - Friday end of week
"W-SAT": 4006, # Weekly - Saturday end of week

"B": 5000, # Business days
"D": 6000, # Daily
"H": 7000, # Hourly
"T": 8000, # Minutely
"S": 9000, # Secondly
"L": 10000, # Millisecondly
"U": 11000, # Microsecondly
"N": 12000, # Nanosecondly
}

_reverse_period_code_map = {
_period_code_map[key]: key for key in _period_code_map}

# Yearly aliases; careful not to put these in _reverse_period_code_map
_period_code_map.update({"Y" + key[1:]: _period_code_map[key]
for key in _period_code_map
if key.startswith("A-")})

_period_code_map.update({
"Q": 2000, # Quarterly - December year end (default quarterly)
"A": 1000, # Annual
"W": 4000, # Weekly
"C": 5000, # Custom Business Day
})
69 changes: 2 additions & 67 deletions pandas/_libs/tslibs/frequencies.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ from pandas._libs.tslibs.offsets import (
opattern,
)

from .dtypes import _period_code_map, _reverse_period_code_map

# ---------------------------------------------------------------------
# Period codes

Expand All @@ -31,73 +33,6 @@ class FreqGroup:
FR_NS = 12000


# period frequency constants corresponding to scikits timeseries
# originals
_period_code_map = {
# Annual freqs with various fiscal year ends.
# eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
"A-DEC": 1000, # Annual - December year end
"A-JAN": 1001, # Annual - January year end
"A-FEB": 1002, # Annual - February year end
"A-MAR": 1003, # Annual - March year end
"A-APR": 1004, # Annual - April year end
"A-MAY": 1005, # Annual - May year end
"A-JUN": 1006, # Annual - June year end
"A-JUL": 1007, # Annual - July year end
"A-AUG": 1008, # Annual - August year end
"A-SEP": 1009, # Annual - September year end
"A-OCT": 1010, # Annual - October year end
"A-NOV": 1011, # Annual - November year end

# Quarterly frequencies with various fiscal year ends.
# eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
"Q-DEC": 2000, # Quarterly - December year end
"Q-JAN": 2001, # Quarterly - January year end
"Q-FEB": 2002, # Quarterly - February year end
"Q-MAR": 2003, # Quarterly - March year end
"Q-APR": 2004, # Quarterly - April year end
"Q-MAY": 2005, # Quarterly - May year end
"Q-JUN": 2006, # Quarterly - June year end
"Q-JUL": 2007, # Quarterly - July year end
"Q-AUG": 2008, # Quarterly - August year end
"Q-SEP": 2009, # Quarterly - September year end
"Q-OCT": 2010, # Quarterly - October year end
"Q-NOV": 2011, # Quarterly - November year end

"M": 3000, # Monthly

"W-SUN": 4000, # Weekly - Sunday end of week
"W-MON": 4001, # Weekly - Monday end of week
"W-TUE": 4002, # Weekly - Tuesday end of week
"W-WED": 4003, # Weekly - Wednesday end of week
"W-THU": 4004, # Weekly - Thursday end of week
"W-FRI": 4005, # Weekly - Friday end of week
"W-SAT": 4006, # Weekly - Saturday end of week

"B": 5000, # Business days
"D": 6000, # Daily
"H": 7000, # Hourly
"T": 8000, # Minutely
"S": 9000, # Secondly
"L": 10000, # Millisecondly
"U": 11000, # Microsecondly
"N": 12000} # Nanosecondly


_reverse_period_code_map = {
_period_code_map[key]: key for key in _period_code_map}

# Yearly aliases; careful not to put these in _reverse_period_code_map
_period_code_map.update({'Y' + key[1:]: _period_code_map[key]
for key in _period_code_map
if key.startswith('A-')})

_period_code_map.update({
"Q": 2000, # Quarterly - December year end (default quarterly)
"A": 1000, # Annual
"W": 4000, # Weekly
"C": 5000}) # Custom Business Day

# Map attribute-name resolutions to resolution abbreviations
_attrname_to_abbrevs = {
"year": "A",
Expand Down
31 changes: 30 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ from pandas._libs.tslibs.np_datetime cimport (
from pandas._libs.tslibs.timezones cimport utc_pytz as UTC
from pandas._libs.tslibs.tzconversion cimport tz_convert_single

from .dtypes cimport PeriodDtypeCode
from .timedeltas cimport delta_to_nanoseconds


Expand Down Expand Up @@ -892,36 +893,43 @@ cdef class Tick(SingleConstructorOffset):
cdef class Day(Tick):
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"
_period_dtype_code = PeriodDtypeCode.D


cdef class Hour(Tick):
_nanos_inc = 3600 * 1_000_000_000
_prefix = "H"
_period_dtype_code = PeriodDtypeCode.H


cdef class Minute(Tick):
_nanos_inc = 60 * 1_000_000_000
_prefix = "T"
_period_dtype_code = PeriodDtypeCode.T


cdef class Second(Tick):
_nanos_inc = 1_000_000_000
_prefix = "S"
_period_dtype_code = PeriodDtypeCode.S


cdef class Milli(Tick):
_nanos_inc = 1_000_000
_prefix = "L"
_period_dtype_code = PeriodDtypeCode.L


cdef class Micro(Tick):
_nanos_inc = 1000
_prefix = "U"
_period_dtype_code = PeriodDtypeCode.U


cdef class Nano(Tick):
_nanos_inc = 1
_prefix = "N"
_period_dtype_code = PeriodDtypeCode.N


def delta_to_tick(delta: timedelta) -> Tick:
Expand Down Expand Up @@ -1281,7 +1289,7 @@ cdef class BusinessDay(BusinessMixin):
"""
DateOffset subclass representing possibly n business days.
"""

_period_dtype_code = PeriodDtypeCode.B
_prefix = "B"
_attributes = tuple(["n", "normalize", "offset"])

Expand Down Expand Up @@ -1945,6 +1953,15 @@ cdef class YearEnd(YearOffset):
_prefix = "A"
_day_opt = "end"

cdef readonly:
int _period_dtype_code

def __init__(self, n=1, normalize=False, month=None):
# Because YearEnd can be the freq for a Period, define its
# _period_dtype_code at construction for performance
YearOffset.__init__(self, n, normalize, month)
self._period_dtype_code = PeriodDtypeCode.A + self.month % 12


cdef class YearBegin(YearOffset):
"""
Expand Down Expand Up @@ -2099,6 +2116,14 @@ cdef class QuarterEnd(QuarterOffset):
_prefix = "Q"
_day_opt = "end"

cdef readonly:
int _period_dtype_code

def __init__(self, n=1, normalize=False, startingMonth=None):
# Because QuarterEnd can be the freq for a Period, define its
# _period_dtype_code at construction for performance
QuarterOffset.__init__(self, n, normalize, startingMonth)
self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12

cdef class QuarterBegin(QuarterOffset):
"""
Expand Down Expand Up @@ -2148,6 +2173,7 @@ cdef class MonthEnd(MonthOffset):
"""
DateOffset of one month end.
"""
_period_dtype_code = PeriodDtypeCode.M
_prefix = "M"
_day_opt = "end"

Expand Down Expand Up @@ -2452,6 +2478,7 @@ cdef class Week(SingleConstructorOffset):

cdef readonly:
object weekday # int or None
int _period_dtype_code

def __init__(self, n=1, normalize=False, weekday=None):
BaseOffset.__init__(self, n, normalize)
Expand All @@ -2461,6 +2488,8 @@ cdef class Week(SingleConstructorOffset):
if self.weekday < 0 or self.weekday > 6:
raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}")

self._period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7

cpdef __setstate__(self, state):
self.n = state.pop("n")
self.normalize = state.pop("normalize")
Expand Down
Loading