Skip to content

Commit 826f0d3

Browse files
authored
ENH: Add HalfYear offsets (#60946)
* ENH: Add HalfYear offsets * Add entry to whatsnew * Resolve cython typing issue
1 parent 8e9487a commit 826f0d3

File tree

7 files changed

+947
-7
lines changed

7 files changed

+947
-7
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Other enhancements
7272
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
7373
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
7474
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
75+
- Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
7576
- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
7677
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
7778
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)

pandas/_libs/tslibs/offsets.pyi

+10
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,16 @@ class BQuarterEnd(QuarterOffset): ...
168168
class BQuarterBegin(QuarterOffset): ...
169169
class QuarterEnd(QuarterOffset): ...
170170
class QuarterBegin(QuarterOffset): ...
171+
172+
class HalfYearOffset(SingleConstructorOffset):
173+
def __init__(
174+
self, n: int = ..., normalize: bool = ..., startingMonth: int | None = ...
175+
) -> None: ...
176+
177+
class BHalfYearEnd(HalfYearOffset): ...
178+
class BHalfYearBegin(HalfYearOffset): ...
179+
class HalfYearEnd(HalfYearOffset): ...
180+
class HalfYearBegin(HalfYearOffset): ...
171181
class MonthOffset(SingleConstructorOffset): ...
172182
class MonthEnd(MonthOffset): ...
173183
class MonthBegin(MonthOffset): ...

pandas/_libs/tslibs/offsets.pyx

+230-5
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ cnp.import_array()
3232

3333
# TODO: formalize having _libs.properties "above" tslibs in the dependency structure
3434

35+
from typing import ClassVar
36+
3537
from pandas._libs.properties import cache_readonly
3638

3739
from pandas._libs.tslibs cimport util
@@ -2524,8 +2526,7 @@ cdef class YearOffset(SingleConstructorOffset):
25242526
"""
25252527
_attributes = tuple(["n", "normalize", "month"])
25262528

2527-
# FIXME(cython#4446): python annotation here gives compile-time errors
2528-
# _default_month: int
2529+
_default_month: ClassVar[int]
25292530

25302531
cdef readonly:
25312532
int month
@@ -2788,9 +2789,8 @@ cdef class QuarterOffset(SingleConstructorOffset):
27882789
# point. Also apply_index, is_on_offset, rule_code if
27892790
# startingMonth vs month attr names are resolved
27902791

2791-
# FIXME(cython#4446): python annotation here gives compile-time errors
2792-
# _default_starting_month: int
2793-
# _from_name_starting_month: int
2792+
_default_starting_month: ClassVar[int]
2793+
_from_name_starting_month: ClassVar[int]
27942794

27952795
cdef readonly:
27962796
int startingMonth
@@ -3011,6 +3011,227 @@ cdef class QuarterBegin(QuarterOffset):
30113011
_day_opt = "start"
30123012

30133013

3014+
# ----------------------------------------------------------------------
3015+
# HalfYear-Based Offset Classes
3016+
3017+
cdef class HalfYearOffset(SingleConstructorOffset):
3018+
_attributes = tuple(["n", "normalize", "startingMonth"])
3019+
# TODO: Consider combining HalfYearOffset, QuarterOffset and YearOffset
3020+
3021+
_default_starting_month: ClassVar[int]
3022+
_from_name_starting_month: ClassVar[int]
3023+
3024+
cdef readonly:
3025+
int startingMonth
3026+
3027+
def __init__(self, n=1, normalize=False, startingMonth=None):
3028+
BaseOffset.__init__(self, n, normalize)
3029+
3030+
if startingMonth is None:
3031+
startingMonth = self._default_starting_month
3032+
self.startingMonth = startingMonth
3033+
3034+
cpdef __setstate__(self, state):
3035+
self.startingMonth = state.pop("startingMonth")
3036+
self.n = state.pop("n")
3037+
self.normalize = state.pop("normalize")
3038+
3039+
@classmethod
3040+
def _from_name(cls, suffix=None):
3041+
kwargs = {}
3042+
if suffix:
3043+
kwargs["startingMonth"] = MONTH_TO_CAL_NUM[suffix]
3044+
else:
3045+
if cls._from_name_starting_month is not None:
3046+
kwargs["startingMonth"] = cls._from_name_starting_month
3047+
return cls(**kwargs)
3048+
3049+
@property
3050+
def rule_code(self) -> str:
3051+
month = MONTH_ALIASES[self.startingMonth]
3052+
return f"{self._prefix}-{month}"
3053+
3054+
def is_on_offset(self, dt: datetime) -> bool:
3055+
if self.normalize and not _is_normalized(dt):
3056+
return False
3057+
mod_month = (dt.month - self.startingMonth) % 6
3058+
return mod_month == 0 and dt.day == self._get_offset_day(dt)
3059+
3060+
@apply_wraps
3061+
def _apply(self, other: datetime) -> datetime:
3062+
# months_since: find the calendar half containing other.month,
3063+
# e.g. if other.month == 8, the calendar half is [Jul, Aug, Sep, ..., Dec].
3064+
# Then find the month in that half containing an is_on_offset date for
3065+
# self. `months_since` is the number of months to shift other.month
3066+
# to get to this on-offset month.
3067+
months_since = other.month % 6 - self.startingMonth % 6
3068+
hlvs = roll_qtrday(
3069+
other, self.n, self.startingMonth, day_opt=self._day_opt, modby=6
3070+
)
3071+
months = hlvs * 6 - months_since
3072+
return shift_month(other, months, self._day_opt)
3073+
3074+
def _apply_array(self, dtarr: np.ndarray) -> np.ndarray:
3075+
reso = get_unit_from_dtype(dtarr.dtype)
3076+
shifted = shift_quarters(
3077+
dtarr.view("i8"),
3078+
self.n,
3079+
self.startingMonth,
3080+
self._day_opt,
3081+
modby=6,
3082+
reso=reso,
3083+
)
3084+
return shifted
3085+
3086+
3087+
cdef class BHalfYearEnd(HalfYearOffset):
3088+
"""
3089+
DateOffset increments between the last business day of each half-year.
3090+
3091+
startingMonth = 1 corresponds to dates like 1/31/2007, 7/31/2007, ...
3092+
startingMonth = 2 corresponds to dates like 2/28/2007, 8/31/2007, ...
3093+
startingMonth = 6 corresponds to dates like 6/30/2007, 12/31/2007, ...
3094+
3095+
Attributes
3096+
----------
3097+
n : int, default 1
3098+
The number of half-years represented.
3099+
normalize : bool, default False
3100+
Normalize start/end dates to midnight before generating date range.
3101+
startingMonth : int, default 6
3102+
A specific integer for the month of the year from which we start half-years.
3103+
3104+
See Also
3105+
--------
3106+
:class:`~pandas.tseries.offsets.DateOffset` : Standard kind of date increment.
3107+
3108+
Examples
3109+
--------
3110+
>>> from pandas.tseries.offsets import BHalfYearEnd
3111+
>>> ts = pd.Timestamp('2020-05-24 05:01:15')
3112+
>>> ts + BHalfYearEnd()
3113+
Timestamp('2020-06-30 05:01:15')
3114+
>>> ts + BHalfYearEnd(2)
3115+
Timestamp('2020-12-31 05:01:15')
3116+
>>> ts + BHalfYearEnd(1, startingMonth=2)
3117+
Timestamp('2020-08-31 05:01:15')
3118+
>>> ts + BHalfYearEnd(startingMonth=2)
3119+
Timestamp('2020-08-31 05:01:15')
3120+
"""
3121+
_output_name = "BusinessHalfYearEnd"
3122+
_default_starting_month = 6
3123+
_from_name_starting_month = 12
3124+
_prefix = "BHYE"
3125+
_day_opt = "business_end"
3126+
3127+
3128+
cdef class BHalfYearBegin(HalfYearOffset):
3129+
"""
3130+
DateOffset increments between the first business day of each half-year.
3131+
3132+
startingMonth = 1 corresponds to dates like 1/01/2007, 7/01/2007, ...
3133+
startingMonth = 2 corresponds to dates like 2/01/2007, 8/01/2007, ...
3134+
startingMonth = 3 corresponds to dates like 3/01/2007, 9/01/2007, ...
3135+
3136+
Attributes
3137+
----------
3138+
n : int, default 1
3139+
The number of half-years represented.
3140+
normalize : bool, default False
3141+
Normalize start/end dates to midnight before generating date range.
3142+
startingMonth : int, default 1
3143+
A specific integer for the month of the year from which we start half-years.
3144+
3145+
See Also
3146+
--------
3147+
:class:`~pandas.tseries.offsets.DateOffset` : Standard kind of date increment.
3148+
3149+
Examples
3150+
--------
3151+
>>> from pandas.tseries.offsets import BHalfYearBegin
3152+
>>> ts = pd.Timestamp('2020-05-24 05:01:15')
3153+
>>> ts + BHalfYearBegin()
3154+
Timestamp('2020-07-01 05:01:15')
3155+
>>> ts + BHalfYearBegin(2)
3156+
Timestamp('2021-01-01 05:01:15')
3157+
>>> ts + BHalfYearBegin(startingMonth=2)
3158+
Timestamp('2020-08-03 05:01:15')
3159+
>>> ts + BHalfYearBegin(-1)
3160+
Timestamp('2020-01-01 05:01:15')
3161+
"""
3162+
_output_name = "BusinessHalfYearBegin"
3163+
_default_starting_month = 1
3164+
_from_name_starting_month = 1
3165+
_prefix = "BHYS"
3166+
_day_opt = "business_start"
3167+
3168+
3169+
cdef class HalfYearEnd(HalfYearOffset):
3170+
"""
3171+
DateOffset increments between half-year end dates.
3172+
3173+
startingMonth = 1 corresponds to dates like 1/31/2007, 7/31/2007, ...
3174+
startingMonth = 2 corresponds to dates like 2/28/2007, 8/31/2007, ...
3175+
startingMonth = 6 corresponds to dates like 6/30/2007, 12/31/2007, ...
3176+
3177+
Attributes
3178+
----------
3179+
n : int, default 1
3180+
The number of half-years represented.
3181+
normalize : bool, default False
3182+
Normalize start/end dates to midnight before generating date range.
3183+
startingMonth : int, default 6
3184+
A specific integer for the month of the year from which we start half-years.
3185+
3186+
See Also
3187+
--------
3188+
:class:`~pandas.tseries.offsets.DateOffset` : Standard kind of date increment.
3189+
3190+
Examples
3191+
--------
3192+
>>> ts = pd.Timestamp(2022, 1, 1)
3193+
>>> ts + pd.offsets.HalfYearEnd()
3194+
Timestamp('2022-06-30 00:00:00')
3195+
"""
3196+
_default_starting_month = 6
3197+
_from_name_starting_month = 12
3198+
_prefix = "HYE"
3199+
_day_opt = "end"
3200+
3201+
3202+
cdef class HalfYearBegin(HalfYearOffset):
3203+
"""
3204+
DateOffset increments between half-year start dates.
3205+
3206+
startingMonth = 1 corresponds to dates like 1/01/2007, 7/01/2007, ...
3207+
startingMonth = 2 corresponds to dates like 2/01/2007, 8/01/2007, ...
3208+
startingMonth = 3 corresponds to dates like 3/01/2007, 9/01/2007, ...
3209+
3210+
Attributes
3211+
----------
3212+
n : int, default 1
3213+
The number of half-years represented.
3214+
normalize : bool, default False
3215+
Normalize start/end dates to midnight before generating date range.
3216+
startingMonth : int, default 1
3217+
A specific integer for the month of the year from which we start half-years.
3218+
3219+
See Also
3220+
--------
3221+
:class:`~pandas.tseries.offsets.DateOffset` : Standard kind of date increment.
3222+
3223+
Examples
3224+
--------
3225+
>>> ts = pd.Timestamp(2022, 2, 1)
3226+
>>> ts + pd.offsets.HalfYearBegin()
3227+
Timestamp('2022-07-01 00:00:00')
3228+
"""
3229+
_default_starting_month = 1
3230+
_from_name_starting_month = 1
3231+
_prefix = "HYS"
3232+
_day_opt = "start"
3233+
3234+
30143235
# ----------------------------------------------------------------------
30153236
# Month-Based Offset Classes
30163237

@@ -4823,6 +5044,8 @@ prefix_mapping = {
48235044
BusinessMonthEnd, # 'BME'
48245045
BQuarterEnd, # 'BQE'
48255046
BQuarterBegin, # 'BQS'
5047+
BHalfYearEnd, # 'BHYE'
5048+
BHalfYearBegin, # 'BHYS'
48265049
BusinessHour, # 'bh'
48275050
CustomBusinessDay, # 'C'
48285051
CustomBusinessMonthEnd, # 'CBME'
@@ -4839,6 +5062,8 @@ prefix_mapping = {
48395062
Micro, # 'us'
48405063
QuarterEnd, # 'QE'
48415064
QuarterBegin, # 'QS'
5065+
HalfYearEnd, # 'HYE'
5066+
HalfYearBegin, # 'HYS'
48425067
Milli, # 'ms'
48435068
Hour, # 'h'
48445069
Day, # 'D'

0 commit comments

Comments
 (0)