Skip to content

Commit bd66592

Browse files
adrienemeryjreback
authored andcommitted
ENH: Add SemiMonthEnd and SemiMonthBegin offsets #1543
closes #1543 Author: Adrien Emery <[email protected]> Closes #13315 from adrienemery/semi-monthly-offset and squashes the following commits: fe221b2 [Adrien Emery] ENH: Add SemiMonthEnd and SemiMonthBegin offsets #1543
1 parent 07761c5 commit bd66592

File tree

7 files changed

+750
-13
lines changed

7 files changed

+750
-13
lines changed

asv_bench/benchmarks/timeseries.py

+60
Original file line numberDiff line numberDiff line change
@@ -1155,3 +1155,63 @@ def setup(self):
11551155

11561156
def time_timeseries_year_incr(self):
11571157
(self.date + self.year)
1158+
1159+
1160+
class timeseries_semi_month_offset(object):
1161+
goal_time = 0.2
1162+
1163+
def setup(self):
1164+
self.N = 100000
1165+
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
1166+
# date is not on an offset which will be slowest case
1167+
self.date = dt.datetime(2011, 1, 2)
1168+
self.semi_month_end = pd.offsets.SemiMonthEnd()
1169+
self.semi_month_begin = pd.offsets.SemiMonthBegin()
1170+
1171+
def time_semi_month_end_apply(self):
1172+
self.semi_month_end.apply(self.date)
1173+
1174+
def time_semi_month_end_incr(self):
1175+
self.date + self.semi_month_end
1176+
1177+
def time_semi_month_end_incr_n(self):
1178+
self.date + 10 * self.semi_month_end
1179+
1180+
def time_semi_month_end_decr(self):
1181+
self.date - self.semi_month_end
1182+
1183+
def time_semi_month_end_decr_n(self):
1184+
self.date - 10 * self.semi_month_end
1185+
1186+
def time_semi_month_end_apply_index(self):
1187+
self.semi_month_end.apply_index(self.rng)
1188+
1189+
def time_semi_month_end_incr_rng(self):
1190+
self.rng + self.semi_month_end
1191+
1192+
def time_semi_month_end_decr_rng(self):
1193+
self.rng - self.semi_month_end
1194+
1195+
def time_semi_month_begin_apply(self):
1196+
self.semi_month_begin.apply(self.date)
1197+
1198+
def time_semi_month_begin_incr(self):
1199+
self.date + self.semi_month_begin
1200+
1201+
def time_semi_month_begin_incr_n(self):
1202+
self.date + 10 * self.semi_month_begin
1203+
1204+
def time_semi_month_begin_decr(self):
1205+
self.date - self.semi_month_begin
1206+
1207+
def time_semi_month_begin_decr_n(self):
1208+
self.date - 10 * self.semi_month_begin
1209+
1210+
def time_semi_month_begin_apply_index(self):
1211+
self.semi_month_begin.apply_index(self.rng)
1212+
1213+
def time_semi_month_begin_incr_rng(self):
1214+
self.rng + self.semi_month_begin
1215+
1216+
def time_semi_month_begin_decr_rng(self):
1217+
self.rng - self.semi_month_begin

doc/source/timeseries.rst

+4
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,8 @@ frequency increment. Specific offset logic like "month", "business day", or
589589
BMonthBegin, "business month begin"
590590
CBMonthEnd, "custom business month end"
591591
CBMonthBegin, "custom business month begin"
592+
SemiMonthEnd, "15th (or other day_of_month) and calendar month end"
593+
SemiMonthBegin, "15th (or other day_of_month) and calendar month begin"
592594
QuarterEnd, "calendar quarter end"
593595
QuarterBegin, "calendar quarter begin"
594596
BQuarterEnd, "business quarter end"
@@ -967,9 +969,11 @@ frequencies. We will refer to these aliases as *offset aliases*
967969
"D", "calendar day frequency"
968970
"W", "weekly frequency"
969971
"M", "month end frequency"
972+
"SM", "semi-month end frequency (15th and end of month)"
970973
"BM", "business month end frequency"
971974
"CBM", "custom business month end frequency"
972975
"MS", "month start frequency"
976+
"SMS", "semi-month start frequency (1st and 15th)"
973977
"BMS", "business month start frequency"
974978
"CBMS", "custom business month start frequency"
975979
"Q", "quarter end frequency"

doc/source/whatsnew/v0.18.2.txt

+37
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,43 @@ New behaviour:
5151

5252
In [2]: pd.read_csv(StringIO(data), names=names)
5353

54+
.. _whatsnew_0182.enhancements.semi_month_offsets:
55+
56+
Semi-Month Offsets
57+
^^^^^^^^^^^^^^^^^^
58+
59+
Pandas has gained new frequency offsets, ``SemiMonthEnd`` ('SM') and ``SemiMonthBegin`` ('SMS').
60+
These provide date offsets anchored (by default) to the 15th and end of month, and 15th and 1st of month respectively.
61+
(:issue:`1543`)
62+
63+
.. ipython:: python
64+
65+
from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin
66+
67+
SemiMonthEnd:
68+
69+
.. ipython:: python
70+
71+
Timestamp('2016-01-01') + SemiMonthEnd()
72+
73+
pd.date_range('2015-01-01', freq='SM', periods=4)
74+
75+
SemiMonthBegin:
76+
77+
.. ipython:: python
78+
79+
Timestamp('2016-01-01') + SemiMonthBegin()
80+
81+
pd.date_range('2015-01-01', freq='SMS', periods=4)
82+
83+
Using the anchoring suffix, you can also specify the day of month to use instead of the 15th.
84+
85+
.. ipython:: python
86+
87+
pd.date_range('2015-01-01', freq='SMS-16', periods=4)
88+
89+
pd.date_range('2015-01-01', freq='SM-14', periods=4)
90+
5491
.. _whatsnew_0182.enhancements.other:
5592

5693
Other enhancements

pandas/tseries/offsets.py

+213-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
import numpy as np
55

66
from pandas.tseries.tools import to_datetime, normalize_date
7-
from pandas.core.common import ABCSeries, ABCDatetimeIndex, ABCPeriod
7+
from pandas.core.common import (ABCSeries, ABCDatetimeIndex, ABCPeriod,
8+
AbstractMethodError)
89

910
# import after tools, dateutil check
1011
from dateutil.relativedelta import relativedelta, weekday
@@ -18,6 +19,7 @@
1819
__all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay',
1920
'CBMonthEnd', 'CBMonthBegin',
2021
'MonthBegin', 'BMonthBegin', 'MonthEnd', 'BMonthEnd',
22+
'SemiMonthEnd', 'SemiMonthBegin',
2123
'BusinessHour', 'CustomBusinessHour',
2224
'YearBegin', 'BYearBegin', 'YearEnd', 'BYearEnd',
2325
'QuarterBegin', 'BQuarterBegin', 'QuarterEnd', 'BQuarterEnd',
@@ -1160,6 +1162,214 @@ def onOffset(self, dt):
11601162
_prefix = 'MS'
11611163

11621164

1165+
class SemiMonthOffset(DateOffset):
1166+
_adjust_dst = True
1167+
_default_day_of_month = 15
1168+
_min_day_of_month = 2
1169+
1170+
def __init__(self, n=1, day_of_month=None, normalize=False, **kwds):
1171+
if day_of_month is None:
1172+
self.day_of_month = self._default_day_of_month
1173+
else:
1174+
self.day_of_month = int(day_of_month)
1175+
if not self._min_day_of_month <= self.day_of_month <= 27:
1176+
raise ValueError('day_of_month must be '
1177+
'{}<=day_of_month<=27, got {}'.format(
1178+
self._min_day_of_month, self.day_of_month))
1179+
self.n = int(n)
1180+
self.normalize = normalize
1181+
self.kwds = kwds
1182+
self.kwds['day_of_month'] = self.day_of_month
1183+
1184+
@classmethod
1185+
def _from_name(cls, suffix=None):
1186+
return cls(day_of_month=suffix)
1187+
1188+
@property
1189+
def rule_code(self):
1190+
suffix = '-{}'.format(self.day_of_month)
1191+
return self._prefix + suffix
1192+
1193+
@apply_wraps
1194+
def apply(self, other):
1195+
n = self.n
1196+
if not self.onOffset(other):
1197+
_, days_in_month = tslib.monthrange(other.year, other.month)
1198+
if 1 < other.day < self.day_of_month:
1199+
other += relativedelta(day=self.day_of_month)
1200+
if n > 0:
1201+
# rollforward so subtract 1
1202+
n -= 1
1203+
elif self.day_of_month < other.day < days_in_month:
1204+
other += relativedelta(day=self.day_of_month)
1205+
if n < 0:
1206+
# rollforward in the negative direction so add 1
1207+
n += 1
1208+
elif n == 0:
1209+
n = 1
1210+
1211+
return self._apply(n, other)
1212+
1213+
def _apply(self, n, other):
1214+
"""Handle specific apply logic for child classes"""
1215+
raise AbstractMethodError(self)
1216+
1217+
@apply_index_wraps
1218+
def apply_index(self, i):
1219+
# determine how many days away from the 1st of the month we are
1220+
days_from_start = i.to_perioddelta('M').asi8
1221+
delta = Timedelta(days=self.day_of_month - 1).value
1222+
1223+
# get boolean array for each element before the day_of_month
1224+
before_day_of_month = days_from_start < delta
1225+
1226+
# get boolean array for each element after the day_of_month
1227+
after_day_of_month = days_from_start > delta
1228+
1229+
# determine the correct n for each date in i
1230+
roll = self._get_roll(i, before_day_of_month, after_day_of_month)
1231+
1232+
# isolate the time since it will be striped away one the next line
1233+
time = i.to_perioddelta('D')
1234+
1235+
# apply the correct number of months
1236+
i = (i.to_period('M') + (roll // 2)).to_timestamp()
1237+
1238+
# apply the correct day
1239+
i = self._apply_index_days(i, roll)
1240+
1241+
return i + time
1242+
1243+
def _get_roll(self, i, before_day_of_month, after_day_of_month):
1244+
"""Return an array with the correct n for each date in i.
1245+
1246+
The roll array is based on the fact that i gets rolled back to
1247+
the first day of the month.
1248+
"""
1249+
raise AbstractMethodError(self)
1250+
1251+
def _apply_index_days(self, i, roll):
1252+
"""Apply the correct day for each date in i"""
1253+
raise AbstractMethodError(self)
1254+
1255+
1256+
class SemiMonthEnd(SemiMonthOffset):
1257+
"""
1258+
Two DateOffset's per month repeating on the last
1259+
day of the month and day_of_month.
1260+
1261+
.. versionadded:: 0.18.2
1262+
1263+
Parameters
1264+
----------
1265+
n: int
1266+
normalize : bool, default False
1267+
day_of_month: int, {1, 3,...,27}, default 15
1268+
"""
1269+
_prefix = 'SM'
1270+
_min_day_of_month = 1
1271+
1272+
def onOffset(self, dt):
1273+
if self.normalize and not _is_normalized(dt):
1274+
return False
1275+
_, days_in_month = tslib.monthrange(dt.year, dt.month)
1276+
return dt.day in (self.day_of_month, days_in_month)
1277+
1278+
def _apply(self, n, other):
1279+
# if other.day is not day_of_month move to day_of_month and update n
1280+
if other.day < self.day_of_month:
1281+
other += relativedelta(day=self.day_of_month)
1282+
if n > 0:
1283+
n -= 1
1284+
elif other.day > self.day_of_month:
1285+
other += relativedelta(day=self.day_of_month)
1286+
if n == 0:
1287+
n = 1
1288+
else:
1289+
n += 1
1290+
1291+
months = n // 2
1292+
day = 31 if n % 2 else self.day_of_month
1293+
return other + relativedelta(months=months, day=day)
1294+
1295+
def _get_roll(self, i, before_day_of_month, after_day_of_month):
1296+
n = self.n
1297+
is_month_end = i.is_month_end
1298+
if n > 0:
1299+
roll_end = np.where(is_month_end, 1, 0)
1300+
roll_before = np.where(before_day_of_month, n, n + 1)
1301+
roll = roll_end + roll_before
1302+
elif n == 0:
1303+
roll_after = np.where(after_day_of_month, 2, 0)
1304+
roll_before = np.where(~after_day_of_month, 1, 0)
1305+
roll = roll_before + roll_after
1306+
else:
1307+
roll = np.where(after_day_of_month, n + 2, n + 1)
1308+
return roll
1309+
1310+
def _apply_index_days(self, i, roll):
1311+
i += (roll % 2) * Timedelta(days=self.day_of_month).value
1312+
return i + Timedelta(days=-1)
1313+
1314+
1315+
class SemiMonthBegin(SemiMonthOffset):
1316+
"""
1317+
Two DateOffset's per month repeating on the first
1318+
day of the month and day_of_month.
1319+
1320+
.. versionadded:: 0.18.2
1321+
1322+
Parameters
1323+
----------
1324+
n: int
1325+
normalize : bool, default False
1326+
day_of_month: int, {2, 3,...,27}, default 15
1327+
"""
1328+
_prefix = 'SMS'
1329+
1330+
def onOffset(self, dt):
1331+
if self.normalize and not _is_normalized(dt):
1332+
return False
1333+
return dt.day in (1, self.day_of_month)
1334+
1335+
def _apply(self, n, other):
1336+
# if other.day is not day_of_month move to day_of_month and update n
1337+
if other.day < self.day_of_month:
1338+
other += relativedelta(day=self.day_of_month)
1339+
if n == 0:
1340+
n = -1
1341+
else:
1342+
n -= 1
1343+
elif other.day > self.day_of_month:
1344+
other += relativedelta(day=self.day_of_month)
1345+
if n == 0:
1346+
n = 1
1347+
elif n < 0:
1348+
n += 1
1349+
1350+
months = n // 2 + n % 2
1351+
day = 1 if n % 2 else self.day_of_month
1352+
return other + relativedelta(months=months, day=day)
1353+
1354+
def _get_roll(self, i, before_day_of_month, after_day_of_month):
1355+
n = self.n
1356+
is_month_start = i.is_month_start
1357+
if n > 0:
1358+
roll = np.where(before_day_of_month, n, n + 1)
1359+
elif n == 0:
1360+
roll_start = np.where(is_month_start, 0, 1)
1361+
roll_after = np.where(after_day_of_month, 1, 0)
1362+
roll = roll_start + roll_after
1363+
else:
1364+
roll_after = np.where(after_day_of_month, n + 2, n + 1)
1365+
roll_start = np.where(is_month_start, -1, 0)
1366+
roll = roll_after + roll_start
1367+
return roll
1368+
1369+
def _apply_index_days(self, i, roll):
1370+
return i + (roll % 2) * Timedelta(days=self.day_of_month - 1).value
1371+
1372+
11631373
class BusinessMonthEnd(MonthOffset):
11641374
"""DateOffset increments between business EOM dates"""
11651375

@@ -2720,6 +2930,8 @@ def generate_range(start=None, end=None, periods=None,
27202930
CustomBusinessHour, # 'CBH'
27212931
MonthEnd, # 'M'
27222932
MonthBegin, # 'MS'
2933+
SemiMonthEnd, # 'SM'
2934+
SemiMonthBegin, # 'SMS'
27232935
Week, # 'W'
27242936
Second, # 'S'
27252937
Minute, # 'T'

0 commit comments

Comments
 (0)