diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 61c05d1b226e0..8a6e0c24e235d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -649,6 +649,7 @@ Other API Changes - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) +- Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). - Pandas no longer registers matplotlib converters on import. The converters will be registered and used when the first plot is draw (:issue:`17710`) diff --git a/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle new file mode 100644 index 0000000000000..555be58cc33ac Binary files /dev/null and b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle differ diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle new file mode 100644 index 0000000000000..963e533c4d2b4 Binary files /dev/null and b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle differ diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 1cb2081409312..0b60d37d36c08 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -45,7 +45,10 @@ from pandas.tseries.offsets import ( DateOffset, Hour, Minute, Day, MonthBegin, MonthEnd, YearBegin, - YearEnd, Week, + YearEnd, Week, WeekOfMonth, LastWeekOfMonth, + BusinessDay, BusinessHour, CustomBusinessDay, FY5253, + Easter, + SemiMonthEnd, SemiMonthBegin, QuarterBegin, QuarterEnd) from pandas.compat import u import os @@ -53,7 +56,7 @@ import numpy as np import pandas import platform as pl - +from datetime import timedelta _loose_version = LooseVersion(pandas.__version__) @@ -201,6 +204,12 @@ def create_data(): freq='M') off = {'DateOffset': DateOffset(years=1), + 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), + 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), + 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), + 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), + 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), + 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), @@ -209,6 +218,11 @@ def create_data(): 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), + 'Week_Tues': Week(2, normalize=False, weekday=1), + 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), + 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), + 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), + 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1)} diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 3a2a613986dca..c65691618e654 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -581,6 +581,7 @@ def __setstate__(self, state): if '_offset' in state: # pragma: no cover raise ValueError('Unexpected key `_offset`') state['_offset'] = state.pop('offset') + state['kwds']['offset'] = state['_offset'] self.__dict__ = state if 'weekmask' in state and 'holidays' in state: calendar, holidays = _get_calendar(weekmask=self.weekmask, @@ -598,11 +599,11 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = 'B' _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self.kwds = {'offset': offset} + self._offset = offset def _offset_str(self): def get_str(td): @@ -693,14 +694,13 @@ def onOffset(self, dt): class BusinessHourMixin(BusinessMixin): - def __init__(self, **kwds): + def __init__(self, start='09:00', end='17:00', offset=timedelta(0)): # must be validated here to equality check - kwds['start'] = self._validate_time(kwds.get('start', '09:00')) - kwds['end'] = self._validate_time(kwds.get('end', '17:00')) + kwds = {'offset': offset} + self.start = kwds['start'] = self._validate_time(start) + self.end = kwds['end'] = self._validate_time(end) self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) - self.start = kwds.get('start', '09:00') - self.end = kwds.get('end', '17:00') + self._offset = offset def _validate_time(self, t_input): from datetime import time as dt_time @@ -923,10 +923,11 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset): _prefix = 'BH' _anchor = 0 - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, start='09:00', + end='17:00', offset=timedelta(0)): self.n = int(n) self.normalize = normalize - super(BusinessHour, self).__init__(**kwds) + super(BusinessHour, self).__init__(start=start, end=end, offset=offset) @cache_readonly def next_bday(self): @@ -960,11 +961,11 @@ class CustomBusinessDay(BusinessDay): _prefix = 'C' def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self._offset = offset + self.kwds = {} calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, @@ -976,6 +977,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['weekmask'] = self.weekmask = weekmask self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar + self.kwds['offset'] = offset @apply_wraps def apply(self, other): @@ -1026,10 +1028,12 @@ class CustomBusinessHour(BusinessHourMixin, SingleConstructorOffset): _anchor = 0 def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, + start='09:00', end='17:00', offset=timedelta(0)): self.n = int(n) self.normalize = normalize - super(CustomBusinessHour, self).__init__(**kwds) + super(CustomBusinessHour, self).__init__(start=start, + end=end, offset=offset) calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, @@ -1121,7 +1125,7 @@ class SemiMonthOffset(DateOffset): _default_day_of_month = 15 _min_day_of_month = 2 - def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, day_of_month=None): if day_of_month is None: self.day_of_month = self._default_day_of_month else: @@ -1132,8 +1136,7 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): day=self.day_of_month)) self.n = int(n) self.normalize = normalize - self.kwds = kwds - self.kwds['day_of_month'] = self.day_of_month + self.kwds = {'day_of_month': self.day_of_month} @classmethod def _from_name(cls, suffix=None): @@ -1408,11 +1411,11 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset): _prefix = 'CBM' def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self._offset = offset + self.kwds = {} calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, @@ -1420,6 +1423,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['weekmask'] = self.weekmask = weekmask self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar + self.kwds['offset'] = offset @cache_readonly def cbday(self): @@ -1430,7 +1434,7 @@ def cbday(self): def m_offset(self): kwds = self.kwds kwds = {key: kwds[key] for key in kwds - if key not in ['calendar', 'weekmask', 'holidays']} + if key not in ['calendar', 'weekmask', 'holidays', 'offset']} return MonthEnd(n=1, normalize=self.normalize, **kwds) @apply_wraps @@ -1478,20 +1482,21 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset): _prefix = 'CBMS' def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self._offset = offset + self.kwds = {} # _get_calendar does validation and possible transformation # of calendar and holidays. calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, calendar=calendar) - kwds['calendar'] = self.calendar = calendar - kwds['weekmask'] = self.weekmask = weekmask - kwds['holidays'] = self.holidays = holidays + self.kwds['calendar'] = self.calendar = calendar + self.kwds['weekmask'] = self.weekmask = weekmask + self.kwds['holidays'] = self.holidays = holidays + self.kwds['offset'] = offset @cache_readonly def cbday(self): @@ -1502,7 +1507,7 @@ def cbday(self): def m_offset(self): kwds = self.kwds kwds = {key: kwds[key] for key in kwds - if key not in ['calendar', 'weekmask', 'holidays']} + if key not in ['calendar', 'weekmask', 'holidays', 'offset']} return MonthBegin(n=1, normalize=self.normalize, **kwds) @apply_wraps @@ -1540,17 +1545,17 @@ class Week(DateOffset): _adjust_dst = True _inc = timedelta(weeks=1) - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=None): self.n = n self.normalize = normalize - self.weekday = kwds.get('weekday', None) + self.weekday = weekday if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: raise ValueError('Day must be 0<=day<=6, got {day}' .format(day=self.weekday)) - self.kwds = kwds + self.kwds = {'weekday': weekday} def isAnchored(self): return (self.n == 1 and self.weekday is not None) @@ -1642,9 +1647,9 @@ class WeekOfMonth(DateOffset): Parameters ---------- n : int - week : {0, 1, 2, 3, ...} + week : {0, 1, 2, 3, ...}, default None 0 is 1st week of month, 1 2nd week, etc. - weekday : {0, 1, ..., 6} + weekday : {0, 1, ..., 6}, default None 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1656,11 +1661,11 @@ class WeekOfMonth(DateOffset): _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, week=None, weekday=None): self.n = n self.normalize = normalize - self.weekday = kwds['weekday'] - self.week = kwds['week'] + self.weekday = weekday + self.week = week if self.n == 0: raise ValueError('N cannot be 0') @@ -1672,7 +1677,7 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('Week must be 0<=week<=3, got {week}' .format(week=self.week)) - self.kwds = kwds + self.kwds = {'weekday': weekday, 'week': week} @apply_wraps def apply(self, other): @@ -1742,8 +1747,8 @@ class LastWeekOfMonth(DateOffset): Parameters ---------- - n : int - weekday : {0, 1, ..., 6} + n : int, default 1 + weekday : {0, 1, ..., 6}, default None 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1751,12 +1756,13 @@ class LastWeekOfMonth(DateOffset): 4: Fridays 5: Saturdays 6: Sundays + """ - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=None): self.n = n self.normalize = normalize - self.weekday = kwds['weekday'] + self.weekday = weekday if self.n == 0: raise ValueError('N cannot be 0') @@ -1765,7 +1771,7 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('Day must be 0<=day<=6, got {day}' .format(day=self.weekday)) - self.kwds = kwds + self.kwds = {'weekday': weekday} @apply_wraps def apply(self, other): @@ -1829,13 +1835,14 @@ class QuarterOffset(DateOffset): # TODO: Consider combining QuarterOffset and YearOffset __init__ at some # point - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, startingMonth=None): self.n = n self.normalize = normalize - self.startingMonth = kwds.get('startingMonth', - self._default_startingMonth) + if startingMonth is None: + startingMonth = self._default_startingMonth + self.startingMonth = startingMonth - self.kwds = kwds + self.kwds = {'startingMonth': startingMonth} def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -2017,13 +2024,14 @@ class YearOffset(DateOffset): """DateOffset that just needs a month""" _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): - self.month = kwds.get('month', self._default_month) + def __init__(self, n=1, normalize=False, month=None): + month = month if month is not None else self._default_month + self.month = month if self.month < 1 or self.month > 12: raise ValueError('Month must go from 1 to 12') - DateOffset.__init__(self, n=n, normalize=normalize, **kwds) + DateOffset.__init__(self, n=n, normalize=normalize, month=month) @classmethod def _from_name(cls, suffix=None): @@ -2262,15 +2270,17 @@ class FY5253(DateOffset): _suffix_prefix_nearest = 'N' _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, + variation="nearest"): self.n = n self.normalize = normalize - self.startingMonth = kwds['startingMonth'] - self.weekday = kwds["weekday"] + self.startingMonth = startingMonth + self.weekday = weekday - self.variation = kwds["variation"] + self.variation = variation - self.kwds = kwds + self.kwds = {'weekday': weekday, 'startingMonth': startingMonth, + 'variation': variation} if self.n == 0: raise ValueError('N cannot be 0') @@ -2510,24 +2520,29 @@ class FY5253Quarter(DateOffset): _prefix = 'REQ' _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, + qtr_with_extra_week=1, variation="nearest"): self.n = n self.normalize = normalize - self.qtr_with_extra_week = kwds["qtr_with_extra_week"] + self.weekday = weekday + self.startingMonth = startingMonth + self.qtr_with_extra_week = qtr_with_extra_week + self.variation = variation - self.kwds = kwds + self.kwds = {'weekday': weekday, 'startingMonth': startingMonth, + 'qtr_with_extra_week': qtr_with_extra_week, + 'variation': variation} if self.n == 0: raise ValueError('N cannot be 0') @cache_readonly def _offset(self): - kwds = self.kwds return FY5253( - startingMonth=kwds['startingMonth'], - weekday=kwds["weekday"], - variation=kwds["variation"]) + startingMonth=self.startingMonth, + weekday=self.weekday, + variation=self.variation) def isAnchored(self): return self.n == 1 and self._offset.isAnchored()