diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 7229bd38fffa9..0ffc02aa72456 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -79,7 +79,7 @@ Other API Changes - :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) - +- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). .. _whatsnew_0220.deprecations: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b03d48bba1649..4ed4d4a9b7b99 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -261,7 +261,7 @@ def _validate_business_time(t_input): # --------------------------------------------------------------------- # Constructor Helpers -_rd_kwds = set([ +relativedelta_kwds = set([ 'years', 'months', 'weeks', 'days', 'year', 'month', 'week', 'day', 'weekday', 'hour', 'minute', 'second', 'microsecond', @@ -406,6 +406,33 @@ class _BaseOffset(object): # will raise NotImplementedError. return get_day_of_month(other, self._day_opt) + def _validate_n(self, n): + """ + Require that `n` be a nonzero integer. + + Parameters + ---------- + n : int + + Returns + ------- + nint : int + + Raises + ------ + TypeError if `int(n)` raises + ValueError if n != int(n) + """ + try: + nint = int(n) + except (ValueError, TypeError): + raise TypeError('`n` argument must be an integer, ' + 'got {ntype}'.format(ntype=type(n))) + if n != nint: + raise ValueError('`n` argument must be an integer, ' + 'got {n}'.format(n=n)) + return nint + class BaseOffset(_BaseOffset): # Here we add __rfoo__ methods that don't play well with cdef classes diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py index 25446c24b28c0..76f24123ea0e1 100644 --- a/pandas/tests/tseries/offsets/conftest.py +++ b/pandas/tests/tseries/offsets/conftest.py @@ -7,6 +7,19 @@ def offset_types(request): return request.param +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if + issubclass(getattr(offsets, o), offsets.MonthOffset) + and o != 'MonthOffset']) +def month_classes(request): + return request.param + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if + issubclass(getattr(offsets, o), offsets.Tick)]) +def tick_classes(request): + return request.param + + @pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']) def tz(request): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 6821017c89c3a..357c95282e78d 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -17,6 +17,7 @@ get_offset, get_standard_freq) from pandas.core.indexes.datetimes import ( _to_m8, DatetimeIndex, _daterange_cache) +import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour, WeekOfMonth, CBMonthEnd, @@ -4682,9 +4683,45 @@ def test_all_offset_classes(self, tup): assert first == second +# --------------------------------------------------------------------- def test_get_offset_day_error(): # subclass of _BaseOffset must override _day_opt attribute, or we should # get a NotImplementedError with pytest.raises(NotImplementedError): DateOffset()._get_offset_day(datetime.now()) + + +@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds))) +def test_valid_month_attributes(kwd, month_classes): + # GH#18226 + cls = month_classes + # check that we cannot create e.g. MonthEnd(weeks=3) + with pytest.raises(TypeError): + cls(**{kwd: 3}) + + +@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds))) +def test_valid_tick_attributes(kwd, tick_classes): + # GH#18226 + cls = tick_classes + # check that we cannot create e.g. Hour(weeks=3) + with pytest.raises(TypeError): + cls(**{kwd: 3}) + + +def test_validate_n_error(): + with pytest.raises(TypeError): + DateOffset(n='Doh!') + + with pytest.raises(TypeError): + MonthBegin(n=timedelta(1)) + + with pytest.raises(TypeError): + BDay(n=np.array([1, 2], dtype=np.int64)) + + +def test_require_integers(offset_types): + cls = offset_types + with pytest.raises(ValueError): + cls(n=1.5) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 90496729554f8..7b699349c3f07 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- +from datetime import date, datetime, timedelta import functools import operator -from datetime import date, datetime, timedelta from pandas.compat import range from pandas import compat import numpy as np @@ -166,7 +166,7 @@ def __add__(date): normalize = False def __init__(self, n=1, normalize=False, **kwds): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize self.kwds = kwds @@ -473,7 +473,7 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _adjust_dst = True def __init__(self, n=1, normalize=False, offset=timedelta(0)): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize self.kwds = {'offset': offset} self._offset = offset @@ -782,7 +782,7 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset): def __init__(self, n=1, normalize=False, start='09:00', end='17:00', offset=timedelta(0)): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize super(BusinessHour, self).__init__(start=start, end=end, offset=offset) @@ -819,7 +819,7 @@ class CustomBusinessDay(BusinessDay): def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize self._offset = offset self.kwds = {} @@ -887,7 +887,7 @@ class CustomBusinessHour(BusinessHourMixin, SingleConstructorOffset): def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, start='09:00', end='17:00', offset=timedelta(0)): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize super(CustomBusinessHour, self).__init__(start=start, end=end, offset=offset) @@ -919,6 +919,11 @@ def next_bday(self): class MonthOffset(SingleConstructorOffset): _adjust_dst = True + def __init__(self, n=1, normalize=False): + self.n = self._validate_n(n) + self.normalize = normalize + self.kwds = {} + @property def name(self): if self.isAnchored: @@ -994,7 +999,8 @@ def __init__(self, n=1, normalize=False, day_of_month=None): msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}' raise ValueError(msg.format(min=self._min_day_of_month, day=self.day_of_month)) - self.n = int(n) + + self.n = self._validate_n(n) self.normalize = normalize self.kwds = {'day_of_month': self.day_of_month} @@ -1205,7 +1211,7 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset): def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize self._offset = offset self.kwds = {} @@ -1278,7 +1284,7 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset): def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize self._offset = offset self.kwds = {} @@ -1345,7 +1351,7 @@ class Week(EndMixin, DateOffset): _prefix = 'W' def __init__(self, n=1, normalize=False, weekday=None): - self.n = n + self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday @@ -1424,7 +1430,7 @@ class WeekOfMonth(DateOffset): _adjust_dst = True def __init__(self, n=1, normalize=False, week=None, weekday=None): - self.n = n + self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday self.week = week @@ -1509,7 +1515,7 @@ class LastWeekOfMonth(DateOffset): _prefix = 'LWOM' def __init__(self, n=1, normalize=False, weekday=None): - self.n = n + self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday @@ -1575,7 +1581,7 @@ class QuarterOffset(DateOffset): # point def __init__(self, n=1, normalize=False, startingMonth=None): - self.n = n + self.n = self._validate_n(n) self.normalize = normalize if startingMonth is None: startingMonth = self._default_startingMonth @@ -1820,7 +1826,7 @@ class FY5253(DateOffset): def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, variation="nearest"): - self.n = n + self.n = self._validate_n(n) self.normalize = normalize self.startingMonth = startingMonth self.weekday = weekday @@ -2032,7 +2038,7 @@ class FY5253Quarter(DateOffset): def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, qtr_with_extra_week=1, variation="nearest"): - self.n = n + self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday @@ -2158,6 +2164,11 @@ class Easter(DateOffset): """ _adjust_dst = True + def __init__(self, n=1, normalize=False): + self.n = self._validate_n(n) + self.normalize = normalize + self.kwds = {} + @apply_wraps def apply(self, other): current_easter = easter(other.year) @@ -2199,6 +2210,12 @@ class Tick(SingleConstructorOffset): _inc = Timedelta(microseconds=1000) _prefix = 'undefined' + def __init__(self, n=1, normalize=False): + # TODO: do Tick classes with normalize=True make sense? + self.n = self._validate_n(n) + self.normalize = normalize + self.kwds = {} + __gt__ = _tick_comp(operator.gt) __ge__ = _tick_comp(operator.ge) __lt__ = _tick_comp(operator.lt) @@ -2257,6 +2274,7 @@ def delta(self): def nanos(self): return delta_to_nanoseconds(self.delta) + # TODO: Should Tick have its own apply_index? def apply(self, other): # Timestamp can handle tz and nano sec, thus no need to use apply_wraps if isinstance(other, Timestamp):