Skip to content

Commit 06518b2

Browse files
jbrockmendeljreback
authored andcommitted
Prevent passing invalid kwds to DateOffset constructors (#18226)
1 parent 0bcd77e commit 06518b2

File tree

5 files changed

+112
-17
lines changed

5 files changed

+112
-17
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ Other API Changes
7979
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
8080
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
8181
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
82-
82+
- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
8383

8484
.. _whatsnew_0220.deprecations:
8585

pandas/_libs/tslibs/offsets.pyx

+28-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def _validate_business_time(t_input):
261261
# ---------------------------------------------------------------------
262262
# Constructor Helpers
263263

264-
_rd_kwds = set([
264+
relativedelta_kwds = set([
265265
'years', 'months', 'weeks', 'days',
266266
'year', 'month', 'week', 'day', 'weekday',
267267
'hour', 'minute', 'second', 'microsecond',
@@ -406,6 +406,33 @@ class _BaseOffset(object):
406406
# will raise NotImplementedError.
407407
return get_day_of_month(other, self._day_opt)
408408

409+
def _validate_n(self, n):
410+
"""
411+
Require that `n` be a nonzero integer.
412+
413+
Parameters
414+
----------
415+
n : int
416+
417+
Returns
418+
-------
419+
nint : int
420+
421+
Raises
422+
------
423+
TypeError if `int(n)` raises
424+
ValueError if n != int(n)
425+
"""
426+
try:
427+
nint = int(n)
428+
except (ValueError, TypeError):
429+
raise TypeError('`n` argument must be an integer, '
430+
'got {ntype}'.format(ntype=type(n)))
431+
if n != nint:
432+
raise ValueError('`n` argument must be an integer, '
433+
'got {n}'.format(n=n))
434+
return nint
435+
409436

410437
class BaseOffset(_BaseOffset):
411438
# Here we add __rfoo__ methods that don't play well with cdef classes

pandas/tests/tseries/offsets/conftest.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ def offset_types(request):
77
return request.param
88

99

10+
@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if
11+
issubclass(getattr(offsets, o), offsets.MonthOffset)
12+
and o != 'MonthOffset'])
13+
def month_classes(request):
14+
return request.param
15+
16+
17+
@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if
18+
issubclass(getattr(offsets, o), offsets.Tick)])
19+
def tick_classes(request):
20+
return request.param
21+
22+
1023
@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
1124
'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'])
1225
def tz(request):

pandas/tests/tseries/offsets/test_offsets.py

+37
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
get_offset, get_standard_freq)
1818
from pandas.core.indexes.datetimes import (
1919
_to_m8, DatetimeIndex, _daterange_cache)
20+
import pandas._libs.tslibs.offsets as liboffsets
2021
from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset
2122
from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd,
2223
BusinessHour, WeekOfMonth, CBMonthEnd,
@@ -4682,9 +4683,45 @@ def test_all_offset_classes(self, tup):
46824683
assert first == second
46834684

46844685

4686+
# ---------------------------------------------------------------------
46854687
def test_get_offset_day_error():
46864688
# subclass of _BaseOffset must override _day_opt attribute, or we should
46874689
# get a NotImplementedError
46884690

46894691
with pytest.raises(NotImplementedError):
46904692
DateOffset()._get_offset_day(datetime.now())
4693+
4694+
4695+
@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds)))
4696+
def test_valid_month_attributes(kwd, month_classes):
4697+
# GH#18226
4698+
cls = month_classes
4699+
# check that we cannot create e.g. MonthEnd(weeks=3)
4700+
with pytest.raises(TypeError):
4701+
cls(**{kwd: 3})
4702+
4703+
4704+
@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds)))
4705+
def test_valid_tick_attributes(kwd, tick_classes):
4706+
# GH#18226
4707+
cls = tick_classes
4708+
# check that we cannot create e.g. Hour(weeks=3)
4709+
with pytest.raises(TypeError):
4710+
cls(**{kwd: 3})
4711+
4712+
4713+
def test_validate_n_error():
4714+
with pytest.raises(TypeError):
4715+
DateOffset(n='Doh!')
4716+
4717+
with pytest.raises(TypeError):
4718+
MonthBegin(n=timedelta(1))
4719+
4720+
with pytest.raises(TypeError):
4721+
BDay(n=np.array([1, 2], dtype=np.int64))
4722+
4723+
4724+
def test_require_integers(offset_types):
4725+
cls = offset_types
4726+
with pytest.raises(ValueError):
4727+
cls(n=1.5)

pandas/tseries/offsets.py

+33-15
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# -*- coding: utf-8 -*-
2+
from datetime import date, datetime, timedelta
23
import functools
34
import operator
45

5-
from datetime import date, datetime, timedelta
66
from pandas.compat import range
77
from pandas import compat
88
import numpy as np
@@ -166,7 +166,7 @@ def __add__(date):
166166
normalize = False
167167

168168
def __init__(self, n=1, normalize=False, **kwds):
169-
self.n = int(n)
169+
self.n = self._validate_n(n)
170170
self.normalize = normalize
171171
self.kwds = kwds
172172

@@ -473,7 +473,7 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset):
473473
_adjust_dst = True
474474

475475
def __init__(self, n=1, normalize=False, offset=timedelta(0)):
476-
self.n = int(n)
476+
self.n = self._validate_n(n)
477477
self.normalize = normalize
478478
self.kwds = {'offset': offset}
479479
self._offset = offset
@@ -782,7 +782,7 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset):
782782

783783
def __init__(self, n=1, normalize=False, start='09:00',
784784
end='17:00', offset=timedelta(0)):
785-
self.n = int(n)
785+
self.n = self._validate_n(n)
786786
self.normalize = normalize
787787
super(BusinessHour, self).__init__(start=start, end=end, offset=offset)
788788

@@ -819,7 +819,7 @@ class CustomBusinessDay(BusinessDay):
819819

820820
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
821821
holidays=None, calendar=None, offset=timedelta(0)):
822-
self.n = int(n)
822+
self.n = self._validate_n(n)
823823
self.normalize = normalize
824824
self._offset = offset
825825
self.kwds = {}
@@ -887,7 +887,7 @@ class CustomBusinessHour(BusinessHourMixin, SingleConstructorOffset):
887887
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
888888
holidays=None, calendar=None,
889889
start='09:00', end='17:00', offset=timedelta(0)):
890-
self.n = int(n)
890+
self.n = self._validate_n(n)
891891
self.normalize = normalize
892892
super(CustomBusinessHour, self).__init__(start=start,
893893
end=end, offset=offset)
@@ -919,6 +919,11 @@ def next_bday(self):
919919
class MonthOffset(SingleConstructorOffset):
920920
_adjust_dst = True
921921

922+
def __init__(self, n=1, normalize=False):
923+
self.n = self._validate_n(n)
924+
self.normalize = normalize
925+
self.kwds = {}
926+
922927
@property
923928
def name(self):
924929
if self.isAnchored:
@@ -994,7 +999,8 @@ def __init__(self, n=1, normalize=False, day_of_month=None):
994999
msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}'
9951000
raise ValueError(msg.format(min=self._min_day_of_month,
9961001
day=self.day_of_month))
997-
self.n = int(n)
1002+
1003+
self.n = self._validate_n(n)
9981004
self.normalize = normalize
9991005
self.kwds = {'day_of_month': self.day_of_month}
10001006

@@ -1205,7 +1211,7 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset):
12051211

12061212
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
12071213
holidays=None, calendar=None, offset=timedelta(0)):
1208-
self.n = int(n)
1214+
self.n = self._validate_n(n)
12091215
self.normalize = normalize
12101216
self._offset = offset
12111217
self.kwds = {}
@@ -1278,7 +1284,7 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset):
12781284

12791285
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
12801286
holidays=None, calendar=None, offset=timedelta(0)):
1281-
self.n = int(n)
1287+
self.n = self._validate_n(n)
12821288
self.normalize = normalize
12831289
self._offset = offset
12841290
self.kwds = {}
@@ -1345,7 +1351,7 @@ class Week(EndMixin, DateOffset):
13451351
_prefix = 'W'
13461352

13471353
def __init__(self, n=1, normalize=False, weekday=None):
1348-
self.n = n
1354+
self.n = self._validate_n(n)
13491355
self.normalize = normalize
13501356
self.weekday = weekday
13511357

@@ -1424,7 +1430,7 @@ class WeekOfMonth(DateOffset):
14241430
_adjust_dst = True
14251431

14261432
def __init__(self, n=1, normalize=False, week=None, weekday=None):
1427-
self.n = n
1433+
self.n = self._validate_n(n)
14281434
self.normalize = normalize
14291435
self.weekday = weekday
14301436
self.week = week
@@ -1509,7 +1515,7 @@ class LastWeekOfMonth(DateOffset):
15091515
_prefix = 'LWOM'
15101516

15111517
def __init__(self, n=1, normalize=False, weekday=None):
1512-
self.n = n
1518+
self.n = self._validate_n(n)
15131519
self.normalize = normalize
15141520
self.weekday = weekday
15151521

@@ -1575,7 +1581,7 @@ class QuarterOffset(DateOffset):
15751581
# point
15761582

15771583
def __init__(self, n=1, normalize=False, startingMonth=None):
1578-
self.n = n
1584+
self.n = self._validate_n(n)
15791585
self.normalize = normalize
15801586
if startingMonth is None:
15811587
startingMonth = self._default_startingMonth
@@ -1820,7 +1826,7 @@ class FY5253(DateOffset):
18201826

18211827
def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1,
18221828
variation="nearest"):
1823-
self.n = n
1829+
self.n = self._validate_n(n)
18241830
self.normalize = normalize
18251831
self.startingMonth = startingMonth
18261832
self.weekday = weekday
@@ -2032,7 +2038,7 @@ class FY5253Quarter(DateOffset):
20322038

20332039
def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1,
20342040
qtr_with_extra_week=1, variation="nearest"):
2035-
self.n = n
2041+
self.n = self._validate_n(n)
20362042
self.normalize = normalize
20372043

20382044
self.weekday = weekday
@@ -2158,6 +2164,11 @@ class Easter(DateOffset):
21582164
"""
21592165
_adjust_dst = True
21602166

2167+
def __init__(self, n=1, normalize=False):
2168+
self.n = self._validate_n(n)
2169+
self.normalize = normalize
2170+
self.kwds = {}
2171+
21612172
@apply_wraps
21622173
def apply(self, other):
21632174
current_easter = easter(other.year)
@@ -2199,6 +2210,12 @@ class Tick(SingleConstructorOffset):
21992210
_inc = Timedelta(microseconds=1000)
22002211
_prefix = 'undefined'
22012212

2213+
def __init__(self, n=1, normalize=False):
2214+
# TODO: do Tick classes with normalize=True make sense?
2215+
self.n = self._validate_n(n)
2216+
self.normalize = normalize
2217+
self.kwds = {}
2218+
22022219
__gt__ = _tick_comp(operator.gt)
22032220
__ge__ = _tick_comp(operator.ge)
22042221
__lt__ = _tick_comp(operator.lt)
@@ -2257,6 +2274,7 @@ def delta(self):
22572274
def nanos(self):
22582275
return delta_to_nanoseconds(self.delta)
22592276

2277+
# TODO: Should Tick have its own apply_index?
22602278
def apply(self, other):
22612279
# Timestamp can handle tz and nano sec, thus no need to use apply_wraps
22622280
if isinstance(other, Timestamp):

0 commit comments

Comments
 (0)