Skip to content

Fixes to resample over DST boundaries #9623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ New features
- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`)
- Added ``days_in_month`` (compatibility alias ``daysinmonth``) property to ``Timestamp``, ``DatetimeIndex``, ``Period``, ``PeriodIndex``, and ``Series.dt`` (:issue:`9572`)
- Added ``decimal`` option in ``to_csv`` to provide formatting for non-'.' decimal separators (:issue:`781`)
- Added ``normalize`` option for ``Timestamp`` to normalized to midnight (:issue:`8794`)

.. _whatsnew_0160.enhancements.assign:

Expand Down Expand Up @@ -461,6 +462,7 @@ Bug Fixes
To reproduce the old behavior, simply add more precision to the label (e.g., use ``2000-02-01`` instead of ``2000-02``).
- Bug in adding ``offsets.Nano`` to other offets raises ``TypeError`` (:issue:`9284`)
- Bug in ``DatetimeIndex`` iteration, related to (:issue:`8890`), fixed in (:issue:`9100`)
- Bug in ``resample`` around DST transitions (:issue:`5172`, :issue:`8744`, :issue:`8653`, :issue:`9173`, :issue:`9468`). This required fixing offset classes so they behave correctly on DST transitions.
- Bug in binary operator method (eg ``.mul()``) alignment with integer levels (:issue:`9463`).
- Bug in boxplot, scatter and hexbin plot may show an unnecessary warning (:issue:`8877`)
- Bug in subplot with ``layout`` kw may show unnecessary warning (:issue:`9464`)
Expand Down
35 changes: 24 additions & 11 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,16 @@ def wrapper(self, other):
nano = getattr(other, 'nanosecond', 0)

try:
result = func(self, other)
if self._adjust_dst and isinstance(other, Timestamp):
other = other.tz_localize(None)

if self.normalize:
# normalize_date returns normal datetime
result = tslib.normalize_date(result)
result = func(self, other)
if self._adjust_dst:
result = tslib._localize_pydatetime(result, tz)

result = Timestamp(result)
if self.normalize:
result = result.normalize()

# nanosecond may be deleted depending on offset process
if not self.normalize and nano != 0:
Expand All @@ -79,7 +83,7 @@ def wrapper(self, other):

if self.normalize:
# normalize_date returns normal datetime
result = tslib.normalize_date(result)
result = normalize_date(result)

if tz is not None and result.tzinfo is None:
result = tslib._localize_pydatetime(result, tz)
Expand Down Expand Up @@ -158,6 +162,7 @@ def __add__(date):
'hour', 'minute', 'second', 'microsecond'
)
_use_relativedelta = False
_adjust_dst = False

# default for prior pickles
normalize = False
Expand Down Expand Up @@ -380,16 +385,15 @@ def freqstr(self):

return fstr


class SingleConstructorOffset(DateOffset):

@classmethod
def _from_name(cls, suffix=None):
# default _from_name calls cls with no args
if suffix:
raise ValueError("Bad freq suffix %s" % suffix)
return cls()


class BusinessMixin(object):
""" mixin to business types to provide related functions """

Expand Down Expand Up @@ -425,6 +429,7 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset):
DateOffset subclass representing possibly n business days
"""
_prefix = 'B'
_adjust_dst = True

def __init__(self, n=1, normalize=False, **kwds):
self.n = int(n)
Expand Down Expand Up @@ -685,6 +690,8 @@ def onOffset(self, dt):


class MonthOffset(SingleConstructorOffset):
_adjust_dst = True

@property
def name(self):
if self.isAnchored:
Expand Down Expand Up @@ -925,7 +932,7 @@ class Week(DateOffset):
weekday : int, default None
Always generate specific day of week. 0 for Monday
"""

_adjust_dst = True
def __init__(self, n=1, normalize=False, **kwds):
self.n = n
self.normalize = normalize
Expand Down Expand Up @@ -1031,7 +1038,9 @@ class WeekOfMonth(DateOffset):
5: Saturdays
6: Sundays
"""


_adjust_dst = True

def __init__(self, n=1, normalize=False, **kwds):
self.n = n
self.normalize = normalize
Expand Down Expand Up @@ -1190,7 +1199,7 @@ class QuarterOffset(DateOffset):
_default_startingMonth = None
#: default month in _from_name
_from_name_startingMonth = None

_adjust_dst = True
# TODO: Consider combining QuarterOffset and YearOffset __init__ at some
# point
def __init__(self, n=1, normalize=False, **kwds):
Expand Down Expand Up @@ -1395,7 +1404,7 @@ def apply(self, other):

class YearOffset(DateOffset):
"""DateOffset that just needs a month"""

_adjust_dst = True
def __init__(self, n=1, normalize=False, **kwds):
self.month = kwds.get('month', self._default_month)

Expand Down Expand Up @@ -1627,6 +1636,7 @@ class FY5253(DateOffset):
_prefix = 'RE'
_suffix_prefix_last = 'L'
_suffix_prefix_nearest = 'N'
_adjust_dst = True

def __init__(self, n=1, normalize=False, **kwds):
self.n = n
Expand Down Expand Up @@ -1848,6 +1858,7 @@ class FY5253Quarter(DateOffset):
"""

_prefix = 'REQ'
_adjust_dst = True

def __init__(self, n=1, normalize=False, **kwds):
self.n = n
Expand Down Expand Up @@ -1966,6 +1977,8 @@ class Easter(DateOffset):
the revised method which is valid in years
1583-4099.
'''
_adjust_dst = True

def __init__(self, n=1, **kwds):
super(Easter, self).__init__(n, **kwds)

Expand Down
18 changes: 12 additions & 6 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ def _get_range_edges(first, last, offset, closed='left', base=0):

if not isinstance(offset, Tick): # and first.time() != last.time():
# hack!
first = tools.normalize_date(first)
last = tools.normalize_date(last)
first = first.normalize()
last = last.normalize()

if closed == 'left':
first = Timestamp(offset.rollback(first))
Expand All @@ -409,15 +409,18 @@ def _get_range_edges(first, last, offset, closed='left', base=0):


def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
from pandas.tseries.tools import normalize_date
# from pandas.tseries.tools import normalize_date

# First and last offsets should be calculated from the start day to fix an
# error cause by resampling across multiple days when a one day period is
# not a multiple of the frequency.
#
# See https://github.com/pydata/pandas/issues/8683

start_day_nanos = Timestamp(normalize_date(first)).value
first_tzinfo = first.tzinfo
first = first.tz_localize(None)
last = last.tz_localize(None)
start_day_nanos = first.normalize().value

base_nanos = (base % offset.n) * offset.nanos // offset.n
start_day_nanos += base_nanos
Expand Down Expand Up @@ -451,8 +454,11 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
else:
lresult = last.value + offset.nanos

return (Timestamp(fresult, tz=first.tz),
Timestamp(lresult, tz=last.tz))
# return (Timestamp(fresult, tz=first.tz),
# Timestamp(lresult, tz=last.tz))

return (Timestamp(fresult).tz_localize(first_tzinfo),
Timestamp(lresult).tz_localize(first_tzinfo))


def asfreq(obj, freq, method=None, how=None, normalize=False):
Expand Down
187 changes: 187 additions & 0 deletions pandas/tseries/tests/data/dateoffset_0_15_2.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
(dp0
S'YearBegin'
p1
ccopy_reg
_reconstructor
p2
(cpandas.tseries.offsets
YearBegin
p3
c__builtin__
object
p4
Ntp5
Rp6
(dp7
S'normalize'
p8
I00
sS'kwds'
p9
(dp10
sS'n'
p11
I1
sS'_offset'
p12
cdatetime
timedelta
p13
(I1
I0
I0
tp14
Rp15
sS'month'
p16
I1
sS'_use_relativedelta'
p17
I00
sbsS'Week'
p18
g2
(cpandas.tseries.offsets
Week
p19
g4
Ntp20
Rp21
(dp22
g8
I00
sS'_inc'
p23
g13
(I7
I0
I0
tp24
Rp25
sg9
(dp26
sS'weekday'
p27
Nsg11
I1
sbsS'MonthBegin'
p28
g2
(cpandas.tseries.offsets
MonthBegin
p29
g4
Ntp30
Rp31
(dp32
g8
I00
sg12
g13
(I1
I0
I0
tp33
Rp34
sg17
I00
sg9
(dp35
sg11
I1
sbsS'Day'
p36
g2
(cpandas.tseries.offsets
Day
p37
g4
Ntp38
Rp39
(dp40
g8
I00
sg12
g13
(I1
I0
I0
tp41
Rp42
sg17
I00
sg9
(dp43
sg11
I1
sbsS'DateOffset'
p44
g2
(cpandas.tseries.offsets
DateOffset
p45
g4
Ntp46
Rp47
(dp48
g8
I00
sg12
g2
(cdateutil.relativedelta
relativedelta
p49
g4
Ntp50
Rp51
(dp52
S'_has_time'
p53
I0
sS'hour'
p54
NsS'seconds'
p55
I0
sS'months'
p56
I0
sS'year'
p57
NsS'days'
p58
I0
sS'years'
p59
I1
sS'hours'
p60
I0
sS'second'
p61
NsS'microsecond'
p62
Nsg16
NsS'microseconds'
p63
I0
sS'leapdays'
p64
I0
sS'minutes'
p65
I0
sS'day'
p66
NsS'minute'
p67
Nsg27
Nsbsg17
I01
sg9
(dp68
g59
I1
ssg11
I1
sbs.
Loading