Skip to content

Commit dcc68d7

Browse files
rockgjreback
authored andcommitted
Fixes to resample over DST boundaries. This requires
changes to offset classes that weren't working over such boundaries as well as adding normalize() on Timestamp.
1 parent 28b5ef9 commit dcc68d7

File tree

7 files changed

+326
-20
lines changed

7 files changed

+326
-20
lines changed

doc/source/whatsnew/v0.16.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ New features
6969
- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`)
7070
- Added ``days_in_month`` (compatibility alias ``daysinmonth``) property to ``Timestamp``, ``DatetimeIndex``, ``Period``, ``PeriodIndex``, and ``Series.dt`` (:issue:`9572`)
7171
- Added ``decimal`` option in ``to_csv`` to provide formatting for non-'.' decimal separators (:issue:`781`)
72+
- Added ``normalize`` option for ``Timestamp`` to normalized to midnight (:issue:`8794`)
7273

7374
.. _whatsnew_0160.enhancements.assign:
7475

@@ -463,6 +464,7 @@ Bug Fixes
463464
To reproduce the old behavior, simply add more precision to the label (e.g., use ``2000-02-01`` instead of ``2000-02``).
464465
- Bug in adding ``offsets.Nano`` to other offets raises ``TypeError`` (:issue:`9284`)
465466
- Bug in ``DatetimeIndex`` iteration, related to (:issue:`8890`), fixed in (:issue:`9100`)
467+
- Bug in ``resample`` around DST transitions (:issue:`5172`, :issue:`8744`, :issue:`8653`, :issue:`9173`, :issue:`9468`). This required fixing offset classes so they behave correctly on DST transitions.
466468
- Bug in binary operator method (eg ``.mul()``) alignment with integer levels (:issue:`9463`).
467469
- Bug in boxplot, scatter and hexbin plot may show an unnecessary warning (:issue:`8877`)
468470
- Bug in subplot with ``layout`` kw may show unnecessary warning (:issue:`9464`)

pandas/tseries/offsets.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,16 @@ def wrapper(self, other):
5454
nano = getattr(other, 'nanosecond', 0)
5555

5656
try:
57-
result = func(self, other)
57+
if self._adjust_dst and isinstance(other, Timestamp):
58+
other = other.tz_localize(None)
5859

59-
if self.normalize:
60-
# normalize_date returns normal datetime
61-
result = tslib.normalize_date(result)
60+
result = func(self, other)
61+
if self._adjust_dst:
62+
result = tslib._localize_pydatetime(result, tz)
63+
6264
result = Timestamp(result)
65+
if self.normalize:
66+
result = result.normalize()
6367

6468
# nanosecond may be deleted depending on offset process
6569
if not self.normalize and nano != 0:
@@ -79,7 +83,7 @@ def wrapper(self, other):
7983

8084
if self.normalize:
8185
# normalize_date returns normal datetime
82-
result = tslib.normalize_date(result)
86+
result = normalize_date(result)
8387

8488
if tz is not None and result.tzinfo is None:
8589
result = tslib._localize_pydatetime(result, tz)
@@ -158,6 +162,7 @@ def __add__(date):
158162
'hour', 'minute', 'second', 'microsecond'
159163
)
160164
_use_relativedelta = False
165+
_adjust_dst = False
161166

162167
# default for prior pickles
163168
normalize = False
@@ -380,16 +385,15 @@ def freqstr(self):
380385

381386
return fstr
382387

383-
384388
class SingleConstructorOffset(DateOffset):
389+
385390
@classmethod
386391
def _from_name(cls, suffix=None):
387392
# default _from_name calls cls with no args
388393
if suffix:
389394
raise ValueError("Bad freq suffix %s" % suffix)
390395
return cls()
391396

392-
393397
class BusinessMixin(object):
394398
""" mixin to business types to provide related functions """
395399

@@ -425,6 +429,7 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset):
425429
DateOffset subclass representing possibly n business days
426430
"""
427431
_prefix = 'B'
432+
_adjust_dst = True
428433

429434
def __init__(self, n=1, normalize=False, **kwds):
430435
self.n = int(n)
@@ -685,6 +690,8 @@ def onOffset(self, dt):
685690

686691

687692
class MonthOffset(SingleConstructorOffset):
693+
_adjust_dst = True
694+
688695
@property
689696
def name(self):
690697
if self.isAnchored:
@@ -925,7 +932,7 @@ class Week(DateOffset):
925932
weekday : int, default None
926933
Always generate specific day of week. 0 for Monday
927934
"""
928-
935+
_adjust_dst = True
929936
def __init__(self, n=1, normalize=False, **kwds):
930937
self.n = n
931938
self.normalize = normalize
@@ -1031,7 +1038,9 @@ class WeekOfMonth(DateOffset):
10311038
5: Saturdays
10321039
6: Sundays
10331040
"""
1034-
1041+
1042+
_adjust_dst = True
1043+
10351044
def __init__(self, n=1, normalize=False, **kwds):
10361045
self.n = n
10371046
self.normalize = normalize
@@ -1190,7 +1199,7 @@ class QuarterOffset(DateOffset):
11901199
_default_startingMonth = None
11911200
#: default month in _from_name
11921201
_from_name_startingMonth = None
1193-
1202+
_adjust_dst = True
11941203
# TODO: Consider combining QuarterOffset and YearOffset __init__ at some
11951204
# point
11961205
def __init__(self, n=1, normalize=False, **kwds):
@@ -1395,7 +1404,7 @@ def apply(self, other):
13951404

13961405
class YearOffset(DateOffset):
13971406
"""DateOffset that just needs a month"""
1398-
1407+
_adjust_dst = True
13991408
def __init__(self, n=1, normalize=False, **kwds):
14001409
self.month = kwds.get('month', self._default_month)
14011410

@@ -1627,6 +1636,7 @@ class FY5253(DateOffset):
16271636
_prefix = 'RE'
16281637
_suffix_prefix_last = 'L'
16291638
_suffix_prefix_nearest = 'N'
1639+
_adjust_dst = True
16301640

16311641
def __init__(self, n=1, normalize=False, **kwds):
16321642
self.n = n
@@ -1848,6 +1858,7 @@ class FY5253Quarter(DateOffset):
18481858
"""
18491859

18501860
_prefix = 'REQ'
1861+
_adjust_dst = True
18511862

18521863
def __init__(self, n=1, normalize=False, **kwds):
18531864
self.n = n
@@ -1966,6 +1977,8 @@ class Easter(DateOffset):
19661977
the revised method which is valid in years
19671978
1583-4099.
19681979
'''
1980+
_adjust_dst = True
1981+
19691982
def __init__(self, n=1, **kwds):
19701983
super(Easter, self).__init__(n, **kwds)
19711984

pandas/tseries/resample.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,8 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
395395

396396
if not isinstance(offset, Tick): # and first.time() != last.time():
397397
# hack!
398-
first = tools.normalize_date(first)
399-
last = tools.normalize_date(last)
398+
first = first.normalize()
399+
last = last.normalize()
400400

401401
if closed == 'left':
402402
first = Timestamp(offset.rollback(first))
@@ -409,15 +409,18 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
409409

410410

411411
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
412-
from pandas.tseries.tools import normalize_date
412+
# from pandas.tseries.tools import normalize_date
413413

414414
# First and last offsets should be calculated from the start day to fix an
415415
# error cause by resampling across multiple days when a one day period is
416416
# not a multiple of the frequency.
417417
#
418418
# See https://github.com/pydata/pandas/issues/8683
419419

420-
start_day_nanos = Timestamp(normalize_date(first)).value
420+
first_tzinfo = first.tzinfo
421+
first = first.tz_localize(None)
422+
last = last.tz_localize(None)
423+
start_day_nanos = first.normalize().value
421424

422425
base_nanos = (base % offset.n) * offset.nanos // offset.n
423426
start_day_nanos += base_nanos
@@ -451,8 +454,11 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
451454
else:
452455
lresult = last.value + offset.nanos
453456

454-
return (Timestamp(fresult, tz=first.tz),
455-
Timestamp(lresult, tz=last.tz))
457+
# return (Timestamp(fresult, tz=first.tz),
458+
# Timestamp(lresult, tz=last.tz))
459+
460+
return (Timestamp(fresult).tz_localize(first_tzinfo),
461+
Timestamp(lresult).tz_localize(first_tzinfo))
456462

457463

458464
def asfreq(obj, freq, method=None, how=None, normalize=False):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
(dp0
2+
S'YearBegin'
3+
p1
4+
ccopy_reg
5+
_reconstructor
6+
p2
7+
(cpandas.tseries.offsets
8+
YearBegin
9+
p3
10+
c__builtin__
11+
object
12+
p4
13+
Ntp5
14+
Rp6
15+
(dp7
16+
S'normalize'
17+
p8
18+
I00
19+
sS'kwds'
20+
p9
21+
(dp10
22+
sS'n'
23+
p11
24+
I1
25+
sS'_offset'
26+
p12
27+
cdatetime
28+
timedelta
29+
p13
30+
(I1
31+
I0
32+
I0
33+
tp14
34+
Rp15
35+
sS'month'
36+
p16
37+
I1
38+
sS'_use_relativedelta'
39+
p17
40+
I00
41+
sbsS'Week'
42+
p18
43+
g2
44+
(cpandas.tseries.offsets
45+
Week
46+
p19
47+
g4
48+
Ntp20
49+
Rp21
50+
(dp22
51+
g8
52+
I00
53+
sS'_inc'
54+
p23
55+
g13
56+
(I7
57+
I0
58+
I0
59+
tp24
60+
Rp25
61+
sg9
62+
(dp26
63+
sS'weekday'
64+
p27
65+
Nsg11
66+
I1
67+
sbsS'MonthBegin'
68+
p28
69+
g2
70+
(cpandas.tseries.offsets
71+
MonthBegin
72+
p29
73+
g4
74+
Ntp30
75+
Rp31
76+
(dp32
77+
g8
78+
I00
79+
sg12
80+
g13
81+
(I1
82+
I0
83+
I0
84+
tp33
85+
Rp34
86+
sg17
87+
I00
88+
sg9
89+
(dp35
90+
sg11
91+
I1
92+
sbsS'Day'
93+
p36
94+
g2
95+
(cpandas.tseries.offsets
96+
Day
97+
p37
98+
g4
99+
Ntp38
100+
Rp39
101+
(dp40
102+
g8
103+
I00
104+
sg12
105+
g13
106+
(I1
107+
I0
108+
I0
109+
tp41
110+
Rp42
111+
sg17
112+
I00
113+
sg9
114+
(dp43
115+
sg11
116+
I1
117+
sbsS'DateOffset'
118+
p44
119+
g2
120+
(cpandas.tseries.offsets
121+
DateOffset
122+
p45
123+
g4
124+
Ntp46
125+
Rp47
126+
(dp48
127+
g8
128+
I00
129+
sg12
130+
g2
131+
(cdateutil.relativedelta
132+
relativedelta
133+
p49
134+
g4
135+
Ntp50
136+
Rp51
137+
(dp52
138+
S'_has_time'
139+
p53
140+
I0
141+
sS'hour'
142+
p54
143+
NsS'seconds'
144+
p55
145+
I0
146+
sS'months'
147+
p56
148+
I0
149+
sS'year'
150+
p57
151+
NsS'days'
152+
p58
153+
I0
154+
sS'years'
155+
p59
156+
I1
157+
sS'hours'
158+
p60
159+
I0
160+
sS'second'
161+
p61
162+
NsS'microsecond'
163+
p62
164+
Nsg16
165+
NsS'microseconds'
166+
p63
167+
I0
168+
sS'leapdays'
169+
p64
170+
I0
171+
sS'minutes'
172+
p65
173+
I0
174+
sS'day'
175+
p66
176+
NsS'minute'
177+
p67
178+
Nsg27
179+
Nsbsg17
180+
I01
181+
sg9
182+
(dp68
183+
g59
184+
I1
185+
ssg11
186+
I1
187+
sbs.

0 commit comments

Comments
 (0)