Skip to content

Commit fa24af9

Browse files
mroeschkejreback
authored andcommitted
API/BUG: Enforce "normalized" pytz timezones for DatetimeIndex (pandas-dev#20510)
1 parent d91b706 commit fa24af9

File tree

9 files changed

+114
-14
lines changed

9 files changed

+114
-14
lines changed

doc/source/whatsnew/v0.23.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,8 @@ Datetimelike API Changes
770770
- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`)
771771
- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`)
772772
- :class:`Timestamp` constructor now accepts a `nanosecond` keyword or positional argument (:issue:`18898`)
773+
- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`)
774+
- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`)
773775

774776
.. _whatsnew_0230.api.other:
775777

@@ -1127,6 +1129,7 @@ Groupby/Resample/Rolling
11271129
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
11281130
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
11291131
- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`)
1132+
- Bug in :func:`Dataframe.resample` that dropped timezone information (:issue:`13238`)
11301133

11311134
Sparse
11321135
^^^^^^

pandas/_libs/tslibs/timestamps.pyx

+6
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,12 @@ class Timestamp(_Timestamp):
700700
"""
701701
return self.tzinfo
702702

703+
@tz.setter
704+
def tz(self, value):
705+
# GH 3746: Prevent localizing or converting the index by setting tz
706+
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
707+
"or tz_convert() as appropriate")
708+
703709
def __setstate__(self, state):
704710
self.value = state[0]
705711
self.freq = state[1]

pandas/_libs/tslibs/timezones.pyx

+38
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,41 @@ cpdef bint tz_compare(object start, object end):
314314
"""
315315
# GH 18523
316316
return get_timezone(start) == get_timezone(end)
317+
318+
319+
cpdef tz_standardize(object tz):
320+
"""
321+
If the passed tz is a pytz timezone object, "normalize" it to the a
322+
consistent version
323+
324+
Parameters
325+
----------
326+
tz : tz object
327+
328+
Returns:
329+
-------
330+
tz object
331+
332+
Examples:
333+
--------
334+
>>> tz
335+
<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
336+
337+
>>> tz_standardize(tz)
338+
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
339+
340+
>>> tz
341+
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
342+
343+
>>> tz_standardize(tz)
344+
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
345+
346+
>>> tz
347+
dateutil.tz.tz.tzutc
348+
349+
>>> tz_standardize(tz)
350+
dateutil.tz.tz.tzutc
351+
"""
352+
if treat_tz_as_pytz(tz):
353+
return pytz.timezone(str(tz))
354+
return tz

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,7 @@ def shift(self, n, freq=None):
10051005
result = self + offset
10061006

10071007
if hasattr(self, 'tz'):
1008-
result.tz = self.tz
1008+
result._tz = self.tz
10091009

10101010
return result
10111011

pandas/core/indexes/datetimes.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -511,13 +511,7 @@ def _generate(cls, start, end, periods, name, offset,
511511
'different timezones')
512512

513513
inferred_tz = timezones.maybe_get_tz(inferred_tz)
514-
515-
# these may need to be localized
516514
tz = timezones.maybe_get_tz(tz)
517-
if tz is not None:
518-
date = start or end
519-
if date.tzinfo is not None and hasattr(tz, 'localize'):
520-
tz = tz.localize(date.replace(tzinfo=None)).tzinfo
521515

522516
if tz is not None and inferred_tz is not None:
523517
if not timezones.tz_compare(inferred_tz, tz):
@@ -654,7 +648,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
654648
result._data = values
655649
result.name = name
656650
result.offset = freq
657-
result.tz = timezones.maybe_get_tz(tz)
651+
result._tz = timezones.maybe_get_tz(tz)
652+
result._tz = timezones.tz_standardize(result._tz)
658653
result._reset_identity()
659654
return result
660655

@@ -684,6 +679,17 @@ def _values(self):
684679
else:
685680
return self.values
686681

682+
@property
683+
def tz(self):
684+
# GH 18595
685+
return self._tz
686+
687+
@tz.setter
688+
def tz(self, value):
689+
# GH 3746: Prevent localizing or converting the index by setting tz
690+
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
691+
"or tz_convert() as appropriate")
692+
687693
@property
688694
def tzinfo(self):
689695
"""
@@ -754,7 +760,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
754760

755761
cachedRange = DatetimeIndex._simple_new(arr)
756762
cachedRange.offset = offset
757-
cachedRange.tz = None
763+
cachedRange = cachedRange.tz_localize(None)
758764
cachedRange.name = None
759765
drc[offset] = cachedRange
760766
else:
@@ -831,7 +837,7 @@ def __setstate__(self, state):
831837

832838
self.name = own_state[0]
833839
self.offset = own_state[1]
834-
self.tz = own_state[2]
840+
self._tz = timezones.tz_standardize(own_state[2])
835841

836842
# provide numpy < 1.7 compat
837843
if nd_state[2] == 'M8[us]':
@@ -1175,7 +1181,7 @@ def union(self, other):
11751181
else:
11761182
result = Index.union(this, other)
11771183
if isinstance(result, DatetimeIndex):
1178-
result.tz = this.tz
1184+
result._tz = timezones.tz_standardize(this.tz)
11791185
if (result.freq is None and
11801186
(this.freq is not None or other.freq is not None)):
11811187
result.offset = to_offset(result.inferred_freq)
@@ -1223,7 +1229,7 @@ def union_many(self, others):
12231229
tz = this.tz
12241230
this = Index.union(this, other)
12251231
if isinstance(this, DatetimeIndex):
1226-
this.tz = tz
1232+
this._tz = timezones.tz_standardize(tz)
12271233

12281234
if this.freq is None:
12291235
this.offset = to_offset(this.inferred_freq)

pandas/tests/frame/test_alter_axes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,8 @@ def test_set_index_cast_datetimeindex(self):
249249
# convert to utc
250250
df['C'] = i.to_series().reset_index(drop=True)
251251
result = df['C']
252-
comp = pd.DatetimeIndex(expected.values).copy()
253-
comp.tz = None
252+
comp = pd.DatetimeIndex(expected.values)
253+
comp = comp.tz_localize(None)
254254
tm.assert_numpy_array_equal(result.values, comp.values)
255255

256256
# list of datetimes with a tz

pandas/tests/indexes/datetimes/test_construction.py

+28
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,34 @@ def test_000constructor_resolution(self):
441441

442442
assert idx.nanosecond[0] == t1.nanosecond
443443

444+
def test_disallow_setting_tz(self):
445+
# GH 3746
446+
dti = DatetimeIndex(['2010'], tz='UTC')
447+
with pytest.raises(AttributeError):
448+
dti.tz = pytz.timezone('US/Pacific')
449+
450+
@pytest.mark.parametrize('tz', [
451+
None, 'America/Los_Angeles', pytz.timezone('America/Los_Angeles'),
452+
Timestamp('2000', tz='America/Los_Angeles').tz])
453+
def test_constructor_start_end_with_tz(self, tz):
454+
# GH 18595
455+
start = Timestamp('2013-01-01 06:00:00', tz='America/Los_Angeles')
456+
end = Timestamp('2013-01-02 06:00:00', tz='America/Los_Angeles')
457+
result = DatetimeIndex(freq='D', start=start, end=end, tz=tz)
458+
expected = DatetimeIndex(['2013-01-01 06:00:00',
459+
'2013-01-02 06:00:00'],
460+
tz='America/Los_Angeles')
461+
tm.assert_index_equal(result, expected)
462+
# Especially assert that the timezone is consistent for pytz
463+
assert pytz.timezone('America/Los_Angeles') is result.tz
464+
465+
@pytest.mark.parametrize('tz', ['US/Pacific', 'US/Eastern', 'Asia/Tokyo'])
466+
def test_constructor_with_non_normalized_pytz(self, tz):
467+
# GH 18595
468+
non_norm_tz = Timestamp('2010', tz=tz).tz
469+
result = DatetimeIndex(['2010'], tz=non_norm_tz)
470+
assert pytz.timezone(tz) is result.tz
471+
444472

445473
class TestTimeSeries(object):
446474

pandas/tests/scalar/timestamp/test_timestamp.py

+7
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,13 @@ def test_today(self):
521521
assert (abs(ts_from_string_tz.tz_localize(None) -
522522
ts_from_method_tz.tz_localize(None)) < delta)
523523

524+
@pytest.mark.parametrize('tz', [None, pytz.timezone('US/Pacific')])
525+
def test_disallow_setting_tz(self, tz):
526+
# GH 3746
527+
ts = Timestamp('2010')
528+
with pytest.raises(AttributeError):
529+
ts.tz = tz
530+
524531

525532
class TestTimestamp(object):
526533

pandas/tests/test_resample.py

+12
Original file line numberDiff line numberDiff line change
@@ -2532,6 +2532,18 @@ def test_with_local_timezone_pytz(self):
25322532
expected = Series(1, index=expected_index)
25332533
assert_series_equal(result, expected)
25342534

2535+
def test_resample_with_pytz(self):
2536+
# GH 13238
2537+
s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H",
2538+
tz="US/Eastern"))
2539+
result = s.resample("D").mean()
2540+
expected = Series(2, index=pd.DatetimeIndex(['2017-01-01',
2541+
'2017-01-02'],
2542+
tz="US/Eastern"))
2543+
assert_series_equal(result, expected)
2544+
# Especially assert that the timezone is LMT for pytz
2545+
assert result.index.tz == pytz.timezone('US/Eastern')
2546+
25352547
def test_with_local_timezone_dateutil(self):
25362548
# see gh-5430
25372549
local_timezone = 'dateutil/America/Los_Angeles'

0 commit comments

Comments
 (0)