Skip to content

Commit 702e3bb

Browse files
committed
Merge pull request #4706 from rockg/tz-localize-aware
ENH: Ability to tz localize when index is implicility in tz
2 parents 33fbcbb + e5ea6c8 commit 702e3bb

File tree

10 files changed

+290
-16
lines changed

10 files changed

+290
-16
lines changed

doc/source/api.rst

+146
Original file line numberDiff line numberDiff line change
@@ -868,3 +868,149 @@ Serialization / IO / Conversion
868868
Panel.to_frame
869869
Panel.to_clipboard
870870

871+
.. currentmodule:: pandas.core.index
872+
873+
.. _api.index
874+
875+
Index
876+
-----
877+
878+
**Many of these methods or variants thereof are available on the objects that contain an index (Series/Dataframe)
879+
and those should most likely be used before calling these methods directly.**
880+
881+
* **values**
882+
Modifying and Computations
883+
~~~~~~~~~~~~~~~~~~~~~~~~~~
884+
.. autosummary::
885+
:toctree: generated/
886+
887+
Index.copy
888+
Index.delete
889+
Index.diff
890+
Index.drop
891+
Index.equals
892+
Index.identical
893+
Index.insert
894+
Index.order
895+
Index.reindex
896+
Index.repeat
897+
Index.set_names
898+
Index.unique
899+
900+
Conversion
901+
~~~~~~~~~~
902+
.. autosummary::
903+
:toctree: generated/
904+
905+
Index.astype
906+
Index.tolist
907+
Index.to_datetime
908+
Index.to_series
909+
910+
Sorting
911+
~~~~~~~
912+
.. autosummary::
913+
:toctree: generated/
914+
915+
Index.argsort
916+
Index.order
917+
Index.sort
918+
919+
Time-specific operations
920+
~~~~~~~~~~~~~~~~~~~~~~~~
921+
.. autosummary::
922+
:toctree: generated/
923+
924+
Index.shift
925+
926+
Combining / joining / merging
927+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
928+
.. autosummary::
929+
:toctree: generated/
930+
931+
Index.append
932+
Index.intersection
933+
Index.join
934+
Index.union
935+
936+
Selecting
937+
~~~~~~~~~
938+
.. autosummary::
939+
:toctree: generated/
940+
941+
Index.get_indexer
942+
Index.get_indexer_non_unique
943+
Index.get_level_values
944+
Index.get_loc
945+
Index.get_value
946+
Index.isin
947+
Index.slice_indexer
948+
Index.slice_locs
949+
950+
Properties
951+
~~~~~~~~~~
952+
.. autosummary::
953+
:toctree: generated/
954+
955+
Index.is_monotonic
956+
Index.is_numeric
957+
958+
.. currentmodule:: pandas.tseries.index
959+
960+
.. _api.datetimeindex:
961+
962+
DatetimeIndex
963+
-------------
964+
965+
Time/Date Components
966+
~~~~~~~~~~~~~~~~~~~~
967+
* **year**
968+
* **month**
969+
* **day**
970+
* **hour**
971+
* **minute**
972+
* **second**
973+
* **microsecond**
974+
* **nanosecond**
975+
976+
* **weekofyear**
977+
* **week**: Same as weekofyear
978+
* **dayofweek**: (0=Monday, 6=Sunday)
979+
* **weekday**: (0=Monday, 6=Sunday)
980+
* **dayofyear**
981+
* **quarter**
982+
983+
* **date**: Returns date component of Timestamps
984+
* **time**: Returns time component of Timestamps
985+
986+
987+
Selecting
988+
~~~~~~~~~
989+
.. autosummary::
990+
:toctree: generated/
991+
992+
DatetimeIndex.indexer_at_time
993+
DatetimeIndex.indexer_between_time
994+
995+
996+
Time-specific operations
997+
~~~~~~~~~~~~~~~~~~~~~~~~
998+
.. autosummary::
999+
:toctree: generated/
1000+
1001+
DatetimeIndex.normalize
1002+
DatetimeIndex.snap
1003+
DatetimeIndex.tz_convert
1004+
DatetimeIndex.tz_localize
1005+
1006+
1007+
Conversion
1008+
~~~~~~~~~~
1009+
.. autosummary::
1010+
:toctree: generated/
1011+
1012+
DatetimeIndex.to_datetime
1013+
DatetimeIndex.to_period
1014+
DatetimeIndex.to_pydatetime
1015+
1016+

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ Improvements to existing features
160160
:issue:`4998`)
161161
- ``to_dict`` now takes ``records`` as a possible outtype. Returns an array
162162
of column-keyed dictionaries. (:issue:`4936`)
163+
- ``tz_localize`` can infer a fall daylight savings transition based on the
164+
structure of unlocalized data (:issue:`4230`)
165+
- DatetimeIndex is now in the API documentation
163166

164167
API Changes
165168
~~~~~~~~~~~

doc/source/timeseries.rst

+14
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,20 @@ TimeSeries, aligning the data on the UTC timestamps:
11081108
11091109
.. _timeseries.timedeltas:
11101110

1111+
In some cases, localize cannot determine the DST and non-DST hours when there are
1112+
duplicates. This often happens when reading files that simply duplicate the hours.
1113+
The infer_dst argument in tz_localize will attempt
1114+
to determine the right offset.
1115+
1116+
.. ipython:: python
1117+
1118+
rng_hourly = DatetimeIndex(['11/06/2011 00:00', '11/06/2011 01:00',
1119+
'11/06/2011 01:00', '11/06/2011 02:00',
1120+
'11/06/2011 03:00'])
1121+
rng_hourly.tz_localize('US/Eastern')
1122+
rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', infer_dst=True)
1123+
rng_hourly_eastern.values
1124+
11111125
Time Deltas
11121126
-----------
11131127

doc/source/v0.13.0.txt

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ enhancements along with a large number of bug fixes.
88

99
.. warning::
1010

11-
In 0.13.0 ``Series`` has internaly been refactored to no longer sub-class ``ndarray``
11+
In 0.13.0 ``Series`` has internally been refactored to no longer sub-class ``ndarray``
1212
but instead subclass ``NDFrame``, similarly to the rest of the pandas containers. This should be
1313
a transparent change with only very limited API implications. See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
1414

@@ -481,6 +481,10 @@ Enhancements
481481

482482
:ref:`See the docs<indexing.basics.indexing_isin>` for more.
483483

484+
- ``tz_localize`` can infer a fall daylight savings transition based on the structure
485+
of the unlocalized data (:issue:`4230`), see :ref:`here<timeseries.timezone>`
486+
- DatetimeIndex is now in the API documentation, see :ref:`here<api.datetimeindex>`
487+
484488
.. _whatsnew_0130.experimental:
485489

486490
Experimental

pandas/core/generic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2752,7 +2752,7 @@ def tz_convert(self, tz, axis=0, copy=True):
27522752

27532753
return new_obj
27542754

2755-
def tz_localize(self, tz, axis=0, copy=True):
2755+
def tz_localize(self, tz, axis=0, copy=True, infer_dst=False):
27562756
"""
27572757
Localize tz-naive TimeSeries to target time zone
27582758
@@ -2761,6 +2761,8 @@ def tz_localize(self, tz, axis=0, copy=True):
27612761
tz : string or pytz.timezone object
27622762
copy : boolean, default True
27632763
Also make a copy of the underlying data
2764+
infer_dst : boolean, default False
2765+
Attempt to infer fall dst-transition times based on order
27642766
27652767
Returns
27662768
-------
@@ -2778,7 +2780,7 @@ def tz_localize(self, tz, axis=0, copy=True):
27782780
new_data = new_data.copy()
27792781

27802782
new_obj = self._constructor(new_data)
2781-
new_ax = ax.tz_localize(tz)
2783+
new_ax = ax.tz_localize(tz, infer_dst=infer_dst)
27822784

27832785
if axis == 0:
27842786
new_obj._set_axis(1, new_ax)

pandas/core/series.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2331,7 +2331,7 @@ def tz_convert(self, tz, copy=True):
23312331

23322332
return self._constructor(new_values, index=new_index, name=self.name)
23332333

2334-
def tz_localize(self, tz, copy=True):
2334+
def tz_localize(self, tz, copy=True, infer_dst=False):
23352335
"""
23362336
Localize tz-naive TimeSeries to target time zone
23372337
Entries will retain their "naive" value but will be annotated as
@@ -2345,6 +2345,8 @@ def tz_localize(self, tz, copy=True):
23452345
tz : string or pytz.timezone object
23462346
copy : boolean, default True
23472347
Also make a copy of the underlying data
2348+
infer_dst : boolean, default False
2349+
Attempt to infer fall dst-transition hours based on order
23482350
23492351
Returns
23502352
-------
@@ -2358,7 +2360,7 @@ def tz_localize(self, tz, copy=True):
23582360

23592361
new_index = DatetimeIndex([], tz=tz)
23602362
else:
2361-
new_index = self.index.tz_localize(tz)
2363+
new_index = self.index.tz_localize(tz, infer_dst=infer_dst)
23622364

23632365
new_values = self.values
23642366
if copy:

pandas/tseries/index.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def __new__(cls, data=None,
147147

148148
dayfirst = kwds.pop('dayfirst', None)
149149
yearfirst = kwds.pop('yearfirst', None)
150+
infer_dst = kwds.pop('infer_dst', False)
150151
warn = False
151152
if 'offset' in kwds and kwds['offset']:
152153
freq = kwds['offset']
@@ -183,7 +184,8 @@ def __new__(cls, data=None,
183184

184185
if data is None:
185186
return cls._generate(start, end, periods, name, offset,
186-
tz=tz, normalize=normalize)
187+
tz=tz, normalize=normalize,
188+
infer_dst=infer_dst)
187189

188190
if not isinstance(data, np.ndarray):
189191
if np.isscalar(data):
@@ -209,7 +211,7 @@ def __new__(cls, data=None,
209211
data.name = name
210212

211213
if tz is not None:
212-
return data.tz_localize(tz)
214+
return data.tz_localize(tz, infer_dst=infer_dst)
213215

214216
return data
215217

@@ -261,7 +263,8 @@ def __new__(cls, data=None,
261263
getattr(data, 'tz', None) is None):
262264
# Convert tz-naive to UTC
263265
ints = subarr.view('i8')
264-
subarr = tslib.tz_localize_to_utc(ints, tz)
266+
subarr = tslib.tz_localize_to_utc(ints, tz,
267+
infer_dst=infer_dst)
265268

266269
subarr = subarr.view(_NS_DTYPE)
267270

@@ -286,7 +289,7 @@ def __new__(cls, data=None,
286289

287290
@classmethod
288291
def _generate(cls, start, end, periods, name, offset,
289-
tz=None, normalize=False):
292+
tz=None, normalize=False, infer_dst=False):
290293
if com._count_not_none(start, end, periods) != 2:
291294
raise ValueError('Must specify two of start, end, or periods')
292295

@@ -375,7 +378,8 @@ def _generate(cls, start, end, periods, name, offset,
375378
index = _generate_regular_range(start, end, periods, offset)
376379

377380
if tz is not None and getattr(index, 'tz', None) is None:
378-
index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz)
381+
index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz,
382+
infer_dst=infer_dst)
379383
index = index.view(_NS_DTYPE)
380384

381385
index = index.view(cls)
@@ -1537,9 +1541,17 @@ def tz_convert(self, tz):
15371541
# No conversion since timestamps are all UTC to begin with
15381542
return self._simple_new(self.values, self.name, self.offset, tz)
15391543

1540-
def tz_localize(self, tz):
1544+
def tz_localize(self, tz, infer_dst=False):
15411545
"""
15421546
Localize tz-naive DatetimeIndex to given time zone (using pytz)
1547+
1548+
Parameters
1549+
----------
1550+
tz : string or pytz.timezone
1551+
Time zone for time. Corresponding timestamps would be converted to
1552+
time zone of the TimeSeries
1553+
infer_dst : boolean, default False
1554+
Attempt to infer fall dst-transition hours based on order
15431555
15441556
Returns
15451557
-------
@@ -1550,7 +1562,7 @@ def tz_localize(self, tz):
15501562
tz = tools._maybe_get_tz(tz)
15511563

15521564
# Convert to UTC
1553-
new_dates = tslib.tz_localize_to_utc(self.asi8, tz)
1565+
new_dates = tslib.tz_localize_to_utc(self.asi8, tz, infer_dst=infer_dst)
15541566
new_dates = new_dates.view(_NS_DTYPE)
15551567

15561568
return self._simple_new(new_dates, self.name, self.offset, tz)

pandas/tseries/tests/test_timezones.py

+26
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,32 @@ def test_with_tz_ambiguous_times(self):
360360
dr = date_range(datetime(2011, 3, 13), periods=48,
361361
freq=datetools.Minute(30), tz=pytz.utc)
362362

363+
def test_infer_dst(self):
364+
# November 6, 2011, fall back, repeat 2 AM hour
365+
# With no repeated hours, we cannot infer the transition
366+
tz = pytz.timezone('US/Eastern')
367+
dr = date_range(datetime(2011, 11, 6, 0), periods=5,
368+
freq=datetools.Hour())
369+
self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize,
370+
tz, infer_dst=True)
371+
372+
# With repeated hours, we can infer the transition
373+
dr = date_range(datetime(2011, 11, 6, 0), periods=5,
374+
freq=datetools.Hour(), tz=tz)
375+
di = DatetimeIndex(['11/06/2011 00:00', '11/06/2011 01:00',
376+
'11/06/2011 01:00', '11/06/2011 02:00',
377+
'11/06/2011 03:00'])
378+
localized = di.tz_localize(tz, infer_dst=True)
379+
self.assert_(np.array_equal(dr, localized))
380+
381+
# When there is no dst transition, nothing special happens
382+
dr = date_range(datetime(2011, 6, 1, 0), periods=10,
383+
freq=datetools.Hour())
384+
localized = dr.tz_localize(tz)
385+
localized_infer = dr.tz_localize(tz, infer_dst=True)
386+
self.assert_(np.array_equal(localized, localized_infer))
387+
388+
363389
# test utility methods
364390
def test_infer_tz(self):
365391
eastern = pytz.timezone('US/Eastern')

0 commit comments

Comments
 (0)