Skip to content

Commit 79b9639

Browse files
mroeschkevictor
authored and
victor
committed
API/BUG: DatetimeIndex correctly localizes integer data (pandas-dev#21216)
1 parent bc4ce26 commit 79b9639

File tree

6 files changed

+105
-82
lines changed

6 files changed

+105
-82
lines changed

doc/source/whatsnew/v0.24.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Datetimelike API Changes
3636
Other API Changes
3737
^^^^^^^^^^^^^^^^^
3838

39-
-
39+
- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
4040
-
4141
-
4242

@@ -92,7 +92,7 @@ Datetimelike
9292
^^^^^^^^^^^^
9393

9494
- Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
95-
-
95+
- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
9696
-
9797

9898
Timedelta

pandas/core/indexes/base.py

+4
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,10 @@ def astype(self, dtype, copy=True):
11751175
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
11761176
copy=copy)
11771177
try:
1178+
if is_datetime64tz_dtype(dtype):
1179+
from pandas.core.indexes.datetimes import DatetimeIndex
1180+
return DatetimeIndex(self.values, name=self.name, dtype=dtype,
1181+
copy=copy)
11781182
return Index(self.values.astype(dtype, copy=copy), name=self.name,
11791183
dtype=dtype)
11801184
except (TypeError, ValueError):

pandas/core/indexes/datetimes.py

+33-49
Original file line numberDiff line numberDiff line change
@@ -395,57 +395,43 @@ def __new__(cls, data=None,
395395

396396
# data must be Index or np.ndarray here
397397
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
398-
is_integer_dtype(data)):
398+
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
399399
data = tools.to_datetime(data, dayfirst=dayfirst,
400400
yearfirst=yearfirst)
401401

402-
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
403-
404-
if isinstance(data, DatetimeIndex):
405-
if tz is None:
406-
tz = data.tz
407-
elif data.tz is None:
408-
data = data.tz_localize(tz, ambiguous=ambiguous)
409-
else:
410-
# the tz's must match
411-
if str(tz) != str(data.tz):
412-
msg = ('data is already tz-aware {0}, unable to '
413-
'set specified tz: {1}')
414-
raise TypeError(msg.format(data.tz, tz))
402+
if isinstance(data, DatetimeIndex):
403+
if tz is None:
404+
tz = data.tz
405+
elif data.tz is None:
406+
data = data.tz_localize(tz, ambiguous=ambiguous)
407+
else:
408+
# the tz's must match
409+
if str(tz) != str(data.tz):
410+
msg = ('data is already tz-aware {0}, unable to '
411+
'set specified tz: {1}')
412+
raise TypeError(msg.format(data.tz, tz))
415413

416-
subarr = data.values
414+
subarr = data.values
417415

418-
if freq is None:
419-
freq = data.freq
420-
verify_integrity = False
421-
else:
422-
if data.dtype != _NS_DTYPE:
423-
subarr = conversion.ensure_datetime64ns(data)
424-
else:
425-
subarr = data
416+
if freq is None:
417+
freq = data.freq
418+
verify_integrity = False
419+
elif issubclass(data.dtype.type, np.datetime64):
420+
if data.dtype != _NS_DTYPE:
421+
data = conversion.ensure_datetime64ns(data)
422+
if tz is not None:
423+
# Convert tz-naive to UTC
424+
tz = timezones.maybe_get_tz(tz)
425+
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
426+
ambiguous=ambiguous)
427+
subarr = data.view(_NS_DTYPE)
426428
else:
427429
# must be integer dtype otherwise
428-
if isinstance(data, Int64Index):
429-
raise TypeError('cannot convert Int64Index->DatetimeIndex')
430+
# assume this data are epoch timestamps
430431
if data.dtype != _INT64_DTYPE:
431-
data = data.astype(np.int64)
432+
data = data.astype(np.int64, copy=False)
432433
subarr = data.view(_NS_DTYPE)
433434

434-
if isinstance(subarr, DatetimeIndex):
435-
if tz is None:
436-
tz = subarr.tz
437-
else:
438-
if tz is not None:
439-
tz = timezones.maybe_get_tz(tz)
440-
441-
if (not isinstance(data, DatetimeIndex) or
442-
getattr(data, 'tz', None) is None):
443-
# Convert tz-naive to UTC
444-
ints = subarr.view('i8')
445-
subarr = conversion.tz_localize_to_utc(ints, tz,
446-
ambiguous=ambiguous)
447-
subarr = subarr.view(_NS_DTYPE)
448-
449435
subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
450436
if dtype is not None:
451437
if not is_dtype_equal(subarr.dtype, dtype):
@@ -807,8 +793,9 @@ def _mpl_repr(self):
807793

808794
@cache_readonly
809795
def _is_dates_only(self):
796+
"""Return a boolean if we are only dates (and don't have a timezone)"""
810797
from pandas.io.formats.format import _is_dates_only
811-
return _is_dates_only(self.values)
798+
return _is_dates_only(self.values) and self.tz is None
812799

813800
@property
814801
def _formatter_func(self):
@@ -1244,7 +1231,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
12441231
See Index.join
12451232
"""
12461233
if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
1247-
other.inferred_type not in ('floating', 'mixed-integer',
1234+
other.inferred_type not in ('floating', 'integer', 'mixed-integer',
12481235
'mixed-integer-float', 'mixed')):
12491236
try:
12501237
other = DatetimeIndex(other)
@@ -2100,8 +2087,9 @@ def normalize(self):
21002087
dtype='datetime64[ns, Asia/Calcutta]', freq=None)
21012088
"""
21022089
new_values = conversion.date_normalize(self.asi8, self.tz)
2103-
return DatetimeIndex(new_values, freq='infer', name=self.name,
2104-
tz=self.tz)
2090+
return DatetimeIndex(new_values,
2091+
freq='infer',
2092+
name=self.name).tz_localize(self.tz)
21052093

21062094
@Substitution(klass='DatetimeIndex')
21072095
@Appender(_shared_docs['searchsorted'])
@@ -2182,8 +2170,6 @@ def insert(self, loc, item):
21822170
try:
21832171
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
21842172
self[loc:].asi8))
2185-
if self.tz is not None:
2186-
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
21872173
return DatetimeIndex(new_dates, name=self.name, freq=freq,
21882174
tz=self.tz)
21892175
except (AttributeError, TypeError):
@@ -2221,8 +2207,6 @@ def delete(self, loc):
22212207
if (loc.start in (0, None) or loc.stop in (len(self), None)):
22222208
freq = self.freq
22232209

2224-
if self.tz is not None:
2225-
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
22262210
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
22272211

22282212
def tz_convert(self, tz):

pandas/tests/indexes/datetimes/test_astype.py

+10
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,16 @@ def _check_rng(rng):
225225
_check_rng(rng_eastern)
226226
_check_rng(rng_utc)
227227

228+
@pytest.mark.parametrize('tz, dtype', [
229+
['US/Pacific', 'datetime64[ns, US/Pacific]'],
230+
[None, 'datetime64[ns]']])
231+
def test_integer_index_astype_datetime(self, tz, dtype):
232+
# GH 20997, 20964
233+
val = [pd.Timestamp('2018-01-01', tz=tz).value]
234+
result = pd.Index(val).astype(dtype)
235+
expected = pd.DatetimeIndex(['2018-01-01'], tz=tz)
236+
tm.assert_index_equal(result, expected)
237+
228238

229239
class TestToPeriod(object):
230240

pandas/tests/indexes/datetimes/test_construction.py

+39-21
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
import pytest
1+
from datetime import timedelta
2+
from operator import attrgetter
3+
from functools import partial
24

5+
import pytest
36
import pytz
47
import numpy as np
5-
from datetime import timedelta
68

79
import pandas as pd
810
from pandas import offsets
@@ -26,25 +28,28 @@ def test_construction_caching(self):
2628
freq='ns')})
2729
assert df.dttz.dtype.tz.zone == 'US/Eastern'
2830

29-
def test_construction_with_alt(self):
30-
31-
i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern')
32-
i2 = DatetimeIndex(i, dtype=i.dtype)
33-
tm.assert_index_equal(i, i2)
34-
assert i.tz.zone == 'US/Eastern'
35-
36-
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz)
37-
tm.assert_index_equal(i, i2)
38-
assert i.tz.zone == 'US/Eastern'
39-
40-
i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype)
41-
tm.assert_index_equal(i, i2)
42-
assert i.tz.zone == 'US/Eastern'
43-
44-
i2 = DatetimeIndex(
45-
i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz)
46-
tm.assert_index_equal(i, i2)
47-
assert i.tz.zone == 'US/Eastern'
31+
@pytest.mark.parametrize('kwargs', [
32+
{'tz': 'dtype.tz'},
33+
{'dtype': 'dtype'},
34+
{'dtype': 'dtype', 'tz': 'dtype.tz'}])
35+
def test_construction_with_alt(self, kwargs, tz_aware_fixture):
36+
tz = tz_aware_fixture
37+
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
38+
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
39+
result = DatetimeIndex(i, **kwargs)
40+
tm.assert_index_equal(i, result)
41+
42+
@pytest.mark.parametrize('kwargs', [
43+
{'tz': 'dtype.tz'},
44+
{'dtype': 'dtype'},
45+
{'dtype': 'dtype', 'tz': 'dtype.tz'}])
46+
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
47+
tz = tz_aware_fixture
48+
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
49+
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
50+
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
51+
expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz)
52+
tm.assert_index_equal(result, expected)
4853

4954
# localize into the provided tz
5055
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC')
@@ -478,6 +483,19 @@ def test_constructor_timestamp_near_dst(self):
478483
ts[1].to_pydatetime()])
479484
tm.assert_index_equal(result, expected)
480485

486+
@pytest.mark.parametrize('klass', [Index, DatetimeIndex])
487+
@pytest.mark.parametrize('box', [
488+
np.array, partial(np.array, dtype=object), list])
489+
@pytest.mark.parametrize('tz, dtype', [
490+
['US/Pacific', 'datetime64[ns, US/Pacific]'],
491+
[None, 'datetime64[ns]']])
492+
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
493+
# GH 20997, 20964
494+
ts = Timestamp('2018-01-01', tz=tz)
495+
result = klass(box([ts.value]), dtype=dtype)
496+
expected = klass([ts])
497+
assert result == expected
498+
481499

482500
class TestTimeSeries(object):
483501

pandas/tests/indexes/test_base.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -402,26 +402,33 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
402402
index = Index(vals)
403403
assert isinstance(index, TimedeltaIndex)
404404

405-
@pytest.mark.parametrize("values", [
406-
# pass values without timezone, as DatetimeIndex localizes it
407-
pd.date_range('2011-01-01', periods=5).values,
408-
pd.date_range('2011-01-01', periods=5).asi8])
405+
@pytest.mark.parametrize("attr, utc", [
406+
['values', False],
407+
['asi8', True]])
409408
@pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
410-
def test_constructor_dtypes_datetime(self, tz_naive_fixture, values,
409+
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc,
411410
klass):
412-
index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture)
411+
# Test constructing with a datetimetz dtype
412+
# .values produces numpy datetimes, so these are considered naive
413+
# .asi8 produces integers, so these are considered epoch timestamps
414+
index = pd.date_range('2011-01-01', periods=5)
415+
arg = getattr(index, attr)
416+
if utc:
417+
index = index.tz_localize('UTC').tz_convert(tz_naive_fixture)
418+
else:
419+
index = index.tz_localize(tz_naive_fixture)
413420
dtype = index.dtype
414421

415-
result = klass(values, tz=tz_naive_fixture)
422+
result = klass(arg, tz=tz_naive_fixture)
416423
tm.assert_index_equal(result, index)
417424

418-
result = klass(values, dtype=dtype)
425+
result = klass(arg, dtype=dtype)
419426
tm.assert_index_equal(result, index)
420427

421-
result = klass(list(values), tz=tz_naive_fixture)
428+
result = klass(list(arg), tz=tz_naive_fixture)
422429
tm.assert_index_equal(result, index)
423430

424-
result = klass(list(values), dtype=dtype)
431+
result = klass(list(arg), dtype=dtype)
425432
tm.assert_index_equal(result, index)
426433

427434
@pytest.mark.parametrize("attr", ['values', 'asi8'])

0 commit comments

Comments
 (0)