-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API/BUG: DatetimeIndex correctly localizes integer data #21216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
9626044
f5235eb
82fbf0c
c95ca51
af3c615
cc68764
ab00008
86a40a1
069774b
74661ca
de64718
abf3efc
3a8a714
ec4795b
b71de82
37484ea
763a675
dc0a3fe
2bc293d
b9801a3
dc7e5c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -394,57 +394,43 @@ def __new__(cls, data=None, | |
|
||
# data must be Index or np.ndarray here | ||
if not (is_datetime64_dtype(data) or is_datetimetz(data) or | ||
is_integer_dtype(data)): | ||
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): | ||
data = tools.to_datetime(data, dayfirst=dayfirst, | ||
yearfirst=yearfirst) | ||
|
||
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data): | ||
|
||
if isinstance(data, DatetimeIndex): | ||
if tz is None: | ||
tz = data.tz | ||
elif data.tz is None: | ||
data = data.tz_localize(tz, ambiguous=ambiguous) | ||
else: | ||
# the tz's must match | ||
if str(tz) != str(data.tz): | ||
msg = ('data is already tz-aware {0}, unable to ' | ||
'set specified tz: {1}') | ||
raise TypeError(msg.format(data.tz, tz)) | ||
if isinstance(data, DatetimeIndex): | ||
if tz is None: | ||
tz = data.tz | ||
elif data.tz is None: | ||
data = data.tz_localize(tz, ambiguous=ambiguous) | ||
else: | ||
# the tz's must match | ||
if str(tz) != str(data.tz): | ||
msg = ('data is already tz-aware {0}, unable to ' | ||
'set specified tz: {1}') | ||
raise TypeError(msg.format(data.tz, tz)) | ||
|
||
subarr = data.values | ||
subarr = data.values | ||
|
||
if freq is None: | ||
freq = data.freq | ||
verify_integrity = False | ||
else: | ||
if data.dtype != _NS_DTYPE: | ||
subarr = conversion.ensure_datetime64ns(data) | ||
else: | ||
subarr = data | ||
if freq is None: | ||
freq = data.freq | ||
verify_integrity = False | ||
elif issubclass(data.dtype.type, np.datetime64): | ||
if data.dtype != _NS_DTYPE: | ||
data = conversion.ensure_datetime64ns(data) | ||
if tz is not None: | ||
# Convert tz-naive to UTC | ||
tz = timezones.maybe_get_tz(tz) | ||
data = conversion.tz_localize_to_utc(data.view('i8'), tz, | ||
ambiguous=ambiguous) | ||
subarr = data.view(_NS_DTYPE) | ||
else: | ||
# must be integer dtype otherwise | ||
if isinstance(data, Int64Index): | ||
raise TypeError('cannot convert Int64Index->DatetimeIndex') | ||
# assume this data are epoch timestamps | ||
if data.dtype != _INT64_DTYPE: | ||
data = data.astype(np.int64) | ||
data = data.astype(np.int64, copy=False) | ||
subarr = data.view(_NS_DTYPE) | ||
|
||
if isinstance(subarr, DatetimeIndex): | ||
if tz is None: | ||
tz = subarr.tz | ||
else: | ||
if tz is not None: | ||
tz = timezones.maybe_get_tz(tz) | ||
|
||
if (not isinstance(data, DatetimeIndex) or | ||
getattr(data, 'tz', None) is None): | ||
# Convert tz-naive to UTC | ||
ints = subarr.view('i8') | ||
subarr = conversion.tz_localize_to_utc(ints, tz, | ||
ambiguous=ambiguous) | ||
subarr = subarr.view(_NS_DTYPE) | ||
|
||
subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) | ||
if dtype is not None: | ||
if not is_dtype_equal(subarr.dtype, dtype): | ||
|
@@ -806,8 +792,9 @@ def _mpl_repr(self): | |
|
||
@cache_readonly | ||
def _is_dates_only(self): | ||
"""Return a boolean if we are only dates (and don't have a timezone)""" | ||
from pandas.io.formats.format import _is_dates_only | ||
return _is_dates_only(self.values) | ||
return _is_dates_only(self.values) and self.tz is None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why did this need changing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After this refactor, I was getting errors where the repr didn't look correct but the i8 data was correct in the case when a tz was passed in the
but checks for timezones by re-passing |
||
|
||
@property | ||
def _formatter_func(self): | ||
|
@@ -1243,7 +1230,7 @@ def join(self, other, how='left', level=None, return_indexers=False, | |
See Index.join | ||
""" | ||
if (not isinstance(other, DatetimeIndex) and len(other) > 0 and | ||
other.inferred_type not in ('floating', 'mixed-integer', | ||
other.inferred_type not in ('floating', 'integer', 'mixed-integer', | ||
'mixed-integer-float', 'mixed')): | ||
try: | ||
other = DatetimeIndex(other) | ||
|
@@ -2081,8 +2068,9 @@ def normalize(self): | |
dtype='datetime64[ns, Asia/Calcutta]', freq=None) | ||
""" | ||
new_values = conversion.date_normalize(self.asi8, self.tz) | ||
return DatetimeIndex(new_values, freq='infer', name=self.name, | ||
tz=self.tz) | ||
return DatetimeIndex(new_values, | ||
freq='infer', | ||
name=self.name).tz_localize(self.tz) | ||
|
||
@Substitution(klass='DatetimeIndex') | ||
@Appender(_shared_docs['searchsorted']) | ||
|
@@ -2163,8 +2151,6 @@ def insert(self, loc, item): | |
try: | ||
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], | ||
self[loc:].asi8)) | ||
if self.tz is not None: | ||
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) | ||
return DatetimeIndex(new_dates, name=self.name, freq=freq, | ||
tz=self.tz) | ||
except (AttributeError, TypeError): | ||
|
@@ -2202,8 +2188,6 @@ def delete(self, loc): | |
if (loc.start in (0, None) or loc.stop in (len(self), None)): | ||
freq = self.freq | ||
|
||
if self.tz is not None: | ||
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) | ||
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) | ||
|
||
def tz_convert(self, tz): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
import pytest | ||
from datetime import timedelta | ||
from operator import attrgetter | ||
from functools import partial | ||
|
||
import pytest | ||
import pytz | ||
import numpy as np | ||
from datetime import timedelta | ||
|
||
import pandas as pd | ||
from pandas import offsets | ||
|
@@ -26,25 +28,28 @@ def test_construction_caching(self): | |
freq='ns')}) | ||
assert df.dttz.dtype.tz.zone == 'US/Eastern' | ||
|
||
def test_construction_with_alt(self): | ||
|
||
i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') | ||
i2 = DatetimeIndex(i, dtype=i.dtype) | ||
tm.assert_index_equal(i, i2) | ||
assert i.tz.zone == 'US/Eastern' | ||
|
||
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) | ||
tm.assert_index_equal(i, i2) | ||
assert i.tz.zone == 'US/Eastern' | ||
|
||
i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i actually like this construction, can you add a test that does both? (e.g. both uses and does not localize), maybe split it out and parameterize |
||
tm.assert_index_equal(i, i2) | ||
assert i.tz.zone == 'US/Eastern' | ||
|
||
i2 = DatetimeIndex( | ||
i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) | ||
tm.assert_index_equal(i, i2) | ||
assert i.tz.zone == 'US/Eastern' | ||
@pytest.mark.parametrize('kwargs', [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you parameterize on tz_aware_fixture (doesn't include None), then parameterize as you are doing |
||
{'tz': 'dtype.tz'}, | ||
{'dtype': 'dtype'}, | ||
{'dtype': 'dtype', 'tz': 'dtype.tz'}]) | ||
def test_construction_with_alt(self, kwargs, tz_aware_fixture): | ||
tz = tz_aware_fixture | ||
i = pd.date_range('20130101', periods=5, freq='H', tz=tz) | ||
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} | ||
result = DatetimeIndex(i, **kwargs) | ||
tm.assert_index_equal(i, result) | ||
|
||
@pytest.mark.parametrize('kwargs', [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
{'tz': 'dtype.tz'}, | ||
{'dtype': 'dtype'}, | ||
{'dtype': 'dtype', 'tz': 'dtype.tz'}]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you explain how this test is different than the one above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am preserving the construction of |
||
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): | ||
tz = tz_aware_fixture | ||
i = pd.date_range('20130101', periods=5, freq='H', tz=tz) | ||
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} | ||
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs) | ||
expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz) | ||
tm.assert_index_equal(result, expected) | ||
|
||
# localize into the provided tz | ||
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') | ||
|
@@ -469,6 +474,19 @@ def test_constructor_with_non_normalized_pytz(self, tz): | |
result = DatetimeIndex(['2010'], tz=non_norm_tz) | ||
assert pytz.timezone(tz) is result.tz | ||
|
||
@pytest.mark.parametrize('klass', [Index, DatetimeIndex]) | ||
@pytest.mark.parametrize('box', [ | ||
np.array, partial(np.array, dtype=object), list]) | ||
@pytest.mark.parametrize('tz, dtype', [ | ||
['US/Pacific', 'datetime64[ns, US/Pacific]'], | ||
[None, 'datetime64[ns]']]) | ||
def test_constructor_with_int_tz(self, klass, box, tz, dtype): | ||
# GH 20997, 20964 | ||
ts = Timestamp('2018-01-01', tz=tz) | ||
result = klass(box([ts.value]), dtype=dtype) | ||
expected = klass([ts]) | ||
tm.assert_index_equal(result, expected) | ||
|
||
|
||
class TestTimeSeries(object): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -402,26 +402,33 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals): | |
index = Index(vals) | ||
assert isinstance(index, TimedeltaIndex) | ||
|
||
@pytest.mark.parametrize("values", [ | ||
# pass values without timezone, as DatetimeIndex localizes it | ||
pd.date_range('2011-01-01', periods=5).values, | ||
pd.date_range('2011-01-01', periods=5).asi8]) | ||
@pytest.mark.parametrize("attr, utc", [ | ||
['values', False], | ||
['asi8', True]]) | ||
@pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex]) | ||
def test_constructor_dtypes_datetime(self, tz_naive_fixture, values, | ||
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc, | ||
klass): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a bit of commentary on what is tested here |
||
index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture) | ||
# Test constructing with a datetimetz dtype | ||
# .values produces numpy datetimes, so these are considered naive | ||
# .asi8 produces integers, so these are considered epoch timestamps | ||
index = pd.date_range('2011-01-01', periods=5) | ||
arg = getattr(index, attr) | ||
if utc: | ||
index = index.tz_localize('UTC').tz_convert(tz_naive_fixture) | ||
else: | ||
index = index.tz_localize(tz_naive_fixture) | ||
dtype = index.dtype | ||
|
||
result = klass(values, tz=tz_naive_fixture) | ||
result = klass(arg, tz=tz_naive_fixture) | ||
tm.assert_index_equal(result, index) | ||
|
||
result = klass(values, dtype=dtype) | ||
result = klass(arg, dtype=dtype) | ||
tm.assert_index_equal(result, index) | ||
|
||
result = klass(list(values), tz=tz_naive_fixture) | ||
result = klass(list(arg), tz=tz_naive_fixture) | ||
tm.assert_index_equal(result, index) | ||
|
||
result = klass(list(values), dtype=dtype) | ||
result = klass(list(arg), dtype=dtype) | ||
tm.assert_index_equal(result, index) | ||
|
||
@pytest.mark.parametrize("attr", ['values', 'asi8']) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
side issue (another PR). The imports inside functions should be as simple as possible, e.g.
from pandas import DatetimeIndex (several occurrences of this)