From 962604458d91ab0bb5ee1b59247e8b309959f4d6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 12 May 2018 14:07:13 -0700 Subject: [PATCH 01/11] BUG: Localize Index with datetime dtype and integer data correctly --- pandas/core/indexes/datetimes.py | 10 +++++----- pandas/tests/indexes/datetimes/test_construction.py | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9761974d77d4b..8db3e14426308 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -394,10 +394,10 @@ def __new__(cls, data=None, # data must be Index or np.ndarray here if not (is_datetime64_dtype(data) or is_datetimetz(data) or - is_integer_dtype(data)): + is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - + passed_integer_data = False if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data): if isinstance(data, DatetimeIndex): @@ -424,10 +424,9 @@ def __new__(cls, data=None, subarr = data else: # must be integer dtype otherwise - if isinstance(data, Int64Index): - raise TypeError('cannot convert Int64Index->DatetimeIndex') if data.dtype != _INT64_DTYPE: data = data.astype(np.int64) + passed_integer_data = True subarr = data.view(_NS_DTYPE) if isinstance(subarr, DatetimeIndex): @@ -438,7 +437,8 @@ def __new__(cls, data=None, tz = timezones.maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or - getattr(data, 'tz', None) is None): + getattr(data, 'tz', None) is None) and + not passed_integer_data: # Convert tz-naive to UTC ints = subarr.view('i8') subarr = conversion.tz_localize_to_utc(ints, tz, diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index dae69a86910af..4dafb46a64a1a 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -469,6 +469,14 @@ def test_constructor_with_non_normalized_pytz(self, tz): result = DatetimeIndex(['2010'], tz=non_norm_tz) assert pytz.timezone(tz) is result.tz + @pytest.mark.parametrize('klass', [Index, DatetimeIndex]) + @pytest.mark.parametrize('box', [np.array, list]) + def test_constructor_with_int_tz(self, klass): + ts = Timestamp('2018-01-01', tz='US/Pacific') + result = klass(box(ts.value), dtype='datetime64[ns, US/Pacific]') + expected = klass([ts]) + tm.assert_index_equal(result, expected) + class TestTimeSeries(object): From f5235eb3f8c110231d1ed3074ebe8b81e5e579f0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 May 2018 21:32:01 -0700 Subject: [PATCH 02/11] Adjust tests --- pandas/core/indexes/datetimes.py | 3 +-- pandas/tests/indexes/datetimes/test_construction.py | 11 +++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8db3e14426308..8594b1d5e176f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -437,8 +437,7 @@ def __new__(cls, data=None, tz = timezones.maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or - getattr(data, 'tz', None) is None) and - not passed_integer_data: + getattr(data, 'tz', None) is None) and not passed_integer_data: # Convert tz-naive to UTC ints = subarr.view('i8') subarr = conversion.tz_localize_to_utc(ints, tz, diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 4dafb46a64a1a..0fddac4cd22e4 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -33,16 +33,15 @@ def test_construction_with_alt(self): tm.assert_index_equal(i, i2) assert i.tz.zone == 'US/Eastern' - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) + i2 = DatetimeIndex(i.asi8, tz=i.dtype.tz) tm.assert_index_equal(i, i2) assert i.tz.zone == 'US/Eastern' - i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) + i2 = DatetimeIndex(i.asi8, dtype=i.dtype) tm.assert_index_equal(i, i2) assert i.tz.zone == 'US/Eastern' - i2 = DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) + i2 = DatetimeIndex(i.asi8, dtype=i.dtype, tz=i.dtype.tz) tm.assert_index_equal(i, i2) assert i.tz.zone == 'US/Eastern' @@ -471,9 +470,9 @@ def test_constructor_with_non_normalized_pytz(self, tz): @pytest.mark.parametrize('klass', [Index, DatetimeIndex]) @pytest.mark.parametrize('box', [np.array, list]) - def test_constructor_with_int_tz(self, klass): + def test_constructor_with_int_tz(self, klass, box): ts = Timestamp('2018-01-01', tz='US/Pacific') - result = klass(box(ts.value), dtype='datetime64[ns, US/Pacific]') + result = klass(box([ts.value]), dtype='datetime64[ns, US/Pacific]') expected = klass([ts]) tm.assert_index_equal(result, expected) From c95ca51fb25a9768d0a347b51ca0152218b5d0cf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 19 May 2018 16:38:06 -0700 Subject: [PATCH 03/11] Adjust tests --- pandas/core/indexes/datetimes.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9bba7044d6b69..e191a9d0695c6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -437,7 +437,7 @@ def __new__(cls, data=None, tz = timezones.maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or - getattr(data, 'tz', None) is None) and not passed_integer_data: + getattr(data, 'tz', None) is None) and not passed_integer_data: # Convert tz-naive to UTC ints = subarr.view('i8') subarr = conversion.tz_localize_to_utc(ints, tz, @@ -1242,7 +1242,7 @@ def join(self, other, how='left', level=None, return_indexers=False, See Index.join """ if (not isinstance(other, DatetimeIndex) and len(other) > 0 and - other.inferred_type not in ('floating', 'mixed-integer', + other.inferred_type not in ('floating', 'integer', 'mixed-integer', 'mixed-integer-float', 'mixed')): try: other = DatetimeIndex(other) @@ -2080,8 +2080,9 @@ def normalize(self): dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ new_values = conversion.date_normalize(self.asi8, self.tz) - return DatetimeIndex(new_values, freq='infer', name=self.name, - tz=self.tz) + return DatetimeIndex(new_values, + freq='infer', + name=self.name).tz_localize(self.tz) @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) @@ -2162,8 +2163,6 @@ def insert(self, loc, item): try: new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) - if self.tz is not None: - new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) except (AttributeError, TypeError): @@ -2201,8 +2200,6 @@ def delete(self, loc): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq - if self.tz is not None: - new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) def tz_convert(self, tz): From cc687644fd13613330fd19d79698a196d5a6eeec Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 20 May 2018 09:31:18 -0700 Subject: [PATCH 04/11] Refactor processing logic --- pandas/core/indexes/datetimes.py | 69 +++++++++++++------------------- pandas/io/formats/format.py | 6 ++- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e191a9d0695c6..330644beab906 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -397,53 +397,40 @@ def __new__(cls, data=None, is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - passed_integer_data = False - if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data): - - if isinstance(data, DatetimeIndex): - if tz is None: - tz = data.tz - elif data.tz is None: - data = data.tz_localize(tz, ambiguous=ambiguous) - else: - # the tz's must match - if str(tz) != str(data.tz): - msg = ('data is already tz-aware {0}, unable to ' - 'set specified tz: {1}') - raise TypeError(msg.format(data.tz, tz)) + import pdb; pdb.set_trace() + if isinstance(data, DatetimeIndex): + if tz is None: + tz = data.tz + elif data.tz is None: + data = data.tz_localize(tz, ambiguous=ambiguous) + else: + # the tz's must match + if str(tz) != str(data.tz): + msg = ('data is already tz-aware {0}, unable to ' + 'set specified tz: {1}') + raise TypeError(msg.format(data.tz, tz)) - subarr = data.values + subarr = data.values - if freq is None: - freq = data.freq - verify_integrity = False - else: - if data.dtype != _NS_DTYPE: - subarr = conversion.ensure_datetime64ns(data) - else: - subarr = data + if freq is None: + freq = data.freq + verify_integrity = False + elif issubclass(data.dtype.type, np.datetime64): + if data.dtype != _NS_DTYPE: + data = conversion.ensure_datetime64ns(data) + if tz is not None: + # Convert tz-naive to UTC + tz = timezones.maybe_get_tz(tz) + data = conversion.tz_localize_to_utc(data.view('i8'), tz, + ambiguous=ambiguous) + subarr = data.view(_NS_DTYPE) else: # must be integer dtype otherwise + # assume this data are epoch timestamps if data.dtype != _INT64_DTYPE: data = data.astype(np.int64) - passed_integer_data = True subarr = data.view(_NS_DTYPE) - if isinstance(subarr, DatetimeIndex): - if tz is None: - tz = subarr.tz - else: - if tz is not None: - tz = timezones.maybe_get_tz(tz) - - if (not isinstance(data, DatetimeIndex) or - getattr(data, 'tz', None) is None) and not passed_integer_data: - # Convert tz-naive to UTC - ints = subarr.view('i8') - subarr = conversion.tz_localize_to_utc(ints, tz, - ambiguous=ambiguous) - subarr = subarr.view(_NS_DTYPE) - subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) if dtype is not None: if not is_dtype_equal(subarr.dtype, dtype): @@ -630,7 +617,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ - + import pdb; pdb.set_trace() if getattr(values, 'dtype', None) is None: # empty, but with dtype compat if values is None: @@ -806,7 +793,7 @@ def _mpl_repr(self): @cache_readonly def _is_dates_only(self): from pandas.io.formats.format import _is_dates_only - return _is_dates_only(self.values) + return _is_dates_only(self.values) and self.tz is None @property def _formatter_func(self): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 12201f62946ac..e03a1e98311e2 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1237,6 +1237,7 @@ def format_percentiles(percentiles): def _is_dates_only(values): + import pdb; pdb.set_trace() # return a boolean if we are only dates (and don't have a timezone) values = DatetimeIndex(values) if values.tz is not None: @@ -1253,6 +1254,7 @@ def _is_dates_only(values): def _format_datetime64(x, tz=None, nat_rep='NaT'): + import pdb; pdb.set_trace() if x is None or (is_scalar(x) and isna(x)): return nat_rep @@ -1276,7 +1278,7 @@ def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None): def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None): - + import pdb; pdb.set_trace() if is_dates_only: return lambda x, tz=None: _format_datetime64_dateonly( x, nat_rep=nat_rep, date_format=date_format) @@ -1296,7 +1298,7 @@ class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self): """ we by definition have a TZ """ - + import pdb; pdb.set_trace() values = self.values.astype(object) is_dates_only = _is_dates_only(values) formatter = (self.formatter or From 86a40a147867485b797ee739831d27ad80a006db Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 21 May 2018 22:39:09 -0700 Subject: [PATCH 05/11] Adjust test --- pandas/core/indexes/datetimes.py | 3 +-- pandas/io/formats/format.py | 8 ++++---- pandas/tests/indexes/test_base.py | 24 ++++++++++++++---------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 330644beab906..02be4b0c80041 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -397,7 +397,7 @@ def __new__(cls, data=None, is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - import pdb; pdb.set_trace() + if isinstance(data, DatetimeIndex): if tz is None: tz = data.tz @@ -617,7 +617,6 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ - import pdb; pdb.set_trace() if getattr(values, 'dtype', None) is None: # empty, but with dtype compat if values is None: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index e03a1e98311e2..fee63957a9397 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1237,7 +1237,7 @@ def format_percentiles(percentiles): def _is_dates_only(values): - import pdb; pdb.set_trace() + # return a boolean if we are only dates (and don't have a timezone) values = DatetimeIndex(values) if values.tz is not None: @@ -1254,7 +1254,7 @@ def _is_dates_only(values): def _format_datetime64(x, tz=None, nat_rep='NaT'): - import pdb; pdb.set_trace() + if x is None or (is_scalar(x) and isna(x)): return nat_rep @@ -1278,7 +1278,7 @@ def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None): def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None): - import pdb; pdb.set_trace() + if is_dates_only: return lambda x, tz=None: _format_datetime64_dateonly( x, nat_rep=nat_rep, date_format=date_format) @@ -1298,7 +1298,7 @@ class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self): """ we by definition have a TZ """ - import pdb; pdb.set_trace() + values = self.values.astype(object) is_dates_only = _is_dates_only(values) formatter = (self.formatter or diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f9f16dc0ce8b7..9730c224e18e2 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -402,26 +402,30 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals): index = Index(vals) assert isinstance(index, TimedeltaIndex) - @pytest.mark.parametrize("values", [ - # pass values without timezone, as DatetimeIndex localizes it - pd.date_range('2011-01-01', periods=5).values, - pd.date_range('2011-01-01', periods=5).asi8]) + @pytest.mark.parametrize("attr, utc", [ + ['values', False], + ['asi8', True]]) @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex]) - def test_constructor_dtypes_datetime(self, tz_naive_fixture, values, + def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc, klass): - index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture) + index = pd.date_range('2011-01-01', periods=5) + arg = getattr(index, attr) + if utc: + index = index.tz_localize('UTC').tz_convert(tz_naive_fixture) + else: + index = index.tz_localize(tz_naive_fixture) dtype = index.dtype - result = klass(values, tz=tz_naive_fixture) + result = klass(arg, tz=tz_naive_fixture) tm.assert_index_equal(result, index) - result = klass(values, dtype=dtype) + result = klass(arg, dtype=dtype) tm.assert_index_equal(result, index) - result = klass(list(values), tz=tz_naive_fixture) + result = klass(list(arg), tz=tz_naive_fixture) tm.assert_index_equal(result, index) - result = klass(list(values), dtype=dtype) + result = klass(list(arg), dtype=dtype) tm.assert_index_equal(result, index) @pytest.mark.parametrize("attr", ['values', 'asi8']) From 74661ca2734f72028ebb21fc797397ec6f399398 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 23 May 2018 11:41:14 -0700 Subject: [PATCH 06/11] add additional comment --- pandas/core/indexes/datetimes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 02be4b0c80041..6df495fe252c8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -791,6 +791,7 @@ def _mpl_repr(self): @cache_readonly def _is_dates_only(self): + """Return a boolean if we are only dates (and don't have a timezone)""" from pandas.io.formats.format import _is_dates_only return _is_dates_only(self.values) and self.tz is None From abf3efc8e4020e39b93b920132b583bddea052a8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 May 2018 22:25:52 -0700 Subject: [PATCH 07/11] adjust blank spaces --- pandas/core/indexes/datetimes.py | 1 + pandas/io/formats/format.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 6df495fe252c8..f3ef18c89174d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -617,6 +617,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ + if getattr(values, 'dtype', None) is None: # empty, but with dtype compat if values is None: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fee63957a9397..12201f62946ac 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1237,7 +1237,6 @@ def format_percentiles(percentiles): def _is_dates_only(values): - # return a boolean if we are only dates (and don't have a timezone) values = DatetimeIndex(values) if values.tz is not None: @@ -1254,7 +1253,6 @@ def _is_dates_only(values): def _format_datetime64(x, tz=None, nat_rep='NaT'): - if x is None or (is_scalar(x) and isna(x)): return nat_rep From ec4795b72b2f7cb72a364cdf35404d0b3ccd901a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 29 May 2018 23:24:39 -0700 Subject: [PATCH 08/11] Address review --- doc/source/whatsnew/v0.24.0.txt | 4 +- pandas/core/indexes/base.py | 4 ++ pandas/tests/indexes/datetimes/test_astype.py | 7 +++ .../indexes/datetimes/test_construction.py | 44 +++++++++++-------- pandas/tests/indexes/test_base.py | 3 ++ 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e931450cb5c01..2dee2048eb842 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -34,7 +34,7 @@ Datetimelike API Changes Other API Changes ^^^^^^^^^^^^^^^^^ -- +- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) - - @@ -90,7 +90,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- +- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 229624c7e6645..dc4644157592d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1163,6 +1163,10 @@ def astype(self, dtype, copy=True): from .category import CategoricalIndex return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) + elif is_datetime64tz_dtype(dtype): + from pandas.core.indexes.datetimes import DatetimeIndex + return DatetimeIndex(self.values, name=self.name, dtype=dtype, + copy=copy) try: return Index(self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 8acdd301f241a..2b22938d8a1ac 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -225,6 +225,13 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) + def test_integer_index_astype_datetimetz_dtype(self): + # GH 20997, 20964 + val = [pd.Timestamp('2018-01-01', tz='US/Pacific').value] + result = pd.Index(val).astype('datetime64[ns, US/Pacific]') + expected = pd.DatetimeIndex(['2018-01-01'], tz='US/Pacific') + tm.assert_index_equal(result, expected) + class TestToPeriod(object): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 0fddac4cd22e4..bfc964634ee13 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -1,8 +1,9 @@ -import pytest +from datetime import timedelta +from operator import attrgetter +import pytest import pytz import numpy as np -from datetime import timedelta import pandas as pd from pandas import offsets @@ -26,24 +27,28 @@ def test_construction_caching(self): freq='ns')}) assert df.dttz.dtype.tz.zone == 'US/Eastern' - def test_construction_with_alt(self): - + @pytest.mark.parametrize('kwargs', [ + {'tz': 'dtype.tz'}, + {'dtype': 'dtype'}, + {'dtype': 'dtype', 'tz': 'dtype.tz'}]) + def test_construction_with_alt(self, kwargs): i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') - i2 = DatetimeIndex(i, dtype=i.dtype) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' - - i2 = DatetimeIndex(i.asi8, tz=i.dtype.tz) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' - - i2 = DatetimeIndex(i.asi8, dtype=i.dtype) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' - - i2 = DatetimeIndex(i.asi8, dtype=i.dtype, tz=i.dtype.tz) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(i, result) + assert result.tz.zone == 'US/Eastern' + + @pytest.mark.parametrize('kwargs', [ + {'tz': 'dtype.tz'}, + {'dtype': 'dtype'}, + {'dtype': 'dtype', 'tz': 'dtype.tz'}]) + def test_construction_with_alt_tz_localize(self, kwargs): + i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs) + expected = i - pd.Timedelta(hours=5) + tm.assert_index_equal(result, expected) + assert result.tz.zone == 'US/Eastern' # localize into the provided tz i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') @@ -471,6 +476,7 @@ def test_constructor_with_non_normalized_pytz(self, tz): @pytest.mark.parametrize('klass', [Index, DatetimeIndex]) @pytest.mark.parametrize('box', [np.array, list]) def test_constructor_with_int_tz(self, klass, box): + # GH 20997, 20964 ts = Timestamp('2018-01-01', tz='US/Pacific') result = klass(box([ts.value]), dtype='datetime64[ns, US/Pacific]') expected = klass([ts]) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9730c224e18e2..dc1c70bd5bc44 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -408,6 +408,9 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals): @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex]) def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc, klass): + # Test constructing with a datetimetz dtype + # .values produces numpy datetimes, so these are considered naive + # .asi8 produces integers, so these are considered epoch timestamps index = pd.date_range('2011-01-01', periods=5) arg = getattr(index, attr) if utc: From 37484eaeab309050fa0fa23cd8312023c7a7117e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 2 Jun 2018 13:42:05 -0700 Subject: [PATCH 09/11] Adjust astype --- pandas/core/indexes/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dc4644157592d..d3ed0f7b8ae55 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1163,11 +1163,11 @@ def astype(self, dtype, copy=True): from .category import CategoricalIndex return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) - elif is_datetime64tz_dtype(dtype): - from pandas.core.indexes.datetimes import DatetimeIndex - return DatetimeIndex(self.values, name=self.name, dtype=dtype, - copy=copy) try: + if is_datetime64tz_dtype(dtype): + from pandas.core.indexes.datetimes import DatetimeIndex + return DatetimeIndex(self.values, name=self.name, dtype=dtype, + copy=copy) return Index(self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype) except (TypeError, ValueError): From dc0a3fe834a73af7ca8b00f450b14be467299b1f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 6 Jun 2018 18:08:26 -0700 Subject: [PATCH 10/11] Add fixture tests, test with object data, dont copy with astype --- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/indexes/datetimes/test_astype.py | 11 +++++--- .../indexes/datetimes/test_construction.py | 27 +++++++++++-------- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f3ef18c89174d..35f6e751caa6c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -428,7 +428,7 @@ def __new__(cls, data=None, # must be integer dtype otherwise # assume this data are epoch timestamps if data.dtype != _INT64_DTYPE: - data = data.astype(np.int64) + data = data.astype(np.int64, copy=False) subarr = data.view(_NS_DTYPE) subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 2b22938d8a1ac..64b8f48f6a4e1 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -225,11 +225,14 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) - def test_integer_index_astype_datetimetz_dtype(self): + @pytest.mark.parametrize('tz, dtype', [ + ['US/Pacific', 'datetime64[ns, US/Pacific]'], + [None, 'datetime64[ns]']]) + def test_integer_index_astype_datetime(self, tz, dtype): # GH 20997, 20964 - val = [pd.Timestamp('2018-01-01', tz='US/Pacific').value] - result = pd.Index(val).astype('datetime64[ns, US/Pacific]') - expected = pd.DatetimeIndex(['2018-01-01'], tz='US/Pacific') + val = [pd.Timestamp('2018-01-01', tz=tz).value] + result = pd.Index(val).astype(dtype) + expected = pd.DatetimeIndex(['2018-01-01'], tz=tz) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index bfc964634ee13..360cc42afc1c4 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -1,5 +1,6 @@ from datetime import timedelta from operator import attrgetter +from functools import partial import pytest import pytz @@ -31,24 +32,24 @@ def test_construction_caching(self): {'tz': 'dtype.tz'}, {'dtype': 'dtype'}, {'dtype': 'dtype', 'tz': 'dtype.tz'}]) - def test_construction_with_alt(self, kwargs): - i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') + def test_construction_with_alt(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = pd.date_range('20130101', periods=5, freq='H', tz=tz) kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} result = DatetimeIndex(i, **kwargs) tm.assert_index_equal(i, result) - assert result.tz.zone == 'US/Eastern' @pytest.mark.parametrize('kwargs', [ {'tz': 'dtype.tz'}, {'dtype': 'dtype'}, {'dtype': 'dtype', 'tz': 'dtype.tz'}]) - def test_construction_with_alt_tz_localize(self, kwargs): - i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') + def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = pd.date_range('20130101', periods=5, freq='H', tz=tz) kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs) - expected = i - pd.Timedelta(hours=5) + expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz) tm.assert_index_equal(result, expected) - assert result.tz.zone == 'US/Eastern' # localize into the provided tz i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') @@ -474,11 +475,15 @@ def test_constructor_with_non_normalized_pytz(self, tz): assert pytz.timezone(tz) is result.tz @pytest.mark.parametrize('klass', [Index, DatetimeIndex]) - @pytest.mark.parametrize('box', [np.array, list]) - def test_constructor_with_int_tz(self, klass, box): + @pytest.mark.parametrize('box', [ + np.array, partial(np.array, dtype=object), list]) + @pytest.mark.parametrize('tz, dtype', [ + ['US/Pacific', 'datetime64[ns, US/Pacific]'], + [None, 'datetime64[ns]']]) + def test_constructor_with_int_tz(self, klass, box, tz, dtype): # GH 20997, 20964 - ts = Timestamp('2018-01-01', tz='US/Pacific') - result = klass(box([ts.value]), dtype='datetime64[ns, US/Pacific]') + ts = Timestamp('2018-01-01', tz=tz) + result = klass(box([ts.value]), dtype=dtype) expected = klass([ts]) tm.assert_index_equal(result, expected) From dc7e5c03d9abba90e3e9b4d3bc3f6f7f77a1aaa4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 13 Jun 2018 23:34:00 -0700 Subject: [PATCH 11/11] add assertion back after merge conflict --- pandas/tests/indexes/datetimes/test_construction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 318a3234024a6..f7682a965c038 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -494,6 +494,7 @@ def test_constructor_with_int_tz(self, klass, box, tz, dtype): ts = Timestamp('2018-01-01', tz=tz) result = klass(box([ts.value]), dtype=dtype) expected = klass([ts]) + assert result == expected class TestTimeSeries(object):