Skip to content

Commit 7c4efe0

Browse files
DEPR/API: Revert handling of i8values to DatetimeIndex (#24708)
1 parent 3fe28fc commit 7c4efe0

File tree

12 files changed

+179
-31
lines changed

12 files changed

+179
-31
lines changed

doc/source/whatsnew/v0.24.0.rst

+46-1
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,6 @@ Datetimelike API Changes
12351235
- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`)
12361236
- :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`)
12371237
- :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`)
1238-
- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
12391238
- :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`)
12401239

12411240
.. _whatsnew_0240.api.other:
@@ -1353,6 +1352,52 @@ the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`).
13531352
dti + pd.Index([1 * dti.freq, 2 * dti.freq])
13541353
13551354
1355+
.. _whatsnew_0240.deprecations.integer_tz:
1356+
1357+
Passing Integer data and a timezone to DatetimeIndex
1358+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1359+
1360+
The behavior of :class:`DatetimeIndex` when passed integer data and
1361+
a timezone is changing in a future version of pandas. Previously, these
1362+
were interpreted as wall times in the desired timezone. In the future,
1363+
these will be interpreted as wall times in UTC, which are then converted
1364+
to the desired timezone (:issue:`24559`).
1365+
1366+
The default behavior remains the same, but issues a warning:
1367+
1368+
.. code-block:: ipython
1369+
1370+
In [3]: pd.DatetimeIndex([946684800000000000], tz="US/Central")
1371+
/bin/ipython:1: FutureWarning:
1372+
Passing integer-dtype data and a timezone to DatetimeIndex. Integer values
1373+
will be interpreted differently in a future version of pandas. Previously,
1374+
these were viewed as datetime64[ns] values representing the wall time
1375+
*in the specified timezone*. In the future, these will be viewed as
1376+
datetime64[ns] values representing the wall time *in UTC*. This is similar
1377+
to a nanosecond-precision UNIX epoch. To accept the future behavior, use
1378+
1379+
pd.to_datetime(integer_data, utc=True).tz_convert(tz)
1380+
1381+
To keep the previous behavior, use
1382+
1383+
pd.to_datetime(integer_data).tz_localize(tz)
1384+
1385+
#!/bin/python3
1386+
Out[3]: DatetimeIndex(['2000-01-01 00:00:00-06:00'], dtype='datetime64[ns, US/Central]', freq=None)
1387+
1388+
As the warning message explains, opt in to the future behavior by specifying that
1389+
the integer values are UTC, and then converting to the final timezone:
1390+
1391+
.. ipython:: python
1392+
1393+
pd.to_datetime([946684800000000000], utc=True).tz_convert('US/Central')
1394+
1395+
The old behavior can be retained with by localizing directly to the final timezone:
1396+
1397+
.. ipython:: python
1398+
1399+
pd.to_datetime([946684800000000000]).tz_localize('US/Central')
1400+
13561401
.. _whatsnew_0240.deprecations.tz_aware_array:
13571402

13581403
Converting Timezone-Aware Series and Index to NumPy Arrays

pandas/core/arrays/datetimes.py

+40-3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,21 @@
3333
from pandas.tseries.offsets import Day, Tick
3434

3535
_midnight = time(0, 0)
36+
# TODO(GH-24559): Remove warning, int_as_wall_time parameter.
37+
_i8_message = """
38+
Passing integer-dtype data and a timezone to DatetimeIndex. Integer values
39+
will be interpreted differently in a future version of pandas. Previously,
40+
these were viewed as datetime64[ns] values representing the wall time
41+
*in the specified timezone*. In the future, these will be viewed as
42+
datetime64[ns] values representing the wall time *in UTC*. This is similar
43+
to a nanosecond-precision UNIX epoch. To accept the future behavior, use
44+
45+
pd.to_datetime(integer_data, utc=True).tz_convert(tz)
46+
47+
To keep the previous behavior, use
48+
49+
pd.to_datetime(integer_data).tz_localize(tz)
50+
"""
3651

3752

3853
def tz_to_dtype(tz):
@@ -342,13 +357,15 @@ def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE):
342357
@classmethod
343358
def _from_sequence(cls, data, dtype=None, copy=False,
344359
tz=None, freq=None,
345-
dayfirst=False, yearfirst=False, ambiguous='raise'):
360+
dayfirst=False, yearfirst=False, ambiguous='raise',
361+
int_as_wall_time=False):
346362

347363
freq, freq_infer = dtl.maybe_infer_freq(freq)
348364

349365
subarr, tz, inferred_freq = sequence_to_dt64ns(
350366
data, dtype=dtype, copy=copy, tz=tz,
351-
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
367+
dayfirst=dayfirst, yearfirst=yearfirst,
368+
ambiguous=ambiguous, int_as_wall_time=int_as_wall_time)
352369

353370
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq,
354371
freq_infer)
@@ -1649,7 +1666,8 @@ def to_julian_date(self):
16491666

16501667
def sequence_to_dt64ns(data, dtype=None, copy=False,
16511668
tz=None,
1652-
dayfirst=False, yearfirst=False, ambiguous='raise'):
1669+
dayfirst=False, yearfirst=False, ambiguous='raise',
1670+
int_as_wall_time=False):
16531671
"""
16541672
Parameters
16551673
----------
@@ -1661,6 +1679,13 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
16611679
yearfirst : bool, default False
16621680
ambiguous : str, bool, or arraylike, default 'raise'
16631681
See pandas._libs.tslibs.conversion.tz_localize_to_utc
1682+
int_as_wall_time : bool, default False
1683+
Whether to treat ints as wall time in specified timezone, or as
1684+
nanosecond-precision UNIX epoch (wall time in UTC).
1685+
This is used in DatetimeIndex.__init__ to deprecate the wall-time
1686+
behaviour.
1687+
1688+
..versionadded:: 0.24.0
16641689
16651690
Returns
16661691
-------
@@ -1717,6 +1742,10 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
17171742
data, inferred_tz = objects_to_datetime64ns(
17181743
data, dayfirst=dayfirst, yearfirst=yearfirst)
17191744
tz = maybe_infer_tz(tz, inferred_tz)
1745+
# When a sequence of timestamp objects is passed, we always
1746+
# want to treat the (now i8-valued) data as UTC timestamps,
1747+
# not wall times.
1748+
int_as_wall_time = False
17201749

17211750
# `data` may have originally been a Categorical[datetime64[ns, tz]],
17221751
# so we need to handle these types.
@@ -1744,8 +1773,16 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
17441773
else:
17451774
# must be integer dtype otherwise
17461775
# assume this data are epoch timestamps
1776+
if tz:
1777+
tz = timezones.maybe_get_tz(tz)
1778+
17471779
if data.dtype != _INT64_DTYPE:
17481780
data = data.astype(np.int64, copy=False)
1781+
if int_as_wall_time and tz is not None and not timezones.is_utc(tz):
1782+
warnings.warn(_i8_message, FutureWarning, stacklevel=4)
1783+
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
1784+
ambiguous=ambiguous)
1785+
data = data.view(_NS_DTYPE)
17491786
result = data.view(_NS_DTYPE)
17501787

17511788
if copy:

pandas/core/indexes/base.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
is_dtype_union_equal, is_extension_array_dtype, is_float, is_float_dtype,
2323
is_hashable, is_integer, is_integer_dtype, is_interval_dtype, is_iterator,
2424
is_list_like, is_object_dtype, is_period_dtype, is_scalar,
25-
is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype)
25+
is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
26+
pandas_dtype)
2627
import pandas.core.dtypes.concat as _concat
2728
from pandas.core.dtypes.generic import (
2829
ABCDataFrame, ABCDateOffset, ABCDatetimeArray, ABCIndexClass,
@@ -732,6 +733,13 @@ def astype(self, dtype, copy=True):
732733
from .category import CategoricalIndex
733734
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
734735
copy=copy)
736+
elif is_datetime64tz_dtype(dtype):
737+
# TODO(GH-24559): Remove this block, use the following elif.
738+
# avoid FutureWarning from DatetimeIndex constructor.
739+
from pandas import DatetimeIndex
740+
tz = pandas_dtype(dtype).tz
741+
return (DatetimeIndex(np.asarray(self))
742+
.tz_localize("UTC").tz_convert(tz))
735743

736744
elif is_extension_array_dtype(dtype):
737745
return Index(np.asarray(self), dtype=dtype, copy=copy)

pandas/core/indexes/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,8 @@ def __new__(cls, data=None,
299299

300300
dtarr = DatetimeArray._from_sequence(
301301
data, dtype=dtype, copy=copy, tz=tz, freq=freq,
302-
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
302+
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous,
303+
int_as_wall_time=True)
303304

304305
subarr = cls._simple_new(dtarr, name=name,
305306
freq=dtarr.freq, tz=dtarr.tz)

pandas/core/reshape/tile.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,10 @@ def _convert_bin_to_datelike_type(bins, dtype):
449449
bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is
450450
datelike
451451
"""
452-
if is_datetime64tz_dtype(dtype) or is_datetime_or_timedelta_dtype(dtype):
452+
if is_datetime64tz_dtype(dtype):
453+
bins = to_datetime(bins.astype(np.int64),
454+
utc=True).tz_convert(dtype.tz)
455+
elif is_datetime_or_timedelta_dtype(dtype):
453456
bins = Index(bins.astype(np.int64), dtype=dtype)
454457
return bins
455458

pandas/tests/dtypes/test_common.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ def test_is_datetime64tz_dtype():
209209
assert not com.is_datetime64tz_dtype(object)
210210
assert not com.is_datetime64tz_dtype([1, 2, 3])
211211
assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3]))
212-
assert com.is_datetime64tz_dtype(pd.DatetimeIndex(
213-
[1, 2, 3], tz="US/Eastern"))
212+
assert com.is_datetime64tz_dtype(pd.DatetimeIndex(['2000'],
213+
tz="US/Eastern"))
214214

215215

216216
def test_is_timedelta64_dtype():
@@ -286,7 +286,7 @@ def test_is_datetimelike():
286286
assert com.is_datetimelike(pd.PeriodIndex([], freq="A"))
287287
assert com.is_datetimelike(np.array([], dtype=np.datetime64))
288288
assert com.is_datetimelike(pd.Series([], dtype="timedelta64[ns]"))
289-
assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
289+
assert com.is_datetimelike(pd.DatetimeIndex(["2000"], tz="US/Eastern"))
290290

291291
dtype = DatetimeTZDtype("ns", tz="US/Eastern")
292292
s = pd.Series([], dtype=dtype)
@@ -480,7 +480,7 @@ def test_needs_i8_conversion():
480480
assert com.needs_i8_conversion(np.datetime64)
481481
assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))
482482
assert com.needs_i8_conversion(pd.DatetimeIndex(
483-
[1, 2, 3], tz="US/Eastern"))
483+
["2000"], tz="US/Eastern"))
484484

485485

486486
def test_is_numeric_dtype():
@@ -541,7 +541,7 @@ def test_is_extension_type(check_scipy):
541541
assert com.is_extension_type(pd.Series(cat))
542542
assert com.is_extension_type(pd.SparseArray([1, 2, 3]))
543543
assert com.is_extension_type(pd.SparseSeries([1, 2, 3]))
544-
assert com.is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
544+
assert com.is_extension_type(pd.DatetimeIndex(['2000'], tz="US/Eastern"))
545545

546546
dtype = DatetimeTZDtype("ns", tz="US/Eastern")
547547
s = pd.Series([], dtype=dtype)
@@ -635,8 +635,8 @@ def test__get_dtype_fails(input_param):
635635
(pd.DatetimeIndex([1, 2]), np.datetime64),
636636
(pd.DatetimeIndex([1, 2]).dtype, np.datetime64),
637637
('<M8[ns]', np.datetime64),
638-
(pd.DatetimeIndex([1, 2], tz='Europe/London'), pd.Timestamp),
639-
(pd.DatetimeIndex([1, 2], tz='Europe/London').dtype,
638+
(pd.DatetimeIndex(['2000'], tz='Europe/London'), pd.Timestamp),
639+
(pd.DatetimeIndex(['2000'], tz='Europe/London').dtype,
640640
pd.Timestamp),
641641
('datetime64[ns, Europe/London]', pd.Timestamp),
642642
(pd.SparseSeries([1, 2], dtype='int32'), np.int32),

pandas/tests/indexes/datetimes/test_astype.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,10 @@ def _check_rng(rng):
238238
['US/Pacific', 'datetime64[ns, US/Pacific]'],
239239
[None, 'datetime64[ns]']])
240240
def test_integer_index_astype_datetime(self, tz, dtype):
241-
# GH 20997, 20964
241+
# GH 20997, 20964, 24559
242242
val = [pd.Timestamp('2018-01-01', tz=tz).value]
243243
result = pd.Index(val).astype(dtype)
244-
expected = pd.DatetimeIndex(['2018-01-01'], tz=tz)
244+
expected = pd.DatetimeIndex(["2018-01-01"], tz=tz)
245245
tm.assert_index_equal(result, expected)
246246

247247

pandas/tests/indexes/datetimes/test_construction.py

+33-4
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,15 @@ def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
118118
tz = tz_aware_fixture
119119
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
120120
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
121-
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
122-
expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz)
121+
122+
if str(tz) in ('UTC', 'tzutc()'):
123+
warn = None
124+
else:
125+
warn = FutureWarning
126+
127+
with tm.assert_produces_warning(warn, check_stacklevel=False):
128+
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
129+
expected = DatetimeIndex(i, **kwargs)
123130
tm.assert_index_equal(result, expected)
124131

125132
# localize into the provided tz
@@ -377,6 +384,19 @@ def test_range_kwargs_deprecated(self):
377384
with tm.assert_produces_warning(FutureWarning):
378385
DatetimeIndex(start='1/1/2000', end='1/10/2000', freq='D')
379386

387+
def test_integer_values_and_tz_deprecated(self):
388+
# GH-24559
389+
values = np.array([946684800000000000])
390+
with tm.assert_produces_warning(FutureWarning):
391+
result = DatetimeIndex(values, tz='US/Central')
392+
expected = pd.DatetimeIndex(['2000-01-01T00:00:00'], tz="US/Central")
393+
tm.assert_index_equal(result, expected)
394+
395+
# but UTC is *not* deprecated.
396+
with tm.assert_produces_warning(None):
397+
result = DatetimeIndex(values, tz='UTC')
398+
expected = pd.DatetimeIndex(['2000-01-01T00:00:00'], tz="US/Central")
399+
380400
def test_constructor_coverage(self):
381401
rng = date_range('1/1/2000', periods=10.5)
382402
exp = date_range('1/1/2000', periods=10)
@@ -555,21 +575,30 @@ def test_constructor_timestamp_near_dst(self):
555575
ts[1].to_pydatetime()])
556576
tm.assert_index_equal(result, expected)
557577

578+
# TODO(GH-24559): Remove the xfail for the tz-aware case.
558579
@pytest.mark.parametrize('klass', [Index, DatetimeIndex])
559580
@pytest.mark.parametrize('box', [
560581
np.array, partial(np.array, dtype=object), list])
561582
@pytest.mark.parametrize('tz, dtype', [
562-
['US/Pacific', 'datetime64[ns, US/Pacific]'],
563-
[None, 'datetime64[ns]']])
583+
pytest.param('US/Pacific', 'datetime64[ns, US/Pacific]',
584+
marks=[pytest.mark.xfail(),
585+
pytest.mark.filterwarnings(
586+
"ignore:\\n Passing:FutureWarning")]),
587+
[None, 'datetime64[ns]'],
588+
])
564589
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
565590
# GH 20997, 20964
566591
ts = Timestamp('2018-01-01', tz=tz)
567592
result = klass(box([ts.value]), dtype=dtype)
568593
expected = klass([ts])
569594
assert result == expected
570595

596+
# This is the desired future behavior
597+
@pytest.mark.xfail(reason="Future behavior", strict=False)
598+
@pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning")
571599
def test_construction_int_rountrip(self, tz_naive_fixture):
572600
# GH 12619
601+
# TODO(GH-24559): Remove xfail
573602
tz = tz_naive_fixture
574603
result = 1293858000000000000
575604
expected = DatetimeIndex([1293858000000000000], tz=tz).asi8[0]

pandas/tests/indexes/multi/test_integrity.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ def test_values_multiindex_datetimeindex():
5050
# Test to ensure we hit the boxing / nobox part of MI.values
5151
ints = np.arange(10 ** 18, 10 ** 18 + 5)
5252
naive = pd.DatetimeIndex(ints)
53-
aware = pd.DatetimeIndex(ints, tz='US/Central')
53+
# TODO(GH-24559): Remove the FutureWarning
54+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
55+
aware = pd.DatetimeIndex(ints, tz='US/Central')
5456

5557
idx = pd.MultiIndex.from_arrays([naive, aware])
5658
result = idx.values

pandas/tests/indexes/test_base.py

+25-8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from datetime import datetime, timedelta
55
from decimal import Decimal
66
import math
7+
import sys
78

89
import numpy as np
910
import pytest
@@ -401,24 +402,40 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc,
401402
# Test constructing with a datetimetz dtype
402403
# .values produces numpy datetimes, so these are considered naive
403404
# .asi8 produces integers, so these are considered epoch timestamps
405+
# ^the above will be true in a later version. Right now we `.view`
406+
# the i8 values as NS_DTYPE, effectively treating them as wall times.
404407
index = pd.date_range('2011-01-01', periods=5)
405408
arg = getattr(index, attr)
406-
if utc:
407-
index = index.tz_localize('UTC').tz_convert(tz_naive_fixture)
408-
else:
409-
index = index.tz_localize(tz_naive_fixture)
409+
index = index.tz_localize(tz_naive_fixture)
410410
dtype = index.dtype
411411

412-
result = klass(arg, tz=tz_naive_fixture)
412+
# TODO(GH-24559): Remove the sys.modules and warnings
413+
# not sure what this is from. It's Py2 only.
414+
modules = [sys.modules['pandas.core.indexes.base']]
415+
416+
if (tz_naive_fixture and attr == "asi8" and
417+
str(tz_naive_fixture) not in ('UTC', 'tzutc()')):
418+
ex_warn = FutureWarning
419+
else:
420+
ex_warn = None
421+
422+
# stacklevel is checked elsewhere. We don't do it here since
423+
# Index will have an frame, throwing off the expected.
424+
with tm.assert_produces_warning(ex_warn, check_stacklevel=False,
425+
clear=modules):
426+
result = klass(arg, tz=tz_naive_fixture)
413427
tm.assert_index_equal(result, index)
414428

415-
result = klass(arg, dtype=dtype)
429+
with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
430+
result = klass(arg, dtype=dtype)
416431
tm.assert_index_equal(result, index)
417432

418-
result = klass(list(arg), tz=tz_naive_fixture)
433+
with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
434+
result = klass(list(arg), tz=tz_naive_fixture)
419435
tm.assert_index_equal(result, index)
420436

421-
result = klass(list(arg), dtype=dtype)
437+
with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
438+
result = klass(list(arg), dtype=dtype)
422439
tm.assert_index_equal(result, index)
423440

424441
@pytest.mark.parametrize("attr", ['values', 'asi8'])

0 commit comments

Comments
 (0)