Skip to content

Commit 7d37ab5

Browse files
committed
DEPR: __array__ for tz-aware Series/Index
This deprecates the current behvior when converting tz-aware Series or Index to an ndarray. Previously, we converted to M8[ns], throwing away the timezone information. In the future, we will return an object-dtype array filled with Timestamps, each of which has the correct tz. ```python In [1]: import pandas as pd; import numpy as np In [2]: ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) In [3]: np.asarray(ser) /bin/ipython:1: FutureWarning: Converting timezone-aware DatetimeArray to timezone-naive ndarray with 'datetime64[ns]' dtype. In the future, this will return an ndarray with 'object' dtype where each element is a 'pandas.Timestamp' with the correct 'tz'. To accept the future behavior, pass 'dtype=object'. To keep the old behavior, pass 'dtype="datetime64[ns]"'. #!/Users/taugspurger/Envs/pandas-dev/bin/python3 Out[3]: array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], dtype='datetime64[ns]') ``` xref pandas-dev#23569
1 parent db051b9 commit 7d37ab5

File tree

16 files changed

+301
-27
lines changed

16 files changed

+301
-27
lines changed

doc/source/whatsnew/v0.24.0.rst

+55-1
Original file line numberDiff line numberDiff line change
@@ -1228,7 +1228,7 @@ Deprecations
12281228
.. _whatsnew_0240.deprecations.datetimelike_int_ops:
12291229

12301230
Integer Addition/Subtraction with Datetimes and Timedeltas is Deprecated
1231-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1231+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12321232

12331233
In the past, users could—in some cases—add or subtract integers or integer-dtype
12341234
arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.
@@ -1266,6 +1266,60 @@ the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`).
12661266
dti = pd.date_range('2001-01-01', periods=2, freq='7D')
12671267
dti + pd.Index([1 * dti.freq, 2 * dti.freq])
12681268
1269+
1270+
.. _whatsnew_0240.deprecations.tz_aware_array:
1271+
1272+
Converting Timezone-Aware Series and Index to NumPy Arrays
1273+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1274+
1275+
The conversion from a :class:`Series` or :class:`Index` with timezone-aware
1276+
datetime data will changing to preserve timezones by default (:issue:`23569`).
1277+
1278+
NumPy doesn't have a dedicated dtype for timezone-aware datetimes.
1279+
In the past, converting a :class:`Series` or :class:`DatetimeIndex` with
1280+
timezone-aware datatimes would convert to a NumPy array by
1281+
1282+
1. converting the tz-aware data to UTC
1283+
2. dropping the timezone-info
1284+
3. returning a :class:`numpy.ndarray` with ``datetime64[ns]`` dtype
1285+
1286+
Future versions of pandas will preserve the timezone information by returning an
1287+
object-dtype NumPy array where each value is a :class:`Timestamp` with the correct
1288+
timezone attached
1289+
1290+
.. ipython:: python
1291+
1292+
ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
1293+
ser
1294+
1295+
The default behavior renames the same, but issues a warning
1296+
1297+
.. code-block:: python
1298+
1299+
In [8]: np.asarray(ser)
1300+
/bin/ipython:1: FutureWarning: Converting timezone-aware DatetimeArray to timezone-naive
1301+
ndarray with 'datetime64[ns]' dtype. In the future, this will return an ndarray
1302+
with 'object' dtype where each element is a 'pandas.Timestamp' with the correct 'tz'.
1303+
1304+
To accept the future behavior, pass 'dtype=object'.
1305+
To keep the old behavior, pass 'dtype="datetime64[ns]"'.
1306+
#!/bin/python3
1307+
Out[8]:
1308+
array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'],
1309+
dtype='datetime64[ns]')
1310+
1311+
The old or new behavior can be obtained by specifying the ``dtype``
1312+
1313+
.. ipython:: python
1314+
:okwarning:
1315+
1316+
# Old behavior
1317+
np.asarray(ser, dtype='datetime64[ns]')
1318+
1319+
# New behavior
1320+
np.asarray(ser, dtype=object)
1321+
1322+
12691323
.. _whatsnew_0240.prior_deprecations:
12701324

12711325
Removal of prior version deprecations/changes

pandas/core/arrays/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ def _resolution(self):
522522
# Array-Like / EA-Interface Methods
523523

524524
def __array__(self, dtype=None):
525-
if is_object_dtype(dtype):
525+
if is_object_dtype(dtype) or (dtype is None and self.tz):
526526
return np.array(list(self), dtype=object)
527527
elif is_int64_dtype(dtype):
528528
return self.asi8

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
10201020
# datetime64tz is assumed to be naive which should
10211021
# be localized to the timezone.
10221022
is_dt_string = is_string_dtype(value)
1023-
value = to_datetime(value, errors=errors)
1023+
value = to_datetime(value, errors=errors).array
10241024
if is_dt_string:
10251025
# Strings here are naive, so directly localize
10261026
value = value.tz_localize(dtype.tz)

pandas/core/dtypes/dtypes.py

+6
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ def _hash_categories(categories, ordered=True):
318318
from pandas.core.util.hashing import (
319319
hash_array, _combine_hash_arrays, hash_tuples
320320
)
321+
from pandas.core.dtypes.common import is_datetime64tz_dtype, _NS_DTYPE
321322

322323
if len(categories) and isinstance(categories[0], tuple):
323324
# assumes if any individual category is a tuple, then all our. ATM
@@ -335,6 +336,11 @@ def _hash_categories(categories, ordered=True):
335336
# find a better solution
336337
hashed = hash((tuple(categories), ordered))
337338
return hashed
339+
340+
if is_datetime64tz_dtype(categories.dtype):
341+
# Avoid future warning.
342+
categories = categories.astype(_NS_DTYPE)
343+
338344
cat_array = hash_array(np.asarray(categories), categorize=False)
339345
if ordered:
340346
cat_array = np.vstack([

pandas/core/groupby/groupby.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ class providing the base-class of operations.
2626

2727
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
2828
from pandas.core.dtypes.common import (
29-
ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar)
29+
_NS_DTYPE, ensure_float, is_datetime64tz_dtype, is_extension_array_dtype,
30+
is_numeric_dtype, is_scalar)
3031
from pandas.core.dtypes.missing import isna, notna
3132

3233
import pandas.core.algorithms as algorithms
@@ -1269,10 +1270,18 @@ def f(self, **kwargs):
12691270
return f
12701271

12711272
def first_compat(x, axis=0):
1273+
# This is a bit strange.
1274+
# We only hit this block when grouping a DatetimeTZBlock *and*
1275+
# a categorical. Something strange going on with first for
1276+
# categorical dta.
1277+
if is_datetime64tz_dtype(x.dtype):
1278+
dtype = _NS_DTYPE
1279+
else:
1280+
dtype = None
12721281

12731282
def first(x):
12741283

1275-
x = np.asarray(x)
1284+
x = np.asarray(x, dtype=dtype)
12761285
x = x[notna(x)]
12771286
if len(x) == 0:
12781287
return np.nan
@@ -1284,10 +1293,18 @@ def first(x):
12841293
return first(x)
12851294

12861295
def last_compat(x, axis=0):
1296+
# This is a bit strange.
1297+
# We only hit this block when grouping a DatetimeTZBlock *and*
1298+
# a categorical. Something strange going on with first for
1299+
# categorical dta.
1300+
if is_datetime64tz_dtype(x.dtype):
1301+
dtype = _NS_DTYPE
1302+
else:
1303+
dtype = None
12871304

12881305
def last(x):
12891306

1290-
x = np.asarray(x)
1307+
x = np.asarray(x, dtype=dtype)
12911308
x = x[notna(x)]
12921309
if len(x) == 0:
12931310
return np.nan

pandas/core/indexes/datetimes.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,21 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
339339

340340
# --------------------------------------------------------------------
341341

342+
def __array__(self, dtype=None):
343+
if dtype is None and isinstance(self._data, DatetimeArray)\
344+
and getattr(self.dtype, 'tz', None):
345+
msg = (
346+
"Converting timezone-aware DatetimeArray to timezone-naive "
347+
"ndarray with 'datetime64[ns]' dtype. In the future, this "
348+
"will return an ndarray with 'object' dtype where each "
349+
"element is a 'pandas.Timestamp' with the correct 'tz'.\n\t"
350+
"To accept the future behavior, pass 'dtype=object'.\n\t"
351+
"To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'."
352+
)
353+
warnings.warn(msg, FutureWarning, stacklevel=3)
354+
dtype = 'M8[ns]'
355+
return np.asarray(self._data, dtype=dtype)
356+
342357
@property
343358
def dtype(self):
344359
return self._eadata.dtype
@@ -1114,7 +1129,6 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
11141129

11151130
strftime = ea_passthrough(DatetimeArray.strftime)
11161131
_has_same_tz = ea_passthrough(DatetimeArray._has_same_tz)
1117-
__array__ = ea_passthrough(DatetimeArray.__array__)
11181132

11191133
@property
11201134
def offset(self):

pandas/core/indexing.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,12 @@ def can_do_equal_len():
581581
setter(item, v)
582582

583583
# we have an equal len ndarray/convertible to our labels
584-
elif np.array(value).ndim == 2:
584+
# hasattr first, to avoid coercing to ndarray without reason.
585+
# But we may be relying on the ndarray coercion to check ndim.
586+
# Why not just convert to an ndarray earlier on if needed?
587+
elif ((hasattr(value, 'ndim') and value.ndim == 2)
588+
or (not hasattr(value, 'ndim') and
589+
np.array(value).ndim) == 2):
585590

586591
# note that this coerces the dtype if we are mixed
587592
# GH 7551

pandas/core/internals/blocks.py

+40
Original file line numberDiff line numberDiff line change
@@ -2456,6 +2456,12 @@ def get_values(self, dtype=None):
24562456
values = values.reshape(1, -1)
24572457
return values
24582458

2459+
def to_dense(self):
2460+
# we request M8[ns] dtype here, even though it discards tzinfo,
2461+
# as lots of code (e.g. anything using values_from_object)
2462+
# expects that behavior.
2463+
return np.asarray(self.values, dtype=_NS_DTYPE)
2464+
24592465
def _slice(self, slicer):
24602466
""" return a slice of my values """
24612467
if isinstance(slicer, tuple):
@@ -2600,6 +2606,40 @@ def setitem(self, indexer, value):
26002606
klass=ObjectBlock,)
26012607
return newb.setitem(indexer, value)
26022608

2609+
def quantile(self, qs, interpolation='linear', axis=0, axes=None):
2610+
# TODO: Add quantile as a reduction method.
2611+
# We can't just use Block.quantile, as that converts the DTA
2612+
# to an ndarray[object] via get_values.
2613+
# This method
2614+
# 1. Convert DatetimeTZBlock -> DatetimeBlock
2615+
# 2. Perform the op via Block.quantile
2616+
# 3. Converts back to tz-aware
2617+
# Alternatively, we could special case the call to `get_values`
2618+
# in Block.quantile for DatetimeTZ.
2619+
2620+
new_values = np.asarray(self.values, dtype=_NS_DTYPE)
2621+
if self.ndim == 2:
2622+
new_values = new_values[None, :]
2623+
2624+
new_block = DatetimeBlock(new_values, placement=self.mgr_locs)
2625+
2626+
ax, naive = new_block.quantile(qs, interpolation=interpolation,
2627+
axis=axis, axes=axes)
2628+
2629+
ndim = getattr(naive, 'ndim', None) or 0
2630+
if ndim == 0:
2631+
return ax, self.make_block_scalar(
2632+
tslibs.Timestamp(naive.values.value, tz=self.values.tz)
2633+
)
2634+
else:
2635+
naive = naive.values.ravel()
2636+
2637+
result = DatetimeArray(naive, dtype=self.values.dtype)
2638+
2639+
return ax, make_block(result,
2640+
placement=np.arange(len(result)),
2641+
ndim=ndim)
2642+
26032643

26042644
class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
26052645
__slots__ = ()

pandas/core/internals/construction.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas.core.indexes import base as ibase
3535
from pandas.core.internals import (
3636
create_block_manager_from_arrays, create_block_manager_from_blocks)
37+
from pandas.core.internals.arrays import extract_array
3738

3839
# ---------------------------------------------------------------------
3940
# BlockManager Interface
@@ -539,7 +540,6 @@ def sanitize_array(data, index, dtype=None, copy=False,
539540
Sanitize input data to an ndarray, copy if specified, coerce to the
540541
dtype if specified.
541542
"""
542-
543543
if dtype is not None:
544544
dtype = pandas_dtype(dtype)
545545

@@ -551,8 +551,10 @@ def sanitize_array(data, index, dtype=None, copy=False,
551551
else:
552552
data = data.copy()
553553

554+
data = extract_array(data, extract_numpy=True)
555+
554556
# GH#846
555-
if isinstance(data, (np.ndarray, Index, ABCSeries)):
557+
if isinstance(data, np.ndarray):
556558

557559
if dtype is not None:
558560
subarr = np.array(data, copy=False)

pandas/core/reshape/tile.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas._libs.lib import infer_dtype
99

1010
from pandas.core.dtypes.common import (
11-
ensure_int64, is_categorical_dtype, is_datetime64_dtype,
11+
_NS_DTYPE, ensure_int64, is_categorical_dtype, is_datetime64_dtype,
1212
is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_integer,
1313
is_scalar, is_timedelta64_dtype)
1414
from pandas.core.dtypes.missing import isna
@@ -226,7 +226,10 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
226226
raise ValueError('Overlapping IntervalIndex is not accepted.')
227227

228228
else:
229-
bins = np.asarray(bins)
229+
if is_datetime64tz_dtype(bins):
230+
bins = np.asarray(bins, dtype=_NS_DTYPE)
231+
else:
232+
bins = np.asarray(bins)
230233
bins = _convert_bin_to_numeric_type(bins, dtype)
231234
if (np.diff(bins) < 0).any():
232235
raise ValueError('bins must increase monotonically.')

pandas/core/series.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
2222
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
2323
from pandas.core.dtypes.generic import (
24-
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
24+
ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
25+
ABCSparseArray, ABCSparseSeries)
2526
from pandas.core.dtypes.missing import (
2627
isna, na_value_for_dtype, notna, remove_na_arraylike)
2728

@@ -665,7 +666,20 @@ def __array__(self, result=None):
665666
"""
666667
The array interface, return my values.
667668
"""
668-
return self.get_values()
669+
# TODO: change the keyword name from result to dtype?
670+
if (result is None and isinstance(self.array, ABCDatetimeArray)
671+
and getattr(self.dtype, 'tz', None)):
672+
msg = (
673+
"Converting timezone-aware DatetimeArray to timezone-naive "
674+
"ndarray with 'datetime64[ns]' dtype. In the future, this "
675+
"will return an ndarray with 'object' dtype where each "
676+
"element is a 'pandas.Timestamp' with the correct 'tz'.\n\t"
677+
"To accept the future behavior, pass 'dtype=object'.\n\t"
678+
"To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'."
679+
)
680+
warnings.warn(msg, FutureWarning, stacklevel=3)
681+
result = 'M8[ns]'
682+
return np.asarray(self.array, result)
669683

670684
def __array_wrap__(self, result, context=None):
671685
"""

pandas/tests/arrays/test_datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,11 @@ def test_array(self, tz_naive_fixture):
264264
arr = DatetimeArray(dti)
265265

266266
expected = dti.asi8.view('M8[ns]')
267-
result = np.array(arr)
267+
result = np.array(arr, dtype='M8[ns]')
268268
tm.assert_numpy_array_equal(result, expected)
269269

270270
# check that we are not making copies when setting copy=False
271-
result = np.array(arr, copy=False)
271+
result = np.array(arr, dtype='M8[ns]', copy=False)
272272
assert result.base is expected.base
273273
assert result.base is not None
274274

pandas/tests/arrays/test_datetimes.py

+33
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,39 @@ def test_fillna_preserves_tz(self, method):
178178
assert arr[2] is pd.NaT
179179
assert dti[2] == pd.Timestamp('2000-01-03', tz='US/Central')
180180

181+
def test_array_interface_tz(self):
182+
tz = "US/Central"
183+
data = DatetimeArray(pd.date_range('2017', periods=2, tz=tz))
184+
result = np.asarray(data)
185+
186+
expected = np.array([pd.Timestamp('2017-01-01T00:00:00', tz=tz),
187+
pd.Timestamp('2017-01-02T00:00:00', tz=tz)],
188+
dtype=object)
189+
tm.assert_numpy_array_equal(result, expected)
190+
191+
result = np.asarray(data, dtype=object)
192+
tm.assert_numpy_array_equal(result, expected)
193+
194+
result = np.asarray(data, dtype='M8[ns]')
195+
196+
expected = np.array(['2017-01-01T06:00:00',
197+
'2017-01-02T06:00:00'], dtype="M8[ns]")
198+
tm.assert_numpy_array_equal(result, expected)
199+
200+
def test_array_interface(self):
201+
data = DatetimeArray(pd.date_range('2017', periods=2))
202+
expected = np.array(['2017-01-01T00:00:00', '2017-01-02T00:00:00'],
203+
dtype='datetime64[ns]')
204+
205+
result = np.asarray(data)
206+
tm.assert_numpy_array_equal(result, expected)
207+
208+
result = np.asarray(data, dtype=object)
209+
expected = np.array([pd.Timestamp('2017-01-01T00:00:00'),
210+
pd.Timestamp('2017-01-02T00:00:00')],
211+
dtype=object)
212+
tm.assert_numpy_array_equal(result, expected)
213+
181214

182215
class TestSequenceToDT64NS(object):
183216

0 commit comments

Comments
 (0)