Skip to content

Commit 4ed3de0

Browse files
TomAugspurgerPingviinituutti
authored andcommitted
API: Datetime/TimedeltaArray from to_datetime (pandas-dev#24660)
* API: Datetime/TimedeltaArray from to_datetime Closes pandas-dev#24656
1 parent e53258f commit 4ed3de0

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

pandas/core/arrays/array_.py

+33-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pandas._libs import lib, tslibs
22

3-
from pandas.core.dtypes.common import is_extension_array_dtype
3+
from pandas.core.dtypes.common import (
4+
is_datetime64_ns_dtype, is_extension_array_dtype, is_timedelta64_ns_dtype)
45
from pandas.core.dtypes.dtypes import registry
56

67
from pandas import compat
@@ -75,9 +76,10 @@ def array(data, # type: Sequence[object]
7576
See Also
7677
--------
7778
numpy.array : Construct a NumPy array.
78-
arrays.PandasArray : ExtensionArray wrapping a NumPy array.
7979
Series : Construct a pandas Series.
8080
Index : Construct a pandas Index.
81+
arrays.PandasArray : ExtensionArray wrapping a NumPy array.
82+
Series.array : Extract the array stored within a Series.
8183
8284
Notes
8385
-----
@@ -120,6 +122,26 @@ def array(data, # type: Sequence[object]
120122
['a', 'b']
121123
Length: 2, dtype: str32
122124
125+
Finally, Pandas has arrays that mostly overlap with NumPy
126+
127+
* :class:`arrays.DatetimeArray`
128+
* :class:`arrays.TimedeltaArray`
129+
130+
When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is
131+
passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``
132+
rather than a ``PandasArray``. This is for symmetry with the case of
133+
timezone-aware data, which NumPy does not natively support.
134+
135+
>>> pd.array(['2015', '2016'], dtype='datetime64[ns]')
136+
<DatetimeArray>
137+
['2015-01-01 00:00:00', '2016-01-01 00:00:00']
138+
Length: 2, dtype: datetime64[ns]
139+
140+
>>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')
141+
<TimedeltaArray>
142+
['01:00:00', '02:00:00']
143+
Length: 2, dtype: timedelta64[ns]
144+
123145
Examples
124146
--------
125147
If a dtype is not specified, `data` is passed through to
@@ -239,5 +261,14 @@ def array(data, # type: Sequence[object]
239261

240262
# TODO(BooleanArray): handle this type
241263

264+
# Pandas overrides NumPy for
265+
# 1. datetime64[ns]
266+
# 2. timedelta64[ns]
267+
# so that a DatetimeArray is returned.
268+
if is_datetime64_ns_dtype(dtype):
269+
return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
270+
elif is_timedelta64_ns_dtype(dtype):
271+
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)
272+
242273
result = PandasArray._from_sequence(data, dtype=dtype, copy=copy)
243274
return result

pandas/tests/arrays/test_array.py

+30-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,36 @@
3636
3737
# Datetime (naive)
3838
([1, 2], np.dtype('datetime64[ns]'),
39-
PandasArray(np.array([1, 2], dtype='datetime64[ns]'))),
40-
# TODO(DatetimeArray): add here
39+
pd.arrays.DatetimeArray._from_sequence(
40+
np.array([1, 2], dtype='datetime64[ns]'))),
41+
42+
(np.array([1, 2], dtype='datetime64[ns]'), None,
43+
pd.arrays.DatetimeArray._from_sequence(
44+
np.array([1, 2], dtype='datetime64[ns]'))),
45+
46+
(pd.DatetimeIndex(['2000', '2001']), np.dtype('datetime64[ns]'),
47+
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
48+
49+
(pd.DatetimeIndex(['2000', '2001']), None,
50+
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
51+
52+
(['2000', '2001'], np.dtype('datetime64[ns]'),
53+
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
54+
55+
# Datetime (tz-aware)
56+
(['2000', '2001'], pd.DatetimeTZDtype(tz="CET"),
57+
pd.arrays.DatetimeArray._from_sequence(
58+
['2000', '2001'], dtype=pd.DatetimeTZDtype(tz="CET"))),
59+
60+
# Timedelta
61+
(['1H', '2H'], np.dtype('timedelta64[ns]'),
62+
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
63+
64+
(pd.TimedeltaIndex(['1H', '2H']), np.dtype('timedelta64[ns]'),
65+
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
66+
67+
(pd.TimedeltaIndex(['1H', '2H']), None,
68+
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
4169
4270
# Category
4371
(['a', 'b'], 'category', pd.Categorical(['a', 'b'])),

pandas/tests/series/test_internals.py

+14
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,20 @@ def test_constructor_no_pandas_array(self):
313313
tm.assert_series_equal(ser, result)
314314
assert isinstance(result._data.blocks[0], IntBlock)
315315

316+
def test_from_array(self):
317+
result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
318+
assert result._data.blocks[0].is_extension is False
319+
320+
result = pd.Series(pd.array(['2015'], dtype='datetime64[ns]'))
321+
assert result._data.blocks[0].is_extension is False
322+
323+
def test_from_list_dtype(self):
324+
result = pd.Series(['1H', '2H'], dtype='timedelta64[ns]')
325+
assert result._data.blocks[0].is_extension is False
326+
327+
result = pd.Series(['2015'], dtype='datetime64[ns]')
328+
assert result._data.blocks[0].is_extension is False
329+
316330

317331
def test_hasnans_unchached_for_series():
318332
# GH#19700

0 commit comments

Comments
 (0)