Skip to content

Commit c217656

Browse files
TomAugspurgerjreback
authored andcommitted
Added Datetime & Timedelta inference to array (#24571)
1 parent a422da1 commit c217656

File tree

2 files changed

+76
-9
lines changed

2 files changed

+76
-9
lines changed

pandas/core/arrays/array_.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@ def array(data, # type: Sequence[object]
4646
4747
Currently, pandas will infer an extension dtype for sequences of
4848
49-
========================== ==================================
50-
scalar type Array Type
51-
========================== ==================================
52-
* :class:`pandas.Interval` :class:`pandas.IntervalArray`
53-
* :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
54-
========================== ==================================
49+
============================== =====================================
50+
scalar type Array Type
51+
============================= =====================================
52+
* :class:`pandas.Interval` :class:`pandas.IntervalArray`
53+
* :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
54+
* :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
55+
* :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
56+
============================= =====================================
5557
5658
For all other cases, NumPy's usual inference rules will be used.
5759
@@ -62,7 +64,8 @@ def array(data, # type: Sequence[object]
6264
6365
Returns
6466
-------
65-
array : ExtensionArray
67+
ExtensionArray
68+
The newly created array.
6669
6770
Raises
6871
------
@@ -180,7 +183,9 @@ def array(data, # type: Sequence[object]
180183
ValueError: Cannot pass scalar '1' to 'pandas.array'.
181184
"""
182185
from pandas.core.arrays import (
183-
period_array, ExtensionArray, IntervalArray, PandasArray
186+
period_array, ExtensionArray, IntervalArray, PandasArray,
187+
DatetimeArrayMixin,
188+
TimedeltaArrayMixin,
184189
)
185190
from pandas.core.internals.arrays import extract_array
186191

@@ -220,7 +225,18 @@ def array(data, # type: Sequence[object]
220225
# We choose to return an ndarray, rather than raising.
221226
pass
222227

223-
# TODO(DatetimeArray): handle this type
228+
elif inferred_dtype.startswith('datetime'):
229+
# datetime, datetime64
230+
try:
231+
return DatetimeArrayMixin._from_sequence(data, copy=copy)
232+
except ValueError:
233+
# Mixture of timezones, fall back to PandasArray
234+
pass
235+
236+
elif inferred_dtype.startswith('timedelta'):
237+
# timedelta, timedelta64
238+
return TimedeltaArrayMixin._from_sequence(data, copy=copy)
239+
224240
# TODO(BooleanArray): handle this type
225241

226242
result = PandasArray._from_sequence(data, dtype=dtype, copy=copy)

pandas/tests/arrays/test_array.py

+51
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import datetime
12
import decimal
23

34
import numpy as np
45
import pytest
6+
import pytz
57

68
from pandas.core.dtypes.dtypes import registry
79

@@ -89,11 +91,51 @@ def test_array_copy():
8991
assert np.shares_memory(a, b._ndarray) is True
9092

9193

94+
cet = pytz.timezone("CET")
95+
96+
9297
@pytest.mark.parametrize('data, expected', [
98+
# period
9399
([pd.Period("2000", "D"), pd.Period("2001", "D")],
94100
period_array(["2000", "2001"], freq="D")),
101+
102+
# interval
95103
([pd.Interval(0, 1), pd.Interval(1, 2)],
96104
pd.IntervalArray.from_breaks([0, 1, 2])),
105+
106+
# datetime
107+
([pd.Timestamp('2000',), pd.Timestamp('2001')],
108+
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
109+
110+
([datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
111+
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
112+
113+
(np.array([1, 2], dtype='M8[ns]'),
114+
pd.arrays.DatetimeArray(np.array([1, 2], dtype='M8[ns]'))),
115+
116+
(np.array([1, 2], dtype='M8[us]'),
117+
pd.arrays.DatetimeArray(np.array([1000, 2000], dtype='M8[ns]'))),
118+
119+
# datetimetz
120+
([pd.Timestamp('2000', tz='CET'), pd.Timestamp('2001', tz='CET')],
121+
pd.arrays.DatetimeArray._from_sequence(
122+
['2000', '2001'], dtype=pd.DatetimeTZDtype(tz='CET'))),
123+
124+
([datetime.datetime(2000, 1, 1, tzinfo=cet),
125+
datetime.datetime(2001, 1, 1, tzinfo=cet)],
126+
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'],
127+
tz=cet)),
128+
129+
# timedelta
130+
([pd.Timedelta('1H'), pd.Timedelta('2H')],
131+
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
132+
133+
(np.array([1, 2], dtype='m8[ns]'),
134+
pd.arrays.TimedeltaArray(np.array([1, 2], dtype='m8[ns]'))),
135+
136+
(np.array([1, 2], dtype='m8[us]'),
137+
pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype='m8[ns]'))),
138+
97139
])
98140
def test_array_inference(data, expected):
99141
result = pd.array(data)
@@ -105,6 +147,15 @@ def test_array_inference(data, expected):
105147
[pd.Period("2000", "D"), pd.Period("2001", "A")],
106148
# mix of closed
107149
[pd.Interval(0, 1, closed='left'), pd.Interval(1, 2, closed='right')],
150+
# Mix of timezones
151+
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")],
152+
# Mix of tz-aware and tz-naive
153+
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")],
154+
# GH-24569
155+
pytest.param(
156+
np.array([pd.Timestamp('2000'), pd.Timestamp('2000', tz='CET')]),
157+
marks=pytest.mark.xfail(reason="bug in DTA._from_sequence")
158+
),
108159
])
109160
def test_array_inference_fails(data):
110161
result = pd.array(data)

0 commit comments

Comments
 (0)