From e1bedfb0c6710bf3b6f4c080ad1ed633ff12f010 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 7 Jan 2019 07:06:31 -0600 Subject: [PATCH 1/2] API: Datetime/TimedeltaArray from to_datetime Closes https://github.com/pandas-dev/pandas/issues/24656 --- pandas/core/arrays/array_.py | 35 +++++++++++++++++++++++++-- pandas/tests/arrays/test_array.py | 26 ++++++++++++++++++-- pandas/tests/series/test_internals.py | 14 +++++++++++ 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index 32c08e40b8033..c7be8e3f745c4 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -1,6 +1,7 @@ from pandas._libs import lib, tslibs -from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.common import ( + is_datetime64_ns_dtype, is_extension_array_dtype, is_timedelta64_ns_dtype) from pandas.core.dtypes.dtypes import registry from pandas import compat @@ -75,9 +76,10 @@ def array(data, # type: Sequence[object] See Also -------- numpy.array : Construct a NumPy array. - arrays.PandasArray : ExtensionArray wrapping a NumPy array. Series : Construct a pandas Series. Index : Construct a pandas Index. + arrays.PandasArray : ExtensionArray wrapping a NumPy array. + Series.array : Extract the array stored within a Series. Notes ----- @@ -120,6 +122,26 @@ def array(data, # type: Sequence[object] ['a', 'b'] Length: 2, dtype: str32 + Finally, Pandas has arrays that mostly overlap with NumPy + + * :class:`arrays.DatetimeArray` + * :class:`arrays.TimedeltaArray` + + When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is + passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` + rather than a ``PandasArray``. This is for symmetry with the case of + timezone-aware data, which NumPy does not natively support. + + >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') + + ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] + Length: 2, dtype: datetime64[ns] + + >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]') + + ['01:00:00', '02:00:00'] + Length: 2, dtype: timedelta64[ns] + Examples -------- If a dtype is not specified, `data` is passed through to @@ -239,5 +261,14 @@ def array(data, # type: Sequence[object] # TODO(BooleanArray): handle this type + # Pandas overrides NumPy for + # 1. datetime64[ns] + # 2. timedelta64[ns] + # so that a DatetimeArray is returned. + if is_datetime64_ns_dtype(dtype): + return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) + elif is_timedelta64_ns_dtype(dtype): + return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) + result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) return result diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 1d09a1f65e43f..9393a840fd762 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -36,8 +36,30 @@ # Datetime (naive) ([1, 2], np.dtype('datetime64[ns]'), - PandasArray(np.array([1, 2], dtype='datetime64[ns]'))), - # TODO(DatetimeArray): add here + pd.arrays.DatetimeArray._from_sequence( + np.array([1, 2], dtype='datetime64[ns]'))), + + (np.array([1, 2], dtype='datetime64[ns]'), None, + pd.arrays.DatetimeArray._from_sequence( + np.array([1, 2], dtype='datetime64[ns]'))), + + (pd.DatetimeIndex(['2000', '2001']), np.dtype('datetime64[ns]'), + pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + + (['2000', '2001'], np.dtype('datetime64[ns]'), + pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + + # Datetime (tz-aware) + (['2000', '2001'], pd.DatetimeTZDtype(tz="CET"), + pd.arrays.DatetimeArray._from_sequence( + ['2000', '2001'], dtype=pd.DatetimeTZDtype(tz="CET"))), + + # Timedelta + (['1H', '2H'], np.dtype('timedelta64[ns]'), + pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), + + (pd.TimedeltaIndex(['1H', '2H']), np.dtype('timedelta64[ns]'), + pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), # Category (['a', 'b'], 'category', pd.Categorical(['a', 'b'])), diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 31cbea8f95090..772617c494aef 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -313,6 +313,20 @@ def test_constructor_no_pandas_array(self): tm.assert_series_equal(ser, result) assert isinstance(result._data.blocks[0], IntBlock) + def test_from_array(self): + result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]')) + assert result._data.blocks[0].is_extension is False + + result = pd.Series(pd.array(['2015'], dtype='datetime64[ns]')) + assert result._data.blocks[0].is_extension is False + + def test_from_list_dtype(self): + result = pd.Series(['1H', '2H'], dtype='timedelta64[ns]') + assert result._data.blocks[0].is_extension is False + + result = pd.Series(['2015'], dtype='datetime64[ns]') + assert result._data.blocks[0].is_extension is False + def test_hasnans_unchached_for_series(): # GH#19700 From 351c781c55b5391833ab8cccf23669226cc8de52 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 8 Jan 2019 09:24:29 -0600 Subject: [PATCH 2/2] more tests --- pandas/tests/arrays/test_array.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 9393a840fd762..69221c5048307 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -46,6 +46,9 @@ (pd.DatetimeIndex(['2000', '2001']), np.dtype('datetime64[ns]'), pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + (pd.DatetimeIndex(['2000', '2001']), None, + pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + (['2000', '2001'], np.dtype('datetime64[ns]'), pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), @@ -61,6 +64,9 @@ (pd.TimedeltaIndex(['1H', '2H']), np.dtype('timedelta64[ns]'), pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), + (pd.TimedeltaIndex(['1H', '2H']), None, + pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), + # Category (['a', 'b'], 'category', pd.Categorical(['a', 'b'])), (['a', 'b'], pd.CategoricalDtype(None, ordered=True),