From ee1e0af4145ffedbd7bc7735a5db0a67a2a6faeb Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 4 Jun 2022 10:00:05 -0700 Subject: [PATCH 1/4] ENH: preserve non-nano DTA/TDA in Index/Series/DataFrame --- pandas/core/indexes/datetimes.py | 12 +++++++ pandas/core/indexes/timedeltas.py | 1 + pandas/core/internals/construction.py | 11 +++--- pandas/tests/frame/test_constructors.py | 47 +++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e7b810dacdf57..4bc01b8f4ddb0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -326,6 +326,18 @@ def __new__( name = maybe_extract_name(name, data, cls) + if ( + isinstance(data, DatetimeArray) + and freq is lib.no_default + and tz is None + and dtype is None + ): + # fastpath, similar logic in TimedeltaIndex.__new__; + # Note in this particular case we retain non-nano. + if copy: + data = data.copy() + return cls._simple_new(data, name=name) + dtarr = DatetimeArray._from_sequence_not_strict( data, dtype=dtype, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 0249bf51f71b7..cdf09bbc3b78c 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -132,6 +132,7 @@ def __new__( "represent unambiguous timedelta values durations." ) + # FIXME: need to check for dtype/data match if isinstance(data, TimedeltaArray) and freq is lib.no_default: if copy: data = data.copy() diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7a5db56cb48fe..c19bb8156db60 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -537,15 +537,16 @@ def treat_as_nested(data) -> bool: # --------------------------------------------------------------------- -def _prep_ndarray(values, copy: bool = True) -> np.ndarray: +def _prep_ndarray( + values, copy: bool = True +) -> np.ndarray | DatetimeArray | TimedeltaArray: if isinstance(values, TimedeltaArray) or ( isinstance(values, DatetimeArray) and values.tz is None ): - # On older numpy, np.asarray below apparently does not call __array__, - # so nanoseconds get dropped. - values = values._ndarray + # By retaining DTA/TDA instead of unpacking, we end up retaining non-nano + pass - if not isinstance(values, (np.ndarray, ABCSeries, Index)): + elif not isinstance(values, (np.ndarray, ABCSeries, Index)): if len(values) == 0: return np.empty((0, 0), dtype=object) elif isinstance(values, range): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e62c050fbf812..6578b9c8bdb22 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -53,6 +53,7 @@ IntervalArray, PeriodArray, SparseArray, + TimedeltaArray, ) from pandas.core.api import Int64Index @@ -3086,3 +3087,49 @@ def test_tzaware_data_tznaive_dtype(self, constructor): assert np.all(result.dtypes == "M8[ns]") assert np.all(result == ts_naive) + + +# TODO: better location for this test? +class TestAllowNonNano: + # Until 2.0, we do not preserve non-nano dt64/td64 when passed as ndarray, + # but do preserve it when passed as DTA/TDA + + @pytest.fixture(params=[True, False]) + def as_td(self, request): + return request.param + + @pytest.fixture + def arr(self, as_td): + values = np.arange(5).astype(np.int64).view("M8[s]") + if as_td: + values = values - values[0] + return TimedeltaArray._simple_new(values, dtype=values.dtype) + else: + return DatetimeArray._simple_new(values, dtype=values.dtype) + + def test_index_allow_non_nano(self, arr): + idx = Index(arr) + assert idx.dtype == arr.dtype + + def test_dti_tdi_allow_non_nano(self, arr, as_td): + if as_td: + idx = pd.TimedeltaIndex(arr) + else: + idx = DatetimeIndex(arr) + assert idx.dtype == arr.dtype + + def test_series_allow_non_nano(self, arr): + ser = Series(arr) + assert ser.dtype == arr.dtype + + def test_frame_allow_non_nano(self, arr): + df = DataFrame(arr) + assert df.dtypes[0] == arr.dtype + + @pytest.mark.xfail( + reason="stack_arrays converts TDA to ndarray, then goes " + "through ensure_wrapped_if_datetimelike" + ) + def test_frame_from_dict_allow_non_nano(self, arr): + df = DataFrame({0: arr}) + assert df.dtypes[0] == arr.dtype From 6d1c2f536799973a406286e4f0ff9b78661d082d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 4 Jun 2022 12:47:34 -0700 Subject: [PATCH 2/4] tighten xfail --- pandas/tests/frame/test_constructors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6578b9c8bdb22..ed4f0b498eb74 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -18,7 +18,10 @@ import pytest import pytz -from pandas.compat import np_version_under1p19 +from pandas.compat import ( + is_platform_mac, + np_version_under1p19, +) import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_integer_dtype @@ -3127,8 +3130,9 @@ def test_frame_allow_non_nano(self, arr): assert df.dtypes[0] == arr.dtype @pytest.mark.xfail( + is_platform_mac(), reason="stack_arrays converts TDA to ndarray, then goes " - "through ensure_wrapped_if_datetimelike" + "through ensure_wrapped_if_datetimelike", ) def test_frame_from_dict_allow_non_nano(self, arr): df = DataFrame({0: arr}) From 7ddc1d85238e6eb939066f13d611ce68b3ab4e2a Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Jun 2022 17:43:50 -0700 Subject: [PATCH 3/4] _prep_ndarray->_prep_ndarraylike --- pandas/core/internals/construction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c19bb8156db60..626809eab304e 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -326,7 +326,7 @@ def ndarray_to_mgr( else: # by definition an array here # the dtypes will be coerced to a single dtype - values = _prep_ndarray(values, copy=copy_on_sanitize) + values = _prep_ndarraylike(values, copy=copy_on_sanitize) if dtype is not None and not is_dtype_equal(values.dtype, dtype): # GH#40110 see similar check inside sanitize_array @@ -341,7 +341,7 @@ def ndarray_to_mgr( allow_2d=True, ) - # _prep_ndarray ensures that values.ndim == 2 at this point + # _prep_ndarraylike ensures that values.ndim == 2 at this point index, columns = _get_axes( values.shape[0], values.shape[1], index=index, columns=columns ) @@ -537,7 +537,7 @@ def treat_as_nested(data) -> bool: # --------------------------------------------------------------------- -def _prep_ndarray( +def _prep_ndarraylike( values, copy: bool = True ) -> np.ndarray | DatetimeArray | TimedeltaArray: if isinstance(values, TimedeltaArray) or ( From 1426d4b0b18ad93f92488dc901fb7fcfa32463d9 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Jun 2022 15:40:46 -0700 Subject: [PATCH 4/4] xfail non-strict --- pandas/tests/frame/test_constructors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 00e36a2202c20..ea367ae4afb74 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -18,7 +18,6 @@ import pytest import pytz -from pandas.compat import is_platform_mac import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_integer_dtype @@ -3126,7 +3125,8 @@ def test_frame_allow_non_nano(self, arr): assert df.dtypes[0] == arr.dtype @pytest.mark.xfail( - is_platform_mac(), + # TODO(2.0): xfail should become unnecessary + strict=False, reason="stack_arrays converts TDA to ndarray, then goes " "through ensure_wrapped_if_datetimelike", )