Skip to content

Commit f2dff26

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
ENH: preserve non-nano DTA/TDA in Index/Series/DataFrame (pandas-dev#47230)
* ENH: preserve non-nano DTA/TDA in Index/Series/DataFrame * tighten xfail * _prep_ndarray->_prep_ndarraylike * xfail non-strict
1 parent eaad290 commit f2dff26

File tree

4 files changed

+75
-12
lines changed

4 files changed

+75
-12
lines changed

pandas/core/indexes/datetimes.py

+12
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,18 @@ def __new__(
326326

327327
name = maybe_extract_name(name, data, cls)
328328

329+
if (
330+
isinstance(data, DatetimeArray)
331+
and freq is lib.no_default
332+
and tz is None
333+
and dtype is None
334+
):
335+
# fastpath, similar logic in TimedeltaIndex.__new__;
336+
# Note in this particular case we retain non-nano.
337+
if copy:
338+
data = data.copy()
339+
return cls._simple_new(data, name=name)
340+
329341
dtarr = DatetimeArray._from_sequence_not_strict(
330342
data,
331343
dtype=dtype,

pandas/core/indexes/timedeltas.py

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def __new__(
132132
"represent unambiguous timedelta values durations."
133133
)
134134

135+
# FIXME: need to check for dtype/data match
135136
if isinstance(data, TimedeltaArray) and freq is lib.no_default:
136137
if copy:
137138
data = data.copy()

pandas/core/internals/construction.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def ndarray_to_mgr(
326326
else:
327327
# by definition an array here
328328
# the dtypes will be coerced to a single dtype
329-
values = _prep_ndarray(values, copy=copy_on_sanitize)
329+
values = _prep_ndarraylike(values, copy=copy_on_sanitize)
330330

331331
if dtype is not None and not is_dtype_equal(values.dtype, dtype):
332332
# GH#40110 see similar check inside sanitize_array
@@ -341,7 +341,7 @@ def ndarray_to_mgr(
341341
allow_2d=True,
342342
)
343343

344-
# _prep_ndarray ensures that values.ndim == 2 at this point
344+
# _prep_ndarraylike ensures that values.ndim == 2 at this point
345345
index, columns = _get_axes(
346346
values.shape[0], values.shape[1], index=index, columns=columns
347347
)
@@ -537,15 +537,16 @@ def treat_as_nested(data) -> bool:
537537
# ---------------------------------------------------------------------
538538

539539

540-
def _prep_ndarray(values, copy: bool = True) -> np.ndarray:
540+
def _prep_ndarraylike(
541+
values, copy: bool = True
542+
) -> np.ndarray | DatetimeArray | TimedeltaArray:
541543
if isinstance(values, TimedeltaArray) or (
542544
isinstance(values, DatetimeArray) and values.tz is None
543545
):
544-
# On older numpy, np.asarray below apparently does not call __array__,
545-
# so nanoseconds get dropped.
546-
values = values._ndarray
546+
# By retaining DTA/TDA instead of unpacking, we end up retaining non-nano
547+
pass
547548

548-
if not isinstance(values, (np.ndarray, ABCSeries, Index)):
549+
elif not isinstance(values, (np.ndarray, ABCSeries, Index)):
549550
if len(values) == 0:
550551
return np.empty((0, 0), dtype=object)
551552
elif isinstance(values, range):

pandas/tests/frame/test_constructors.py

+54-5
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
IntervalArray,
5353
PeriodArray,
5454
SparseArray,
55+
TimedeltaArray,
5556
)
5657
from pandas.core.api import Int64Index
5758

@@ -2665,6 +2666,12 @@ def test_from_dict_with_missing_copy_false(self):
26652666
)
26662667
tm.assert_frame_equal(df, expected)
26672668

2669+
def test_construction_empty_array_multi_column_raises(self):
2670+
# GH#46822
2671+
msg = "Empty data passed with indices specified."
2672+
with pytest.raises(ValueError, match=msg):
2673+
DataFrame(data=np.array([]), columns=["a", "b"])
2674+
26682675

26692676
class TestDataFrameConstructorIndexInference:
26702677
def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):
@@ -3086,8 +3093,50 @@ def test_tzaware_data_tznaive_dtype(self, constructor):
30863093
assert np.all(result.dtypes == "M8[ns]")
30873094
assert np.all(result == ts_naive)
30883095

3089-
def test_construction_empty_array_multi_column_raises(self):
3090-
# GH#46822
3091-
msg = "Empty data passed with indices specified."
3092-
with pytest.raises(ValueError, match=msg):
3093-
DataFrame(data=np.array([]), columns=["a", "b"])
3096+
3097+
# TODO: better location for this test?
3098+
class TestAllowNonNano:
3099+
# Until 2.0, we do not preserve non-nano dt64/td64 when passed as ndarray,
3100+
# but do preserve it when passed as DTA/TDA
3101+
3102+
@pytest.fixture(params=[True, False])
3103+
def as_td(self, request):
3104+
return request.param
3105+
3106+
@pytest.fixture
3107+
def arr(self, as_td):
3108+
values = np.arange(5).astype(np.int64).view("M8[s]")
3109+
if as_td:
3110+
values = values - values[0]
3111+
return TimedeltaArray._simple_new(values, dtype=values.dtype)
3112+
else:
3113+
return DatetimeArray._simple_new(values, dtype=values.dtype)
3114+
3115+
def test_index_allow_non_nano(self, arr):
3116+
idx = Index(arr)
3117+
assert idx.dtype == arr.dtype
3118+
3119+
def test_dti_tdi_allow_non_nano(self, arr, as_td):
3120+
if as_td:
3121+
idx = pd.TimedeltaIndex(arr)
3122+
else:
3123+
idx = DatetimeIndex(arr)
3124+
assert idx.dtype == arr.dtype
3125+
3126+
def test_series_allow_non_nano(self, arr):
3127+
ser = Series(arr)
3128+
assert ser.dtype == arr.dtype
3129+
3130+
def test_frame_allow_non_nano(self, arr):
3131+
df = DataFrame(arr)
3132+
assert df.dtypes[0] == arr.dtype
3133+
3134+
@pytest.mark.xfail(
3135+
# TODO(2.0): xfail should become unnecessary
3136+
strict=False,
3137+
reason="stack_arrays converts TDA to ndarray, then goes "
3138+
"through ensure_wrapped_if_datetimelike",
3139+
)
3140+
def test_frame_from_dict_allow_non_nano(self, arr):
3141+
df = DataFrame({0: arr})
3142+
assert df.dtypes[0] == arr.dtype

0 commit comments

Comments
 (0)