Skip to content

Commit 09296b8

Browse files
authored
ENH/BUG: DataFrame(object_array_of_periods_or_intervals) (#41812)
1 parent ad58cf6 commit 09296b8

File tree

8 files changed

+73
-15
lines changed

8 files changed

+73
-15
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,7 @@ Conversion
892892
- Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`)
893893
- Bug in :class:`DataFrame` and :class:`Series` construction with ``datetime64[ns]`` data and ``dtype=object`` resulting in ``datetime`` objects instead of :class:`Timestamp` objects (:issue:`41599`)
894894
- Bug in :class:`DataFrame` and :class:`Series` construction with ``timedelta64[ns]`` data and ``dtype=object`` resulting in ``np.timedelta64`` objects instead of :class:`Timedelta` objects (:issue:`41599`)
895+
- Bug in :class:`DataFrame` construction when given a two-dimensional object-dtype ``np.ndarray`` of :class:`Period` or :class:`Interval` objects failing to cast to :class:`PeriodDtype` or :class:`IntervalDtype`, respectively (:issue:`41812`)
895896

896897
Strings
897898
^^^^^^^

pandas/_libs/lib.pyi

+5
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def maybe_convert_objects(
7373
convert_datetime: Literal[False] = ...,
7474
convert_timedelta: bool = ...,
7575
convert_period: Literal[False] = ...,
76+
convert_interval: Literal[False] = ...,
7677
convert_to_nullable_integer: Literal[False] = ...,
7778
dtype_if_all_nat: DtypeObj | None = ...,
7879
) -> np.ndarray: ...
@@ -86,6 +87,7 @@ def maybe_convert_objects(
8687
convert_datetime: bool = ...,
8788
convert_timedelta: bool = ...,
8889
convert_period: bool = ...,
90+
convert_interval: bool = ...,
8991
convert_to_nullable_integer: Literal[True] = ...,
9092
dtype_if_all_nat: DtypeObj | None = ...,
9193
) -> ArrayLike: ...
@@ -99,6 +101,7 @@ def maybe_convert_objects(
99101
convert_datetime: Literal[True] = ...,
100102
convert_timedelta: bool = ...,
101103
convert_period: bool = ...,
104+
convert_interval: bool = ...,
102105
convert_to_nullable_integer: bool = ...,
103106
dtype_if_all_nat: DtypeObj | None = ...,
104107
) -> ArrayLike: ...
@@ -112,6 +115,7 @@ def maybe_convert_objects(
112115
convert_datetime: bool = ...,
113116
convert_timedelta: bool = ...,
114117
convert_period: Literal[True] = ...,
118+
convert_interval: bool = ...,
115119
convert_to_nullable_integer: bool = ...,
116120
dtype_if_all_nat: DtypeObj | None = ...,
117121
) -> ArrayLike: ...
@@ -125,6 +129,7 @@ def maybe_convert_objects(
125129
convert_datetime: bool = ...,
126130
convert_timedelta: bool = ...,
127131
convert_period: bool = ...,
132+
convert_interval: bool = ...,
128133
convert_to_nullable_integer: bool = ...,
129134
dtype_if_all_nat: DtypeObj | None = ...,
130135
) -> ArrayLike: ...

pandas/_libs/lib.pyx

+17
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,7 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
15731573
bint seen_timedelta = False, seen_date = False, seen_datetime = False
15741574
bint seen_tz_aware = False, seen_tz_naive = False
15751575
bint seen_nat = False, seen_str = False
1576+
bint seen_period = False, seen_interval = False
15761577
list objs = []
15771578
object v
15781579

@@ -1610,9 +1611,25 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
16101611
elif is_timedelta(v):
16111612
# timedelta, or timedelta64
16121613
seen_timedelta = True
1614+
elif is_period_object(v):
1615+
seen_period = True
1616+
break
1617+
elif is_interval(v):
1618+
seen_interval = True
1619+
break
16131620
else:
16141621
return "mixed", seen_str
16151622

1623+
if seen_period:
1624+
if is_period_array(arr):
1625+
return "period", seen_str
1626+
return "mixed", seen_str
1627+
1628+
if seen_interval:
1629+
if is_interval_array(arr):
1630+
return "interval", seen_str
1631+
return "mixed", seen_str
1632+
16161633
if seen_date and not (seen_datetime or seen_timedelta):
16171634
return "date", seen_str
16181635
elif seen_datetime and not seen_timedelta:

pandas/core/construction.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
is_integer_dtype,
4949
is_list_like,
5050
is_object_dtype,
51-
is_string_dtype,
5251
is_timedelta64_ns_dtype,
5352
)
5453
from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -565,18 +564,11 @@ def sanitize_array(
565564

566565
subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
567566

568-
if not (
569-
isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype)
570-
):
567+
if isinstance(subarr, np.ndarray):
568+
# at this point we should have dtype be None or subarr.dtype == dtype
569+
dtype = cast(np.dtype, dtype)
571570
subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)
572571

573-
is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype)
574-
if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
575-
inferred = lib.infer_dtype(subarr, skipna=False)
576-
if inferred in {"interval", "period"}:
577-
subarr = array(subarr)
578-
subarr = extract_array(subarr, extract_numpy=True)
579-
580572
return subarr
581573

582574

pandas/core/dtypes/cast.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1464,7 +1464,7 @@ def convert_dtypes(
14641464

14651465
def maybe_infer_to_datetimelike(
14661466
value: np.ndarray,
1467-
) -> np.ndarray | DatetimeArray | TimedeltaArray:
1467+
) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray:
14681468
"""
14691469
we might have a array (or single object) that is datetime like,
14701470
and no dtype is passed don't change the value unless we find a
@@ -1479,7 +1479,7 @@ def maybe_infer_to_datetimelike(
14791479
14801480
Returns
14811481
-------
1482-
np.ndarray, DatetimeArray, or TimedeltaArray
1482+
np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray
14831483
14841484
"""
14851485
if not isinstance(value, np.ndarray) or value.dtype != object:
@@ -1528,6 +1528,13 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:
15281528
return td_values.reshape(shape)
15291529

15301530
inferred_type, seen_str = lib.infer_datetimelike_array(ensure_object(v))
1531+
if inferred_type in ["period", "interval"]:
1532+
# Incompatible return value type (got "Union[ExtensionArray, ndarray]",
1533+
# expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray,
1534+
# IntervalArray]")
1535+
return lib.maybe_convert_objects( # type: ignore[return-value]
1536+
v, convert_period=True, convert_interval=True
1537+
)
15311538

15321539
if inferred_type == "datetime":
15331540
# error: Incompatible types in assignment (expression has type "ExtensionArray",
@@ -1564,7 +1571,6 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:
15641571
FutureWarning,
15651572
stacklevel=find_stack_level(),
15661573
)
1567-
# return v.reshape(shape)
15681574
return value
15691575

15701576

pandas/tests/extension/base/reshaping.py

+13
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.core.dtypes.common import (
7+
is_interval_dtype,
8+
is_period_dtype,
9+
)
10+
611
import pandas as pd
712
from pandas.api.extensions import ExtensionArray
813
from pandas.core.internals import ExtensionBlock
@@ -321,6 +326,14 @@ def test_unstack(self, data, index, obj):
321326
expected = ser.astype(object).unstack(
322327
level=level, fill_value=data.dtype.na_value
323328
)
329+
if obj == "series":
330+
# TODO: special cases belong in dtype-specific tests
331+
if is_period_dtype(data.dtype):
332+
assert expected.dtypes.apply(is_period_dtype).all()
333+
expected = expected.astype(object)
334+
if is_interval_dtype(data.dtype):
335+
assert expected.dtypes.apply(is_interval_dtype).all()
336+
expected = expected.astype(object)
324337
result = result.astype(object)
325338

326339
self.assert_frame_equal(result, expected)

pandas/tests/frame/test_constructors.py

+22
Original file line numberDiff line numberDiff line change
@@ -2494,6 +2494,28 @@ def test_nested_list_columns(self):
24942494
)
24952495
tm.assert_frame_equal(result, expected)
24962496

2497+
def test_from_2d_object_array_of_periods_or_intervals(self):
2498+
# Period analogue to GH#26825
2499+
pi = pd.period_range("2016-04-05", periods=3)
2500+
data = pi._data.astype(object).reshape(1, -1)
2501+
df = DataFrame(data)
2502+
assert df.shape == (1, 3)
2503+
assert (df.dtypes == pi.dtype).all()
2504+
assert (df == pi).all().all()
2505+
2506+
ii = pd.IntervalIndex.from_breaks([3, 4, 5, 6])
2507+
data2 = ii._data.astype(object).reshape(1, -1)
2508+
df2 = DataFrame(data2)
2509+
assert df2.shape == (1, 3)
2510+
assert (df2.dtypes == ii.dtype).all()
2511+
assert (df2 == ii).all().all()
2512+
2513+
# mixed
2514+
data3 = np.r_[data, data2, data, data2].T
2515+
df3 = DataFrame(data3)
2516+
expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii})
2517+
tm.assert_frame_equal(df3, expected)
2518+
24972519

24982520
class TestDataFrameConstructorWithDtypeCoercion:
24992521
def test_floating_values_integer_dtype(self):

pandas/tests/tools/test_to_timedelta.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,9 @@ def test_to_timedelta_on_missing_values(self):
208208
tm.assert_series_equal(actual, expected)
209209

210210
with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"):
211-
actual = to_timedelta(Series(["00:00:01", pd.NaT]))
211+
ser = Series(["00:00:01", pd.NaT])
212+
assert ser.dtype == "m8[ns]"
213+
actual = to_timedelta(ser)
212214
tm.assert_series_equal(actual, expected)
213215

214216
actual = to_timedelta(np.nan)

0 commit comments

Comments
 (0)