Skip to content

Commit e67183f

Browse files
jbrockmendelphofl
authored andcommitted
API: Index(object_dtype_bool_ndarray) retain object dtype (pandas-dev#49594)
* API: Index(object_dtype_bool_ndarray) retain object dtype * GH ref, test
1 parent 1b8104d commit e67183f

File tree

3 files changed

+25
-18
lines changed

3 files changed

+25
-18
lines changed

doc/source/whatsnew/v2.0.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ Other API changes
291291
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
292292
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
293293
- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
294-
- Changed behavior of :class:`Index` construct with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
294+
- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
295+
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
295296
-
296297

297298
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+5-17
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
find_common_type,
8282
infer_dtype_from,
8383
maybe_cast_pointwise_result,
84+
maybe_infer_to_datetimelike,
8485
np_can_hold_element,
8586
)
8687
from pandas.core.dtypes.common import (
@@ -503,9 +504,8 @@ def __new__(
503504
arr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
504505

505506
if dtype is None:
506-
arr = _maybe_cast_data_without_dtype(
507-
arr, cast_numeric_deprecated=True
508-
)
507+
arr = maybe_infer_to_datetimelike(arr)
508+
arr = ensure_wrapped_if_datetimelike(arr)
509509
dtype = arr.dtype
510510

511511
klass = cls._dtype_to_subclass(arr.dtype)
@@ -534,9 +534,7 @@ def __new__(
534534
subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
535535
if dtype is None:
536536
# with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated
537-
subarr = _maybe_cast_data_without_dtype(
538-
subarr, cast_numeric_deprecated=False
539-
)
537+
subarr = _maybe_cast_data_without_dtype(subarr)
540538
dtype = subarr.dtype
541539
return Index(subarr, dtype=dtype, copy=copy, name=name)
542540

@@ -7058,18 +7056,14 @@ def maybe_extract_name(name, obj, cls) -> Hashable:
70587056
return name
70597057

70607058

7061-
def _maybe_cast_data_without_dtype(
7062-
subarr: np.ndarray, cast_numeric_deprecated: bool = True
7063-
) -> ArrayLike:
7059+
def _maybe_cast_data_without_dtype(subarr: npt.NDArray[np.object_]) -> ArrayLike:
70647060
"""
70657061
If we have an arraylike input but no passed dtype, try to infer
70667062
a supported dtype.
70677063
70687064
Parameters
70697065
----------
70707066
subarr : np.ndarray[object]
7071-
cast_numeric_deprecated : bool, default True
7072-
Whether to issue a FutureWarning when inferring numeric dtypes.
70737067
70747068
Returns
70757069
-------
@@ -7084,12 +7078,6 @@ def _maybe_cast_data_without_dtype(
70847078
convert_interval=True,
70857079
dtype_if_all_nat=np.dtype("datetime64[ns]"),
70867080
)
7087-
if result.dtype.kind in ["i", "u", "f"]:
7088-
if not cast_numeric_deprecated:
7089-
# i.e. we started with a list, not an ndarray[object]
7090-
return result
7091-
return subarr
7092-
70937081
result = ensure_wrapped_if_datetimelike(result)
70947082
return result
70957083

pandas/tests/indexes/test_index_new.py

+18
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,24 @@
3939

4040

4141
class TestIndexConstructorInference:
42+
def test_object_all_bools(self):
43+
# GH#49594 match Series behavior on ndarray[object] of all bools
44+
arr = np.array([True, False], dtype=object)
45+
res = Index(arr)
46+
assert res.dtype == object
47+
48+
# since the point is matching Series behavior, let's double check
49+
assert Series(arr).dtype == object
50+
51+
def test_object_all_complex(self):
52+
# GH#49594 match Series behavior on ndarray[object] of all complex
53+
arr = np.array([complex(1), complex(2)], dtype=object)
54+
res = Index(arr)
55+
assert res.dtype == object
56+
57+
# since the point is matching Series behavior, let's double check
58+
assert Series(arr).dtype == object
59+
4260
@pytest.mark.parametrize("val", [NaT, None, np.nan, float("nan")])
4361
def test_infer_nat(self, val):
4462
# GH#49340 all NaT/None/nan and at least 1 NaT -> datetime64[ns],

0 commit comments

Comments
 (0)