From fbeb80361869c0e963f75dc19e54b2ee2fd03bac Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Nov 2022 15:48:59 -0800 Subject: [PATCH 1/2] API: Index(object_dtype_bool_ndarray) retain object dtype --- doc/source/whatsnew/v2.0.0.rst | 3 ++- pandas/core/indexes/base.py | 22 +++++----------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d7ecfa0ca6e38..118505423f51e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -291,7 +291,8 @@ Other API changes - Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`) - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) -- Changed behavior of :class:`Index` construct with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) +- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) +- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing ``bool`` values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`??`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4da8d1a11c607..22719978d1c8b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -81,6 +81,7 @@ find_common_type, infer_dtype_from, maybe_cast_pointwise_result, + maybe_infer_to_datetimelike, np_can_hold_element, ) from pandas.core.dtypes.common import ( @@ -503,9 +504,8 @@ def __new__( arr = com.asarray_tuplesafe(data, dtype=_dtype_obj) if dtype is None: - arr = _maybe_cast_data_without_dtype( - arr, cast_numeric_deprecated=True - ) + arr = maybe_infer_to_datetimelike(arr) + arr = ensure_wrapped_if_datetimelike(arr) dtype = arr.dtype klass = cls._dtype_to_subclass(arr.dtype) @@ -534,9 +534,7 @@ def __new__( subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj) if dtype is None: # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated - subarr = _maybe_cast_data_without_dtype( - subarr, cast_numeric_deprecated=False - ) + subarr = _maybe_cast_data_without_dtype(subarr) dtype = subarr.dtype return Index(subarr, dtype=dtype, copy=copy, name=name) @@ -7062,9 +7060,7 @@ def maybe_extract_name(name, obj, cls) -> Hashable: return name -def _maybe_cast_data_without_dtype( - subarr: np.ndarray, cast_numeric_deprecated: bool = True -) -> ArrayLike: +def _maybe_cast_data_without_dtype(subarr: npt.NDArray[np.object_]) -> ArrayLike: """ If we have an arraylike input but no passed dtype, try to infer a supported dtype. @@ -7072,8 +7068,6 @@ def _maybe_cast_data_without_dtype( Parameters ---------- subarr : np.ndarray[object] - cast_numeric_deprecated : bool, default True - Whether to issue a FutureWarning when inferring numeric dtypes. Returns ------- @@ -7088,12 +7082,6 @@ def _maybe_cast_data_without_dtype( convert_interval=True, dtype_if_all_nat=np.dtype("datetime64[ns]"), ) - if result.dtype.kind in ["i", "u", "f"]: - if not cast_numeric_deprecated: - # i.e. we started with a list, not an ndarray[object] - return result - return subarr - result = ensure_wrapped_if_datetimelike(result) return result From 2ced0bb3bb78d9f374dab639b128f217d5cee9a1 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Nov 2022 16:37:40 -0800 Subject: [PATCH 2/2] GH ref, test --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/indexes/test_index_new.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 118505423f51e..8297d41dc8a05 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -292,7 +292,7 @@ Other API changes - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) -- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing ``bool`` values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`??`) +- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - .. --------------------------------------------------------------------------- diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index b718c33e666d7..4a1333e2b18b4 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -39,6 +39,24 @@ class TestIndexConstructorInference: + def test_object_all_bools(self): + # GH#49594 match Series behavior on ndarray[object] of all bools + arr = np.array([True, False], dtype=object) + res = Index(arr) + assert res.dtype == object + + # since the point is matching Series behavior, let's double check + assert Series(arr).dtype == object + + def test_object_all_complex(self): + # GH#49594 match Series behavior on ndarray[object] of all complex + arr = np.array([complex(1), complex(2)], dtype=object) + res = Index(arr) + assert res.dtype == object + + # since the point is matching Series behavior, let's double check + assert Series(arr).dtype == object + @pytest.mark.parametrize("val", [NaT, None, np.nan, float("nan")]) def test_infer_nat(self, val): # GH#49340 all NaT/None/nan and at least 1 NaT -> datetime64[ns],