diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index d7e15bb2ad197..ba82a7840c4f9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2029,16 +2029,59 @@ cdef bint is_period_array(ndarray[object] values): return True -cdef class IntervalValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: - return is_interval(value) - - cpdef bint is_interval_array(ndarray values): + """ + Is this an ndarray of Interval (or np.nan) with a single dtype? + """ + cdef: - IntervalValidator validator = IntervalValidator(len(values), - skipna=True) - return validator.validate(values) + Py_ssize_t i, n = len(values) + str closed = None + bint numeric = False + bint dt64 = False + bint td64 = False + object val + + if len(values) == 0: + return False + + for val in values: + if is_interval(val): + if closed is None: + closed = val.closed + numeric = ( + util.is_float_object(val.left) + or util.is_integer_object(val.left) + ) + td64 = is_timedelta(val.left) + dt64 = PyDateTime_Check(val.left) + elif val.closed != closed: + # mismatched closedness + return False + elif numeric: + if not ( + util.is_float_object(val.left) + or util.is_integer_object(val.left) + ): + # i.e. datetime64 or timedelta64 + return False + elif td64: + if not is_timedelta(val.left): + return False + elif dt64: + if not PyDateTime_Check(val.left): + return False + else: + raise ValueError(val) + elif util.is_nan(val) or val is None: + pass + else: + return False + + if closed is None: + # we saw all-NAs, no actual Intervals + return False + return True @cython.boundscheck(False) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 0c299056075c1..d34ae6179fe76 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -317,12 +317,7 @@ def array( return PeriodArray._from_sequence(data, copy=copy) elif inferred_dtype == "interval": - try: - return IntervalArray(data, copy=copy) - except ValueError: - # We may have a mixture of `closed` here. - # We choose to return an ndarray, rather than raising. - pass + return IntervalArray(data, copy=copy) elif inferred_dtype.startswith("datetime"): # datetime, datetime64 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 30215b40593d3..aa8be070df312 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6441,12 +6441,8 @@ def _maybe_cast_data_without_dtype(subarr: np.ndarray) -> ArrayLike: return data elif inferred == "interval": - try: - ia_data = IntervalArray._from_sequence(subarr, copy=False) - return ia_data - except (ValueError, TypeError): - # GH27172: mixed closed Intervals --> object dtype - pass + ia_data = IntervalArray._from_sequence(subarr, copy=False) + return ia_data elif inferred == "boolean": # don't support boolean explicitly ATM pass diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 09efa97871fae..073a1ff28815b 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1458,17 +1458,54 @@ def test_categorical(self): result = lib.infer_dtype(Series(arr), skipna=True) assert result == "categorical" - def test_interval(self): + @pytest.mark.parametrize("asobject", [True, False]) + def test_interval(self, asobject): idx = pd.IntervalIndex.from_breaks(range(5), closed="both") + if asobject: + idx = idx.astype(object) + inferred = lib.infer_dtype(idx, skipna=False) assert inferred == "interval" inferred = lib.infer_dtype(idx._data, skipna=False) assert inferred == "interval" - inferred = lib.infer_dtype(Series(idx), skipna=False) + inferred = lib.infer_dtype(Series(idx, dtype=idx.dtype), skipna=False) assert inferred == "interval" + @pytest.mark.parametrize("value", [Timestamp(0), Timedelta(0), 0, 0.0]) + def test_interval_mismatched_closed(self, value): + + first = Interval(value, value, closed="left") + second = Interval(value, value, closed="right") + + # if closed match, we should infer "interval" + arr = np.array([first, first], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "interval" + + # if closed dont match, we should _not_ get "interval" + arr2 = np.array([first, second], dtype=object) + assert lib.infer_dtype(arr2, skipna=False) == "mixed" + + def test_interval_mismatched_subtype(self): + first = Interval(0, 1, closed="left") + second = Interval(Timestamp(0), Timestamp(1), closed="left") + third = Interval(Timedelta(0), Timedelta(1), closed="left") + + arr = np.array([first, second]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([second, third]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([first, third]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # float vs int subdtype are compatible + flt_interval = Interval(1.5, 2.5, closed="left") + arr = np.array([first, flt_interval], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "interval" + @pytest.mark.parametrize("klass", [pd.array, Series]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]])