diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9be54e95bf36f..0e613760f4927 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -404,6 +404,7 @@ Strings Interval ^^^^^^^^ - Bug in :class:`Interval` ``__repr__`` not displaying UTC offsets for :class:`Timestamp` bounds. Additionally the hour, minute and second components will now be shown. (:issue:`55015`) +- Bug in :meth:`IntervalIndex.factorize` and :meth:`Series.factorize` with :class:`IntervalDtype` with datetime64 or timedelta64 intervals not preserving non-nanosecond units (:issue:`56099`) - Bug in :meth:`IntervalIndex.from_arrays` when passed ``datetime64`` or ``timedelta64`` arrays with mismatched resolutions constructing an invalid ``IntervalArray`` object (:issue:`55714`) - Bug in :meth:`IntervalIndex.get_indexer` with datetime or timedelta intervals incorrectly matching on integer targets (:issue:`47772`) - Bug in :meth:`IntervalIndex.get_indexer` with timezone-aware datetime intervals incorrectly matching on a sequence of timezone-naive targets (:issue:`47772`) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 91960173e7c1e..2f1363f180d08 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -391,12 +391,7 @@ def _from_sequence( @classmethod def _from_factorized(cls, values: np.ndarray, original: IntervalArray) -> Self: - if len(values) == 0: - # An empty array returns object-dtype here. We can't create - # a new IA from an (empty) object-dtype array, so turn it into the - # correct dtype. - values = values.astype(original.dtype.subtype) - return cls(values, closed=original.closed) + return cls._from_sequence(values, dtype=original.dtype) _interval_shared_docs["from_breaks"] = textwrap.dedent( """ diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 97119127b1665..ce8d962fc3115 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -536,6 +536,25 @@ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques): tm.assert_numpy_array_equal(codes, expected_codes) tm.assert_index_equal(uniques, expected_uniques) + def test_factorize_interval_non_nano(self, unit): + # GH#56099 + left = DatetimeIndex(["2016-01-01", np.nan, "2015-10-11"]).as_unit(unit) + right = DatetimeIndex(["2016-01-02", np.nan, "2015-10-15"]).as_unit(unit) + idx = IntervalIndex.from_arrays(left, right) + codes, cats = idx.factorize() + assert cats.dtype == f"interval[datetime64[{unit}], right]" + + ts = Timestamp(0).as_unit(unit) + idx2 = IntervalIndex.from_arrays(left - ts, right - ts) + codes2, cats2 = idx2.factorize() + assert cats2.dtype == f"interval[timedelta64[{unit}], right]" + + idx3 = IntervalIndex.from_arrays( + left.tz_localize("US/Pacific"), right.tz_localize("US/Pacific") + ) + codes3, cats3 = idx3.factorize() + assert cats3.dtype == f"interval[datetime64[{unit}, US/Pacific], right]" + class TestUnique: def test_ints(self):