From 5d417ae7409eaee8ec2e98cc38bc75f8fb30bd47 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 11:11:11 -0800 Subject: [PATCH 1/2] BUG: IntervalIndex.factorize with non-nano --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/arrays/interval.py | 7 +------ pandas/tests/test_algos.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9be54e95bf36f..be2d24c10e539 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -408,6 +408,7 @@ Interval - Bug in :meth:`IntervalIndex.get_indexer` with datetime or timedelta intervals incorrectly matching on integer targets (:issue:`47772`) - Bug in :meth:`IntervalIndex.get_indexer` with timezone-aware datetime intervals incorrectly matching on a sequence of timezone-naive targets (:issue:`47772`) - Bug in setting values on a :class:`Series` with an :class:`IntervalIndex` using a slice incorrectly raising (:issue:`54722`) +- Bug in :meth:`IntervalIndex.factorize` and :meth:`Series.factorize` with :class:`IntervalDtype` with datetime64 or timedelta64 intervals not preserving non-nanosecond units (:issue:`?`) - Indexing diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 91960173e7c1e..2f1363f180d08 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -391,12 +391,7 @@ def _from_sequence( @classmethod def _from_factorized(cls, values: np.ndarray, original: IntervalArray) -> Self: - if len(values) == 0: - # An empty array returns object-dtype here. We can't create - # a new IA from an (empty) object-dtype array, so turn it into the - # correct dtype. - values = values.astype(original.dtype.subtype) - return cls(values, closed=original.closed) + return cls._from_sequence(values, dtype=original.dtype) _interval_shared_docs["from_breaks"] = textwrap.dedent( """ diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 97119127b1665..bb8a0b6ebcdf7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -536,6 +536,24 @@ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques): tm.assert_numpy_array_equal(codes, expected_codes) tm.assert_index_equal(uniques, expected_uniques) + def test_factorize_interval_non_nano(self, unit): + left = DatetimeIndex(["2016-01-01", np.nan, "2015-10-11"]).as_unit(unit) + right = DatetimeIndex(["2016-01-02", np.nan, "2015-10-15"]).as_unit(unit) + idx = IntervalIndex.from_arrays(left, right) + codes, cats = idx.factorize() + assert cats.dtype == f"interval[datetime64[{unit}], right]" + + ts = Timestamp(0).as_unit(unit) + idx2 = IntervalIndex.from_arrays(left - ts, right - ts) + codes2, cats2 = idx2.factorize() + assert cats2.dtype == f"interval[timedelta64[{unit}], right]" + + idx3 = IntervalIndex.from_arrays( + left.tz_localize("US/Pacific"), right.tz_localize("US/Pacific") + ) + codes3, cats3 = idx3.factorize() + assert cats3.dtype == f"interval[datetime64[{unit}, US/Pacific], right]" + class TestUnique: def test_ints(self): From 1359776397d0fb47295c5bff9b41a86b86aba8b5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 11:12:13 -0800 Subject: [PATCH 2/2] GH ref --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/tests/test_algos.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index be2d24c10e539..0e613760f4927 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -404,11 +404,11 @@ Strings Interval ^^^^^^^^ - Bug in :class:`Interval` ``__repr__`` not displaying UTC offsets for :class:`Timestamp` bounds. Additionally the hour, minute and second components will now be shown. (:issue:`55015`) +- Bug in :meth:`IntervalIndex.factorize` and :meth:`Series.factorize` with :class:`IntervalDtype` with datetime64 or timedelta64 intervals not preserving non-nanosecond units (:issue:`56099`) - Bug in :meth:`IntervalIndex.from_arrays` when passed ``datetime64`` or ``timedelta64`` arrays with mismatched resolutions constructing an invalid ``IntervalArray`` object (:issue:`55714`) - Bug in :meth:`IntervalIndex.get_indexer` with datetime or timedelta intervals incorrectly matching on integer targets (:issue:`47772`) - Bug in :meth:`IntervalIndex.get_indexer` with timezone-aware datetime intervals incorrectly matching on a sequence of timezone-naive targets (:issue:`47772`) - Bug in setting values on a :class:`Series` with an :class:`IntervalIndex` using a slice incorrectly raising (:issue:`54722`) -- Bug in :meth:`IntervalIndex.factorize` and :meth:`Series.factorize` with :class:`IntervalDtype` with datetime64 or timedelta64 intervals not preserving non-nanosecond units (:issue:`?`) - Indexing diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index bb8a0b6ebcdf7..ce8d962fc3115 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -537,6 +537,7 @@ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques): tm.assert_index_equal(uniques, expected_uniques) def test_factorize_interval_non_nano(self, unit): + # GH#56099 left = DatetimeIndex(["2016-01-01", np.nan, "2015-10-11"]).as_unit(unit) right = DatetimeIndex(["2016-01-02", np.nan, "2015-10-15"]).as_unit(unit) idx = IntervalIndex.from_arrays(left, right)