Skip to content

Commit 9138b5b

Browse files
authored
BUG: IntervalIndex.factorize with non-nano (#56099)
* BUG: IntervalIndex.factorize with non-nano * GH ref
1 parent 772d285 commit 9138b5b

File tree

3 files changed

+21
-6
lines changed

3 files changed

+21
-6
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ Strings
404404
Interval
405405
^^^^^^^^
406406
- Bug in :class:`Interval` ``__repr__`` not displaying UTC offsets for :class:`Timestamp` bounds. Additionally the hour, minute and second components will now be shown. (:issue:`55015`)
407+
- Bug in :meth:`IntervalIndex.factorize` and :meth:`Series.factorize` with :class:`IntervalDtype` with datetime64 or timedelta64 intervals not preserving non-nanosecond units (:issue:`56099`)
407408
- Bug in :meth:`IntervalIndex.from_arrays` when passed ``datetime64`` or ``timedelta64`` arrays with mismatched resolutions constructing an invalid ``IntervalArray`` object (:issue:`55714`)
408409
- Bug in :meth:`IntervalIndex.get_indexer` with datetime or timedelta intervals incorrectly matching on integer targets (:issue:`47772`)
409410
- Bug in :meth:`IntervalIndex.get_indexer` with timezone-aware datetime intervals incorrectly matching on a sequence of timezone-naive targets (:issue:`47772`)

pandas/core/arrays/interval.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -391,12 +391,7 @@ def _from_sequence(
391391

392392
@classmethod
393393
def _from_factorized(cls, values: np.ndarray, original: IntervalArray) -> Self:
394-
if len(values) == 0:
395-
# An empty array returns object-dtype here. We can't create
396-
# a new IA from an (empty) object-dtype array, so turn it into the
397-
# correct dtype.
398-
values = values.astype(original.dtype.subtype)
399-
return cls(values, closed=original.closed)
394+
return cls._from_sequence(values, dtype=original.dtype)
400395

401396
_interval_shared_docs["from_breaks"] = textwrap.dedent(
402397
"""

pandas/tests/test_algos.py

+19
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,25 @@ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques):
536536
tm.assert_numpy_array_equal(codes, expected_codes)
537537
tm.assert_index_equal(uniques, expected_uniques)
538538

539+
def test_factorize_interval_non_nano(self, unit):
540+
# GH#56099
541+
left = DatetimeIndex(["2016-01-01", np.nan, "2015-10-11"]).as_unit(unit)
542+
right = DatetimeIndex(["2016-01-02", np.nan, "2015-10-15"]).as_unit(unit)
543+
idx = IntervalIndex.from_arrays(left, right)
544+
codes, cats = idx.factorize()
545+
assert cats.dtype == f"interval[datetime64[{unit}], right]"
546+
547+
ts = Timestamp(0).as_unit(unit)
548+
idx2 = IntervalIndex.from_arrays(left - ts, right - ts)
549+
codes2, cats2 = idx2.factorize()
550+
assert cats2.dtype == f"interval[timedelta64[{unit}], right]"
551+
552+
idx3 = IntervalIndex.from_arrays(
553+
left.tz_localize("US/Pacific"), right.tz_localize("US/Pacific")
554+
)
555+
codes3, cats3 = idx3.factorize()
556+
assert cats3.dtype == f"interval[datetime64[{unit}, US/Pacific], right]"
557+
539558

540559
class TestUnique:
541560
def test_ints(self):

0 commit comments

Comments
 (0)