Skip to content

Commit f15f678

Browse files
authored
BUG: pd.unique(Index) now returns Index as Index.unique (#57679)
* Add test * Fix * Adjust tests * Add whatsnew * Improve tests * Remove duplicate test
1 parent d6c2586 commit f15f678

File tree

3 files changed

+36
-26
lines changed

3 files changed

+36
-26
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ Other
393393
^^^^^
394394
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
395395
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
396+
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
396397
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
397398
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
398399
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

pandas/core/algorithms.py

+4
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,10 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
439439
# Dispatch to extension dtype's unique.
440440
return values.unique()
441441

442+
if isinstance(values, ABCIndex):
443+
# Dispatch to Index's unique.
444+
return values.unique()
445+
442446
original = values
443447
hashtable, values = _get_hashtable_algo(values)
444448

pandas/tests/test_algos.py

+31-26
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
is_integer_dtype,
1717
is_object_dtype,
1818
)
19-
from pandas.core.dtypes.dtypes import CategoricalDtype
19+
from pandas.core.dtypes.dtypes import (
20+
CategoricalDtype,
21+
DatetimeTZDtype,
22+
)
2023

2124
import pandas as pd
2225
from pandas import (
@@ -570,19 +573,20 @@ def test_object_refcount_bug(self):
570573
for i in range(1000):
571574
len(algos.unique(lst))
572575

573-
def test_on_index_object(self):
574-
mindex = MultiIndex.from_arrays(
575-
[np.arange(5).repeat(5), np.tile(np.arange(5), 5)]
576-
)
577-
expected = mindex.values
578-
expected.sort()
579-
580-
mindex = mindex.repeat(2)
576+
def test_index_returned(self, index):
577+
# GH#57043
578+
index = index.repeat(2)
579+
result = algos.unique(index)
581580

582-
result = pd.unique(mindex)
583-
result.sort()
584-
585-
tm.assert_almost_equal(result, expected)
581+
# dict.fromkeys preserves the order
582+
unique_values = list(dict.fromkeys(index.values))
583+
if isinstance(index, MultiIndex):
584+
expected = MultiIndex.from_tuples(unique_values, names=index.names)
585+
else:
586+
expected = Index(unique_values, dtype=index.dtype)
587+
if isinstance(index.dtype, DatetimeTZDtype):
588+
expected = expected.normalize()
589+
tm.assert_index_equal(result, expected, exact=True)
586590

587591
def test_dtype_preservation(self, any_numpy_dtype):
588592
# GH 15442
@@ -623,7 +627,7 @@ def test_dtype_preservation(self, any_numpy_dtype):
623627

624628
def test_datetime64_dtype_array_returned(self):
625629
# GH 9431
626-
expected = np.array(
630+
dt_arr = np.array(
627631
[
628632
"2015-01-03T00:00:00.000000000",
629633
"2015-01-01T00:00:00.000000000",
@@ -639,18 +643,18 @@ def test_datetime64_dtype_array_returned(self):
639643
]
640644
)
641645
result = algos.unique(dt_index)
642-
tm.assert_numpy_array_equal(result, expected)
643-
assert result.dtype == expected.dtype
646+
expected = to_datetime(dt_arr)
647+
tm.assert_index_equal(result, expected, exact=True)
644648

645649
s = Series(dt_index)
646650
result = algos.unique(s)
647-
tm.assert_numpy_array_equal(result, expected)
648-
assert result.dtype == expected.dtype
651+
tm.assert_numpy_array_equal(result, dt_arr)
652+
assert result.dtype == dt_arr.dtype
649653

650654
arr = s.values
651655
result = algos.unique(arr)
652-
tm.assert_numpy_array_equal(result, expected)
653-
assert result.dtype == expected.dtype
656+
tm.assert_numpy_array_equal(result, dt_arr)
657+
assert result.dtype == dt_arr.dtype
654658

655659
def test_datetime_non_ns(self):
656660
a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]")
@@ -666,22 +670,23 @@ def test_timedelta_non_ns(self):
666670

667671
def test_timedelta64_dtype_array_returned(self):
668672
# GH 9431
669-
expected = np.array([31200, 45678, 10000], dtype="m8[ns]")
673+
td_arr = np.array([31200, 45678, 10000], dtype="m8[ns]")
670674

671675
td_index = to_timedelta([31200, 45678, 31200, 10000, 45678])
672676
result = algos.unique(td_index)
673-
tm.assert_numpy_array_equal(result, expected)
677+
expected = to_timedelta(td_arr)
678+
tm.assert_index_equal(result, expected)
674679
assert result.dtype == expected.dtype
675680

676681
s = Series(td_index)
677682
result = algos.unique(s)
678-
tm.assert_numpy_array_equal(result, expected)
679-
assert result.dtype == expected.dtype
683+
tm.assert_numpy_array_equal(result, td_arr)
684+
assert result.dtype == td_arr.dtype
680685

681686
arr = s.values
682687
result = algos.unique(arr)
683-
tm.assert_numpy_array_equal(result, expected)
684-
assert result.dtype == expected.dtype
688+
tm.assert_numpy_array_equal(result, td_arr)
689+
assert result.dtype == td_arr.dtype
685690

686691
def test_uint64_overflow(self):
687692
s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)

0 commit comments

Comments
 (0)