Skip to content

Commit 08d9d6f

Browse files
phoflnoatamir
authored andcommitted
BUG: MultiIndex.unique losing ea dtype (pandas-dev#48335)
1 parent b7de714 commit 08d9d6f

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

asv_bench/benchmarks/multiindex_object.py

+29
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
import numpy as np
44

55
from pandas import (
6+
NA,
67
DataFrame,
78
MultiIndex,
89
RangeIndex,
10+
Series,
911
date_range,
1012
)
1113

@@ -255,4 +257,31 @@ def time_operation(self, index_structure, dtype, method):
255257
getattr(self.left, method)(self.right)
256258

257259

260+
class Unique:
261+
params = [
262+
(("Int64", NA), ("int64", 0)),
263+
]
264+
param_names = ["dtype_val"]
265+
266+
def setup(self, dtype_val):
267+
level = Series(
268+
[1, 2, dtype_val[1], dtype_val[1]] + list(range(1_000_000)),
269+
dtype=dtype_val[0],
270+
)
271+
self.midx = MultiIndex.from_arrays([level, level])
272+
273+
level_dups = Series(
274+
[1, 2, dtype_val[1], dtype_val[1]] + list(range(500_000)) * 2,
275+
dtype=dtype_val[0],
276+
)
277+
278+
self.midx_dups = MultiIndex.from_arrays([level_dups, level_dups])
279+
280+
def time_unique(self, dtype_val):
281+
self.midx.unique()
282+
283+
def time_unique_dups(self, dtype_val):
284+
self.midx_dups.unique()
285+
286+
258287
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v1.6.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ Deprecations
100100

101101
Performance improvements
102102
~~~~~~~~~~~~~~~~~~~~~~~~
103-
-
103+
- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
104104
-
105105

106106
.. ---------------------------------------------------------------------------
@@ -161,6 +161,7 @@ Missing
161161

162162
MultiIndex
163163
^^^^^^^^^^
164+
- Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`)
164165
- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
165166
-
166167

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1728,7 +1728,7 @@ def get_level_values(self, level):
17281728
def unique(self, level=None):
17291729

17301730
if level is None:
1731-
return super().unique()
1731+
return self.drop_duplicates()
17321732
else:
17331733
level = self._get_level_number(level)
17341734
return self._get_level_values(level=level, unique=True)

pandas/tests/indexes/multi/test_duplicates.py

+14
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pandas._libs import hashtable
77

88
from pandas import (
9+
NA,
910
DatetimeIndex,
1011
MultiIndex,
1112
Series,
@@ -337,3 +338,16 @@ def test_multi_drop_duplicates_pos_args_deprecation():
337338
result = idx.drop_duplicates("last")
338339
expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]])
339340
tm.assert_index_equal(expected, result)
341+
342+
343+
def test_midx_unique_ea_dtype():
344+
# GH#48335
345+
vals_a = Series([1, 2, NA, NA], dtype="Int64")
346+
vals_b = np.array([1, 2, 3, 3])
347+
midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"])
348+
result = midx.unique()
349+
350+
exp_vals_a = Series([1, 2, NA], dtype="Int64")
351+
exp_vals_b = np.array([1, 2, 3])
352+
expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"])
353+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)