Skip to content

Commit 24da5be

Browse files
committed
Add duplicated to ExtensionArray api
1 parent 6b93a0c commit 24da5be

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

pandas/core/arrays/base.py

+28
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
ArrayLike,
3232
AstypeArg,
3333
AxisInt,
34+
DropKeep,
3435
Dtype,
3536
FillnaOptions,
3637
PositionalIndexer,
@@ -78,6 +79,7 @@
7879
roperator,
7980
)
8081
from pandas.core.algorithms import (
82+
duplicated,
8183
factorize_array,
8284
isin,
8385
mode,
@@ -888,6 +890,32 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
888890
b = empty
889891
return self._concat_same_type([a, b])
890892

893+
def duplicated(
894+
self: ExtensionArrayT,
895+
keep: DropKeep = "first",
896+
) -> npt.NDArray[np.bool_]:
897+
"""
898+
Indicate duplicate ExtensionArray values.
899+
900+
Duplicated values are indicated as ``True`` values in the resulting
901+
array. Either all duplicates, all except the first or all except the
902+
last occurrence of duplicates can be indicated.
903+
904+
The base implementation uses ``pd.core.algorithms.duplicated``
905+
906+
Parameters
907+
----------
908+
keep : DropKeep, optional
909+
See ``Series.duplicated``, by default "first"
910+
911+
Returns
912+
-------
913+
npt.NDArray[np.bool_]
914+
A numpy boolean array indicating duplicates as determined by ``keep``
915+
argument
916+
"""
917+
return duplicated(self, keep=keep)
918+
891919
def unique(self: ExtensionArrayT) -> ExtensionArrayT:
892920
"""
893921
Compute the ExtensionArray of unique values.

pandas/core/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,12 @@ def drop_duplicates(self, keep: DropKeep = "first"):
13161316

13171317
@final
13181318
def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
1319-
return duplicated(self._values, keep=keep)
1319+
values = self._values
1320+
if not isinstance(values, np.ndarray):
1321+
# Going through EA.duplicated directly can improve performance GH#48424
1322+
return values.duplicated(keep=keep)
1323+
1324+
return duplicated(values, keep=keep)
13201325

13211326
def _arith_method(self, other, op):
13221327
res_name = ops.get_op_result_name(self, other)

0 commit comments

Comments
 (0)