Skip to content

Commit 4e16e4f

Browse files
authored
REF: remove Categorical._shallow_copy (pandas-dev#41030)
1 parent 70435eb commit 4e16e4f

File tree

4 files changed

+13
-33
lines changed

4 files changed

+13
-33
lines changed

pandas/core/algorithms.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -1876,29 +1876,33 @@ def _sort_tuples(values: np.ndarray) -> np.ndarray:
18761876
return values[indexer]
18771877

18781878

1879-
def union_with_duplicates(lvals: np.ndarray, rvals: np.ndarray) -> np.ndarray:
1879+
def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike:
18801880
"""
18811881
Extracts the union from lvals and rvals with respect to duplicates and nans in
18821882
both arrays.
18831883
18841884
Parameters
18851885
----------
1886-
lvals: np.ndarray
1886+
lvals: np.ndarray or ExtensionArray
18871887
left values which is ordered in front.
1888-
rvals: np.ndarray
1888+
rvals: np.ndarray or ExtensionArray
18891889
right values ordered after lvals.
18901890
18911891
Returns
18921892
-------
1893-
np.ndarray containing the unsorted union of both arrays
1893+
np.ndarray or ExtensionArray
1894+
Containing the unsorted union of both arrays.
18941895
"""
18951896
indexer = []
18961897
l_count = value_counts(lvals, dropna=False)
18971898
r_count = value_counts(rvals, dropna=False)
18981899
l_count, r_count = l_count.align(r_count, fill_value=0)
18991900
unique_array = unique(np.append(lvals, rvals))
1900-
if is_extension_array_dtype(lvals) or is_extension_array_dtype(rvals):
1901-
unique_array = pd_array(unique_array)
1901+
if not isinstance(lvals, np.ndarray):
1902+
# i.e. ExtensionArray
1903+
# Note: we only get here with lvals.dtype == rvals.dtype
1904+
# TODO: are there any cases where union won't be type/dtype preserving?
1905+
unique_array = type(lvals)._from_sequence(unique_array, dtype=lvals.dtype)
19021906
for i, value in enumerate(unique_array):
19031907
indexer += [i] * int(max(l_count[value], r_count[value]))
19041908
return unique_array.take(indexer)

pandas/core/indexes/base.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -2982,12 +2982,7 @@ def _union(self, other: Index, sort):
29822982

29832983
elif not other.is_unique:
29842984
# other has duplicates
2985-
2986-
# error: Argument 1 to "union_with_duplicates" has incompatible type
2987-
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
2988-
# error: Argument 2 to "union_with_duplicates" has incompatible type
2989-
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
2990-
result = algos.union_with_duplicates(lvals, rvals) # type: ignore[arg-type]
2985+
result = algos.union_with_duplicates(lvals, rvals)
29912986
return _maybe_try_sort(result, sort)
29922987

29932988
# Self may have duplicates
@@ -3002,9 +2997,7 @@ def _union(self, other: Index, sort):
30022997
other_diff = rvals.take(missing)
30032998
result = concat_compat((lvals, other_diff))
30042999
else:
3005-
# error: Incompatible types in assignment (expression has type
3006-
# "Union[ExtensionArray, ndarray]", variable has type "ndarray")
3007-
result = lvals # type: ignore[assignment]
3000+
result = lvals
30083001

30093002
if not self.is_monotonic or not other.is_monotonic:
30103003
result = _maybe_try_sort(result, sort)

pandas/core/indexes/category.py

-17
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from pandas._config import get_option
1212

1313
from pandas._libs import index as libindex
14-
from pandas._libs.lib import no_default
1514
from pandas._typing import (
1615
ArrayLike,
1716
Dtype,
@@ -234,22 +233,6 @@ def __new__(
234233

235234
# --------------------------------------------------------------------
236235

237-
@doc(Index._shallow_copy)
238-
def _shallow_copy(
239-
self,
240-
values: Categorical,
241-
name: Hashable = no_default,
242-
) -> CategoricalIndex:
243-
name = self._name if name is no_default else name
244-
245-
if values is not None:
246-
# In tests we only get here with Categorical objects that
247-
# have matching .ordered, and values.categories a subset of
248-
# our own. However we do _not_ have a dtype match in general.
249-
values = Categorical(values, dtype=self.dtype)
250-
251-
return super()._shallow_copy(values=values, name=name)
252-
253236
def _is_dtype_compat(self, other) -> Categorical:
254237
"""
255238
*this is an internal non-public method*

pandas/core/indexes/extension.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def _get_unique_index(self):
331331
return self
332332

333333
result = self._data.unique()
334-
return self._shallow_copy(result)
334+
return type(self)._simple_new(result, name=self.name)
335335

336336
@doc(Index.map)
337337
def map(self, mapper, na_action=None):

0 commit comments

Comments
 (0)