From 43275ddb31f0bf5050e57bac62fa699747384f75 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 18 Apr 2021 20:59:25 -0700 Subject: [PATCH] REF: remove Categorical._shallow_copy --- pandas/core/algorithms.py | 16 ++++++++++------ pandas/core/indexes/base.py | 11 ++--------- pandas/core/indexes/category.py | 17 ----------------- pandas/core/indexes/extension.py | 2 +- 4 files changed, 13 insertions(+), 33 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9e2dd846f0379..6f906cf8879ff 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1876,29 +1876,33 @@ def _sort_tuples(values: np.ndarray) -> np.ndarray: return values[indexer] -def union_with_duplicates(lvals: np.ndarray, rvals: np.ndarray) -> np.ndarray: +def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike: """ Extracts the union from lvals and rvals with respect to duplicates and nans in both arrays. Parameters ---------- - lvals: np.ndarray + lvals: np.ndarray or ExtensionArray left values which is ordered in front. - rvals: np.ndarray + rvals: np.ndarray or ExtensionArray right values ordered after lvals. Returns ------- - np.ndarray containing the unsorted union of both arrays + np.ndarray or ExtensionArray + Containing the unsorted union of both arrays. """ indexer = [] l_count = value_counts(lvals, dropna=False) r_count = value_counts(rvals, dropna=False) l_count, r_count = l_count.align(r_count, fill_value=0) unique_array = unique(np.append(lvals, rvals)) - if is_extension_array_dtype(lvals) or is_extension_array_dtype(rvals): - unique_array = pd_array(unique_array) + if not isinstance(lvals, np.ndarray): + # i.e. ExtensionArray + # Note: we only get here with lvals.dtype == rvals.dtype + # TODO: are there any cases where union won't be type/dtype preserving? + unique_array = type(lvals)._from_sequence(unique_array, dtype=lvals.dtype) for i, value in enumerate(unique_array): indexer += [i] * int(max(l_count[value], r_count[value])) return unique_array.take(indexer) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 310ee4c3a63e3..6cf6c18dbe350 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2982,12 +2982,7 @@ def _union(self, other: Index, sort): elif not other.is_unique: # other has duplicates - - # error: Argument 1 to "union_with_duplicates" has incompatible type - # "Union[ExtensionArray, ndarray]"; expected "ndarray" - # error: Argument 2 to "union_with_duplicates" has incompatible type - # "Union[ExtensionArray, ndarray]"; expected "ndarray" - result = algos.union_with_duplicates(lvals, rvals) # type: ignore[arg-type] + result = algos.union_with_duplicates(lvals, rvals) return _maybe_try_sort(result, sort) # Self may have duplicates @@ -3002,9 +2997,7 @@ def _union(self, other: Index, sort): other_diff = rvals.take(missing) result = concat_compat((lvals, other_diff)) else: - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray]", variable has type "ndarray") - result = lvals # type: ignore[assignment] + result = lvals if not self.is_monotonic or not other.is_monotonic: result = _maybe_try_sort(result, sort) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5b98b956e33e6..8d15b460a79df 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -11,7 +11,6 @@ from pandas._config import get_option from pandas._libs import index as libindex -from pandas._libs.lib import no_default from pandas._typing import ( ArrayLike, Dtype, @@ -234,22 +233,6 @@ def __new__( # -------------------------------------------------------------------- - @doc(Index._shallow_copy) - def _shallow_copy( - self, - values: Categorical, - name: Hashable = no_default, - ) -> CategoricalIndex: - name = self._name if name is no_default else name - - if values is not None: - # In tests we only get here with Categorical objects that - # have matching .ordered, and values.categories a subset of - # our own. However we do _not_ have a dtype match in general. - values = Categorical(values, dtype=self.dtype) - - return super()._shallow_copy(values=values, name=name) - def _is_dtype_compat(self, other) -> Categorical: """ *this is an internal non-public method* diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index b11ec06120e0c..d28bcd6c5497a 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -331,7 +331,7 @@ def _get_unique_index(self): return self result = self._data.unique() - return self._shallow_copy(result) + return type(self)._simple_new(result, name=self.name) @doc(Index.map) def map(self, mapper, na_action=None):