REF: remove Categorical._shallow_copy (pandas-dev#41030)

jbrockmendel · web-flow · commit 4e16e4fbcc59 · 2021-04-19T09:38:44.000-04:00
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1876,29 +1876,33 @@ def _sort_tuples(values: np.ndarray) -> np.ndarray:
     return values[indexer]
 
 
-def union_with_duplicates(lvals: np.ndarray, rvals: np.ndarray) -> np.ndarray:
+def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike:
     """
     Extracts the union from lvals and rvals with respect to duplicates and nans in
     both arrays.
 
     Parameters
     ----------
-    lvals: np.ndarray
+    lvals: np.ndarray or ExtensionArray
         left values which is ordered in front.
-    rvals: np.ndarray
+    rvals: np.ndarray or ExtensionArray
         right values ordered after lvals.
 
     Returns
     -------
-    np.ndarray containing the unsorted union of both arrays
+    np.ndarray or ExtensionArray
+        Containing the unsorted union of both arrays.
     """
     indexer = []
     l_count = value_counts(lvals, dropna=False)
     r_count = value_counts(rvals, dropna=False)
     l_count, r_count = l_count.align(r_count, fill_value=0)
     unique_array = unique(np.append(lvals, rvals))
-    if is_extension_array_dtype(lvals) or is_extension_array_dtype(rvals):
-        unique_array = pd_array(unique_array)
+    if not isinstance(lvals, np.ndarray):
+        # i.e. ExtensionArray
+        # Note: we only get here with lvals.dtype == rvals.dtype
+        # TODO: are there any cases where union won't be type/dtype preserving?
+        unique_array = type(lvals)._from_sequence(unique_array, dtype=lvals.dtype)
     for i, value in enumerate(unique_array):
         indexer += [i] * int(max(l_count[value], r_count[value]))
     return unique_array.take(indexer)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2982,12 +2982,7 @@ def _union(self, other: Index, sort):
 
         elif not other.is_unique:
             # other has duplicates
-
-            # error: Argument 1 to "union_with_duplicates" has incompatible type
-            # "Union[ExtensionArray, ndarray]"; expected "ndarray"
-            # error: Argument 2 to "union_with_duplicates" has incompatible type
-            # "Union[ExtensionArray, ndarray]"; expected "ndarray"
-            result = algos.union_with_duplicates(lvals, rvals)  # type: ignore[arg-type]
+            result = algos.union_with_duplicates(lvals, rvals)
             return _maybe_try_sort(result, sort)
 
         # Self may have duplicates
@@ -3002,9 +2997,7 @@ def _union(self, other: Index, sort):
             other_diff = rvals.take(missing)
             result = concat_compat((lvals, other_diff))
         else:
-            # error: Incompatible types in assignment (expression has type
-            # "Union[ExtensionArray, ndarray]", variable has type "ndarray")
-            result = lvals  # type: ignore[assignment]
+            result = lvals
 
         if not self.is_monotonic or not other.is_monotonic:
             result = _maybe_try_sort(result, sort)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -11,7 +11,6 @@
 from pandas._config import get_option
 
 from pandas._libs import index as libindex
-from pandas._libs.lib import no_default
 from pandas._typing import (
     ArrayLike,
     Dtype,
@@ -234,22 +233,6 @@ def __new__(
 
     # --------------------------------------------------------------------
 
-    @doc(Index._shallow_copy)
-    def _shallow_copy(
-        self,
-        values: Categorical,
-        name: Hashable = no_default,
-    ) -> CategoricalIndex:
-        name = self._name if name is no_default else name
-
-        if values is not None:
-            # In tests we only get here with Categorical objects that
-            #  have matching .ordered, and values.categories a subset of
-            #  our own.  However we do _not_ have a dtype match in general.
-            values = Categorical(values, dtype=self.dtype)
-
-        return super()._shallow_copy(values=values, name=name)
-
     def _is_dtype_compat(self, other) -> Categorical:
         """
         *this is an internal non-public method*
diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -331,7 +331,7 @@ def _get_unique_index(self):
             return self
 
         result = self._data.unique()
-        return self._shallow_copy(result)
+        return type(self)._simple_new(result, name=self.name)
 
     @doc(Index.map)
     def map(self, mapper, na_action=None):