diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 40a169d03f39c..71a3ccfb64024 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1494,7 +1494,7 @@ def check_for_ordered(self, op): ) def _values_for_argsort(self): - return self._codes.copy() + return self._codes def argsort(self, ascending=True, kind="quicksort", **kwargs): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6f44b5abf5b04..1cd772a72058d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3385,6 +3385,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) ------- join_index, (left_indexer, right_indexer) """ + other = ensure_index(other) self_is_mi = isinstance(self, ABCMultiIndex) other_is_mi = isinstance(other, ABCMultiIndex) @@ -3404,8 +3405,6 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) other, level, how=how, return_indexers=return_indexers ) - other = ensure_index(other) - if len(other) == 0 and how in ("left", "outer"): join_index = self._shallow_copy() if return_indexers: @@ -3567,16 +3566,26 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how="left", return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers + # We only get here if dtypes match + assert self.dtype == other.dtype + + if is_extension_array_dtype(self.dtype): + lvalues = self._data._values_for_argsort() + rvalues = other._data._values_for_argsort() + else: + lvalues = self._values + rvalues = other._values + left_idx, right_idx = _get_join_indexers( - [self._ndarray_values], [other._ndarray_values], how=how, sort=True + [lvalues], [rvalues], how=how, sort=True ) left_idx = ensure_platform_int(left_idx) right_idx = ensure_platform_int(right_idx) - join_index = np.asarray(self._ndarray_values.take(left_idx)) + join_index = np.asarray(lvalues.take(left_idx)) mask = left_idx == -1 - np.putmask(join_index, mask, other._ndarray_values.take(right_idx)) + np.putmask(join_index, mask, rvalues.take(right_idx)) join_index = self._wrap_joined_index(join_index, other) @@ -3727,6 +3736,9 @@ def _get_leaf_sorter(labels): return join_index def _join_monotonic(self, other, how="left", return_indexers=False): + # We only get here with matching dtypes + assert other.dtype == self.dtype + if self.equals(other): ret_index = other if how == "right" else self if return_indexers: @@ -3734,8 +3746,12 @@ def _join_monotonic(self, other, how="left", return_indexers=False): else: return ret_index - sv = self._ndarray_values - ov = other._ndarray_values + if is_extension_array_dtype(self.dtype): + sv = self._data._values_for_argsort() + ov = other._data._values_for_argsort() + else: + sv = self._values + ov = other._values if self.is_unique and other.is_unique: # We can perform much better than the general case