|
9 | 9 | )
|
10 | 10 | import datetime
|
11 | 11 | from functools import partial
|
12 |
| -import string |
13 | 12 | from typing import (
|
14 | 13 | TYPE_CHECKING,
|
15 | 14 | Literal,
|
|
90 | 89 | BaseMaskedArray,
|
91 | 90 | ExtensionArray,
|
92 | 91 | )
|
93 |
| -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray |
94 | 92 | from pandas.core.arrays.string_ import StringDtype
|
95 | 93 | import pandas.core.common as com
|
96 | 94 | from pandas.core.construction import (
|
|
99 | 97 | )
|
100 | 98 | from pandas.core.frame import _merge_doc
|
101 | 99 | from pandas.core.indexes.api import default_index
|
102 |
| -from pandas.core.sorting import is_int64_overflow_possible |
| 100 | +from pandas.core.sorting import ( |
| 101 | + get_group_index, |
| 102 | + is_int64_overflow_possible, |
| 103 | +) |
103 | 104 |
|
104 | 105 | if TYPE_CHECKING:
|
105 | 106 | from pandas import DataFrame
|
@@ -2117,34 +2118,6 @@ def _convert_values_for_libjoin(
|
2117 | 2118 | def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
|
2118 | 2119 | """return the join indexers"""
|
2119 | 2120 |
|
2120 |
| - def flip(xs: list[ArrayLike]) -> np.ndarray: |
2121 |
| - """unlike np.transpose, this returns an array of tuples""" |
2122 |
| - |
2123 |
| - def injection(obj: ArrayLike): |
2124 |
| - if not isinstance(obj.dtype, ExtensionDtype): |
2125 |
| - # ndarray |
2126 |
| - return obj |
2127 |
| - obj = extract_array(obj) |
2128 |
| - if isinstance(obj, NDArrayBackedExtensionArray): |
2129 |
| - # fastpath for e.g. dt64tz, categorical |
2130 |
| - return obj._ndarray |
2131 |
| - # FIXME: returning obj._values_for_argsort() here doesn't |
2132 |
| - # break in any existing test cases, but i (@jbrockmendel) |
2133 |
| - # am pretty sure it should! |
2134 |
| - # e.g. |
2135 |
| - # arr = pd.array([0, pd.NA, 255], dtype="UInt8") |
2136 |
| - # will have values_for_argsort (before GH#45434) |
2137 |
| - # np.array([0, 255, 255], dtype=np.uint8) |
2138 |
| - # and the non-injectivity should make a difference somehow |
2139 |
| - # shouldn't it? |
2140 |
| - return np.asarray(obj) |
2141 |
| - |
2142 |
| - xs = [injection(x) for x in xs] |
2143 |
| - labels = list(string.ascii_lowercase[: len(xs)]) |
2144 |
| - dtypes = [x.dtype for x in xs] |
2145 |
| - labeled_dtypes = list(zip(labels, dtypes)) |
2146 |
| - return np.array(list(zip(*xs)), labeled_dtypes) |
2147 |
| - |
2148 | 2121 | # values to compare
|
2149 | 2122 | left_values = (
|
2150 | 2123 | self.left.index._values if self.left_index else self.left_join_keys[-1]
|
@@ -2197,11 +2170,23 @@ def injection(obj: ArrayLike):
|
2197 | 2170 | else:
|
2198 | 2171 | # We get here with non-ndarrays in test_merge_by_col_tz_aware
|
2199 | 2172 | # and test_merge_groupby_multiple_column_with_categorical_column
|
2200 |
| - lbv = flip(left_by_values) |
2201 |
| - rbv = flip(right_by_values) |
2202 |
| - lbv = ensure_object(lbv) |
2203 |
| - rbv = ensure_object(rbv) |
2204 |
| - |
| 2173 | + mapped = [ |
| 2174 | + _factorize_keys( |
| 2175 | + left_by_values[n], |
| 2176 | + right_by_values[n], |
| 2177 | + sort=False, |
| 2178 | + how="left", |
| 2179 | + ) |
| 2180 | + for n in range(len(left_by_values)) |
| 2181 | + ] |
| 2182 | + arrs = [np.concatenate(m[:2]) for m in mapped] |
| 2183 | + shape = tuple(m[2] for m in mapped) |
| 2184 | + group_index = get_group_index( |
| 2185 | + arrs, shape=shape, sort=False, xnull=False |
| 2186 | + ) |
| 2187 | + left_len = len(left_by_values[0]) |
| 2188 | + lbv = group_index[:left_len] |
| 2189 | + rbv = group_index[left_len:] |
2205 | 2190 | # error: Incompatible types in assignment (expression has type
|
2206 | 2191 | # "Union[ndarray[Any, dtype[Any]], ndarray[Any, dtype[object_]]]",
|
2207 | 2192 | # variable has type "List[Union[Union[ExtensionArray,
|
|
0 commit comments