Skip to content

Commit 54bf475

Browse files
authored
REF: separate out cross-merge, make less stateful (#53810)
1 parent 445a76d commit 54bf475

File tree

1 file changed

+76
-66
lines changed

1 file changed

+76
-66
lines changed

pandas/core/reshape/merge.py

+76-66
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,79 @@ def merge(
145145
indicator: str | bool = False,
146146
validate: str | None = None,
147147
) -> DataFrame:
148-
op = _MergeOperation(
148+
if how == "cross":
149+
return _cross_merge(
150+
left,
151+
right,
152+
on=on,
153+
left_on=left_on,
154+
right_on=right_on,
155+
left_index=left_index,
156+
right_index=right_index,
157+
sort=sort,
158+
suffixes=suffixes,
159+
indicator=indicator,
160+
validate=validate,
161+
copy=copy,
162+
)
163+
else:
164+
op = _MergeOperation(
165+
left,
166+
right,
167+
how=how,
168+
on=on,
169+
left_on=left_on,
170+
right_on=right_on,
171+
left_index=left_index,
172+
right_index=right_index,
173+
sort=sort,
174+
suffixes=suffixes,
175+
indicator=indicator,
176+
validate=validate,
177+
)
178+
return op.get_result(copy=copy)
179+
180+
181+
def _cross_merge(
182+
left: DataFrame | Series,
183+
right: DataFrame | Series,
184+
on: IndexLabel | None = None,
185+
left_on: IndexLabel | None = None,
186+
right_on: IndexLabel | None = None,
187+
left_index: bool = False,
188+
right_index: bool = False,
189+
sort: bool = False,
190+
suffixes: Suffixes = ("_x", "_y"),
191+
copy: bool | None = None,
192+
indicator: str | bool = False,
193+
validate: str | None = None,
194+
) -> DataFrame:
195+
"""
196+
See merge.__doc__ with how='cross'
197+
"""
198+
199+
if (
200+
left_index
201+
or right_index
202+
or right_on is not None
203+
or left_on is not None
204+
or on is not None
205+
):
206+
raise MergeError(
207+
"Can not pass on, right_on, left_on or set right_index=True or "
208+
"left_index=True"
209+
)
210+
211+
cross_col = f"_cross_{uuid.uuid4()}"
212+
left = left.assign(**{cross_col: 1})
213+
right = right.assign(**{cross_col: 1})
214+
215+
left_on = right_on = [cross_col]
216+
217+
res = merge(
149218
left,
150219
right,
151-
how=how,
220+
how="inner",
152221
on=on,
153222
left_on=left_on,
154223
right_on=right_on,
@@ -158,8 +227,10 @@ def merge(
158227
suffixes=suffixes,
159228
indicator=indicator,
160229
validate=validate,
230+
copy=copy,
161231
)
162-
return op.get_result(copy=copy)
232+
del res[cross_col]
233+
return res
163234

164235

165236
def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces):
@@ -706,17 +777,6 @@ def __init__(
706777

707778
self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)
708779

709-
cross_col = None
710-
if self.how == "cross":
711-
(
712-
self.left,
713-
self.right,
714-
self.how,
715-
cross_col,
716-
) = self._create_cross_configuration(self.left, self.right)
717-
self.left_on = self.right_on = [cross_col]
718-
self._cross = cross_col
719-
720780
(
721781
self.left_join_keys,
722782
self.right_join_keys,
@@ -829,17 +889,8 @@ def get_result(self, copy: bool | None = True) -> DataFrame:
829889

830890
self._maybe_restore_index_levels(result)
831891

832-
self._maybe_drop_cross_column(result, self._cross)
833-
834892
return result.__finalize__(self, method="merge")
835893

836-
@final
837-
def _maybe_drop_cross_column(
838-
self, result: DataFrame, cross_col: str | None
839-
) -> None:
840-
if cross_col is not None:
841-
del result[cross_col]
842-
843894
@final
844895
@cache_readonly
845896
def _indicator_name(self) -> str | None:
@@ -1448,53 +1499,12 @@ def _maybe_coerce_merge_keys(self) -> None:
14481499
self.right = self.right.copy()
14491500
self.right[name] = self.right[name].astype(typ)
14501501

1451-
@final
1452-
def _create_cross_configuration(
1453-
self, left: DataFrame, right: DataFrame
1454-
) -> tuple[DataFrame, DataFrame, JoinHow, str]:
1455-
"""
1456-
Creates the configuration to dispatch the cross operation to inner join,
1457-
e.g. adding a join column and resetting parameters. Join column is added
1458-
to a new object, no inplace modification
1459-
1460-
Parameters
1461-
----------
1462-
left : DataFrame
1463-
right : DataFrame
1464-
1465-
Returns
1466-
-------
1467-
a tuple (left, right, how, cross_col) representing the adjusted
1468-
DataFrames with cross_col, the merge operation set to inner and the column
1469-
to join over.
1470-
"""
1471-
cross_col = f"_cross_{uuid.uuid4()}"
1472-
how: JoinHow = "inner"
1473-
return (
1474-
left.assign(**{cross_col: 1}),
1475-
right.assign(**{cross_col: 1}),
1476-
how,
1477-
cross_col,
1478-
)
1479-
14801502
def _validate_left_right_on(self, left_on, right_on):
14811503
left_on = com.maybe_make_list(left_on)
14821504
right_on = com.maybe_make_list(right_on)
14831505

1484-
if self.how == "cross":
1485-
if (
1486-
self.left_index
1487-
or self.right_index
1488-
or right_on is not None
1489-
or left_on is not None
1490-
or self.on is not None
1491-
):
1492-
raise MergeError(
1493-
"Can not pass on, right_on, left_on or set right_index=True or "
1494-
"left_index=True"
1495-
)
14961506
# Hm, any way to make this logic less complicated??
1497-
elif self.on is None and left_on is None and right_on is None:
1507+
if self.on is None and left_on is None and right_on is None:
14981508
if self.left_index and self.right_index:
14991509
left_on, right_on = (), ()
15001510
elif self.left_index:
@@ -1562,7 +1572,7 @@ def _validate_left_right_on(self, left_on, right_on):
15621572
'of levels in the index of "left"'
15631573
)
15641574
left_on = [None] * n
1565-
if self.how != "cross" and len(right_on) != len(left_on):
1575+
if len(right_on) != len(left_on):
15661576
raise ValueError("len(right_on) must equal len(left_on)")
15671577

15681578
return left_on, right_on

0 commit comments

Comments
 (0)