From 3985a19c75fb5050a0a50c53a33635f2776d686f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 22 Jun 2023 16:49:43 -0700 Subject: [PATCH] REF: separate out cross-merge, make less stateful --- pandas/core/reshape/merge.py | 140 +++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 65 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4d1f8bd6301d0..1d857b6c4509e 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -145,10 +145,79 @@ def merge( indicator: str | bool = False, validate: str | None = None, ) -> DataFrame: + if how == "cross": + return _cross_merge( + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + indicator=indicator, + validate=validate, + copy=copy, + ) + else: + op = _MergeOperation( + left, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + indicator=indicator, + validate=validate, + ) + return op.get_result(copy=copy) + + +def _cross_merge( + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool | None = None, + indicator: str | bool = False, + validate: str | None = None, +) -> DataFrame: + """ + See merge.__doc__ with how='cross' + """ + + cross_col = f"_cross_{uuid.uuid4()}" + left = left.assign(**{cross_col: 1}) + right = right.assign(**{cross_col: 1}) + + if ( + left_index + or right_index + or right_on is not None + or left_on is not None + or on is not None + ): + raise MergeError( + "Can not pass on, right_on, left_on or set right_index=True or " + "left_index=True" + ) + + left_on = right_on = [cross_col] + op = _MergeOperation( left, right, - how=how, + how="inner", on=on, left_on=left_on, right_on=right_on, @@ -159,7 +228,9 @@ def merge( indicator=indicator, validate=validate, ) - return op.get_result(copy=copy) + res = op.get_result(copy=copy) + del res[cross_col] + return res def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): @@ -706,17 +777,6 @@ def __init__( self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on) - cross_col = None - if self.how == "cross": - ( - self.left, - self.right, - self.how, - cross_col, - ) = self._create_cross_configuration(self.left, self.right) - self.left_on = self.right_on = [cross_col] - self._cross = cross_col - # note this function has side effects ( self.left_join_keys, @@ -809,17 +869,8 @@ def get_result(self, copy: bool | None = True) -> DataFrame: self._maybe_restore_index_levels(result) - self._maybe_drop_cross_column(result, self._cross) - return result.__finalize__(self, method="merge") - @final - def _maybe_drop_cross_column( - self, result: DataFrame, cross_col: str | None - ) -> None: - if cross_col is not None: - del result[cross_col] - @final @cache_readonly def _indicator_name(self) -> str | None: @@ -1437,53 +1488,12 @@ def _maybe_coerce_merge_keys(self) -> None: self.right = self.right.copy() self.right[name] = self.right[name].astype(typ) - @final - def _create_cross_configuration( - self, left: DataFrame, right: DataFrame - ) -> tuple[DataFrame, DataFrame, JoinHow, str]: - """ - Creates the configuration to dispatch the cross operation to inner join, - e.g. adding a join column and resetting parameters. Join column is added - to a new object, no inplace modification - - Parameters - ---------- - left : DataFrame - right : DataFrame - - Returns - ------- - a tuple (left, right, how, cross_col) representing the adjusted - DataFrames with cross_col, the merge operation set to inner and the column - to join over. - """ - cross_col = f"_cross_{uuid.uuid4()}" - how: JoinHow = "inner" - return ( - left.assign(**{cross_col: 1}), - right.assign(**{cross_col: 1}), - how, - cross_col, - ) - def _validate_left_right_on(self, left_on, right_on): left_on = com.maybe_make_list(left_on) right_on = com.maybe_make_list(right_on) - if self.how == "cross": - if ( - self.left_index - or self.right_index - or right_on is not None - or left_on is not None - or self.on is not None - ): - raise MergeError( - "Can not pass on, right_on, left_on or set right_index=True or " - "left_index=True" - ) # Hm, any way to make this logic less complicated?? - elif self.on is None and left_on is None and right_on is None: + if self.on is None and left_on is None and right_on is None: if self.left_index and self.right_index: left_on, right_on = (), () elif self.left_index: @@ -1551,7 +1561,7 @@ def _validate_left_right_on(self, left_on, right_on): 'of levels in the index of "left"' ) left_on = [None] * n - if self.how != "cross" and len(right_on) != len(left_on): + if len(right_on) != len(left_on): raise ValueError("len(right_on) must equal len(left_on)") return left_on, right_on