Skip to content

REF: dont alter self in _validate_specification #48051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 15, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 48 additions & 44 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,8 +639,6 @@ def __init__(
self.axis = 1 - axis if self.left.ndim == 2 else 0

self.on = com.maybe_make_list(on)
self.left_on = com.maybe_make_list(left_on)
self.right_on = com.maybe_make_list(right_on)

self.copy = copy
self.suffixes = suffixes
Expand Down Expand Up @@ -683,7 +681,7 @@ def __init__(
msg, FutureWarning, stacklevel=find_stack_level(inspect.currentframe())
)

self._validate_specification()
self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)

cross_col = None
if self.how == "cross":
Expand Down Expand Up @@ -1075,7 +1073,7 @@ def _get_merge_keys(self):
# a pd.merge_asof(left_index=True, left_by=...) will result in a
# self.left_on array with a None in the middle of it. This requires
# a work-around as designated in the code below.
# See _validate_specification() for where this happens.
# See _validate_left_right_on() for where this happens.

# ugh, spaghetti re #733
if _any(self.left_on) and _any(self.right_on):
Expand Down Expand Up @@ -1323,25 +1321,27 @@ def _create_cross_configuration(
cross_col,
)

def _validate_specification(self) -> None:
def _validate_left_right_on(self, left_on, right_on):
left_on = com.maybe_make_list(left_on)
right_on = com.maybe_make_list(right_on)

if self.how == "cross":
if (
self.left_index
or self.right_index
or self.right_on is not None
or self.left_on is not None
or right_on is not None
or left_on is not None
or self.on is not None
):
raise MergeError(
"Can not pass on, right_on, left_on or set right_index=True or "
"left_index=True"
)
return
# Hm, any way to make this logic less complicated??
elif self.on is None and self.left_on is None and self.right_on is None:
elif self.on is None and left_on is None and right_on is None:

if self.left_index and self.right_index:
self.left_on, self.right_on = (), ()
left_on, right_on = (), ()
elif self.left_index:
raise MergeError("Must pass right_on or right_index=True")
elif self.right_index:
Expand All @@ -1354,8 +1354,8 @@ def _validate_specification(self) -> None:
if len(common_cols) == 0:
raise MergeError(
"No common columns to perform merge on. "
f"Merge options: left_on={self.left_on}, "
f"right_on={self.right_on}, "
f"Merge options: left_on={left_on}, "
f"right_on={right_on}, "
f"left_index={self.left_index}, "
f"right_index={self.right_index}"
)
Expand All @@ -1364,9 +1364,9 @@ def _validate_specification(self) -> None:
or not right_cols.join(common_cols, how="inner").is_unique
):
raise MergeError(f"Data columns not unique: {repr(common_cols)}")
self.left_on = self.right_on = common_cols
left_on = right_on = common_cols
elif self.on is not None:
if self.left_on is not None or self.right_on is not None:
if left_on is not None or right_on is not None:
raise MergeError(
'Can only pass argument "on" OR "left_on" '
'and "right_on", not a combination of both.'
Expand All @@ -1376,40 +1376,42 @@ def _validate_specification(self) -> None:
'Can only pass argument "on" OR "left_index" '
'and "right_index", not a combination of both.'
)
self.left_on = self.right_on = self.on
elif self.left_on is not None:
left_on = right_on = self.on
elif left_on is not None:
if self.left_index:
raise MergeError(
'Can only pass argument "left_on" OR "left_index" not both.'
)
if not self.right_index and self.right_on is None:
if not self.right_index and right_on is None:
raise MergeError('Must pass "right_on" OR "right_index".')
n = len(self.left_on)
n = len(left_on)
if self.right_index:
if len(self.left_on) != self.right.index.nlevels:
if len(left_on) != self.right.index.nlevels:
raise ValueError(
"len(left_on) must equal the number "
'of levels in the index of "right"'
)
self.right_on = [None] * n
elif self.right_on is not None:
right_on = [None] * n
elif right_on is not None:
if self.right_index:
raise MergeError(
'Can only pass argument "right_on" OR "right_index" not both.'
)
if not self.left_index and self.left_on is None:
if not self.left_index and left_on is None:
raise MergeError('Must pass "left_on" OR "left_index".')
n = len(self.right_on)
n = len(right_on)
if self.left_index:
if len(self.right_on) != self.left.index.nlevels:
if len(right_on) != self.left.index.nlevels:
raise ValueError(
"len(right_on) must equal the number "
'of levels in the index of "left"'
)
self.left_on = [None] * n
if self.how != "cross" and len(self.right_on) != len(self.left_on):
left_on = [None] * n
if self.how != "cross" and len(right_on) != len(left_on):
raise ValueError("len(right_on) must equal len(left_on)")

return left_on, right_on

def _validate(self, validate: str) -> None:

# Check uniqueness of each
Expand Down Expand Up @@ -1767,14 +1769,14 @@ def __init__(
fill_method=fill_method,
)

def _validate_specification(self) -> None:
super()._validate_specification()
def _validate_left_right_on(self, left_on, right_on):
left_on, right_on = super()._validate_left_right_on(left_on, right_on)

# we only allow on to be a single item for on
if len(self.left_on) != 1 and not self.left_index:
if len(left_on) != 1 and not self.left_index:
raise MergeError("can only asof on a key for left")

if len(self.right_on) != 1 and not self.right_index:
if len(right_on) != 1 and not self.right_index:
raise MergeError("can only asof on a key for right")

if self.left_index and isinstance(self.left.index, MultiIndex):
Expand All @@ -1795,27 +1797,27 @@ def _validate_specification(self) -> None:

# GH#29130 Check that merge keys do not have dtype object
if not self.left_index:
left_on = self.left_on[0]
if is_array_like(left_on):
lo_dtype = left_on.dtype
left_on_0 = left_on[0]
if is_array_like(left_on_0):
lo_dtype = left_on_0.dtype
else:
lo_dtype = (
self.left[left_on].dtype
if left_on in self.left.columns
else self.left.index.get_level_values(left_on)
self.left[left_on_0].dtype
if left_on_0 in self.left.columns
else self.left.index.get_level_values(left_on_0)
)
else:
lo_dtype = self.left.index.dtype

if not self.right_index:
right_on = self.right_on[0]
if is_array_like(right_on):
ro_dtype = right_on.dtype
right_on_0 = right_on[0]
if is_array_like(right_on_0):
ro_dtype = right_on_0.dtype
else:
ro_dtype = (
self.right[right_on].dtype
if right_on in self.right.columns
else self.right.index.get_level_values(right_on)
self.right[right_on_0].dtype
if right_on_0 in self.right.columns
else self.right.index.get_level_values(right_on_0)
)
else:
ro_dtype = self.right.index.dtype
Expand All @@ -1837,13 +1839,15 @@ def _validate_specification(self) -> None:
if len(self.left_by) != len(self.right_by):
raise MergeError("left_by and right_by must be same length")

self.left_on = self.left_by + list(self.left_on)
self.right_on = self.right_by + list(self.right_on)
left_on = self.left_by + list(left_on)
right_on = self.right_by + list(right_on)

# check 'direction' is valid
if self.direction not in ["backward", "forward", "nearest"]:
raise MergeError(f"direction invalid: {self.direction}")

return left_on, right_on

def _get_merge_keys(self):

# note this function has side effects
Expand Down