Skip to content

REF: avoid internals in merge code #48082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 17, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 58 additions & 28 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.frame import _merge_doc
from pandas.core.internals import concatenate_managers
from pandas.core.sorting import is_int64_overflow_possible

if TYPE_CHECKING:
Expand Down Expand Up @@ -716,28 +715,69 @@ def __init__(
if validate is not None:
self._validate(validate)

def get_result(self, copy: bool = True) -> DataFrame:
if self.indicator:
self.left, self.right = self._indicator_pre_merge(self.left, self.right)

join_index, left_indexer, right_indexer = self._get_join_info()
def _reindex_and_concat(
self,
join_index: Index,
left_indexer: npt.NDArray[np.intp] | None,
right_indexer: npt.NDArray[np.intp] | None,
copy: bool,
) -> DataFrame:
"""
reindex along index and concat along columns.
"""
# Take views so we do not alter the originals
left = self.left[:]
right = self.right[:]

llabels, rlabels = _items_overlap_with_suffix(
self.left._info_axis, self.right._info_axis, self.suffixes
)

lindexers = {1: left_indexer} if left_indexer is not None else {}
rindexers = {1: right_indexer} if right_indexer is not None else {}
if left_indexer is not None:
# Pinning the index here (and in the right code just below) is not
# necessary, but makes the `.take` more performant if we have e.g.
# a MultiIndex for left.index.
lmgr = left._mgr.reindex_indexer(
join_index,
left_indexer,
axis=1,
copy=False,
only_slice=True,
allow_dups=True,
use_na_proxy=True,
)
left = left._constructor(lmgr)
left.index = join_index

if right_indexer is not None:
rmgr = right._mgr.reindex_indexer(
join_index,
right_indexer,
axis=1,
copy=False,
only_slice=True,
allow_dups=True,
use_na_proxy=True,
)
right = right._constructor(rmgr)
right.index = join_index

result_data = concatenate_managers(
[(self.left._mgr, lindexers), (self.right._mgr, rindexers)],
axes=[llabels.append(rlabels), join_index],
concat_axis=0,
copy=copy,
)
from pandas import concat

typ = self.left._constructor
result = typ(result_data).__finalize__(self, method=self._merge_type)
result = concat([left, right], axis=1, copy=copy)
result.columns = llabels.append(rlabels)
return result

def get_result(self, copy: bool = True) -> DataFrame:
if self.indicator:
self.left, self.right = self._indicator_pre_merge(self.left, self.right)

join_index, left_indexer, right_indexer = self._get_join_info()

result = self._reindex_and_concat(
join_index, left_indexer, right_indexer, copy=copy
)
result = result.__finalize__(self, method=self._merge_type)

if self.indicator:
result = self._indicator_post_merge(result)
Expand Down Expand Up @@ -1725,19 +1765,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
left_join_indexer = left_indexer
right_join_indexer = right_indexer

lindexers = {1: left_join_indexer} if left_join_indexer is not None else {}
rindexers = {1: right_join_indexer} if right_join_indexer is not None else {}

result_data = concatenate_managers(
[(self.left._mgr, lindexers), (self.right._mgr, rindexers)],
axes=[llabels.append(rlabels), join_index],
concat_axis=0,
copy=copy,
result = self._reindex_and_concat(
join_index, left_join_indexer, right_join_indexer, copy=copy
)

typ = self.left._constructor
result = typ(result_data)

self._maybe_add_join_keys(result, left_indexer, right_indexer)

return result
Expand Down