Skip to content

Commit 5d115bb

Browse files
authored
REF: avoid internals in merge code (#48082)
* REF: avoid internals in merge code * fix condition * use reindex_indexer
1 parent 19a7ba2 commit 5d115bb

File tree

1 file changed

+58
-28
lines changed

1 file changed

+58
-28
lines changed

pandas/core/reshape/merge.py

+58-28
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@
8383
import pandas.core.common as com
8484
from pandas.core.construction import extract_array
8585
from pandas.core.frame import _merge_doc
86-
from pandas.core.internals import concatenate_managers
8786
from pandas.core.sorting import is_int64_overflow_possible
8887

8988
if TYPE_CHECKING:
@@ -716,28 +715,69 @@ def __init__(
716715
if validate is not None:
717716
self._validate(validate)
718717

719-
def get_result(self, copy: bool = True) -> DataFrame:
720-
if self.indicator:
721-
self.left, self.right = self._indicator_pre_merge(self.left, self.right)
722-
723-
join_index, left_indexer, right_indexer = self._get_join_info()
718+
def _reindex_and_concat(
719+
self,
720+
join_index: Index,
721+
left_indexer: npt.NDArray[np.intp] | None,
722+
right_indexer: npt.NDArray[np.intp] | None,
723+
copy: bool,
724+
) -> DataFrame:
725+
"""
726+
reindex along index and concat along columns.
727+
"""
728+
# Take views so we do not alter the originals
729+
left = self.left[:]
730+
right = self.right[:]
724731

725732
llabels, rlabels = _items_overlap_with_suffix(
726733
self.left._info_axis, self.right._info_axis, self.suffixes
727734
)
728735

729-
lindexers = {1: left_indexer} if left_indexer is not None else {}
730-
rindexers = {1: right_indexer} if right_indexer is not None else {}
736+
if left_indexer is not None:
737+
# Pinning the index here (and in the right code just below) is not
738+
# necessary, but makes the `.take` more performant if we have e.g.
739+
# a MultiIndex for left.index.
740+
lmgr = left._mgr.reindex_indexer(
741+
join_index,
742+
left_indexer,
743+
axis=1,
744+
copy=False,
745+
only_slice=True,
746+
allow_dups=True,
747+
use_na_proxy=True,
748+
)
749+
left = left._constructor(lmgr)
750+
left.index = join_index
751+
752+
if right_indexer is not None:
753+
rmgr = right._mgr.reindex_indexer(
754+
join_index,
755+
right_indexer,
756+
axis=1,
757+
copy=False,
758+
only_slice=True,
759+
allow_dups=True,
760+
use_na_proxy=True,
761+
)
762+
right = right._constructor(rmgr)
763+
right.index = join_index
731764

732-
result_data = concatenate_managers(
733-
[(self.left._mgr, lindexers), (self.right._mgr, rindexers)],
734-
axes=[llabels.append(rlabels), join_index],
735-
concat_axis=0,
736-
copy=copy,
737-
)
765+
from pandas import concat
738766

739-
typ = self.left._constructor
740-
result = typ(result_data).__finalize__(self, method=self._merge_type)
767+
result = concat([left, right], axis=1, copy=copy)
768+
result.columns = llabels.append(rlabels)
769+
return result
770+
771+
def get_result(self, copy: bool = True) -> DataFrame:
772+
if self.indicator:
773+
self.left, self.right = self._indicator_pre_merge(self.left, self.right)
774+
775+
join_index, left_indexer, right_indexer = self._get_join_info()
776+
777+
result = self._reindex_and_concat(
778+
join_index, left_indexer, right_indexer, copy=copy
779+
)
780+
result = result.__finalize__(self, method=self._merge_type)
741781

742782
if self.indicator:
743783
result = self._indicator_post_merge(result)
@@ -1725,19 +1765,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
17251765
left_join_indexer = left_indexer
17261766
right_join_indexer = right_indexer
17271767

1728-
lindexers = {1: left_join_indexer} if left_join_indexer is not None else {}
1729-
rindexers = {1: right_join_indexer} if right_join_indexer is not None else {}
1730-
1731-
result_data = concatenate_managers(
1732-
[(self.left._mgr, lindexers), (self.right._mgr, rindexers)],
1733-
axes=[llabels.append(rlabels), join_index],
1734-
concat_axis=0,
1735-
copy=copy,
1768+
result = self._reindex_and_concat(
1769+
join_index, left_join_indexer, right_join_indexer, copy=copy
17361770
)
1737-
1738-
typ = self.left._constructor
1739-
result = typ(result_data)
1740-
17411771
self._maybe_add_join_keys(result, left_indexer, right_indexer)
17421772

17431773
return result

0 commit comments

Comments
 (0)