Skip to content

Commit e22a6c8

Browse files
Revert "Backport PR pandas-dev#24916: BUG-24212 fix regression in pandas-dev#24897 (pandas-dev#24951)"
This reverts commit 84056c5.
1 parent 84056c5 commit e22a6c8

File tree

3 files changed

+19
-60
lines changed

3 files changed

+19
-60
lines changed

doc/source/whatsnew/v0.24.1.rst

-3
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ Bug Fixes
6363
-
6464
-
6565

66-
**Reshaping**
67-
68-
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
6966

7067
**Other**
7168

pandas/core/reshape/merge.py

+2-43
Original file line numberDiff line numberDiff line change
@@ -757,21 +757,13 @@ def _get_join_info(self):
757757

758758
if self.right_index:
759759
if len(self.left) > 0:
760-
join_index = self._create_join_index(self.left.index,
761-
self.right.index,
762-
left_indexer,
763-
right_indexer,
764-
how='right')
760+
join_index = self.left.index.take(left_indexer)
765761
else:
766762
join_index = self.right.index.take(right_indexer)
767763
left_indexer = np.array([-1] * len(join_index))
768764
elif self.left_index:
769765
if len(self.right) > 0:
770-
join_index = self._create_join_index(self.right.index,
771-
self.left.index,
772-
right_indexer,
773-
left_indexer,
774-
how='left')
766+
join_index = self.right.index.take(right_indexer)
775767
else:
776768
join_index = self.left.index.take(left_indexer)
777769
right_indexer = np.array([-1] * len(join_index))
@@ -782,39 +774,6 @@ def _get_join_info(self):
782774
join_index = join_index.astype(object)
783775
return join_index, left_indexer, right_indexer
784776

785-
def _create_join_index(self, index, other_index, indexer,
786-
other_indexer, how='left'):
787-
"""
788-
Create a join index by rearranging one index to match another
789-
790-
Parameters
791-
----------
792-
index: Index being rearranged
793-
other_index: Index used to supply values not found in index
794-
indexer: how to rearrange index
795-
how: replacement is only necessary if indexer based on other_index
796-
797-
Returns
798-
-------
799-
join_index
800-
"""
801-
join_index = index.take(indexer)
802-
if (self.how in (how, 'outer') and
803-
not isinstance(other_index, MultiIndex)):
804-
# if final index requires values in other_index but not target
805-
# index, indexer may hold missing (-1) values, causing Index.take
806-
# to take the final value in target index
807-
mask = indexer == -1
808-
if np.any(mask):
809-
# if values missing (-1) from target index,
810-
# take from other_index instead
811-
join_list = join_index.to_numpy()
812-
other_list = other_index.take(other_indexer).to_numpy()
813-
join_list[mask] = other_list[mask]
814-
join_index = Index(join_list, dtype=join_index.dtype,
815-
name=join_index.name)
816-
return join_index
817-
818777
def _get_merge_keys(self):
819778
"""
820779
Note: has side effects (copy/delete key columns)

pandas/tests/reshape/merge/test_merge.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -939,22 +939,25 @@ def test_merge_two_empty_df_no_division_error(self):
939939
with np.errstate(divide='raise'):
940940
merge(a, a, on=('a', 'b'))
941941

942-
@pytest.mark.parametrize('how', ['right', 'outer'])
942+
@pytest.mark.parametrize('how', ['left', 'outer'])
943+
@pytest.mark.xfail(reason="GH-24897")
943944
def test_merge_on_index_with_more_values(self, how):
944945
# GH 24212
945-
# pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
946-
# -1 is interpreted as a missing value instead of the last element
947-
df1 = pd.DataFrame({'a': [1, 2, 3], 'key': [0, 2, 2]})
948-
df2 = pd.DataFrame({'b': [1, 2, 3, 4, 5]})
949-
result = df1.merge(df2, left_on='key', right_index=True, how=how)
950-
expected = pd.DataFrame([[1.0, 0, 1],
951-
[2.0, 2, 3],
952-
[3.0, 2, 3],
953-
[np.nan, 1, 2],
954-
[np.nan, 3, 4],
955-
[np.nan, 4, 5]],
956-
columns=['a', 'key', 'b'])
957-
expected.set_index(Int64Index([0, 1, 2, 1, 3, 4]), inplace=True)
946+
# pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is
947+
# interpreted as a missing value instead of the last element
948+
df1 = pd.DataFrame([[1, 2], [2, 4], [3, 6], [4, 8]],
949+
columns=['a', 'b'])
950+
df2 = pd.DataFrame([[3, 30], [4, 40]],
951+
columns=['a', 'c'])
952+
df1.set_index('a', drop=False, inplace=True)
953+
df2.set_index('a', inplace=True)
954+
result = pd.merge(df1, df2, left_index=True, right_on='a', how=how)
955+
expected = pd.DataFrame([[1, 2, np.nan],
956+
[2, 4, np.nan],
957+
[3, 6, 30.0],
958+
[4, 8, 40.0]],
959+
columns=['a', 'b', 'c'])
960+
expected.set_index('a', drop=False, inplace=True)
958961
assert_frame_equal(result, expected)
959962

960963
def test_merge_right_index_right(self):

0 commit comments

Comments
 (0)