Skip to content

Commit 0a4665a

Browse files
Revert BUG-24212 fix usage of Index.take in pd.merge (#24904)
* Revert BUG-24212 fix usage of Index.take in pd.merge xref #24733 xref #24897 * test 0.23.4 output * added note about buggy test
1 parent e2c0b12 commit 0a4665a

File tree

3 files changed

+19
-40
lines changed

3 files changed

+19
-40
lines changed

doc/source/whatsnew/v0.24.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -1827,7 +1827,6 @@ Reshaping
18271827
- Bug in :func:`DataFrame.unstack` where a ``ValueError`` was raised when unstacking timezone aware values (:issue:`18338`)
18281828
- Bug in :func:`DataFrame.stack` where timezone aware values were converted to timezone naive values (:issue:`19420`)
18291829
- Bug in :func:`merge_asof` where a ``TypeError`` was raised when ``by_col`` were timezone aware values (:issue:`21184`)
1830-
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
18311830
- Bug showing an incorrect shape when throwing error during ``DataFrame`` construction. (:issue:`20742`)
18321831

18331832
.. _whatsnew_0240.bug_fixes.sparse:

pandas/core/reshape/merge.py

+2-39
Original file line numberDiff line numberDiff line change
@@ -757,19 +757,13 @@ def _get_join_info(self):
757757

758758
if self.right_index:
759759
if len(self.left) > 0:
760-
join_index = self._create_join_index(self.left.index,
761-
self.right.index,
762-
left_indexer,
763-
how='right')
760+
join_index = self.left.index.take(left_indexer)
764761
else:
765762
join_index = self.right.index.take(right_indexer)
766763
left_indexer = np.array([-1] * len(join_index))
767764
elif self.left_index:
768765
if len(self.right) > 0:
769-
join_index = self._create_join_index(self.right.index,
770-
self.left.index,
771-
right_indexer,
772-
how='left')
766+
join_index = self.right.index.take(right_indexer)
773767
else:
774768
join_index = self.left.index.take(left_indexer)
775769
right_indexer = np.array([-1] * len(join_index))
@@ -780,37 +774,6 @@ def _get_join_info(self):
780774
join_index = join_index.astype(object)
781775
return join_index, left_indexer, right_indexer
782776

783-
def _create_join_index(self, index, other_index, indexer, how='left'):
784-
"""
785-
Create a join index by rearranging one index to match another
786-
787-
Parameters
788-
----------
789-
index: Index being rearranged
790-
other_index: Index used to supply values not found in index
791-
indexer: how to rearrange index
792-
how: replacement is only necessary if indexer based on other_index
793-
794-
Returns
795-
-------
796-
join_index
797-
"""
798-
join_index = index.take(indexer)
799-
if (self.how in (how, 'outer') and
800-
not isinstance(other_index, MultiIndex)):
801-
# if final index requires values in other_index but not target
802-
# index, indexer may hold missing (-1) values, causing Index.take
803-
# to take the final value in target index
804-
mask = indexer == -1
805-
if np.any(mask):
806-
# if values missing (-1) from target index,
807-
# take from other_index instead
808-
join_list = join_index.to_numpy()
809-
join_list[mask] = other_index.to_numpy()[mask]
810-
join_index = Index(join_list, dtype=join_index.dtype,
811-
name=join_index.name)
812-
return join_index
813-
814777
def _get_merge_keys(self):
815778
"""
816779
Note: has side effects (copy/delete key columns)

pandas/tests/reshape/merge/test_merge.py

+17
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,7 @@ def test_merge_two_empty_df_no_division_error(self):
940940
merge(a, a, on=('a', 'b'))
941941

942942
@pytest.mark.parametrize('how', ['left', 'outer'])
943+
@pytest.mark.xfail(reason="GH-24897")
943944
def test_merge_on_index_with_more_values(self, how):
944945
# GH 24212
945946
# pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is
@@ -959,6 +960,22 @@ def test_merge_on_index_with_more_values(self, how):
959960
expected.set_index('a', drop=False, inplace=True)
960961
assert_frame_equal(result, expected)
961962

963+
def test_merge_right_index_right(self):
964+
# Note: the expected output here is probably incorrect.
965+
# See https://github.com/pandas-dev/pandas/issues/17257 for more.
966+
# We include this as a regression test for GH-24897.
967+
left = pd.DataFrame({'a': [1, 2, 3], 'key': [0, 1, 1]})
968+
right = pd.DataFrame({'b': [1, 2, 3]})
969+
970+
expected = pd.DataFrame({'a': [1, 2, 3, None],
971+
'key': [0, 1, 1, 2],
972+
'b': [1, 2, 2, 3]},
973+
columns=['a', 'key', 'b'],
974+
index=[0, 1, 2, 2])
975+
result = left.merge(right, left_on='key', right_index=True,
976+
how='right')
977+
tm.assert_frame_equal(result, expected)
978+
962979

963980
def _check_merge(x, y):
964981
for how in ['inner', 'left', 'outer']:

0 commit comments

Comments
 (0)