diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2650090a3f61a..b55867b432a78 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9809,7 +9809,7 @@ def merge( right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), - copy: bool = True, + copy: bool | None = None, indicator: str | bool = False, validate: str | None = None, ) -> DataFrame: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 37013a5d1fb8f..0413fc865f641 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -25,6 +25,7 @@ join as libjoin, lib, ) +from pandas._libs.lib import is_range_indexer from pandas._typing import ( AnyArrayLike, ArrayLike, @@ -757,7 +758,7 @@ def _reindex_and_concat( self.left._info_axis, self.right._info_axis, self.suffixes ) - if left_indexer is not None: + if left_indexer is not None and not is_range_indexer(left_indexer, len(left)): # Pinning the index here (and in the right code just below) is not # necessary, but makes the `.take` more performant if we have e.g. # a MultiIndex for left.index. @@ -773,7 +774,9 @@ def _reindex_and_concat( left = left._constructor(lmgr) left.index = join_index - if right_indexer is not None: + if right_indexer is not None and not is_range_indexer( + right_indexer, len(right) + ): rmgr = right._mgr.reindex_indexer( join_index, right_indexer, diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index ffc80d2d11798..b6f2f0543cb2b 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -1,6 +1,5 @@ import numpy as np - -import pandas.util._test_decorators as td +import pytest from pandas import ( DataFrame, @@ -182,19 +181,25 @@ def test_concat_mixed_series_frame(using_copy_on_write): tm.assert_frame_equal(result, expected) -@td.skip_copy_on_write_not_yet_implemented # TODO(CoW) -def test_merge_on_key(using_copy_on_write): +@pytest.mark.parametrize( + "func", + [ + lambda df1, df2, **kwargs: df1.merge(df2, **kwargs), + lambda df1, df2, **kwargs: merge(df1, df2, **kwargs), + ], +) +def test_merge_on_key(using_copy_on_write, func): df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]}) df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]}) df1_orig = df1.copy() df2_orig = df2.copy() - result = merge(df1, df2, on="key") + result = func(df1, df2, on="key") if using_copy_on_write: assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(result, "key"), get_array(df1, "key")) + assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) else: assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) @@ -239,4 +244,39 @@ def test_merge_on_index(using_copy_on_write): tm.assert_frame_equal(df2, df2_orig) -# TODO(CoW) add merge tests where one of left/right isn't copied +@pytest.mark.parametrize( + "func, how", + [ + (lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"), + (lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"), + ], +) +def test_merge_on_key_enlarging_one(using_copy_on_write, func, how): + df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]}) + df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]}) + df1_orig = df1.copy() + df2_orig = df2.copy() + + result = func(df1, df2, how=how) + + if using_copy_on_write: + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + assert df2._mgr._has_no_reference(1) + assert df2._mgr._has_no_reference(0) + assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is ( + how == "left" + ) + assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) + else: + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) + + if how == "left": + result.iloc[0, 1] = 0 + else: + result.iloc[0, 2] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) + tm.assert_frame_equal(df1, df1_orig) + tm.assert_frame_equal(df2, df2_orig)