diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8980fe0249193..fa1a79c81630a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -35,6 +35,7 @@ ) from pandas._libs import lib +from pandas._libs.lib import array_equal_fast from pandas._libs.tslibs import ( Period, Tick, @@ -3754,6 +3755,18 @@ def _take( See the docstring of `take` for full explanation of the parameters. """ + if not isinstance(indices, slice): + indices = np.asarray(indices, dtype=np.intp) + if ( + axis == 0 + and indices.ndim == 1 + and using_copy_on_write() + and array_equal_fast( + indices, + np.arange(0, len(self), dtype=np.intp), + ) + ): + return self.copy(deep=None) new_data = self._mgr.take( indices, diff --git a/pandas/core/series.py b/pandas/core/series.py index 6b82d48f82ce7..c38eb4e7c5d34 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -32,7 +32,10 @@ properties, reshape, ) -from pandas._libs.lib import no_default +from pandas._libs.lib import ( + array_equal_fast, + no_default, +) from pandas._typing import ( AggFuncType, AlignJoin, @@ -879,6 +882,14 @@ def take(self, indices, axis: Axis = 0, **kwargs) -> Series: nv.validate_take((), kwargs) indices = ensure_platform_int(indices) + + if ( + indices.ndim == 1 + and using_copy_on_write() + and array_equal_fast(indices, np.arange(0, len(self), dtype=indices.dtype)) + ): + return self.copy(deep=None) + new_index = self.index.take(indices) new_values = self._values.take(indices) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 2bc4202cce5f5..67bf9a117f2a0 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -540,6 +540,40 @@ def test_assign_drop_duplicates(using_copy_on_write, method): tm.assert_frame_equal(df, df_orig) +@pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})]) +def test_take(using_copy_on_write, obj): + # Check that no copy is made when we take all rows in original order + obj_orig = obj.copy() + obj2 = obj.take([0, 1]) + + if using_copy_on_write: + assert np.shares_memory(obj2.values, obj.values) + else: + assert not np.shares_memory(obj2.values, obj.values) + + obj2.iloc[0] = 0 + if using_copy_on_write: + assert not np.shares_memory(obj2.values, obj.values) + tm.assert_equal(obj, obj_orig) + + +@pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})]) +def test_between_time(using_copy_on_write, obj): + obj.index = date_range("2018-04-09", periods=2, freq="1D20min") + obj_orig = obj.copy() + obj2 = obj.between_time("0:00", "1:00") + + if using_copy_on_write: + assert np.shares_memory(obj2.values, obj.values) + else: + assert not np.shares_memory(obj2.values, obj.values) + + obj2.iloc[0] = 0 + if using_copy_on_write: + assert not np.shares_memory(obj2.values, obj.values) + tm.assert_equal(obj, obj_orig) + + def test_reindex_like(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": "a"}) other = DataFrame({"b": "a", "a": [1, 2]})