From a441d73126337fcdb5ba2731f77375042aa7e709 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 29 Mar 2023 18:31:58 -0400 Subject: [PATCH] Backport PR #52276: API / CoW: Respect CoW for DataFrame(Index) --- doc/source/whatsnew/v2.0.0.rst | 3 ++- pandas/core/internals/construction.py | 4 ++-- pandas/tests/copy_view/test_constructors.py | 22 +++++++++++---------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d9495d843d939..efb72b862143f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -191,7 +191,8 @@ Copy-on-Write improvements of those Series objects for the columns of the DataFrame (:issue:`50777`) - The :class:`DataFrame` constructor, when constructing a DataFrame from a - :class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write. + :class:`Series` or :class:`Index` and specifying ``copy=False``, will + now respect Copy-on-Write. - The :class:`DataFrame` and :class:`Series` constructors, when constructing from a NumPy array, will now copy the array by default to avoid mutating diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7aad764aaa8eb..efce472931a20 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -291,7 +291,7 @@ def ndarray_to_mgr( if values.ndim == 1: values = values.reshape(-1, 1) - elif isinstance(values, ABCSeries): + elif isinstance(values, (ABCSeries, Index)): if not copy_on_sanitize and ( dtype is None or astype_is_view(values.dtype, dtype) ): @@ -304,7 +304,7 @@ def ndarray_to_mgr( values = _ensure_2d(values) - elif isinstance(values, (np.ndarray, ExtensionArray, Index)): + elif isinstance(values, (np.ndarray, ExtensionArray)): # drop subclass info _copy = ( copy_on_sanitize diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 2f2548ad0f238..e8d6f15191ffd 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -260,26 +260,28 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): assert np.shares_memory(arr_before, arr_after) +@pytest.mark.parametrize("cons", [Series, Index]) @pytest.mark.parametrize( "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] ) -def test_dataframe_from_series(using_copy_on_write, data, dtype): - ser = Series(data, dtype=dtype) - ser_orig = ser.copy() - df = DataFrame(ser, dtype=dtype) - assert np.shares_memory(get_array(ser), get_array(df, 0)) +def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons): + obj = cons(data, dtype=dtype) + obj_orig = obj.copy() + df = DataFrame(obj, dtype=dtype) + assert np.shares_memory(get_array(obj), get_array(df, 0)) if using_copy_on_write: assert not df._mgr._has_no_reference(0) df.iloc[0, 0] = data[-1] if using_copy_on_write: - tm.assert_series_equal(ser, ser_orig) + tm.assert_equal(obj, obj_orig) -def test_dataframe_from_series_different_dtype(using_copy_on_write): - ser = Series([1, 2], dtype="int64") - df = DataFrame(ser, dtype="int32") - assert not np.shares_memory(get_array(ser), get_array(df, 0)) +@pytest.mark.parametrize("cons", [Series, Index]) +def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, cons): + obj = cons([1, 2], dtype="int64") + df = DataFrame(obj, dtype="int32") + assert not np.shares_memory(get_array(obj), get_array(df, 0)) if using_copy_on_write: assert df._mgr._has_no_reference(0)