diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d9cdc8beaebea..47bfee5858f38 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -19,6 +19,7 @@ Enhancements Copy-on-Write improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Calling :meth:`Index.values` will now return a read-only NumPy array (:issue:`53704`) - Setting a :class:`Series` into a :class:`DataFrame` now creates a lazy instead of a deep copy (:issue:`53142`) - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Index objects and specifying ``copy=False``, will now use a lazy copy diff --git a/pandas/conftest.py b/pandas/conftest.py index ed05ddd1b2f31..1dcfc88eb1bfd 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -685,7 +685,7 @@ def index_with_missing(request): # GH 35538. Use deep copy to avoid illusive bug on np-dev # GHA pipeline that writes into indices_dict despite copy ind = indices_dict[request.param].copy(deep=True) - vals = ind.values + vals = ind.values.copy() if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: # For setting missing values in the top level of MultiIndex vals = ind.tolist() diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 30bc6a42a2db5..e03c126d86aff 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5058,6 +5058,12 @@ def values(self) -> ArrayLike: >>> idx.values array([1, 2, 3]) """ + if using_copy_on_write(): + data = self._data + if isinstance(data, np.ndarray): + data = data.view() + data.flags.writeable = False + return data return self._data @cache_readonly diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d818e1e862c12..2125c1aef6162 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -18,6 +18,8 @@ import numpy as np +from pandas._config import using_copy_on_write + from pandas._libs import ( NaT, Timedelta, @@ -451,7 +453,11 @@ def _with_freq(self, freq): @property def values(self) -> np.ndarray: # NB: For Datetime64TZ this is lossy - return self._data._ndarray + data = self._data._ndarray + if using_copy_on_write(): + data = data.view() + data.flags.writeable = False + return data @doc(DatetimeIndexOpsMixin.shift) def shift(self, periods: int = 1, freq=None) -> Self: diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py index f691d5589f48c..f54beca4cc414 100644 --- a/pandas/tests/copy_view/index/test_datetimeindex.py +++ b/pandas/tests/copy_view/index/test_datetimeindex.py @@ -54,3 +54,12 @@ def test_datetimeindex_isocalendar(using_copy_on_write): ser.iloc[0] = Timestamp("2020-12-31") if using_copy_on_write: tm.assert_index_equal(df.index, expected) + + +def test_index_values(using_copy_on_write): + idx = date_range("2019-12-31", periods=3, freq="D") + result = idx.values + if using_copy_on_write: + assert result.flags.writeable is False + else: + assert result.flags.writeable is True diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 826505cbaf03f..6411e20a972e7 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -167,3 +167,12 @@ def test_index_to_frame(using_copy_on_write): df.iloc[0, 0] = 100 tm.assert_index_equal(idx, expected) + + +def test_index_values(using_copy_on_write): + idx = Index([1, 2, 3]) + result = idx.values + if using_copy_on_write: + assert result.flags.writeable is False + else: + assert result.flags.writeable is True diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py index 2a99a00e249fa..5016b57bdd0b7 100644 --- a/pandas/tests/copy_view/test_setitem.py +++ b/pandas/tests/copy_view/test_setitem.py @@ -58,18 +58,12 @@ def test_set_column_with_index(using_copy_on_write): # the index data is copied assert not np.shares_memory(get_array(df, "c"), idx.values) - # and thus modifying the index does not modify the DataFrame - idx.values[0] = 0 - tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) - idx = RangeIndex(1, 4) arr = idx.values df["d"] = idx assert not np.shares_memory(get_array(df, "d"), arr) - arr[0] = 0 - tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d")) def test_set_columns_with_dataframe(using_copy_on_write): diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 4080dc7081771..977c7da7d866f 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -341,7 +341,7 @@ def test_constructor(self, dtype): # copy # pass list, coerce fine index = index_cls([-5, 0, 1, 2], dtype=dtype) - arr = index.values + arr = index.values.copy() new_index = index_cls(arr, copy=True) tm.assert_index_equal(new_index, index, exact=True) val = arr[0] + 3000 diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 57ef03b380601..10fce6b5bf43d 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -433,8 +433,12 @@ def test_read_columns(self, engine): df, engine, expected=expected, read_kwargs={"columns": ["string"]} ) - def test_write_index(self, engine): + def test_write_index(self, engine, using_copy_on_write, request): check_names = engine != "fastparquet" + if using_copy_on_write and engine == "fastparquet": + request.node.add_marker( + pytest.mark.xfail(reason="fastparquet write into index") + ) df = pd.DataFrame({"A": [1, 2, 3]}) check_round_trip(df, engine) @@ -1213,12 +1217,14 @@ def test_error_on_using_partition_cols_and_partition_on( partition_cols=partition_cols, ) + @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index") def test_empty_dataframe(self, fp): # GH #27339 df = pd.DataFrame() expected = df.copy() check_round_trip(df, fp, expected=expected) + @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index") def test_timezone_aware_index(self, fp, timezone_aware_date_list): idx = 5 * [timezone_aware_date_list] @@ -1328,6 +1334,7 @@ def test_invalid_dtype_backend(self, engine): with pytest.raises(ValueError, match=msg): read_parquet(path, dtype_backend="numpy") + @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index") def test_empty_columns(self, fp): # GH 52034 df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))