Skip to content

Commit ed4ea1a

Browse files
CoW: Return read-only array in Index.values (#53704)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent cbfe215 commit ed4ea1a

File tree

9 files changed

+42
-10
lines changed

9 files changed

+42
-10
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Enhancements
1919
Copy-on-Write improvements
2020
^^^^^^^^^^^^^^^^^^^^^^^^^^
2121

22+
- Calling :meth:`Index.values` will now return a read-only NumPy array (:issue:`53704`)
2223
- Setting a :class:`Series` into a :class:`DataFrame` now creates a lazy instead of a deep copy (:issue:`53142`)
2324
- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
2425
of Index objects and specifying ``copy=False``, will now use a lazy copy

pandas/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def index_with_missing(request):
685685
# GH 35538. Use deep copy to avoid illusive bug on np-dev
686686
# GHA pipeline that writes into indices_dict despite copy
687687
ind = indices_dict[request.param].copy(deep=True)
688-
vals = ind.values
688+
vals = ind.values.copy()
689689
if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]:
690690
# For setting missing values in the top level of MultiIndex
691691
vals = ind.tolist()

pandas/core/indexes/base.py

+6
Original file line numberDiff line numberDiff line change
@@ -5058,6 +5058,12 @@ def values(self) -> ArrayLike:
50585058
>>> idx.values
50595059
array([1, 2, 3])
50605060
"""
5061+
if using_copy_on_write():
5062+
data = self._data
5063+
if isinstance(data, np.ndarray):
5064+
data = data.view()
5065+
data.flags.writeable = False
5066+
return data
50615067
return self._data
50625068

50635069
@cache_readonly

pandas/core/indexes/datetimelike.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
import numpy as np
2020

21+
from pandas._config import using_copy_on_write
22+
2123
from pandas._libs import (
2224
NaT,
2325
Timedelta,
@@ -451,7 +453,11 @@ def _with_freq(self, freq):
451453
@property
452454
def values(self) -> np.ndarray:
453455
# NB: For Datetime64TZ this is lossy
454-
return self._data._ndarray
456+
data = self._data._ndarray
457+
if using_copy_on_write():
458+
data = data.view()
459+
data.flags.writeable = False
460+
return data
455461

456462
@doc(DatetimeIndexOpsMixin.shift)
457463
def shift(self, periods: int = 1, freq=None) -> Self:

pandas/tests/copy_view/index/test_datetimeindex.py

+9
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,12 @@ def test_datetimeindex_isocalendar(using_copy_on_write):
5454
ser.iloc[0] = Timestamp("2020-12-31")
5555
if using_copy_on_write:
5656
tm.assert_index_equal(df.index, expected)
57+
58+
59+
def test_index_values(using_copy_on_write):
60+
idx = date_range("2019-12-31", periods=3, freq="D")
61+
result = idx.values
62+
if using_copy_on_write:
63+
assert result.flags.writeable is False
64+
else:
65+
assert result.flags.writeable is True

pandas/tests/copy_view/index/test_index.py

+9
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,12 @@ def test_index_to_frame(using_copy_on_write):
167167

168168
df.iloc[0, 0] = 100
169169
tm.assert_index_equal(idx, expected)
170+
171+
172+
def test_index_values(using_copy_on_write):
173+
idx = Index([1, 2, 3])
174+
result = idx.values
175+
if using_copy_on_write:
176+
assert result.flags.writeable is False
177+
else:
178+
assert result.flags.writeable is True

pandas/tests/copy_view/test_setitem.py

-6
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,12 @@ def test_set_column_with_index(using_copy_on_write):
5858
# the index data is copied
5959
assert not np.shares_memory(get_array(df, "c"), idx.values)
6060

61-
# and thus modifying the index does not modify the DataFrame
62-
idx.values[0] = 0
63-
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
64-
6561
idx = RangeIndex(1, 4)
6662
arr = idx.values
6763

6864
df["d"] = idx
6965

7066
assert not np.shares_memory(get_array(df, "d"), arr)
71-
arr[0] = 0
72-
tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d"))
7367

7468

7569
def test_set_columns_with_dataframe(using_copy_on_write):

pandas/tests/indexes/numeric/test_numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def test_constructor(self, dtype):
341341
# copy
342342
# pass list, coerce fine
343343
index = index_cls([-5, 0, 1, 2], dtype=dtype)
344-
arr = index.values
344+
arr = index.values.copy()
345345
new_index = index_cls(arr, copy=True)
346346
tm.assert_index_equal(new_index, index, exact=True)
347347
val = arr[0] + 3000

pandas/tests/io/test_parquet.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,12 @@ def test_read_columns(self, engine):
433433
df, engine, expected=expected, read_kwargs={"columns": ["string"]}
434434
)
435435

436-
def test_write_index(self, engine):
436+
def test_write_index(self, engine, using_copy_on_write, request):
437437
check_names = engine != "fastparquet"
438+
if using_copy_on_write and engine == "fastparquet":
439+
request.node.add_marker(
440+
pytest.mark.xfail(reason="fastparquet write into index")
441+
)
438442

439443
df = pd.DataFrame({"A": [1, 2, 3]})
440444
check_round_trip(df, engine)
@@ -1213,12 +1217,14 @@ def test_error_on_using_partition_cols_and_partition_on(
12131217
partition_cols=partition_cols,
12141218
)
12151219

1220+
@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
12161221
def test_empty_dataframe(self, fp):
12171222
# GH #27339
12181223
df = pd.DataFrame()
12191224
expected = df.copy()
12201225
check_round_trip(df, fp, expected=expected)
12211226

1227+
@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
12221228
def test_timezone_aware_index(self, fp, timezone_aware_date_list):
12231229
idx = 5 * [timezone_aware_date_list]
12241230

@@ -1328,6 +1334,7 @@ def test_invalid_dtype_backend(self, engine):
13281334
with pytest.raises(ValueError, match=msg):
13291335
read_parquet(path, dtype_backend="numpy")
13301336

1337+
@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
13311338
def test_empty_columns(self, fp):
13321339
# GH 52034
13331340
df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))

0 commit comments

Comments
 (0)