Skip to content

Commit 1c5195c

Browse files
committed
BUG: Fix copy semantics in __array__
This fixes the semantics of ``__array__``. While rejecting ``copy=False`` is pretty harmless, ``copy=True`` should never have been ignored and is dangerous.
1 parent 2a10e04 commit 1c5195c

File tree

14 files changed

+92
-13
lines changed

14 files changed

+92
-13
lines changed

pandas/core/arrays/arrow/array.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,15 @@ def __array__(
667667
self, dtype: NpDtype | None = None, copy: bool | None = None
668668
) -> np.ndarray:
669669
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
670-
return self.to_numpy(dtype=dtype)
670+
if copy is False:
671+
# TODO: By using `zero_copy_only` it may be possible to implement this
672+
raise ValueError(
673+
"Unable to avoid copy while creating an array as requested."
674+
)
675+
elif copy is None:
676+
copy = False # The NumPy copy=False meaning is different here.
677+
678+
return self.to_numpy(dtype=dtype, copy=copy)
671679

672680
def __invert__(self) -> Self:
673681
# This is a bit wise op for integer types

pandas/core/arrays/categorical.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1663,7 +1663,7 @@ def __array__(
16631663
Specifies the the dtype for the array.
16641664
16651665
copy : bool or None, optional
1666-
Unused.
1666+
See :func:`numpy.asarray`.
16671667
16681668
Returns
16691669
-------
@@ -1686,13 +1686,21 @@ def __array__(
16861686
>>> np.asarray(cat)
16871687
array(['a', 'b'], dtype=object)
16881688
"""
1689+
if copy is False:
1690+
raise ValueError(
1691+
"Unable to avoid copy while creating an array as requested."
1692+
)
1693+
1694+
# TODO: using asarray_func because NumPy 1.x doesn't support copy=None
1695+
asarray_func = np.asarray if copy is None else np.array
1696+
16891697
ret = take_nd(self.categories._values, self._codes)
16901698
if dtype and np.dtype(dtype) != self.categories.dtype:
1691-
return np.asarray(ret, dtype)
1699+
return asarray_func(ret, dtype)
16921700
# When we're a Categorical[ExtensionArray], like Interval,
16931701
# we need to ensure __array__ gets all the way to an
16941702
# ndarray.
1695-
return np.asarray(ret)
1703+
return asarray_func(ret)
16961704

16971705
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
16981706
# for binary ops, use our custom dunder methods

pandas/core/arrays/datetimelike.py

+7
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,14 @@ def __array__(
359359
) -> np.ndarray:
360360
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
361361
if is_object_dtype(dtype):
362+
if copy is False:
363+
raise ValueError(
364+
"Unable to avoid copy while creating an array as requested."
365+
)
362366
return np.array(list(self), dtype=object)
367+
368+
if copy is True:
369+
return np.array(self._ndarray, dtype=dtype)
363370
return self._ndarray
364371

365372
@overload

pandas/core/arrays/interval.py

+5
Original file line numberDiff line numberDiff line change
@@ -1606,6 +1606,11 @@ def __array__(
16061606
Return the IntervalArray's data as a numpy array of Interval
16071607
objects (with dtype='object')
16081608
"""
1609+
if copy is False:
1610+
raise ValueError(
1611+
"Unable to avoid copy while creating an array as requested."
1612+
)
1613+
16091614
left = self._left
16101615
right = self._right
16111616
mask = self.isna()

pandas/core/arrays/masked.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,14 @@ def __array__(
581581
the array interface, return my values
582582
We return an object array here to preserve our scalar values
583583
"""
584-
return self.to_numpy(dtype=dtype)
584+
if copy is False:
585+
raise ValueError(
586+
"Unable to avoid copy while creating an array as requested."
587+
)
588+
589+
if copy is None:
590+
copy = False # The NumPy copy=False meaning is different here.
591+
return self.to_numpy(dtype=dtype, copy=copy)
585592

586593
_HANDLED_TYPES: tuple[type, ...]
587594

pandas/core/arrays/numpy_.py

+3
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
def __array__(
151151
self, dtype: NpDtype | None = None, copy: bool | None = None
152152
) -> np.ndarray:
153+
if copy is not None:
154+
# Note: branch avoids `copy=None` for NumPy 1.x support
155+
return np.asarray(self._ndarray, dtype=dtype, copy=copy)
153156
return np.asarray(self._ndarray, dtype=dtype)
154157

155158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

+5
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,11 @@ def freqstr(self) -> str:
389389
def __array__(
390390
self, dtype: NpDtype | None = None, copy: bool | None = None
391391
) -> np.ndarray:
392+
if copy is False:
393+
raise ValueError(
394+
"Unable to avoid copy while creating an array as requested."
395+
)
396+
392397
if dtype == "i8":
393398
return self.asi8
394399
elif dtype == bool:

pandas/core/arrays/sparse/array.py

+5
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,11 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
547547
def __array__(
548548
self, dtype: NpDtype | None = None, copy: bool | None = None
549549
) -> np.ndarray:
550+
if copy is False:
551+
raise ValueError(
552+
"Unable to avoid copy while creating an array as requested."
553+
)
554+
550555
fill_value = self.fill_value
551556

552557
if self.sp_index.ngaps == 0:

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -2015,8 +2015,17 @@ def __array__(
20152015
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
20162016
) -> np.ndarray:
20172017
values = self._values
2018-
arr = np.asarray(values, dtype=dtype)
2019-
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
2018+
if copy is None:
2019+
# Note: branch avoids `copy=None` for NumPy 1.x support
2020+
arr = np.asarray(values, dtype=dtype)
2021+
else:
2022+
arr = np.asarray(values, dtype=dtype, copy=copy)
2023+
2024+
if (
2025+
copy is not False
2026+
and astype_is_view(values.dtype, arr.dtype)
2027+
and self._mgr.is_single_block
2028+
):
20202029
# Check if both conversions can be done without a copy
20212030
if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
20222031
values.dtype, arr.dtype

pandas/core/indexes/base.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,11 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
908908
"""
909909
The array interface, return my values.
910910
"""
911-
return np.asarray(self._data, dtype=dtype)
911+
if copy is None:
912+
# Note, that the if branch exists for NumPy 1.x support
913+
return np.asarray(self._data, dtype=dtype)
914+
915+
return np.asarray(self._data, dtype=dtype, copy=copy)
912916

913917
def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
914918
if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):

pandas/core/indexes/multi.py

+3
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,9 @@ def copy( # type: ignore[override]
13911391

13921392
def __array__(self, dtype=None, copy=None) -> np.ndarray:
13931393
"""the array interface, return my values"""
1394+
if copy is True:
1395+
# Note: branch avoids `copy=None` for NumPy 1.x support
1396+
return np.asarray(self.values, dtype=dtype, copy=copy)
13941397
return self.values
13951398

13961399
def view(self, cls=None) -> Self:

pandas/core/internals/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def ndarray_to_mgr(
258258
# and a subsequent `astype` will not already result in a copy
259259
values = np.array(values, copy=True, order="F")
260260
else:
261-
values = np.array(values, copy=False)
261+
values = np.asarray(values)
262262
values = _ensure_2d(values)
263263

264264
else:

pandas/core/series.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ def __array__(
842842
the dtype is inferred from the data.
843843
844844
copy : bool or None, optional
845-
Unused.
845+
See :func:`numpy.asarray`.
846846
847847
Returns
848848
-------
@@ -879,8 +879,15 @@ def __array__(
879879
dtype='datetime64[ns]')
880880
"""
881881
values = self._values
882-
arr = np.asarray(values, dtype=dtype)
883-
if astype_is_view(values.dtype, arr.dtype):
882+
if copy is None:
883+
# Note: branch avoids `copy=None` for NumPy 1.x support
884+
arr = np.asarray(values, dtype=dtype)
885+
else:
886+
arr = np.asarray(values, dtype=dtype, copy=copy)
887+
888+
if copy is True:
889+
return arr
890+
if copy is False or astype_is_view(values.dtype, arr.dtype):
884891
arr = arr.view()
885892
arr.flags.writeable = False
886893
return arr

pandas/tests/extension/json/array.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,20 @@ def __ne__(self, other):
148148
return NotImplemented
149149

150150
def __array__(self, dtype=None, copy=None):
151+
if copy is False:
152+
raise ValueError(
153+
"Unable to avoid copy while creating an array as requested."
154+
)
155+
151156
if dtype is None:
152157
dtype = object
153158
if dtype == object:
154159
# on py38 builds it looks like numpy is inferring to a non-1D array
155160
return construct_1d_object_array_from_listlike(list(self))
156-
return np.asarray(self.data, dtype=dtype)
161+
if copy is None:
162+
# Note: branch avoids `copy=None` for NumPy 1.x support
163+
return np.asarray(self.data, dtype=dtype)
164+
return np.asarray(self.data, dtype=dtype, copy=copy)
157165

158166
@property
159167
def nbytes(self) -> int:

0 commit comments

Comments
 (0)