Skip to content

Commit e647fac

Browse files
[backport 2.3.x] BUG: Fix copy semantics in __array__ (#60046) (#60189)
(cherry picked from commit eacf032) Co-authored-by: Joris Van den Bossche <[email protected]> Co-authored-by: Sebastian Berg <[email protected]>
1 parent ce56f2e commit e647fac

File tree

20 files changed

+269
-34
lines changed

20 files changed

+269
-34
lines changed

doc/source/whatsnew/v2.3.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enhancement1
3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
3434

35+
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
36+
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
37+
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3538
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
3639
-
3740

pandas/core/arrays/arrow/array.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,16 @@ def __array__(
662662
self, dtype: NpDtype | None = None, copy: bool | None = None
663663
) -> np.ndarray:
664664
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
665-
return self.to_numpy(dtype=dtype)
665+
if copy is False:
666+
# TODO: By using `zero_copy_only` it may be possible to implement this
667+
raise ValueError(
668+
"Unable to avoid copy while creating an array as requested."
669+
)
670+
elif copy is None:
671+
# `to_numpy(copy=False)` has the meaning of NumPy `copy=None`.
672+
copy = False
673+
674+
return self.to_numpy(dtype=dtype, copy=copy)
666675

667676
def __invert__(self) -> Self:
668677
# This is a bit wise op for integer types

pandas/core/arrays/categorical.py

+25-8
Original file line numberDiff line numberDiff line change
@@ -577,11 +577,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
577577
raise ValueError("Cannot convert float NaN to integer")
578578

579579
elif len(self.codes) == 0 or len(self.categories) == 0:
580-
result = np.array(
581-
self,
582-
dtype=dtype,
583-
copy=copy,
584-
)
580+
# For NumPy 1.x compatibility we cannot use copy=None. And
581+
# `copy=False` has the meaning of `copy=None` here:
582+
if not copy:
583+
result = np.asarray(self, dtype=dtype)
584+
else:
585+
result = np.array(self, dtype=dtype)
585586

586587
else:
587588
# GH8628 (PERF): astype category codes instead of astyping array
@@ -1642,6 +1643,17 @@ def __array__(
16421643
"""
16431644
The numpy array interface.
16441645
1646+
Users should not call this directly. Rather, it is invoked by
1647+
:func:`numpy.array` and :func:`numpy.asarray`.
1648+
1649+
Parameters
1650+
----------
1651+
dtype : np.dtype or None
1652+
Specifies the the dtype for the array.
1653+
1654+
copy : bool or None, optional
1655+
See :func:`numpy.asarray`.
1656+
16451657
Returns
16461658
-------
16471659
numpy.array
@@ -1659,13 +1671,18 @@ def __array__(
16591671
>>> np.asarray(cat)
16601672
array(['a', 'b'], dtype=object)
16611673
"""
1674+
if copy is False:
1675+
raise ValueError(
1676+
"Unable to avoid copy while creating an array as requested."
1677+
)
1678+
16621679
ret = take_nd(self.categories._values, self._codes)
1663-
if dtype and np.dtype(dtype) != self.categories.dtype:
1664-
return np.asarray(ret, dtype)
16651680
# When we're a Categorical[ExtensionArray], like Interval,
16661681
# we need to ensure __array__ gets all the way to an
16671682
# ndarray.
1668-
return np.asarray(ret)
1683+
1684+
# `take_nd` should already make a copy, so don't force again.
1685+
return np.asarray(ret, dtype=dtype)
16691686

16701687
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
16711688
# for binary ops, use our custom dunder methods

pandas/core/arrays/datetimelike.py

+7
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,14 @@ def __array__(
358358
) -> np.ndarray:
359359
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
360360
if is_object_dtype(dtype):
361+
if copy is False:
362+
raise ValueError(
363+
"Unable to avoid copy while creating an array as requested."
364+
)
361365
return np.array(list(self), dtype=object)
366+
367+
if copy is True:
368+
return np.array(self._ndarray, dtype=dtype)
362369
return self._ndarray
363370

364371
@overload

pandas/core/arrays/interval.py

+5
Original file line numberDiff line numberDiff line change
@@ -1574,6 +1574,11 @@ def __array__(
15741574
Return the IntervalArray's data as a numpy array of Interval
15751575
objects (with dtype='object')
15761576
"""
1577+
if copy is False:
1578+
raise ValueError(
1579+
"Unable to avoid copy while creating an array as requested."
1580+
)
1581+
15771582
left = self._left
15781583
right = self._right
15791584
mask = self.isna()

pandas/core/arrays/masked.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,17 @@ def __array__(
600600
the array interface, return my values
601601
We return an object array here to preserve our scalar values
602602
"""
603-
return self.to_numpy(dtype=dtype)
603+
if copy is False:
604+
if not self._hasna:
605+
# special case, here we can simply return the underlying data
606+
return np.array(self._data, dtype=dtype, copy=copy)
607+
raise ValueError(
608+
"Unable to avoid copy while creating an array as requested."
609+
)
610+
611+
if copy is None:
612+
copy = False # The NumPy copy=False meaning is different here.
613+
return self.to_numpy(dtype=dtype, copy=copy)
604614

605615
_HANDLED_TYPES: tuple[type, ...]
606616

pandas/core/arrays/numpy_.py

+3
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
def __array__(
151151
self, dtype: NpDtype | None = None, copy: bool | None = None
152152
) -> np.ndarray:
153+
if copy is not None:
154+
# Note: branch avoids `copy=None` for NumPy 1.x support
155+
return np.array(self._ndarray, dtype=dtype, copy=copy)
153156
return np.asarray(self._ndarray, dtype=dtype)
154157

155158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,19 @@ def __array__(
407407
self, dtype: NpDtype | None = None, copy: bool | None = None
408408
) -> np.ndarray:
409409
if dtype == "i8":
410-
return self.asi8
411-
elif dtype == bool:
410+
# For NumPy 1.x compatibility we cannot use copy=None. And
411+
# `copy=False` has the meaning of `copy=None` here:
412+
if not copy:
413+
return np.asarray(self.asi8, dtype=dtype)
414+
else:
415+
return np.array(self.asi8, dtype=dtype)
416+
417+
if copy is False:
418+
raise ValueError(
419+
"Unable to avoid copy while creating an array as requested."
420+
)
421+
422+
if dtype == bool:
412423
return ~self._isnan
413424

414425
# This will raise TypeError for non-object dtypes

pandas/core/arrays/sparse/array.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -554,11 +554,20 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
554554
def __array__(
555555
self, dtype: NpDtype | None = None, copy: bool | None = None
556556
) -> np.ndarray:
557-
fill_value = self.fill_value
558-
559557
if self.sp_index.ngaps == 0:
560558
# Compat for na dtype and int values.
561-
return self.sp_values
559+
if copy is True:
560+
return np.array(self.sp_values)
561+
else:
562+
return self.sp_values
563+
564+
if copy is False:
565+
raise ValueError(
566+
"Unable to avoid copy while creating an array as requested."
567+
)
568+
569+
fill_value = self.fill_value
570+
562571
if dtype is None:
563572
# Can NumPy represent this type?
564573
# If not, `np.result_type` will raise. We catch that

pandas/core/generic.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -2150,9 +2150,15 @@ def __array__(
21502150
self, dtype: npt.DTypeLike | None = None, copy: bool_t | None = None
21512151
) -> np.ndarray:
21522152
values = self._values
2153-
arr = np.asarray(values, dtype=dtype)
2153+
if copy is None:
2154+
# Note: branch avoids `copy=None` for NumPy 1.x support
2155+
arr = np.asarray(values, dtype=dtype)
2156+
else:
2157+
arr = np.array(values, dtype=dtype, copy=copy)
2158+
21542159
if (
2155-
astype_is_view(values.dtype, arr.dtype)
2160+
copy is not True
2161+
and astype_is_view(values.dtype, arr.dtype)
21562162
and using_copy_on_write()
21572163
and self._mgr.is_single_block
21582164
):

pandas/core/indexes/base.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,11 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
917917
"""
918918
The array interface, return my values.
919919
"""
920-
return np.asarray(self._data, dtype=dtype)
920+
if copy is None:
921+
# Note, that the if branch exists for NumPy 1.x support
922+
return np.asarray(self._data, dtype=dtype)
923+
924+
return np.array(self._data, dtype=dtype, copy=copy)
921925

922926
def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
923927
if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):

pandas/core/indexes/multi.py

+9
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,15 @@ def copy( # type: ignore[override]
13111311

13121312
def __array__(self, dtype=None, copy=None) -> np.ndarray:
13131313
"""the array interface, return my values"""
1314+
if copy is False:
1315+
# self.values is always a newly construct array, so raise.
1316+
raise ValueError(
1317+
"Unable to avoid copy while creating an array as requested."
1318+
)
1319+
if copy is True:
1320+
# explicit np.array call to ensure a copy is made and unique objects
1321+
# are returned, because self.values is cached
1322+
return np.array(self.values, dtype=dtype)
13141323
return self.values
13151324

13161325
def view(self, cls=None) -> Self:

pandas/core/internals/construction.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,12 @@ def ndarray_to_mgr(
305305

306306
elif isinstance(values, (np.ndarray, ExtensionArray)):
307307
# drop subclass info
308-
_copy = (
309-
copy_on_sanitize
310-
if (dtype is None or astype_is_view(values.dtype, dtype))
311-
else False
312-
)
313-
values = np.array(values, copy=_copy)
308+
if copy_on_sanitize and (dtype is None or astype_is_view(values.dtype, dtype)):
309+
# only force a copy now if copy=True was requested
310+
# and a subsequent `astype` will not already result in a copy
311+
values = np.array(values, copy=True, order="F")
312+
else:
313+
values = np.asarray(values)
314314
values = _ensure_2d(values)
315315

316316
else:

pandas/core/series.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def __array__(
991991
the dtype is inferred from the data.
992992
993993
copy : bool or None, optional
994-
Unused.
994+
See :func:`numpy.asarray`.
995995
996996
Returns
997997
-------
@@ -1028,8 +1028,17 @@ def __array__(
10281028
dtype='datetime64[ns]')
10291029
"""
10301030
values = self._values
1031-
arr = np.asarray(values, dtype=dtype)
1032-
if using_copy_on_write() and astype_is_view(values.dtype, arr.dtype):
1031+
if copy is None:
1032+
# Note: branch avoids `copy=None` for NumPy 1.x support
1033+
arr = np.asarray(values, dtype=dtype)
1034+
else:
1035+
arr = np.array(values, dtype=dtype, copy=copy)
1036+
1037+
if copy is True:
1038+
return arr
1039+
if using_copy_on_write() and (
1040+
copy is False or astype_is_view(values.dtype, arr.dtype)
1041+
):
10331042
arr = arr.view()
10341043
arr.flags.writeable = False
10351044
return arr

pandas/tests/arrays/sparse/test_array.py

+31
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from pandas._libs.sparse import IntIndex
7+
from pandas.compat.numpy import np_version_gt2
78

89
import pandas as pd
910
from pandas import (
@@ -478,3 +479,33 @@ def test_zero_sparse_column():
478479

479480
expected = pd.DataFrame({"A": SparseArray([0, 0]), "B": [1, 3]}, index=[0, 2])
480481
tm.assert_frame_equal(result, expected)
482+
483+
484+
def test_array_interface(arr_data, arr):
485+
# https://github.com/pandas-dev/pandas/pull/60046
486+
result = np.asarray(arr)
487+
tm.assert_numpy_array_equal(result, arr_data)
488+
489+
# it always gives a copy by default
490+
result_copy1 = np.asarray(arr)
491+
result_copy2 = np.asarray(arr)
492+
assert not np.may_share_memory(result_copy1, result_copy2)
493+
494+
# or with explicit copy=True
495+
result_copy1 = np.array(arr, copy=True)
496+
result_copy2 = np.array(arr, copy=True)
497+
assert not np.may_share_memory(result_copy1, result_copy2)
498+
499+
if not np_version_gt2:
500+
# copy=False semantics are only supported in NumPy>=2.
501+
return
502+
503+
# for sparse arrays, copy=False is never allowed
504+
with pytest.raises(ValueError, match="Unable to avoid copy while creating"):
505+
np.array(arr, copy=False)
506+
507+
# except when there are actually no sparse filled values
508+
arr2 = SparseArray(np.array([1, 2, 3]))
509+
result_nocopy1 = np.array(arr2, copy=False)
510+
result_nocopy2 = np.array(arr2, copy=False)
511+
assert np.may_share_memory(result_nocopy1, result_nocopy2)

pandas/tests/arrays/test_datetimelike.py

+8
Original file line numberDiff line numberDiff line change
@@ -1148,9 +1148,17 @@ def test_array_interface(self, arr1d):
11481148
result = np.asarray(arr, dtype=object)
11491149
tm.assert_numpy_array_equal(result, expected)
11501150

1151+
# to int64 gives the underlying representation
11511152
result = np.asarray(arr, dtype="int64")
11521153
tm.assert_numpy_array_equal(result, arr.asi8)
11531154

1155+
result2 = np.asarray(arr, dtype="int64")
1156+
assert np.may_share_memory(result, result2)
1157+
1158+
result_copy1 = np.array(arr, dtype="int64", copy=True)
1159+
result_copy2 = np.array(arr, dtype="int64", copy=True)
1160+
assert not np.may_share_memory(result_copy1, result_copy2)
1161+
11541162
# to other dtypes
11551163
msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'"
11561164
with pytest.raises(TypeError, match=msg):

0 commit comments

Comments
 (0)