From 1c5195c04bd20420bb8e6a50c879ed5f06b4eea2 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 15 Oct 2024 12:57:40 +0200
Subject: [PATCH 01/15] BUG: Fix copy semantics in ``__array__``

This fixes the semantics of ``__array__``.  While rejecting ``copy=False``
is pretty harmless, ``copy=True`` should never have been ignored and is
dangerous.
---
 pandas/core/arrays/arrow/array.py     | 10 +++++++++-
 pandas/core/arrays/categorical.py     | 14 +++++++++++---
 pandas/core/arrays/datetimelike.py    |  7 +++++++
 pandas/core/arrays/interval.py        |  5 +++++
 pandas/core/arrays/masked.py          |  9 ++++++++-
 pandas/core/arrays/numpy_.py          |  3 +++
 pandas/core/arrays/period.py          |  5 +++++
 pandas/core/arrays/sparse/array.py    |  5 +++++
 pandas/core/generic.py                | 13 +++++++++++--
 pandas/core/indexes/base.py           |  6 +++++-
 pandas/core/indexes/multi.py          |  3 +++
 pandas/core/internals/construction.py |  2 +-
 pandas/core/series.py                 | 13 ++++++++++---
 pandas/tests/extension/json/array.py  | 10 +++++++++-
 14 files changed, 92 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 619e7b3ccfb4f..940d08216a963 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -667,7 +667,15 @@ def __array__(
         self, dtype: NpDtype | None = None, copy: bool | None = None
     ) -> np.ndarray:
         """Correctly construct numpy arrays when passed to `np.asarray()`."""
-        return self.to_numpy(dtype=dtype)
+        if copy is False:
+            # TODO: By using `zero_copy_only` it may be possible to implement this
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+        elif copy is None:
+            copy = False  # The NumPy copy=False meaning is different here.
+
+        return self.to_numpy(dtype=dtype, copy=copy)
 
     def __invert__(self) -> Self:
         # This is a bit wise op for integer types
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 7cde4c53cb2f5..21b32b1c6fa89 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1663,7 +1663,7 @@ def __array__(
             Specifies the the dtype for the array.
 
         copy : bool or None, optional
-            Unused.
+            See :func:`numpy.asarray`.
 
         Returns
         -------
@@ -1686,13 +1686,21 @@ def __array__(
         >>> np.asarray(cat)
         array(['a', 'b'], dtype=object)
         """
+        if copy is False:
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+
+        # TODO: using asarray_func because NumPy 1.x doesn't support copy=None
+        asarray_func = np.asarray if copy is None else np.array
+
         ret = take_nd(self.categories._values, self._codes)
         if dtype and np.dtype(dtype) != self.categories.dtype:
-            return np.asarray(ret, dtype)
+            return asarray_func(ret, dtype)
         # When we're a Categorical[ExtensionArray], like Interval,
         # we need to ensure __array__ gets all the way to an
         # ndarray.
-        return np.asarray(ret)
+        return asarray_func(ret)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         # for binary ops, use our custom dunder methods
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index a25a698856747..9c821bf0d184e 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -359,7 +359,14 @@ def __array__(
     ) -> np.ndarray:
         # used for Timedelta/DatetimeArray, overwritten by PeriodArray
         if is_object_dtype(dtype):
+            if copy is False:
+                raise ValueError(
+                    "Unable to avoid copy while creating an array as requested."
+                )
             return np.array(list(self), dtype=object)
+
+        if copy is True:
+            return np.array(self._ndarray, dtype=dtype)
         return self._ndarray
 
     @overload
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 2ac9c77bef322..f6b815a874c10 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -1606,6 +1606,11 @@ def __array__(
         Return the IntervalArray's data as a numpy array of Interval
         objects (with dtype='object')
         """
+        if copy is False:
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+
         left = self._left
         right = self._right
         mask = self.isna()
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 92ed690e527c7..1be7787d67c93 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -581,7 +581,14 @@ def __array__(
         the array interface, return my values
         We return an object array here to preserve our scalar values
         """
-        return self.to_numpy(dtype=dtype)
+        if copy is False:
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+
+        if copy is None:
+            copy = False  # The NumPy copy=False meaning is different here.
+        return self.to_numpy(dtype=dtype, copy=copy)
 
     _HANDLED_TYPES: tuple[type, ...]
 
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index aafcd82114b97..e8b33937ca866 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
     def __array__(
         self, dtype: NpDtype | None = None, copy: bool | None = None
     ) -> np.ndarray:
+        if copy is not None:
+            # Note: branch avoids `copy=None` for NumPy 1.x support
+            return np.asarray(self._ndarray, dtype=dtype, copy=copy)
         return np.asarray(self._ndarray, dtype=dtype)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index 7d0ad74f851f0..b2a56b5ca2df3 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -389,6 +389,11 @@ def freqstr(self) -> str:
     def __array__(
         self, dtype: NpDtype | None = None, copy: bool | None = None
     ) -> np.ndarray:
+        if copy is False:
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+
         if dtype == "i8":
             return self.asi8
         elif dtype == bool:
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 0c76280e7fdb4..c1b3605c4e5b5 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -547,6 +547,11 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
     def __array__(
         self, dtype: NpDtype | None = None, copy: bool | None = None
     ) -> np.ndarray:
+        if copy is False:
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+
         fill_value = self.fill_value
 
         if self.sp_index.ngaps == 0:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 42516f0a85e07..02fbb08a14804 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2015,8 +2015,17 @@ def __array__(
         self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
     ) -> np.ndarray:
         values = self._values
-        arr = np.asarray(values, dtype=dtype)
-        if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
+        if copy is None:
+            # Note: branch avoids `copy=None` for NumPy 1.x support
+            arr = np.asarray(values, dtype=dtype)
+        else:
+            arr = np.asarray(values, dtype=dtype, copy=copy)
+
+        if (
+            copy is not False
+            and astype_is_view(values.dtype, arr.dtype)
+            and self._mgr.is_single_block
+        ):
             # Check if both conversions can be done without a copy
             if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
                 values.dtype, arr.dtype
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 749a5fea4d513..5fdf073b9e8c9 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -908,7 +908,11 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
         """
         The array interface, return my values.
         """
-        return np.asarray(self._data, dtype=dtype)
+        if copy is None:
+            # Note, that the if branch exists for NumPy 1.x support
+            return np.asarray(self._data, dtype=dtype)
+
+        return np.asarray(self._data, dtype=dtype, copy=copy)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
         if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ae9b272af9fe9..55f96ce777d09 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1391,6 +1391,9 @@ def copy(  # type: ignore[override]
 
     def __array__(self, dtype=None, copy=None) -> np.ndarray:
         """the array interface, return my values"""
+        if copy is True:
+            # Note: branch avoids `copy=None` for NumPy 1.x support
+            return np.asarray(self.values, dtype=dtype, copy=copy)
         return self.values
 
     def view(self, cls=None) -> Self:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 959e572b2b35b..0812ba5e6def4 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -258,7 +258,7 @@ def ndarray_to_mgr(
             # and a subsequent `astype` will not already result in a copy
             values = np.array(values, copy=True, order="F")
         else:
-            values = np.array(values, copy=False)
+            values = np.asarray(values)
         values = _ensure_2d(values)
 
     else:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index fe2bb0b5aa5c3..2270e0966a505 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -842,7 +842,7 @@ def __array__(
             the dtype is inferred from the data.
 
         copy : bool or None, optional
-            Unused.
+            See :func:`numpy.asarray`.
 
         Returns
         -------
@@ -879,8 +879,15 @@ def __array__(
               dtype='datetime64[ns]')
         """
         values = self._values
-        arr = np.asarray(values, dtype=dtype)
-        if astype_is_view(values.dtype, arr.dtype):
+        if copy is None:
+            # Note: branch avoids `copy=None` for NumPy 1.x support
+            arr = np.asarray(values, dtype=dtype)
+        else:
+            arr = np.asarray(values, dtype=dtype, copy=copy)
+
+        if copy is True:
+            return arr
+        if copy is False or astype_is_view(values.dtype, arr.dtype):
             arr = arr.view()
             arr.flags.writeable = False
         return arr
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 4fa48023fbc95..a68c8a06e1d18 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -148,12 +148,20 @@ def __ne__(self, other):
         return NotImplemented
 
     def __array__(self, dtype=None, copy=None):
+        if copy is False:
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
+
         if dtype is None:
             dtype = object
         if dtype == object:
             # on py38 builds it looks like numpy is inferring to a non-1D array
             return construct_1d_object_array_from_listlike(list(self))
-        return np.asarray(self.data, dtype=dtype)
+        if copy is None:
+            # Note: branch avoids `copy=None` for NumPy 1.x support
+            return np.asarray(self.data, dtype=dtype)
+        return np.asarray(self.data, dtype=dtype, copy=copy)
 
     @property
     def nbytes(self) -> int:

From 2183861e1addba9c883ed5e206db0450aa161d3b Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 15 Oct 2024 13:34:39 +0200
Subject: [PATCH 02/15] BUG: Fix one more path not translating ``copy=``
 correctly

---
 pandas/core/arrays/categorical.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 21b32b1c6fa89..c29466b13f6ff 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -579,10 +579,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             raise ValueError("Cannot convert float NaN to integer")
 
         elif len(self.codes) == 0 or len(self.categories) == 0:
-            result = np.array(
+            # For NumPy 1.x compatibility we cannot use copy=None.  And
+            # `copy=False` has the meaning of `copy=None` here:
+            asarray_func = np.array if copy else np.asarray
+            result = asarray_func(
                 self,
                 dtype=dtype,
-                copy=copy,
             )
 
         else:

From 404827b81381439626180fd9a690776d5eacc337 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 15 Oct 2024 14:16:38 +0200
Subject: [PATCH 03/15] BUG: Avoid asarray with copy= (it was added in 2.0)

---
 pandas/core/arrays/numpy_.py | 2 +-
 pandas/core/generic.py       | 2 +-
 pandas/core/indexes/base.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index e8b33937ca866..9f7238a97d808 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -152,7 +152,7 @@ def __array__(
     ) -> np.ndarray:
         if copy is not None:
             # Note: branch avoids `copy=None` for NumPy 1.x support
-            return np.asarray(self._ndarray, dtype=dtype, copy=copy)
+            return np.array(self._ndarray, dtype=dtype, copy=copy)
         return np.asarray(self._ndarray, dtype=dtype)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 02fbb08a14804..0a43556496973 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2019,7 +2019,7 @@ def __array__(
             # Note: branch avoids `copy=None` for NumPy 1.x support
             arr = np.asarray(values, dtype=dtype)
         else:
-            arr = np.asarray(values, dtype=dtype, copy=copy)
+            arr = np.array(values, dtype=dtype, copy=copy)
 
         if (
             copy is not False
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 5fdf073b9e8c9..cf3d1e6a2ee2d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -912,7 +912,7 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
             # Note, that the if branch exists for NumPy 1.x support
             return np.asarray(self._data, dtype=dtype)
 
-        return np.asarray(self._data, dtype=dtype, copy=copy)
+        return np.array(self._data, dtype=dtype, copy=copy)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
         if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):

From ec08728216a172f518e230ccc637e44cf0a65582 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 15 Oct 2024 14:44:13 +0200
Subject: [PATCH 04/15] More fixes found by typing checks (or working around
 them)

---
 pandas/core/arrays/categorical.py | 20 +++++++++-----------
 pandas/core/indexes/multi.py      |  2 +-
 pandas/core/series.py             |  2 +-
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c29466b13f6ff..ac64c57bece9f 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -581,11 +581,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
         elif len(self.codes) == 0 or len(self.categories) == 0:
             # For NumPy 1.x compatibility we cannot use copy=None.  And
             # `copy=False` has the meaning of `copy=None` here:
-            asarray_func = np.array if copy else np.asarray
-            result = asarray_func(
-                self,
-                dtype=dtype,
-            )
+            if not copy:
+                result = np.asarray(self, dtype=dtype)
+            else:
+                result = np.array(self, dtype=dtype)
 
         else:
             # GH8628 (PERF): astype category codes instead of astyping array
@@ -1693,16 +1692,15 @@ def __array__(
                 "Unable to avoid copy while creating an array as requested."
             )
 
-        # TODO: using asarray_func because NumPy 1.x doesn't support copy=None
-        asarray_func = np.asarray if copy is None else np.array
-
         ret = take_nd(self.categories._values, self._codes)
-        if dtype and np.dtype(dtype) != self.categories.dtype:
-            return asarray_func(ret, dtype)
         # When we're a Categorical[ExtensionArray], like Interval,
         # we need to ensure __array__ gets all the way to an
         # ndarray.
-        return asarray_func(ret)
+
+        if copy is None:
+            # Branch required since copy=None is not defined on 1.x
+            return np.asarray(ret, dtype=dtype)
+        return np.array(ret, dtype=dtype)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         # for binary ops, use our custom dunder methods
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 55f96ce777d09..f21ab28061e9d 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1393,7 +1393,7 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
         """the array interface, return my values"""
         if copy is True:
             # Note: branch avoids `copy=None` for NumPy 1.x support
-            return np.asarray(self.values, dtype=dtype, copy=copy)
+            return np.array(self.values, dtype=dtype, copy=copy)
         return self.values
 
     def view(self, cls=None) -> Self:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 2270e0966a505..97193c7b564e9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -883,7 +883,7 @@ def __array__(
             # Note: branch avoids `copy=None` for NumPy 1.x support
             arr = np.asarray(values, dtype=dtype)
         else:
-            arr = np.asarray(values, dtype=dtype, copy=copy)
+            arr = np.array(values, dtype=dtype, copy=copy)
 
         if copy is True:
             return arr

From 4ac6323be3f04c2613eb2113f28f437c62fff31d Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 30 Oct 2024 15:50:17 +0100
Subject: [PATCH 05/15] TST: Add test for __array__ copy behavior

---
 pandas/tests/base/test_conversion.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index d8af7abe83084..386660ddbb227 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -4,6 +4,7 @@
 from pandas._config import using_string_dtype
 
 from pandas.compat import HAS_PYARROW
+from pandas.compat.numpy import np_version_gt2
 
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
@@ -362,6 +363,27 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
     result = np.asarray(thing)
     tm.assert_numpy_array_equal(result, expected)
 
+    # Additionally, we check the `copy=` semantics for asarray
+    # (these are implemented by us via `__array__`).
+    result_cp1 = np.asarray(thing, copy=True)
+    result_cp2 = np.asarray(thing, copy=True)
+    # When called with `copy=True` NumPy/we should ensure a copy was made
+    assert not np.may_share_memory(result_cp1, result_cp2)
+
+    if not np_version_gt2:
+        # copy=False semantics are only supported in NumPy>=2.
+        return
+
+    try:
+        result_nocopy1 = np.asarray(thing, copy=False)
+    except ValueError:
+        # An error is always acceptable for `copy=False`
+        return
+
+    result_nocopy2 = np.asarray(thing, copy=False)
+    # If copy=False was given, these must share the same data
+    assert np.may_share_memory(result_nocopy1, result_nocopy2)
+
 
 @pytest.mark.xfail(
     using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False

From 9b6c209f91f6c51328cf4492038a7e1efd88ccf2 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 30 Oct 2024 16:32:31 +0100
Subject: [PATCH 06/15] TST: Fixup test to use array rather than asarray

asarray did not support `copy=` on older versions of NumPy
---
 pandas/tests/base/test_conversion.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index 386660ddbb227..90b33bfc408ba 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -363,10 +363,10 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
     result = np.asarray(thing)
     tm.assert_numpy_array_equal(result, expected)
 
-    # Additionally, we check the `copy=` semantics for asarray
+    # Additionally, we check the `copy=` semantics for array/asarray
     # (these are implemented by us via `__array__`).
-    result_cp1 = np.asarray(thing, copy=True)
-    result_cp2 = np.asarray(thing, copy=True)
+    result_cp1 = np.array(thing, copy=True)
+    result_cp2 = np.array(thing, copy=True)
     # When called with `copy=True` NumPy/we should ensure a copy was made
     assert not np.may_share_memory(result_cp1, result_cp2)
 
@@ -375,12 +375,12 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
         return
 
     try:
-        result_nocopy1 = np.asarray(thing, copy=False)
+        result_nocopy1 = np.array(thing, copy=False)
     except ValueError:
         # An error is always acceptable for `copy=False`
         return
 
-    result_nocopy2 = np.asarray(thing, copy=False)
+    result_nocopy2 = np.array(thing, copy=False)
     # If copy=False was given, these must share the same data
     assert np.may_share_memory(result_nocopy1, result_nocopy2)
 

From 77058df6b7a58fe61c05502047c4b04161ab22e3 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Mon, 4 Nov 2024 12:03:35 +0100
Subject: [PATCH 07/15] BUG: Fixup ``__array__`` copy paths based on review

A few of these were just wrong, a few others are enhancements to
allow the cases that clearly should work without copy to still pass.

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/arrays/arrow/array.py  |  3 ++-
 pandas/core/arrays/categorical.py  |  6 ++----
 pandas/core/arrays/period.py       | 12 +++++++++---
 pandas/core/arrays/sparse/array.py | 10 +++++++---
 pandas/core/generic.py             |  2 +-
 pandas/core/indexes/multi.py       |  8 +++++---
 6 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 940d08216a963..375d15a627e47 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -673,7 +673,8 @@ def __array__(
                 "Unable to avoid copy while creating an array as requested."
             )
         elif copy is None:
-            copy = False  # The NumPy copy=False meaning is different here.
+            # `to_numpy(copy=False)` has the meaning of NumPy `copy=None`.
+            copy = False
 
         return self.to_numpy(dtype=dtype, copy=copy)
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ac64c57bece9f..99e4cb0545e2d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1697,10 +1697,8 @@ def __array__(
         # we need to ensure __array__ gets all the way to an
         # ndarray.
 
-        if copy is None:
-            # Branch required since copy=None is not defined on 1.x
-            return np.asarray(ret, dtype=dtype)
-        return np.array(ret, dtype=dtype)
+        # `take_nd` should already make a copy, so don't force again.
+        return np.asarray(ret, dtype=dtype)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         # for binary ops, use our custom dunder methods
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index b2a56b5ca2df3..af880aa1ed727 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -389,14 +389,20 @@ def freqstr(self) -> str:
     def __array__(
         self, dtype: NpDtype | None = None, copy: bool | None = None
     ) -> np.ndarray:
+        if dtype == "i8":
+            # For NumPy 1.x compatibility we cannot use copy=None.  And
+            # `copy=False` has the meaning of `copy=None` here:
+            if not copy:
+                return np.asarray(self, dtype=dtype)
+            else:
+                return np.array(self, dtype=dtype)
+
         if copy is False:
             raise ValueError(
                 "Unable to avoid copy while creating an array as requested."
             )
 
-        if dtype == "i8":
-            return self.asi8
-        elif dtype == bool:
+        if dtype == bool:
             return ~self._isnan
 
         # This will raise TypeError for non-object dtypes
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index c1b3605c4e5b5..a3db7dc1f93e9 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -547,6 +547,13 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
     def __array__(
         self, dtype: NpDtype | None = None, copy: bool | None = None
     ) -> np.ndarray:
+        if self.sp_index.ngaps == 0:
+            # Compat for na dtype and int values.
+            if copy is True:
+                return np.array(self.sp_values)
+            else:
+                return self.sp_values
+
         if copy is False:
             raise ValueError(
                 "Unable to avoid copy while creating an array as requested."
@@ -554,9 +561,6 @@ def __array__(
 
         fill_value = self.fill_value
 
-        if self.sp_index.ngaps == 0:
-            # Compat for na dtype and int values.
-            return self.sp_values
         if dtype is None:
             # Can NumPy represent this type?
             # If not, `np.result_type` will raise. We catch that
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0a43556496973..eb45afa210028 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2022,7 +2022,7 @@ def __array__(
             arr = np.array(values, dtype=dtype, copy=copy)
 
         if (
-            copy is not False
+            copy is not True
             and astype_is_view(values.dtype, arr.dtype)
             and self._mgr.is_single_block
         ):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index f21ab28061e9d..82768ed0b3e99 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1391,9 +1391,11 @@ def copy(  # type: ignore[override]
 
     def __array__(self, dtype=None, copy=None) -> np.ndarray:
         """the array interface, return my values"""
-        if copy is True:
-            # Note: branch avoids `copy=None` for NumPy 1.x support
-            return np.array(self.values, dtype=dtype, copy=copy)
+        if copy is False:
+            # self.values is always a newly construct array, so raise.
+            raise ValueError(
+                "Unable to avoid copy while creating an array as requested."
+            )
         return self.values
 
     def view(self, cls=None) -> Self:

From 6799f55d2fa271de19198e6976ee84f9dbe977b4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 15:39:15 +0100
Subject: [PATCH 08/15] fix period case and add specific test

---
 pandas/core/arrays/period.py             | 4 ++--
 pandas/tests/arrays/test_datetimelike.py | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index af880aa1ed727..ae92e17332c76 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -393,9 +393,9 @@ def __array__(
             # For NumPy 1.x compatibility we cannot use copy=None.  And
             # `copy=False` has the meaning of `copy=None` here:
             if not copy:
-                return np.asarray(self, dtype=dtype)
+                return np.asarray(self.asi8, dtype=dtype)
             else:
-                return np.array(self, dtype=dtype)
+                return np.array(self.asi8, dtype=dtype)
 
         if copy is False:
             raise ValueError(
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 0c8eefab95464..d1ef29b0bf8a0 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -1152,9 +1152,17 @@ def test_array_interface(self, arr1d):
         result = np.asarray(arr, dtype=object)
         tm.assert_numpy_array_equal(result, expected)
 
+        # to int64 gives the underlying representation
         result = np.asarray(arr, dtype="int64")
         tm.assert_numpy_array_equal(result, arr.asi8)
 
+        result2 = np.asarray(arr, dtype="int64")
+        assert np.may_share_memory(result, result2)
+
+        result_copy1 = np.array(arr, dtype="int64", copy=True)
+        result_copy2 = np.array(arr, dtype="int64", copy=True)
+        assert not np.may_share_memory(result_copy1, result_copy2)
+
         # to other dtypes
         msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'"
         with pytest.raises(TypeError, match=msg):

From 4217bafff3ce6cc90990b8f2981217d5bd2685ec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 15:58:23 +0100
Subject: [PATCH 09/15] update test to be explicit about copy vs nocopy + allow
 copy=False for masked arrays in case of no NAs

---
 pandas/core/arrays/masked.py         |  4 ++--
 pandas/tests/base/test_conversion.py | 35 +++++++++++++++++-----------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 1be7787d67c93..e129a0ec2e5fb 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -507,7 +507,7 @@ def to_numpy(
         else:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)
-                data = self._data.astype(dtype, copy=copy)
+                data = np.array(self._data, dtype=dtype, copy=copy)
         return data
 
     @doc(ExtensionArray.tolist)
@@ -581,7 +581,7 @@ def __array__(
         the array interface, return my values
         We return an object array here to preserve our scalar values
         """
-        if copy is False:
+        if copy is False and self._hasna:
             raise ValueError(
                 "Unable to avoid copy while creating an array as requested."
             )
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index 90b33bfc408ba..888e8628f8664 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -298,24 +298,27 @@ def test_array_multiindex_raises():
 
 
 @pytest.mark.parametrize(
-    "arr, expected",
+    "arr, expected, zero_copy",
     [
-        (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
-        (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)),
+        (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64), True),
+        (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object), False),
         (
             pd.core.arrays.period_array(["2000", "2001"], freq="D"),
             np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]),
+            False,
         ),
-        (pd.array([0, np.nan], dtype="Int64"), np.array([0, np.nan])),
+        (pd.array([0, np.nan], dtype="Int64"), np.array([0, np.nan]), False),
         (
             IntervalArray.from_breaks([0, 1, 2]),
             np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object),
+            False,
         ),
-        (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
+        (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64), False),
         # tz-naive datetime
         (
             DatetimeArray._from_sequence(np.array(["2000", "2001"], dtype="M8[ns]")),
             np.array(["2000", "2001"], dtype="M8[ns]"),
+            True,
         ),
         # tz-aware stays tz`-aware
         (
@@ -330,6 +333,7 @@ def test_array_multiindex_raises():
                     Timestamp("2000-01-02", tz="US/Central"),
                 ]
             ),
+            False,
         ),
         # Timedelta
         (
@@ -338,6 +342,7 @@ def test_array_multiindex_raises():
                 dtype=np.dtype("m8[ns]"),
             ),
             np.array([0, 3600000000000], dtype="m8[ns]"),
+            True,
         ),
         # GH#26406 tz is preserved in Categorical[dt64tz]
         (
@@ -348,10 +353,11 @@ def test_array_multiindex_raises():
                     Timestamp("2016-01-02", tz="US/Pacific"),
                 ]
             ),
+            False,
         ),
     ],
 )
-def test_to_numpy(arr, expected, index_or_series_or_array, request):
+def test_to_numpy(arr, expected, zero_copy, index_or_series_or_array):
     box = index_or_series_or_array
 
     with tm.assert_produces_warning(None):
@@ -374,15 +380,16 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
         # copy=False semantics are only supported in NumPy>=2.
         return
 
-    try:
-        result_nocopy1 = np.array(thing, copy=False)
-    except ValueError:
-        # An error is always acceptable for `copy=False`
-        return
+    if not zero_copy:
+        with pytest.raises(ValueError, match="Unable to avoid copy while creating"):
+            # An error is always acceptable for `copy=False`
+            np.array(thing, copy=False)
 
-    result_nocopy2 = np.array(thing, copy=False)
-    # If copy=False was given, these must share the same data
-    assert np.may_share_memory(result_nocopy1, result_nocopy2)
+    else:
+        result_nocopy1 = np.array(thing, copy=False)
+        result_nocopy2 = np.array(thing, copy=False)
+        # If copy=False was given, these must share the same data
+        assert np.may_share_memory(result_nocopy1, result_nocopy2)
 
 
 @pytest.mark.xfail(

From 5e4cb87d7dcd68ef44135601773a8e2d4a3f0219 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 16:02:10 +0100
Subject: [PATCH 10/15] add similar test to base extension tests

---
 pandas/tests/extension/base/interface.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
index 6683c87e2b8fc..79eb64b5a654f 100644
--- a/pandas/tests/extension/base/interface.py
+++ b/pandas/tests/extension/base/interface.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_version_gt2
+
 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
 from pandas.core.dtypes.common import is_extension_array_dtype
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -71,6 +73,25 @@ def test_array_interface(self, data):
             expected = construct_1d_object_array_from_listlike(list(data))
         tm.assert_numpy_array_equal(result, expected)
 
+    def test_array_interface_copy(self, data):
+        result_copy1 = np.array(data, copy=True)
+        result_copy2 = np.array(data, copy=True)
+        assert not np.may_share_memory(result_copy1, result_copy2)
+
+        if not np_version_gt2:
+            # copy=False semantics are only supported in NumPy>=2.
+            return
+
+        try:
+            result_nocopy1 = np.array(data, copy=False)
+        except ValueError:
+            # An error is always acceptable for `copy=False`
+            return
+
+        result_nocopy2 = np.array(data, copy=False)
+        # If copy=False was given and did not raise, these must share the same data
+        assert np.may_share_memory(result_nocopy1, result_nocopy2)
+
     def test_is_extension_array_dtype(self, data):
         assert is_extension_array_dtype(data)
         assert is_extension_array_dtype(data.dtype)

From 357f8a008ce3286bd675dbf43c9c491c652413d8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 16:09:50 +0100
Subject: [PATCH 11/15] add specific test for sparse corner case

---
 pandas/tests/arrays/sparse/test_array.py | 31 ++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
index c35e8204f3437..1b685100e4931 100644
--- a/pandas/tests/arrays/sparse/test_array.py
+++ b/pandas/tests/arrays/sparse/test_array.py
@@ -4,6 +4,7 @@
 import pytest
 
 from pandas._libs.sparse import IntIndex
+from pandas.compat.numpy import np_version_gt2
 
 import pandas as pd
 from pandas import (
@@ -480,3 +481,33 @@ def test_zero_sparse_column():
 
     expected = pd.DataFrame({"A": SparseArray([0, 0]), "B": [1, 3]}, index=[0, 2])
     tm.assert_frame_equal(result, expected)
+
+
+def test_array_interface(arr_data, arr):
+    # https://github.com/pandas-dev/pandas/pull/60046
+    result = np.asarray(arr)
+    tm.assert_numpy_array_equal(result, arr_data)
+
+    # it always gives a copy by default
+    result_copy1 = np.asarray(arr)
+    result_copy2 = np.asarray(arr)
+    assert not np.may_share_memory(result_copy1, result_copy2)
+
+    # or with explicit copy=True
+    result_copy1 = np.array(arr, copy=True)
+    result_copy2 = np.array(arr, copy=True)
+    assert not np.may_share_memory(result_copy1, result_copy2)
+
+    if not np_version_gt2:
+        # copy=False semantics are only supported in NumPy>=2.
+        return
+
+    # for sparse arrays, copy=False is never allowed
+    with pytest.raises(ValueError, match="Unable to avoid copy while creating"):
+        np.array(arr, copy=False)
+
+    # except when there are actually no sparse filled values
+    arr2 = SparseArray(np.array([1, 2, 3]))
+    result_nocopy1 = np.array(arr2, copy=False)
+    result_nocopy2 = np.array(arr2, copy=False)
+    assert np.may_share_memory(result_nocopy1, result_nocopy2)

From 9927903a11a44f09ad5a8ca658fb8d15ac373f7e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 16:18:29 +0100
Subject: [PATCH 12/15] add specific test for MultiIndex

---
 pandas/tests/indexes/multi/test_conversion.py | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py
index f6b10c989326f..347d6b206e3b9 100644
--- a/pandas/tests/indexes/multi/test_conversion.py
+++ b/pandas/tests/indexes/multi/test_conversion.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_version_gt2
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -16,6 +18,40 @@ def test_to_numpy(idx):
     tm.assert_numpy_array_equal(result, exp)
 
 
+def test_array_interface(idx):
+    # https://github.com/pandas-dev/pandas/pull/60046
+    result = np.asarray(idx)
+    expected = np.empty((6,), dtype=object)
+    expected[:] = [
+        ("foo", "one"),
+        ("foo", "two"),
+        ("bar", "one"),
+        ("baz", "two"),
+        ("qux", "one"),
+        ("qux", "two"),
+    ]
+    tm.assert_numpy_array_equal(result, expected)
+
+    # it always gives a copy by default, but the values are cached, so results
+    # are still sharing memory
+    result_copy1 = np.asarray(idx)
+    result_copy2 = np.asarray(idx)
+    assert np.may_share_memory(result_copy1, result_copy2)
+
+    # with explicit copy=True, then it is an actual copy
+    result_copy1 = np.array(idx, copy=True)
+    result_copy2 = np.array(idx, copy=True)
+    assert not np.may_share_memory(result_copy1, result_copy2)
+
+    if not np_version_gt2:
+        # copy=False semantics are only supported in NumPy>=2.
+        return
+
+    # for MultiIndex, copy=False is never allowed
+    with pytest.raises(ValueError, match="Unable to avoid copy while creating"):
+        np.array(idx, copy=False)
+
+
 def test_to_frame():
     tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
 

From 3b000be05751f42dc33eda082c583b122dfc7685 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 17:58:32 +0100
Subject: [PATCH 13/15] fix MultiIndex copy=True case for recent numpy

---
 pandas/core/indexes/multi.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 82768ed0b3e99..e6ce00cb714a4 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1396,6 +1396,10 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
             raise ValueError(
                 "Unable to avoid copy while creating an array as requested."
             )
+        if copy is True:
+            # explicit np.array call to ensure a copy is made and unique objects
+            # are returned, because self.values is cached
+            return np.array(self.values, dtype=dtype)
         return self.values
 
     def view(self, cls=None) -> Self:

From 421f9046d8ff10e1c6e6cc52e0f23b83f6213107 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 18:03:24 +0100
Subject: [PATCH 14/15] fix copy=False case for masked array

---
 pandas/core/arrays/masked.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index e129a0ec2e5fb..349d2ec4d3cc9 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -507,7 +507,7 @@ def to_numpy(
         else:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)
-                data = np.array(self._data, dtype=dtype, copy=copy)
+                data = self._data.astype(dtype, copy=copy)
         return data
 
     @doc(ExtensionArray.tolist)
@@ -581,7 +581,10 @@ def __array__(
         the array interface, return my values
         We return an object array here to preserve our scalar values
         """
-        if copy is False and self._hasna:
+        if copy is False:
+            if not self._hasna:
+                # special case, here we can simply return the underlying data
+                return np.array(self._data, dtype=dtype, copy=copy)
             raise ValueError(
                 "Unable to avoid copy while creating an array as requested."
             )

From d70405e50ba7d8ed9dda4e6bf2a90e49590c7c33 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 20:40:23 +0100
Subject: [PATCH 15/15] add whatsnew note

---
 doc/source/whatsnew/v2.3.0.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 01c2ed3821d7a..7caf9a2cdbeb6 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -32,7 +32,9 @@ enhancement1
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
--
+- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
+  when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
+  updated to work correctly with NumPy >= 2 (:issue:`57739`)
 -
 
 .. ---------------------------------------------------------------------------