pandas-dev · lukemanley · Dec 16, 2022 · Nov 30, 2022 · Nov 30, 2022 · Dec 1, 2022
diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py
@@ -92,3 +92,41 @@ def time_setitem_slice(self, multiple_chunks):
 
     def time_tolist(self, multiple_chunks):
         self.array.tolist()
+
+
+class ArrowExtensionArray:
+
+    params = [
+        [
+            "boolean[pyarrow]",
+            "float64[pyarrow]",
+            "int64[pyarrow]",
+            "string[pyarrow]",
+            "timestamp[ns][pyarrow]",
+        ],
+        [False, True],
+    ]
+    param_names = ["dtype", "hasna"]
+
+    def setup(self, dtype, hasna):
+        N = 100_000
+        if dtype == "boolean[pyarrow]":
+            data = np.random.choice([True, False], N, replace=True)
+        elif dtype == "float64[pyarrow]":
+            data = np.random.randn(N)
+        elif dtype == "int64[pyarrow]":
+            data = np.arange(N)
+        elif dtype == "string[pyarrow]":
+            data = tm.rands_array(10, N)
+        elif dtype == "timestamp[ns][pyarrow]":
+            data = pd.date_range("2000-01-01", freq="s", periods=N)
+        else:
+            raise NotImplementedError
+
+        arr = pd.array(data, dtype=dtype)
+        if hasna:
+            arr[::2] = pd.NA
+        self.arr = arr
+
+    def time_to_numpy(self, dtype, hasna):
+        self.arr.to_numpy()
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -748,6 +748,7 @@ Performance improvements
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
+- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
 - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -9,11 +9,13 @@
 
 import numpy as np
 
+from pandas._libs import lib
 from pandas._typing import (
     ArrayLike,
     Dtype,
     FillnaOptions,
     Iterator,
+    NpDtype,
     PositionalIndexer,
     SortKind,
     TakeIndexer,
@@ -31,6 +33,7 @@
     is_bool_dtype,
     is_integer,
     is_integer_dtype,
+    is_object_dtype,
     is_scalar,
 )
 from pandas.core.dtypes.missing import isna
@@ -351,6 +354,10 @@ def __arrow_array__(self, type=None):
         """Convert myself to a pyarrow ChunkedArray."""
         return self._data
 
+    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+        """Correctly construct numpy arrays when passed to `np.asarray()`."""
+        return self.to_numpy(dtype=dtype)
+
     def __invert__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
         return type(self)(pc.invert(self._data))
 
@@ -749,6 +756,33 @@ def take(
                 indices_array[indices_array < 0] += len(self._data)
             return type(self)(self._data.take(indices_array))
 
+    @doc(ExtensionArray.to_numpy)
+    def to_numpy(
+        self,
+        dtype: npt.DTypeLike | None = None,
+        copy: bool = False,
+        na_value: object = lib.no_default,
+    ) -> np.ndarray:
+        if dtype is None and self._hasna:
+            dtype = object
+        if na_value is lib.no_default:
+            na_value = self.dtype.na_value
+
+        pa_type = self._data.type
+        if (
+            is_object_dtype(dtype)
+            or pa.types.is_timestamp(pa_type)
+            or pa.types.is_duration(pa_type)
+        ):
+            result = np.array(list(self), dtype=dtype)
+        else:
+            result = np.asarray(self._data, dtype=dtype)
+            if copy or self._hasna:
+                result = result.copy()
+        if self._hasna:
+            result[self.isna()] = na_value
+        return result
+
     def unique(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
         """
         Compute the ArrowExtensionArray of unique values.

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -12,7 +12,6 @@
 )
 from pandas._typing import (
     Dtype,
-    NpDtype,
     Scalar,
     npt,
 )
@@ -151,31 +150,6 @@ def dtype(self) -> StringDtype:  # type: ignore[override]
         """
         return self._dtype
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
-        """Correctly construct numpy arrays when passed to `np.asarray()`."""
-        return self.to_numpy(dtype=dtype)
-
-    def to_numpy(
-        self,
-        dtype: npt.DTypeLike | None = None,
-        copy: bool = False,
-        na_value=lib.no_default,
-    ) -> np.ndarray:
-        """
-        Convert to a NumPy ndarray.
-        """
-        # TODO: copy argument is ignored
-
-        result = np.array(self._data, dtype=dtype)
-        if self._data.null_count > 0:
-            if na_value is lib.no_default:
-                if dtype and np.issubdtype(dtype, np.floating):
-                    return result
-                na_value = self._dtype.na_value
-            mask = self.isna()
-            result[mask] = na_value
-        return result
-
     def insert(self, loc: int, item) -> ArrowStringArray:
         if not isinstance(item, str) and item is not libmissing.NA:
             raise TypeError("Scalar must be NA or str")
@@ -219,10 +193,11 @@ def astype(self, dtype, copy: bool = True):
             if copy:
                 return self.copy()
             return self
-
         elif isinstance(dtype, NumericDtype):
             data = self._data.cast(pa.from_numpy_dtype(dtype.numpy_dtype))
             return dtype.__from_arrow__(data)
+        elif isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.floating):
+            return self.to_numpy(dtype=dtype, na_value=np.nan)
 
         return super().astype(dtype, copy=copy)
 

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -1421,3 +1421,20 @@ def test_astype_from_non_pyarrow(data):
     assert not isinstance(pd_array.dtype, ArrowDtype)
     assert isinstance(result.dtype, ArrowDtype)
     tm.assert_extension_array_equal(result, data)
+
+
+def test_to_numpy_with_defaults(data):
+    # GH49973
+    result = data.to_numpy()
+
+    pa_type = data._data.type
+    if pa.types.is_duration(pa_type) or pa.types.is_timestamp(pa_type):
+        expected = np.array(list(data))
+    else:
+        expected = np.array(data._data)
+
+    if data._hasna:
+        expected = expected.astype(object)
+        expected[pd.isna(data)] = pd.NA
+
+    tm.assert_numpy_array_equal(result, expected)