diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0ea18d69f0dbb..9fd9faf057a8a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1078,7 +1078,7 @@ Performance improvements - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`) - Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`) - Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`) -- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`, :issue:`51227`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`) - Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`) - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`) - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 95a63a1f860ae..075beca106e6a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -853,12 +853,12 @@ def to_numpy( na_value = self.dtype.na_value pa_type = self._data.type - if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type): + if ( + is_object_dtype(dtype) + or pa.types.is_timestamp(pa_type) + or pa.types.is_duration(pa_type) + ): result = np.array(list(self), dtype=dtype) - elif is_object_dtype(dtype) and self._hasna: - result = np.empty(len(self), dtype=object) - mask = ~self.isna() - result[mask] = np.asarray(self[mask]._data) else: result = np.asarray(self._data, dtype=dtype) if copy or self._hasna: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 6a0d3c7f6e381..1dac8faa3a9e2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1518,16 +1518,6 @@ def test_to_numpy_with_defaults(data): tm.assert_numpy_array_equal(result, expected) -def test_to_numpy_int_with_na(): - # GH51227: ensure to_numpy does not convert int to float - data = [1, None] - arr = pd.array(data, dtype="int64[pyarrow]") - result = arr.to_numpy() - expected = np.array([1, pd.NA], dtype=object) - assert isinstance(result[0], int) - tm.assert_numpy_array_equal(result, expected) - - def test_setitem_null_slice(data): # GH50248 orig = data.copy()