Skip to content

Commit 7ffc0ad

Browse files
authored
PERF: ArrowExtensionArray.to_numpy(dtype=object) (#51227)
* PERF: ArrowExtensionArray.to_numpy(dtype=object) * gh refs
1 parent 12faa2e commit 7ffc0ad

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1073,7 +1073,7 @@ Performance improvements
10731073
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
10741074
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`)
10751075
- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
1076-
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
1076+
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`, :issue:`51227`)
10771077
- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`)
10781078
- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
10791079
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)

pandas/core/arrays/arrow/array.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -842,12 +842,12 @@ def to_numpy(
842842
na_value = self.dtype.na_value
843843

844844
pa_type = self._data.type
845-
if (
846-
is_object_dtype(dtype)
847-
or pa.types.is_timestamp(pa_type)
848-
or pa.types.is_duration(pa_type)
849-
):
845+
if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type):
850846
result = np.array(list(self), dtype=dtype)
847+
elif is_object_dtype(dtype) and self._hasna:
848+
result = np.empty(len(self), dtype=object)
849+
mask = ~self.isna()
850+
result[mask] = np.asarray(self[mask]._data)
851851
else:
852852
result = np.asarray(self._data, dtype=dtype)
853853
if copy or self._hasna:

pandas/tests/extension/test_arrow.py

+10
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,16 @@ def test_to_numpy_with_defaults(data):
15251525
tm.assert_numpy_array_equal(result, expected)
15261526

15271527

1528+
def test_to_numpy_int_with_na():
1529+
# GH51227: ensure to_numpy does not convert int to float
1530+
data = [1, None]
1531+
arr = pd.array(data, dtype="int64[pyarrow]")
1532+
result = arr.to_numpy()
1533+
expected = np.array([1, pd.NA], dtype=object)
1534+
assert isinstance(result[0], int)
1535+
tm.assert_numpy_array_equal(result, expected)
1536+
1537+
15281538
def test_setitem_null_slice(data):
15291539
# GH50248
15301540
orig = data.copy()

0 commit comments

Comments
 (0)