Skip to content

Commit a108ffa

Browse files
authored
PERF: ArrowExtensionArray.to_numpy(dtype=object) (#51347)
* PERF: ArrowExtensionArray.to_numpy(dtype=object) * gh refs * fix * cleanup / add comment
1 parent dba9ddd commit a108ffa

File tree

3 files changed

+19
-6
lines changed

3 files changed

+19
-6
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1126,7 +1126,7 @@ Performance improvements
11261126
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
11271127
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`)
11281128
- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
1129-
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
1129+
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`, :issue:`51227`)
11301130
- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`)
11311131
- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
11321132
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)

pandas/core/arrays/arrow/array.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -880,12 +880,15 @@ def to_numpy(
880880
na_value = self.dtype.na_value
881881

882882
pa_type = self._data.type
883-
if (
884-
is_object_dtype(dtype)
885-
or pa.types.is_timestamp(pa_type)
886-
or pa.types.is_duration(pa_type)
887-
):
883+
if pa.types.is_temporal(pa_type) and not pa.types.is_date(pa_type):
884+
# temporal types with units and/or timezones currently
885+
# require pandas/python scalars to pass all tests
886+
# TODO: improve performance (this is slow)
888887
result = np.array(list(self), dtype=dtype)
888+
elif is_object_dtype(dtype) and self._hasna:
889+
result = np.empty(len(self), dtype=object)
890+
mask = ~self.isna()
891+
result[mask] = np.asarray(self[mask]._data)
889892
else:
890893
result = np.asarray(self._data, dtype=dtype)
891894
if copy or self._hasna:

pandas/tests/extension/test_arrow.py

+10
Original file line numberDiff line numberDiff line change
@@ -1487,6 +1487,16 @@ def test_to_numpy_with_defaults(data):
14871487
tm.assert_numpy_array_equal(result, expected)
14881488

14891489

1490+
def test_to_numpy_int_with_na():
1491+
# GH51227: ensure to_numpy does not convert int to float
1492+
data = [1, None]
1493+
arr = pd.array(data, dtype="int64[pyarrow]")
1494+
result = arr.to_numpy()
1495+
expected = np.array([1, pd.NA], dtype=object)
1496+
assert isinstance(result[0], int)
1497+
tm.assert_numpy_array_equal(result, expected)
1498+
1499+
14901500
def test_setitem_null_slice(data):
14911501
# GH50248
14921502
orig = data.copy()

0 commit comments

Comments
 (0)