From f021bb4c017a87c0845d735481eccba8aa884fc9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 6 Apr 2023 14:43:13 +0200 Subject: [PATCH 1/3] BUG: segfault for null dtype in to_numpy (#52466) (cherry picked from commit 7974ad0ba191e0a6b6bda5e0beb9669d0391b5af) --- doc/source/whatsnew/v2.0.1.rst | 2 ++ pandas/core/arrays/arrow/array.py | 5 +++++ pandas/tests/extension/test_arrow.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 4fda2cd11ce12..9071d242e25b5 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -21,6 +21,8 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) +- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) +- Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) .. --------------------------------------------------------------------------- .. _whatsnew_201.other: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index a0245be8e14c1..508d3e087413f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1024,6 +1024,11 @@ def to_numpy( result = np.empty(len(self), dtype=object) mask = ~self.isna() result[mask] = np.asarray(self[mask]._data) + elif pa.types.is_null(self._pa_array.type): + result = np.asarray(self._pa_array, dtype=dtype) + if not isna(na_value): + result[:] = na_value + return result elif self._hasna: data = self.copy() data[self.isna()] = na_value diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 28919f496384c..acb63c8cbbb50 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -27,6 +27,7 @@ import numpy as np import pytest +from pandas._libs import lib from pandas.compat import ( PY311, is_ci_environment, @@ -1675,6 +1676,23 @@ def test_to_numpy_int_with_na(): tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize("na_val, exp", [(lib.no_default, np.nan), (1, 1)]) +def test_to_numpy_null_array(na_val, exp): + # GH#52443 + arr = pd.array([pd.NA, pd.NA], dtype="null[pyarrow]") + result = arr.to_numpy(dtype="float64", na_value=na_val) + expected = np.array([exp] * 2, dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_null_array_no_dtype(): + # GH#52443 + arr = pd.array([pd.NA, pd.NA], dtype="null[pyarrow]") + result = arr.to_numpy(dtype=None) + expected = np.array([pd.NA] * 2, dtype="object") + tm.assert_numpy_array_equal(result, expected) + + def test_setitem_null_slice(data): # GH50248 orig = data.copy() From 0933a856d18a21829aa67516dc41aa6863cbd276 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 6 Apr 2023 14:19:40 -0700 Subject: [PATCH 2/3] Update pandas/core/arrays/arrow/array.py --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 508d3e087413f..3cf33ccba2b1b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1024,7 +1024,7 @@ def to_numpy( result = np.empty(len(self), dtype=object) mask = ~self.isna() result[mask] = np.asarray(self[mask]._data) - elif pa.types.is_null(self._pa_array.type): + elif pa.types.is_null(self._data.type): result = np.asarray(self._pa_array, dtype=dtype) if not isna(na_value): result[:] = na_value From 3f05be0ef8953185e1b9df9cd9d56e3a6ac36540 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 6 Apr 2023 14:19:45 -0700 Subject: [PATCH 3/3] Update pandas/core/arrays/arrow/array.py --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 3cf33ccba2b1b..891b25f47000d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1025,7 +1025,7 @@ def to_numpy( mask = ~self.isna() result[mask] = np.asarray(self[mask]._data) elif pa.types.is_null(self._data.type): - result = np.asarray(self._pa_array, dtype=dtype) + result = np.asarray(self._data, dtype=dtype) if not isna(na_value): result[:] = na_value return result