From 7f8bdbe7f9d2b1d130e52b00b17c8290fbabc8b1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 16 Feb 2023 20:53:34 +0100 Subject: [PATCH 1/4] PERF: Improve performance for pyarrow to_numpy with na and na_value --- pandas/core/arrays/arrow/array.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 5e67a2c92cccb..ebfbbfce4828d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -889,9 +889,14 @@ def to_numpy( result = np.empty(len(self), dtype=object) mask = ~self.isna() result[mask] = np.asarray(self[mask]._data) + elif self._hasna: + mask = self.isna() + result = np.ones((len(self),), dtype=dtype) * na_value + result[~mask] = np.asarray(self[~mask]._data, dtype=dtype) + return result else: result = np.asarray(self._data, dtype=dtype) - if copy or self._hasna: + if copy: result = result.copy() if self._hasna: result[self.isna()] = na_value From 8426f085bc38e7f718b7dcd6a908bbf757eeded3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 16 Feb 2023 20:58:53 +0100 Subject: [PATCH 2/4] PERF: Improve performance for pyarrow to_numpy with na and na_value --- pandas/core/arrays/arrow/array.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index ebfbbfce4828d..3d1b49c69bcd9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -891,9 +891,9 @@ def to_numpy( result[mask] = np.asarray(self[mask]._data) elif self._hasna: mask = self.isna() - result = np.ones((len(self),), dtype=dtype) * na_value - result[~mask] = np.asarray(self[~mask]._data, dtype=dtype) - return result + data = self.copy() + data[mask] = na_value + return np.asarray(data._data, dtype=dtype) else: result = np.asarray(self._data, dtype=dtype) if copy: From 405f74974cd2ce18c25589247461f06b23d62ed0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 16 Feb 2023 21:08:32 +0100 Subject: [PATCH 3/4] Refactor --- pandas/core/arrays/arrow/array.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 3d1b49c69bcd9..83b259e2e0c96 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -890,9 +890,8 @@ def to_numpy( mask = ~self.isna() result[mask] = np.asarray(self[mask]._data) elif self._hasna: - mask = self.isna() data = self.copy() - data[mask] = na_value + data[self.isna()] = na_value return np.asarray(data._data, dtype=dtype) else: result = np.asarray(self._data, dtype=dtype) From 0d2c66475ba3b7b754beebea1ecef1fdb915da7f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Feb 2023 01:06:33 +0100 Subject: [PATCH 4/4] Fix merge conflict --- pandas/core/arrays/arrow/array.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0ff34938ba7a4..83b259e2e0c96 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -11,7 +11,6 @@ TypeVar, cast, ) -import warnings import numpy as np @@ -895,10 +894,7 @@ def to_numpy( data[self.isna()] = na_value return np.asarray(data._data, dtype=dtype) else: - with warnings.catch_warnings(): - # int dtype with NA raises Warning - warnings.filterwarnings("ignore", category=RuntimeWarning) - result = np.asarray(self._data, dtype=dtype) + result = np.asarray(self._data, dtype=dtype) if copy: result = result.copy() if self._hasna: