Skip to content

Commit 3ff1303

Browse files
lukemanleytopper-123
authored andcommitted
PERF: Series.to_numpy with float dtype and na_value=np.nan (pandas-dev#52430)
* PERF: Series.to_numpy with dtype=float and na_value=np.nan * whatsnew * add space
1 parent f05b191 commit 3ff1303

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

asv_bench/benchmarks/series_methods.py

+3
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,9 @@ def time_to_numpy_double_copy(self):
385385
def time_to_numpy_copy(self):
386386
self.ser.to_numpy(copy=True)
387387

388+
def time_to_numpy_float_with_nan(self):
389+
self.ser.to_numpy(dtype="float64", na_value=np.nan)
390+
388391

389392
class Replace:
390393
param_names = ["num_to_replace"]

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ Performance improvements
195195
- Performance improvement in :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` when ``verify_integrity=True`` (:issue:`51873`)
196196
- Performance improvement in :func:`factorize` for object columns not containing strings (:issue:`51921`)
197197
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
198+
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
198199
-
199200

200201
.. ---------------------------------------------------------------------------

pandas/core/base.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -573,25 +573,26 @@ def to_numpy(
573573
f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
574574
)
575575

576-
if na_value is not lib.no_default:
577-
values = self._values
576+
fillna = (
577+
na_value is not lib.no_default
578+
# no need to fillna with np.nan if we already have a float dtype
579+
and not (na_value is np.nan and np.issubdtype(self.dtype, np.floating))
580+
)
581+
582+
values = self._values
583+
if fillna:
578584
if not can_hold_element(values, na_value):
579585
# if we can't hold the na_value asarray either makes a copy or we
580586
# error before modifying values. The asarray later on thus won't make
581587
# another copy
582588
values = np.asarray(values, dtype=dtype)
583589
else:
584590
values = values.copy()
585-
586591
values[np.asanyarray(self.isna())] = na_value
587-
else:
588-
values = self._values
589592

590593
result = np.asarray(values, dtype=dtype)
591594

592-
if (copy and na_value is lib.no_default) or (
593-
not copy and using_copy_on_write()
594-
):
595+
if (copy and not fillna) or (not copy and using_copy_on_write()):
595596
if np.shares_memory(self._values[:2], result[:2]):
596597
# Take slices to improve performance of check
597598
if using_copy_on_write() and not copy:

0 commit comments

Comments
 (0)