Skip to content

Commit a3c0e7b

Browse files
authored
PERF/CLN: avoid potential copies in ravel (#45002)
1 parent c0e8044 commit a3c0e7b

File tree

5 files changed

+29
-19
lines changed

5 files changed

+29
-19
lines changed

pandas/_libs/missing.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ def isposinf_scalar(val: object) -> bool: ...
1212
def isneginf_scalar(val: object) -> bool: ...
1313
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
1414
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
15+
def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
1516
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

pandas/core/dtypes/missing.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""
22
missing types & inference
33
"""
4+
from __future__ import annotations
5+
46
from decimal import Decimal
57
from functools import partial
68

@@ -17,6 +19,7 @@
1719
from pandas._typing import (
1820
ArrayLike,
1921
DtypeObj,
22+
npt,
2023
)
2124

2225
from pandas.core.dtypes.common import (
@@ -259,18 +262,22 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
259262
return result
260263

261264

262-
def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray:
265+
def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
263266
# Working around NumPy ticket 1542
264267
dtype = values.dtype
265-
shape = values.shape
266268

267269
if dtype.kind in ("S", "U"):
268270
result = np.zeros(values.shape, dtype=bool)
269271
else:
270-
result = np.empty(shape, dtype=bool)
271-
vec = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
272272

273-
result[...] = vec.reshape(shape)
273+
if values.ndim == 1:
274+
result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
275+
elif values.ndim == 2:
276+
result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na)
277+
else:
278+
# 0-D, reached via e.g. mask_missing
279+
result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
280+
result = result.reshape(values.shape)
274281

275282
return result
276283

pandas/core/internals/blocks.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1862,8 +1862,14 @@ def convert(
18621862
attempt to cast any object types to better types return a copy of
18631863
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
18641864
"""
1865+
values = self.values
1866+
if values.ndim == 2:
1867+
# maybe_split ensures we only get here with values.shape[0] == 1,
1868+
# avoid doing .ravel as that might make a copy
1869+
values = values[0]
1870+
18651871
res_values = soft_convert_objects(
1866-
self.values.ravel(),
1872+
values,
18671873
datetime=datetime,
18681874
numeric=numeric,
18691875
timedelta=timedelta,

pandas/core/nanops.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -757,13 +757,10 @@ def get_median(x):
757757
if mask is not None:
758758
values[mask] = np.nan
759759

760-
if axis is None:
761-
values = values.ravel("K")
762-
763760
notempty = values.size
764761

765762
# an array from a frame
766-
if values.ndim > 1:
763+
if values.ndim > 1 and axis is not None:
767764

768765
# there's a non-empty array to apply over otherwise numpy raises
769766
if notempty:

pandas/core/ops/missing.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _fill_zeros(result, x, y):
4747
if is_float_dtype(result.dtype):
4848
return result
4949

50-
is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
50+
is_variable_type = hasattr(y, "dtype")
5151
is_scalar_type = is_scalar(y)
5252

5353
if not is_variable_type and not is_scalar_type:
@@ -58,19 +58,18 @@ def _fill_zeros(result, x, y):
5858

5959
if is_integer_dtype(y.dtype):
6060

61-
if (y == 0).any():
61+
ymask = y == 0
62+
if ymask.any():
6263

63-
# GH#7325, mask and nans must be broadcastable (also: GH#9308)
64-
# Raveling and then reshaping makes np.putmask faster
65-
mask = ((y == 0) & ~np.isnan(result)).ravel()
64+
# GH#7325, mask and nans must be broadcastable
65+
mask = ymask & ~np.isnan(result)
6666

67-
shape = result.shape
68-
result = result.astype("float64", copy=False).ravel()
67+
# GH#9308 doing ravel on result and mask can improve putmask perf,
68+
# but can also make unwanted copies.
69+
result = result.astype("float64", copy=False)
6970

7071
np.putmask(result, mask, np.nan)
7172

72-
result = result.reshape(shape)
73-
7473
return result
7574

7675

0 commit comments

Comments
 (0)