Skip to content

Commit 96b6a6f

Browse files
topper-123Yi Wei
authored and
Yi Wei
committed
PERF: faster access to the dtype for masked numeric arrays (pandas-dev#52998)
1 parent 564c0fa commit 96b6a6f

File tree

7 files changed

+35
-34
lines changed

7 files changed

+35
-34
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ Performance improvements
280280
- Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`)
281281
- Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`51738`)
282282
- Performance improvement in :func:`concat` (:issue:`52291`, :issue:`52290`)
283+
- Performance improvement accessing :attr:`arrays.IntegerArrays.dtype` & :attr:`arrays.FloatingArray.dtype` (:issue:`52998`)
283284
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
284285
- Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`)
285286
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)

pandas/core/arrays/floating.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def construct_array_type(cls) -> type[FloatingArray]:
3636
return FloatingArray
3737

3838
@classmethod
39-
def _str_to_dtype_mapping(cls):
40-
return FLOAT_STR_TO_DTYPE
39+
def _get_dtype_mapping(cls) -> dict[np.dtype, FloatingDtype]:
40+
return NUMPY_FLOAT_TO_DTYPE
4141

4242
@classmethod
4343
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -153,7 +153,7 @@ class Float64Dtype(FloatingDtype):
153153
__doc__ = _dtype_docstring.format(dtype="float64")
154154

155155

156-
FLOAT_STR_TO_DTYPE = {
157-
"float32": Float32Dtype(),
158-
"float64": Float64Dtype(),
156+
NUMPY_FLOAT_TO_DTYPE: dict[np.dtype, FloatingDtype] = {
157+
np.dtype(np.float32): Float32Dtype(),
158+
np.dtype(np.float64): Float64Dtype(),
159159
}

pandas/core/arrays/integer.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def construct_array_type(cls) -> type[IntegerArray]:
3636
return IntegerArray
3737

3838
@classmethod
39-
def _str_to_dtype_mapping(cls):
40-
return INT_STR_TO_DTYPE
39+
def _get_dtype_mapping(cls) -> dict[np.dtype, IntegerDtype]:
40+
return NUMPY_INT_TO_DTYPE
4141

4242
@classmethod
4343
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -208,13 +208,13 @@ class UInt64Dtype(IntegerDtype):
208208
__doc__ = _dtype_docstring.format(dtype="uint64")
209209

210210

211-
INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
212-
"int8": Int8Dtype(),
213-
"int16": Int16Dtype(),
214-
"int32": Int32Dtype(),
215-
"int64": Int64Dtype(),
216-
"uint8": UInt8Dtype(),
217-
"uint16": UInt16Dtype(),
218-
"uint32": UInt32Dtype(),
219-
"uint64": UInt64Dtype(),
211+
NUMPY_INT_TO_DTYPE: dict[np.dtype, IntegerDtype] = {
212+
np.dtype(np.int8): Int8Dtype(),
213+
np.dtype(np.int16): Int16Dtype(),
214+
np.dtype(np.int32): Int32Dtype(),
215+
np.dtype(np.int64): Int64Dtype(),
216+
np.dtype(np.uint8): UInt8Dtype(),
217+
np.dtype(np.uint16): UInt16Dtype(),
218+
np.dtype(np.uint32): UInt32Dtype(),
219+
np.dtype(np.uint64): UInt64Dtype(),
220220
}

pandas/core/arrays/numeric.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def __from_arrow__(
100100
return array_class(data.copy(), ~mask, copy=False)
101101

102102
@classmethod
103-
def _str_to_dtype_mapping(cls) -> Mapping[str, NumericDtype]:
103+
def _get_dtype_mapping(cls) -> Mapping[np.dtype, NumericDtype]:
104104
raise AbstractMethodError(cls)
105105

106106
@classmethod
@@ -114,9 +114,9 @@ def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtyp
114114
dtype = dtype.lower()
115115

116116
if not isinstance(dtype, NumericDtype):
117-
mapping = cls._str_to_dtype_mapping()
117+
mapping = cls._get_dtype_mapping()
118118
try:
119-
dtype = mapping[str(np.dtype(dtype))]
119+
dtype = mapping[np.dtype(dtype)]
120120
except KeyError as err:
121121
raise ValueError(f"invalid dtype specified {dtype}") from err
122122
return dtype
@@ -250,8 +250,8 @@ def __init__(
250250

251251
@cache_readonly
252252
def dtype(self) -> NumericDtype:
253-
mapping = self._dtype_cls._str_to_dtype_mapping()
254-
return mapping[str(self._data.dtype)]
253+
mapping = self._dtype_cls._get_dtype_mapping()
254+
return mapping[self._data.dtype]
255255

256256
@classmethod
257257
def _coerce_to_array(

pandas/core/dtypes/cast.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1038,10 +1038,10 @@ def convert_dtypes(
10381038
target_int_dtype = pandas_dtype_func("Int64")
10391039

10401040
if input_array.dtype.kind in "iu":
1041-
from pandas.core.arrays.integer import INT_STR_TO_DTYPE
1041+
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
10421042

1043-
inferred_dtype = INT_STR_TO_DTYPE.get(
1044-
input_array.dtype.name, target_int_dtype
1043+
inferred_dtype = NUMPY_INT_TO_DTYPE.get(
1044+
input_array.dtype, target_int_dtype
10451045
)
10461046
elif input_array.dtype.kind in "fcb":
10471047
# TODO: de-dup with maybe_cast_to_integer_array?
@@ -1060,10 +1060,10 @@ def convert_dtypes(
10601060
if convert_floating:
10611061
if input_array.dtype.kind in "fcb":
10621062
# i.e. numeric but not integer
1063-
from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
1063+
from pandas.core.arrays.floating import NUMPY_FLOAT_TO_DTYPE
10641064

1065-
inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get(
1066-
input_array.dtype.name, pandas_dtype_func("Float64")
1065+
inferred_float_dtype: DtypeObj = NUMPY_FLOAT_TO_DTYPE.get(
1066+
input_array.dtype, pandas_dtype_func("Float64")
10671067
)
10681068
# if we could also convert to integer, check if all floats
10691069
# are actually integers

pandas/core/dtypes/dtypes.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1471,13 +1471,13 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype:
14711471

14721472
return BooleanDtype()
14731473
elif dtype.kind in "iu":
1474-
from pandas.core.arrays.integer import INT_STR_TO_DTYPE
1474+
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
14751475

1476-
return INT_STR_TO_DTYPE[dtype.name]
1476+
return NUMPY_INT_TO_DTYPE[dtype]
14771477
elif dtype.kind == "f":
1478-
from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
1478+
from pandas.core.arrays.floating import NUMPY_FLOAT_TO_DTYPE
14791479

1480-
return FLOAT_STR_TO_DTYPE[dtype.name]
1480+
return NUMPY_FLOAT_TO_DTYPE[dtype]
14811481
else:
14821482
raise NotImplementedError(dtype)
14831483

pandas/tests/extension/base/dim2.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313

1414
import pandas as pd
15-
from pandas.core.arrays.integer import INT_STR_TO_DTYPE
15+
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
1616
from pandas.tests.extension.base.base import BaseExtensionTests
1717

1818

@@ -215,10 +215,10 @@ def get_reduction_result_dtype(dtype):
215215
if dtype.itemsize == 8:
216216
return dtype
217217
elif dtype.kind in "ib":
218-
return INT_STR_TO_DTYPE[np.dtype(int).name]
218+
return NUMPY_INT_TO_DTYPE[np.dtype(int)]
219219
else:
220220
# i.e. dtype.kind == "u"
221-
return INT_STR_TO_DTYPE[np.dtype(np.uint).name]
221+
return NUMPY_INT_TO_DTYPE[np.dtype(np.uint)]
222222

223223
if method in ["median", "sum", "prod"]:
224224
# std and var are not dtype-preserving

0 commit comments

Comments
 (0)