Skip to content

Commit 0d761ef

Browse files
authored
PERF: Use fused types for map_infer_mask (#55736)
* PERF: Use fused types for map_infer_mask * Simplify * Refactor and docstrings * Rework * Remove docstring * Cleanup, whatsnew
1 parent 6d662b8 commit 0d761ef

File tree

4 files changed

+40
-12
lines changed

4 files changed

+40
-12
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ Performance improvements
326326
- Performance improvement in :meth:`Index.difference` (:issue:`55108`)
327327
- Performance improvement in :meth:`MultiIndex.get_indexer` when ``method`` is not ``None`` (:issue:`55839`)
328328
- Performance improvement in :meth:`Series.duplicated` for pyarrow dtypes (:issue:`55255`)
329+
- Performance improvement in :meth:`Series.str` methods (:issue:`55736`)
329330
- Performance improvement in :meth:`SeriesGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`DataFrameGroupBy.idxmin` (:issue:`54234`)
330331
- Performance improvement when indexing into a non-unique index (:issue:`55816`)
331332
- Performance improvement when indexing with more than 4 keys (:issue:`54550`)

pandas/_libs/dtypes.pxd

+5
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,8 @@ ctypedef fused numeric_t:
3434
ctypedef fused numeric_object_t:
3535
numeric_t
3636
object
37+
38+
ctypedef fused uint8_int64_object_t:
39+
uint8_t
40+
int64_t
41+
object

pandas/_libs/lib.pyx

+32-12
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ cdef extern from "pandas/parser/pd_parser.h":
102102
PandasParser_IMPORT
103103

104104
from pandas._libs cimport util
105+
from pandas._libs.dtypes cimport uint8_int64_object_t
105106
from pandas._libs.util cimport (
106107
INT64_MAX,
107108
INT64_MIN,
@@ -2856,8 +2857,6 @@ no_default = _NoDefault.no_default # Sentinel indicating the default value.
28562857
NoDefault = Literal[_NoDefault.no_default]
28572858

28582859

2859-
@cython.boundscheck(False)
2860-
@cython.wraparound(False)
28612860
def map_infer_mask(
28622861
ndarray[object] arr,
28632862
object f,
@@ -2876,24 +2875,50 @@ def map_infer_mask(
28762875
mask : ndarray
28772876
uint8 dtype ndarray indicating values not to apply `f` to.
28782877
convert : bool, default True
2879-
Whether to call `maybe_convert_objects` on the resulting ndarray
2878+
Whether to call `maybe_convert_objects` on the resulting ndarray.
28802879
na_value : Any, optional
28812880
The result value to use for masked values. By default, the
2882-
input value is used
2881+
input value is used.
28832882
dtype : numpy.dtype
28842883
The numpy dtype to use for the result ndarray.
28852884

28862885
Returns
28872886
-------
28882887
np.ndarray
28892888
"""
2889+
cdef Py_ssize_t n = len(arr)
2890+
result = np.empty(n, dtype=dtype)
2891+
2892+
_map_infer_mask(
2893+
result,
2894+
arr,
2895+
f,
2896+
mask,
2897+
na_value,
2898+
)
2899+
if convert:
2900+
return maybe_convert_objects(result)
2901+
else:
2902+
return result
2903+
2904+
2905+
@cython.boundscheck(False)
2906+
@cython.wraparound(False)
2907+
def _map_infer_mask(
2908+
ndarray[uint8_int64_object_t] out,
2909+
ndarray[object] arr,
2910+
object f,
2911+
const uint8_t[:] mask,
2912+
object na_value=no_default,
2913+
) -> None:
2914+
"""
2915+
Helper for map_infer_mask, split off to use fused types based on the result.
2916+
"""
28902917
cdef:
28912918
Py_ssize_t i, n
2892-
ndarray result
28932919
object val
28942920

28952921
n = len(arr)
2896-
result = np.empty(n, dtype=dtype)
28972922
for i in range(n):
28982923
if mask[i]:
28992924
if na_value is no_default:
@@ -2907,12 +2932,7 @@ def map_infer_mask(
29072932
# unbox 0-dim arrays, GH#690
29082933
val = val.item()
29092934

2910-
result[i] = val
2911-
2912-
if convert:
2913-
return maybe_convert_objects(result)
2914-
else:
2915-
return result
2935+
out[i] = val
29162936

29172937

29182938
@cython.boundscheck(False)

pandas/core/arrays/string_.py

+2
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,8 @@ def _str_map(
624624
na_value_is_na = isna(na_value)
625625
if na_value_is_na:
626626
na_value = 1
627+
elif dtype == np.dtype("bool"):
628+
na_value = bool(na_value)
627629
result = lib.map_infer_mask(
628630
arr,
629631
f,

0 commit comments

Comments
 (0)