Skip to content

Commit db022e2

Browse files
groutrjreback
authored andcommitted
PERF: Fix performance regression in infer_dtype (#30202)
1 parent f505b74 commit db022e2

File tree

3 files changed

+26
-3
lines changed

3 files changed

+26
-3
lines changed

asv_bench/benchmarks/dtypes.py

+22
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .pandas_vb_common import (
66
datetime_dtypes,
77
extension_dtypes,
8+
lib,
89
numeric_dtypes,
910
string_dtypes,
1011
)
@@ -40,4 +41,25 @@ def time_pandas_dtype_invalid(self, dtype):
4041
pass
4142

4243

44+
class InferDtypes:
45+
param_names = ["dtype"]
46+
data_dict = {
47+
"np-object": np.array([1] * 100000, dtype="O"),
48+
"py-object": [1] * 100000,
49+
"np-null": np.array([1] * 50000 + [np.nan] * 50000),
50+
"py-null": [1] * 50000 + [None] * 50000,
51+
"np-int": np.array([1] * 100000, dtype=int),
52+
"np-floating": np.array([1.0] * 100000, dtype=float),
53+
"empty": [],
54+
"bytes": [b"a"] * 100000,
55+
}
56+
params = list(data_dict.keys())
57+
58+
def time_infer_skipna(self, dtype):
59+
lib.infer_dtype(self.data_dict[dtype], skipna=True)
60+
61+
def time_infer(self, dtype):
62+
lib.infer_dtype(self.data_dict[dtype], skipna=False)
63+
64+
4365
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ Performance improvements
667667
- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
668668
The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)
669669
- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`)
670+
- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`)
670671

671672
.. _whatsnew_1000.bug_fixes:
672673

pandas/_libs/lib.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -1259,16 +1259,16 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
12591259
# make contiguous
12601260
values = values.ravel()
12611261

1262-
if skipna:
1263-
values = values[~isnaobj(values)]
1264-
12651262
val = _try_infer_map(values)
12661263
if val is not None:
12671264
return val
12681265

12691266
if values.dtype != np.object_:
12701267
values = values.astype('O')
12711268

1269+
if skipna:
1270+
values = values[~isnaobj(values)]
1271+
12721272
n = len(values)
12731273
if n == 0:
12741274
return 'empty'

0 commit comments

Comments
 (0)