Skip to content

Commit 12faa2e

Browse files
authored
PERF: improve performance of infer_dtype (#51054)
* PERF: improve performance of infer_dtype * adds GH-number * version without ABCs
1 parent 016063e commit 12faa2e

File tree

3 files changed

+10
-22
lines changed

3 files changed

+10
-22
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,7 @@ Performance improvements
10661066
- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
10671067
- Performance improvement for indexing operations with nullable dtypes (:issue:`49420`)
10681068
- Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`)
1069+
- Performance improvement for :func:`api.types.infer_dtype` (:issue:`51054`)
10691070
- Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
10701071
- Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
10711072
- Performance improvement in :meth:`~arrays.IntervalArray.from_tuples` (:issue:`50620`)

pandas/_libs/lib.pyx

+9-15
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,7 @@ cdef object _try_infer_map(object dtype):
13671367
cdef:
13681368
object val
13691369
str attr
1370-
for attr in ["name", "kind", "base", "type"]:
1370+
for attr in ["kind", "name", "base", "type"]:
13711371
val = getattr(dtype, attr, None)
13721372
if val in _TYPE_MAP:
13731373
return _TYPE_MAP[val]
@@ -1486,23 +1486,17 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
14861486

14871487
if util.is_array(value):
14881488
values = value
1489-
elif hasattr(value, "inferred_type") and skipna is False:
1489+
elif hasattr(type(value), "inferred_type") and skipna is False:
14901490
# Index, use the cached attribute if possible, populate the cache otherwise
14911491
return value.inferred_type
14921492
elif hasattr(value, "dtype"):
1493-
# this will handle ndarray-like
1494-
# e.g. categoricals
1495-
dtype = value.dtype
1496-
if not cnp.PyArray_DescrCheck(dtype):
1497-
# i.e. not isinstance(dtype, np.dtype)
1498-
inferred = _try_infer_map(value.dtype)
1499-
if inferred is not None:
1500-
return inferred
1493+
inferred = _try_infer_map(value.dtype)
1494+
if inferred is not None:
1495+
return inferred
1496+
elif not cnp.PyArray_DescrCheck(value.dtype):
15011497
return "unknown-array"
1502-
15031498
# Unwrap Series/Index
15041499
values = np.asarray(value)
1505-
15061500
else:
15071501
if not isinstance(value, list):
15081502
value = list(value)
@@ -1512,10 +1506,10 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
15121506
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
15131507
values = construct_1d_object_array_from_listlike(value)
15141508

1515-
val = _try_infer_map(values.dtype)
1516-
if val is not None:
1509+
inferred = _try_infer_map(values.dtype)
1510+
if inferred is not None:
15171511
# Anything other than object-dtype should return here.
1518-
return val
1512+
return inferred
15191513

15201514
if values.descr.type_num != NPY_OBJECT:
15211515
# i.e. values.dtype != np.object_

pandas/core/indexes/base.py

-7
Original file line numberDiff line numberDiff line change
@@ -2607,13 +2607,6 @@ def inferred_type(self) -> str_t:
26072607
"""
26082608
Return a string of the type inferred from the values.
26092609
"""
2610-
if isinstance(self.dtype, np.dtype) and self.dtype.kind in "iufc": # fastpath
2611-
return {
2612-
"i": "integer",
2613-
"u": "integer",
2614-
"f": "floating",
2615-
"c": "complex",
2616-
}[self.dtype.kind]
26172610
return lib.infer_dtype(self._values, skipna=False)
26182611

26192612
@cache_readonly

0 commit comments

Comments
 (0)