File tree 3 files changed +26
-3
lines changed
3 files changed +26
-3
lines changed Original file line number Diff line number Diff line change 5
5
from .pandas_vb_common import (
6
6
datetime_dtypes ,
7
7
extension_dtypes ,
8
+ lib ,
8
9
numeric_dtypes ,
9
10
string_dtypes ,
10
11
)
@@ -40,4 +41,25 @@ def time_pandas_dtype_invalid(self, dtype):
40
41
pass
41
42
42
43
44
+ class InferDtypes :
45
+ param_names = ["dtype" ]
46
+ data_dict = {
47
+ "np-object" : np .array ([1 ] * 100000 , dtype = "O" ),
48
+ "py-object" : [1 ] * 100000 ,
49
+ "np-null" : np .array ([1 ] * 50000 + [np .nan ] * 50000 ),
50
+ "py-null" : [1 ] * 50000 + [None ] * 50000 ,
51
+ "np-int" : np .array ([1 ] * 100000 , dtype = int ),
52
+ "np-floating" : np .array ([1.0 ] * 100000 , dtype = float ),
53
+ "empty" : [],
54
+ "bytes" : [b"a" ] * 100000 ,
55
+ }
56
+ params = list (data_dict .keys ())
57
+
58
+ def time_infer_skipna (self , dtype ):
59
+ lib .infer_dtype (self .data_dict [dtype ], skipna = True )
60
+
61
+ def time_infer (self , dtype ):
62
+ lib .infer_dtype (self .data_dict [dtype ], skipna = False )
63
+
64
+
43
65
from .pandas_vb_common import setup # noqa: F401 isort:skip
Original file line number Diff line number Diff line change @@ -667,6 +667,7 @@ Performance improvements
667
667
- Performance improvement when checking if values in a :class: `Categorical ` are equal, equal or larger or larger than a given scalar.
668
668
The improvement is not present if checking if the :class: `Categorical ` is less than or less than or equal than the scalar (:issue: `29820 `)
669
669
- Performance improvement in :meth: `Index.equals ` and :meth: `MultiIndex.equals ` (:issue: `29134 `)
670
+ - Performance improvement in :func: `~pandas.api.types.infer_dtype ` when ``skipna `` is ``True `` (:issue: `28814 `)
670
671
671
672
.. _whatsnew_1000.bug_fixes :
672
673
Original file line number Diff line number Diff line change @@ -1259,16 +1259,16 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
1259
1259
# make contiguous
1260
1260
values = values.ravel()
1261
1261
1262
- if skipna:
1263
- values = values[~ isnaobj(values)]
1264
-
1265
1262
val = _try_infer_map(values)
1266
1263
if val is not None :
1267
1264
return val
1268
1265
1269
1266
if values.dtype != np.object_:
1270
1267
values = values.astype(' O' )
1271
1268
1269
+ if skipna:
1270
+ values = values[~ isnaobj(values)]
1271
+
1272
1272
n = len (values)
1273
1273
if n == 0 :
1274
1274
return ' empty'
You can’t perform that action at this time.
0 commit comments