Skip to content

Commit e629612

Browse files
authored
PERF: dtype checks (#52213)
1 parent 898ab21 commit e629612

File tree

13 files changed

+96
-93
lines changed

13 files changed

+96
-93
lines changed

pandas/_libs/parsers.pyx

+14-20
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import sys
1010
import time
1111
import warnings
1212

13-
from pandas.errors import ParserError
1413
from pandas.util._exceptions import find_stack_level
1514

1615
from pandas import StringDtype
@@ -106,15 +105,10 @@ from pandas.errors import (
106105
ParserWarning,
107106
)
108107

109-
from pandas.core.dtypes.common import (
110-
is_bool_dtype,
111-
is_datetime64_dtype,
112-
is_extension_array_dtype,
113-
is_float_dtype,
114-
is_integer_dtype,
115-
is_object_dtype,
108+
from pandas.core.dtypes.dtypes import (
109+
CategoricalDtype,
110+
ExtensionDtype,
116111
)
117-
from pandas.core.dtypes.dtypes import CategoricalDtype
118112
from pandas.core.dtypes.inference import is_dict_like
119113

120114
cdef:
@@ -1077,7 +1071,7 @@ cdef class TextReader:
10771071

10781072
# don't try to upcast EAs
10791073
if (
1080-
na_count > 0 and not is_extension_array_dtype(col_dtype)
1074+
na_count > 0 and not isinstance(col_dtype, ExtensionDtype)
10811075
or self.dtype_backend != "numpy"
10821076
):
10831077
use_dtype_backend = self.dtype_backend != "numpy" and col_dtype is None
@@ -1142,14 +1136,14 @@ cdef class TextReader:
11421136
# (see _try_bool_flex()). Usually this would be taken care of using
11431137
# _maybe_upcast(), but if col_dtype is a floating type we should just
11441138
# take care of that cast here.
1145-
if col_res.dtype == np.bool_ and is_float_dtype(col_dtype):
1139+
if col_res.dtype == np.bool_ and col_dtype.kind == "f":
11461140
mask = col_res.view(np.uint8) == na_values[np.uint8]
11471141
col_res = col_res.astype(col_dtype)
11481142
np.putmask(col_res, mask, np.nan)
11491143
return col_res, na_count
11501144

11511145
# NaNs are already cast to True here, so can not use astype
1152-
if col_res.dtype == np.bool_ and is_integer_dtype(col_dtype):
1146+
if col_res.dtype == np.bool_ and col_dtype.kind in "iu":
11531147
if na_count > 0:
11541148
raise ValueError(
11551149
f"cannot safely convert passed user dtype of "
@@ -1193,14 +1187,14 @@ cdef class TextReader:
11931187
cats, codes, dtype, true_values=true_values)
11941188
return cat, na_count
11951189

1196-
elif is_extension_array_dtype(dtype):
1190+
elif isinstance(dtype, ExtensionDtype):
11971191
result, na_count = self._string_convert(i, start, end, na_filter,
11981192
na_hashset)
11991193

12001194
array_type = dtype.construct_array_type()
12011195
try:
12021196
# use _from_sequence_of_strings if the class defines it
1203-
if is_bool_dtype(dtype):
1197+
if dtype.kind == "b":
12041198
true_values = [x.decode() for x in self.true_values]
12051199
false_values = [x.decode() for x in self.false_values]
12061200
result = array_type._from_sequence_of_strings(
@@ -1216,7 +1210,7 @@ cdef class TextReader:
12161210

12171211
return result, na_count
12181212

1219-
elif is_integer_dtype(dtype):
1213+
elif dtype.kind in "iu":
12201214
try:
12211215
result, na_count = _try_int64(self.parser, i, start,
12221216
end, na_filter, na_hashset)
@@ -1233,14 +1227,14 @@ cdef class TextReader:
12331227

12341228
return result, na_count
12351229

1236-
elif is_float_dtype(dtype):
1230+
elif dtype.kind == "f":
12371231
result, na_count = _try_double(self.parser, i, start, end,
12381232
na_filter, na_hashset, na_flist)
12391233

12401234
if result is not None and dtype != "float64":
12411235
result = result.astype(dtype)
12421236
return result, na_count
1243-
elif is_bool_dtype(dtype):
1237+
elif dtype.kind == "b":
12441238
result, na_count = _try_bool_flex(self.parser, i, start, end,
12451239
na_filter, na_hashset,
12461240
self.true_set, self.false_set)
@@ -1267,10 +1261,10 @@ cdef class TextReader:
12671261
# unicode variable width
12681262
return self._string_convert(i, start, end, na_filter,
12691263
na_hashset)
1270-
elif is_object_dtype(dtype):
1264+
elif dtype == object:
12711265
return self._string_convert(i, start, end, na_filter,
12721266
na_hashset)
1273-
elif is_datetime64_dtype(dtype):
1267+
elif dtype.kind == "M":
12741268
raise TypeError(f"the dtype {dtype} is not supported "
12751269
f"for parsing, pass this column "
12761270
f"using parse_dates instead")
@@ -1438,7 +1432,7 @@ def _maybe_upcast(
14381432
-------
14391433
The casted array.
14401434
"""
1441-
if is_extension_array_dtype(arr.dtype):
1435+
if isinstance(arr.dtype, ExtensionDtype):
14421436
# TODO: the docstring says arr is an ndarray, in which case this cannot
14431437
# be reached. Is that incorrect?
14441438
return arr

pandas/_testing/asserters.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,8 @@
1313

1414
from pandas.core.dtypes.common import (
1515
is_bool,
16-
is_categorical_dtype,
1716
is_extension_array_dtype,
1817
is_integer_dtype,
19-
is_interval_dtype,
2018
is_number,
2119
is_numeric_dtype,
2220
needs_i8_conversion,
@@ -33,6 +31,7 @@
3331
DataFrame,
3432
DatetimeIndex,
3533
Index,
34+
IntervalDtype,
3635
IntervalIndex,
3736
MultiIndex,
3837
PeriodIndex,
@@ -238,7 +237,9 @@ def _check_types(left, right, obj: str = "Index") -> None:
238237
assert_attr_equal("inferred_type", left, right, obj=obj)
239238

240239
# Skip exact dtype checking when `check_categorical` is False
241-
if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype):
240+
if isinstance(left.dtype, CategoricalDtype) and isinstance(
241+
right.dtype, CategoricalDtype
242+
):
242243
if check_categorical:
243244
assert_attr_equal("dtype", left, right, obj=obj)
244245
assert_index_equal(left.categories, right.categories, exact=exact)
@@ -335,7 +336,9 @@ def _get_ilevel_values(index, level):
335336
assert_interval_array_equal(left._values, right._values)
336337

337338
if check_categorical:
338-
if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
339+
if isinstance(left.dtype, CategoricalDtype) or isinstance(
340+
right.dtype, CategoricalDtype
341+
):
339342
assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
340343

341344

@@ -946,7 +949,9 @@ def assert_series_equal(
946949
f"is not equal to {right._values}."
947950
)
948951
raise AssertionError(msg)
949-
elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype):
952+
elif isinstance(left.dtype, IntervalDtype) and isinstance(
953+
right.dtype, IntervalDtype
954+
):
950955
assert_interval_array_equal(left.array, right.array)
951956
elif isinstance(left.dtype, CategoricalDtype) or isinstance(
952957
right.dtype, CategoricalDtype

pandas/core/algorithms.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
ensure_platform_int,
4343
is_array_like,
4444
is_bool_dtype,
45-
is_categorical_dtype,
4645
is_complex_dtype,
4746
is_dict_like,
4847
is_extension_array_dtype,
@@ -59,6 +58,7 @@
5958
from pandas.core.dtypes.concat import concat_compat
6059
from pandas.core.dtypes.dtypes import (
6160
BaseMaskedDtype,
61+
CategoricalDtype,
6262
ExtensionDtype,
6363
PandasDtype,
6464
)
@@ -141,7 +141,7 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
141141
return _ensure_data(values._data)
142142
return np.asarray(values)
143143

144-
elif is_categorical_dtype(values.dtype):
144+
elif isinstance(values.dtype, CategoricalDtype):
145145
# NB: cases that go through here should NOT be using _reconstruct_data
146146
# on the back-end.
147147
values = cast("Categorical", values)
@@ -417,7 +417,7 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
417417
"""See algorithms.unique for docs. Takes a mask for masked arrays."""
418418
values = _ensure_arraylike(values)
419419

420-
if is_extension_array_dtype(values.dtype):
420+
if isinstance(values.dtype, ExtensionDtype):
421421
# Dispatch to extension dtype's unique.
422422
return values.unique()
423423

@@ -1534,7 +1534,7 @@ def safe_sort(
15341534
ordered: AnyArrayLike
15351535

15361536
if (
1537-
not is_extension_array_dtype(values)
1537+
not isinstance(values.dtype, ExtensionDtype)
15381538
and lib.infer_dtype(values, skipna=False) == "mixed-integer"
15391539
):
15401540
ordered = _sort_mixed(values)

0 commit comments

Comments
 (0)