Skip to content

Commit afe7f1d

Browse files
authored
PERF: use is_foo_dtype fastpaths (pandas-dev#34111)
1 parent 6be51cb commit afe7f1d

21 files changed

+86
-75
lines changed

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def func(self, other):
134134
"use 'np.asarray(cat) <op> other'."
135135
)
136136

137-
if isinstance(other, ExtensionArray) and needs_i8_conversion(other):
137+
if isinstance(other, ExtensionArray) and needs_i8_conversion(other.dtype):
138138
# We would return NotImplemented here, but that messes up
139139
# ExtensionIndex's wrapped methods
140140
return op(other, self)

pandas/core/arrays/datetimelike.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ class TimelikeOps:
363363

364364
def _round(self, freq, mode, ambiguous, nonexistent):
365365
# round the local times
366-
if is_datetime64tz_dtype(self):
366+
if is_datetime64tz_dtype(self.dtype):
367367
# operate on naive timestamps, then convert back to aware
368368
naive = self.tz_localize(None)
369369
result = naive._round(freq, mode, ambiguous, nonexistent)
@@ -1032,7 +1032,7 @@ def fillna(self, value=None, method=None, limit=None):
10321032
values = values.copy()
10331033

10341034
new_values = func(values, limit=limit, mask=mask)
1035-
if is_datetime64tz_dtype(self):
1035+
if is_datetime64tz_dtype(self.dtype):
10361036
# we need to pass int64 values to the constructor to avoid
10371037
# re-localizing incorrectly
10381038
new_values = new_values.view("i8")
@@ -1379,6 +1379,7 @@ def _time_shift(self, periods, freq=None):
13791379

13801380
@unpack_zerodim_and_defer("__add__")
13811381
def __add__(self, other):
1382+
other_dtype = getattr(other, "dtype", None)
13821383

13831384
# scalar others
13841385
if other is NaT:
@@ -1398,16 +1399,16 @@ def __add__(self, other):
13981399
result = self._time_shift(other)
13991400

14001401
# array-like others
1401-
elif is_timedelta64_dtype(other):
1402+
elif is_timedelta64_dtype(other_dtype):
14021403
# TimedeltaIndex, ndarray[timedelta64]
14031404
result = self._add_timedelta_arraylike(other)
1404-
elif is_object_dtype(other):
1405+
elif is_object_dtype(other_dtype):
14051406
# e.g. Array/Index of DateOffset objects
14061407
result = self._addsub_object_array(other, operator.add)
1407-
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
1408+
elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):
14081409
# DatetimeIndex, ndarray[datetime64]
14091410
return self._add_datetime_arraylike(other)
1410-
elif is_integer_dtype(other):
1411+
elif is_integer_dtype(other_dtype):
14111412
if not is_period_dtype(self.dtype):
14121413
raise integer_op_not_supported(self)
14131414
result = self._addsub_int_array(other, operator.add)
@@ -1419,7 +1420,7 @@ def __add__(self, other):
14191420
# In remaining cases, this will end up raising TypeError.
14201421
return NotImplemented
14211422

1422-
if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
1423+
if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):
14231424
from pandas.core.arrays import TimedeltaArray
14241425

14251426
return TimedeltaArray(result)
@@ -1455,13 +1456,13 @@ def __sub__(self, other):
14551456
result = self._sub_period(other)
14561457

14571458
# array-like others
1458-
elif is_timedelta64_dtype(other):
1459+
elif is_timedelta64_dtype(other_dtype):
14591460
# TimedeltaIndex, ndarray[timedelta64]
14601461
result = self._add_timedelta_arraylike(-other)
1461-
elif is_object_dtype(other):
1462+
elif is_object_dtype(other_dtype):
14621463
# e.g. Array/Index of DateOffset objects
14631464
result = self._addsub_object_array(other, operator.sub)
1464-
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
1465+
elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):
14651466
# DatetimeIndex, ndarray[datetime64]
14661467
result = self._sub_datetime_arraylike(other)
14671468
elif is_period_dtype(other_dtype):
@@ -1475,14 +1476,16 @@ def __sub__(self, other):
14751476
# Includes ExtensionArrays, float_dtype
14761477
return NotImplemented
14771478

1478-
if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
1479+
if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):
14791480
from pandas.core.arrays import TimedeltaArray
14801481

14811482
return TimedeltaArray(result)
14821483
return result
14831484

14841485
def __rsub__(self, other):
1485-
if is_datetime64_any_dtype(other) and is_timedelta64_dtype(self.dtype):
1486+
other_dtype = getattr(other, "dtype", None)
1487+
1488+
if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype):
14861489
# ndarray[datetime64] cannot be subtracted from self, so
14871490
# we need to wrap in DatetimeArray/Index and flip the operation
14881491
if lib.is_scalar(other):
@@ -1504,7 +1507,7 @@ def __rsub__(self, other):
15041507
raise TypeError(
15051508
f"cannot subtract {type(self).__name__} from {type(other).__name__}"
15061509
)
1507-
elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other):
1510+
elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other_dtype):
15081511
# TODO: Can we simplify/generalize these cases at all?
15091512
raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")
15101513
elif is_timedelta64_dtype(self.dtype):

pandas/core/arrays/datetimes.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,9 @@ def _has_same_tz(self, other):
631631
def _assert_tzawareness_compat(self, other):
632632
# adapted from _Timestamp._assert_tzawareness_compat
633633
other_tz = getattr(other, "tzinfo", None)
634-
if is_datetime64tz_dtype(other):
634+
other_dtype = getattr(other, "dtype", None)
635+
636+
if is_datetime64tz_dtype(other_dtype):
635637
# Get tzinfo from Series dtype
636638
other_tz = other.dtype.tz
637639
if other is NaT:
@@ -1913,8 +1915,9 @@ def sequence_to_dt64ns(
19131915

19141916
# By this point we are assured to have either a numpy array or Index
19151917
data, copy = maybe_convert_dtype(data, copy)
1918+
data_dtype = getattr(data, "dtype", None)
19161919

1917-
if is_object_dtype(data) or is_string_dtype(data):
1920+
if is_object_dtype(data_dtype) or is_string_dtype(data_dtype):
19181921
# TODO: We do not have tests specific to string-dtypes,
19191922
# also complex or categorical or other extension
19201923
copy = False
@@ -1927,15 +1930,16 @@ def sequence_to_dt64ns(
19271930
data, dayfirst=dayfirst, yearfirst=yearfirst
19281931
)
19291932
tz = maybe_infer_tz(tz, inferred_tz)
1933+
data_dtype = data.dtype
19301934

19311935
# `data` may have originally been a Categorical[datetime64[ns, tz]],
19321936
# so we need to handle these types.
1933-
if is_datetime64tz_dtype(data):
1937+
if is_datetime64tz_dtype(data_dtype):
19341938
# DatetimeArray -> ndarray
19351939
tz = maybe_infer_tz(tz, data.tz)
19361940
result = data._data
19371941

1938-
elif is_datetime64_dtype(data):
1942+
elif is_datetime64_dtype(data_dtype):
19391943
# tz-naive DatetimeArray or ndarray[datetime64]
19401944
data = getattr(data, "_data", data)
19411945
if data.dtype != DT64NS_DTYPE:

pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1517,7 +1517,7 @@ def make_sparse(arr: np.ndarray, kind="block", fill_value=None, dtype=None, copy
15171517
mask = notna(arr)
15181518
else:
15191519
# cast to object comparison to be safe
1520-
if is_string_dtype(arr):
1520+
if is_string_dtype(arr.dtype):
15211521
arr = arr.astype(object)
15221522

15231523
if is_object_dtype(arr.dtype):

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1366,7 +1366,7 @@ def maybe_cast_to_datetime(value, dtype, errors: str = "raise"):
13661366
# is solved. String data that is passed with a
13671367
# datetime64tz is assumed to be naive which should
13681368
# be localized to the timezone.
1369-
is_dt_string = is_string_dtype(value)
1369+
is_dt_string = is_string_dtype(value.dtype)
13701370
value = to_datetime(value, errors=errors).array
13711371
if is_dt_string:
13721372
# Strings here are naive, so directly localize

pandas/core/dtypes/concat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ def get_dtype_kinds(l):
4040
dtype = arr.dtype
4141
if is_categorical_dtype(dtype):
4242
typ = "category"
43-
elif is_sparse(arr):
43+
elif is_sparse(dtype):
4444
typ = "sparse"
4545
elif isinstance(arr, ABCRangeIndex):
4646
typ = "range"
47-
elif is_datetime64tz_dtype(arr):
47+
elif is_datetime64tz_dtype(dtype):
4848
# if to_concat contains different tz,
4949
# the result must be object dtype
50-
typ = str(arr.dtype)
50+
typ = str(dtype)
5151
elif is_datetime64_dtype(dtype):
5252
typ = "datetime"
5353
elif is_timedelta64_dtype(dtype):
@@ -57,7 +57,7 @@ def get_dtype_kinds(l):
5757
elif is_bool_dtype(dtype):
5858
typ = "bool"
5959
elif is_extension_array_dtype(dtype):
60-
typ = str(arr.dtype)
60+
typ = str(dtype)
6161
else:
6262
typ = dtype.kind
6363
typs.add(typ)

pandas/core/dtypes/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool:
393393

394394
# Object arrays can contain None, NaN and NaT.
395395
# string dtypes must be come to this path for NumPy 1.7.1 compat
396-
if is_string_dtype(left) or is_string_dtype(right):
396+
if is_string_dtype(left.dtype) or is_string_dtype(right.dtype):
397397

398398
if not strict_nan:
399399
# isna considers NaN and None to be equivalent.

pandas/core/generic.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -6907,9 +6907,9 @@ def interpolate(
69076907
index = df.index
69086908
methods = {"index", "values", "nearest", "time"}
69096909
is_numeric_or_datetime = (
6910-
is_numeric_dtype(index)
6911-
or is_datetime64_any_dtype(index)
6912-
or is_timedelta64_dtype(index)
6910+
is_numeric_dtype(index.dtype)
6911+
or is_datetime64_any_dtype(index.dtype)
6912+
or is_timedelta64_dtype(index.dtype)
69136913
)
69146914
if method not in methods and not is_numeric_or_datetime:
69156915
raise ValueError(
@@ -8588,7 +8588,7 @@ def _align_frame(
85888588
right = right.fillna(method=method, axis=fill_axis, limit=limit)
85898589

85908590
# if DatetimeIndex have different tz, convert to UTC
8591-
if is_datetime64tz_dtype(left.index):
8591+
if is_datetime64tz_dtype(left.index.dtype):
85928592
if left.index.tz != right.index.tz:
85938593
if join_index is not None:
85948594
left.index = join_index
@@ -8675,7 +8675,7 @@ def _align_series(
86758675

86768676
# if DatetimeIndex have different tz, convert to UTC
86778677
if is_series or (not is_series and axis == 0):
8678-
if is_datetime64tz_dtype(left.index):
8678+
if is_datetime64tz_dtype(left.index.dtype):
86798679
if left.index.tz != right.index.tz:
86808680
if join_index is not None:
86818681
left.index = join_index
@@ -9957,13 +9957,13 @@ def describe_timestamp_1d(data):
99579957
return pd.Series(d, index=stat_index, name=data.name)
99589958

99599959
def describe_1d(data):
9960-
if is_bool_dtype(data):
9960+
if is_bool_dtype(data.dtype):
99619961
return describe_categorical_1d(data)
99629962
elif is_numeric_dtype(data):
99639963
return describe_numeric_1d(data)
9964-
elif is_datetime64_any_dtype(data):
9964+
elif is_datetime64_any_dtype(data.dtype):
99659965
return describe_timestamp_1d(data)
9966-
elif is_timedelta64_dtype(data):
9966+
elif is_timedelta64_dtype(data.dtype):
99679967
return describe_numeric_1d(data)
99689968
else:
99699969
return describe_categorical_1d(data)

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ def value_counts(
690690
lab = lev.take(lab.cat.codes)
691691
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
692692

693-
if is_interval_dtype(lab):
693+
if is_interval_dtype(lab.dtype):
694694
# TODO: should we do this inside II?
695695
sorter = np.lexsort((lab.left, lab.right, ids))
696696
else:

pandas/core/groupby/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -461,12 +461,12 @@ def _cython_operation(
461461
# are not setup for dim transforming
462462
if is_categorical_dtype(values.dtype) or is_sparse(values.dtype):
463463
raise NotImplementedError(f"{values.dtype} dtype not supported")
464-
elif is_datetime64_any_dtype(values):
464+
elif is_datetime64_any_dtype(values.dtype):
465465
if how in ["add", "prod", "cumsum", "cumprod"]:
466466
raise NotImplementedError(
467467
f"datetime64 type does not support {how} operations"
468468
)
469-
elif is_timedelta64_dtype(values):
469+
elif is_timedelta64_dtype(values.dtype):
470470
if how in ["prod", "cumprod"]:
471471
raise NotImplementedError(
472472
f"timedelta64 type does not support {how} operations"

pandas/core/indexes/category.py

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
is_interval_dtype,
1818
is_list_like,
1919
is_scalar,
20+
pandas_dtype,
2021
)
2122
from pandas.core.dtypes.dtypes import CategoricalDtype
2223
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
@@ -372,6 +373,9 @@ def __contains__(self, key: Any) -> bool:
372373

373374
@doc(Index.astype)
374375
def astype(self, dtype, copy=True):
376+
if dtype is not None:
377+
dtype = pandas_dtype(dtype)
378+
375379
if is_interval_dtype(dtype):
376380
from pandas import IntervalIndex
377381

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2714,7 +2714,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None):
27142714
dtype = dtype or values.dtype
27152715
klass = get_block_type(values, dtype)
27162716

2717-
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values):
2717+
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
27182718
# TODO: This is no longer hit internally; does it need to be retained
27192719
# for e.g. pyarrow?
27202720
values = DatetimeArray._simple_new(values, dtype=dtype)

pandas/core/internals/concat.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def _get_empty_dtype_and_na(join_units):
339339
if len(join_units) == 1:
340340
blk = join_units[0].block
341341
if blk is None:
342-
return np.float64, np.nan
342+
return np.dtype(np.float64), np.nan
343343

344344
if _is_uniform_reindex(join_units):
345345
# FIXME: integrate property
@@ -424,7 +424,7 @@ def _get_empty_dtype_and_na(join_units):
424424
return g, g.type(np.nan)
425425
elif is_numeric_dtype(g):
426426
if has_none_blocks:
427-
return np.float64, np.nan
427+
return np.dtype(np.float64), np.nan
428428
else:
429429
return g, None
430430

pandas/core/nanops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -759,12 +759,12 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None):
759759
values = extract_array(values, extract_numpy=True)
760760
dtype = values.dtype
761761
mask = _maybe_get_mask(values, skipna, mask)
762-
if is_any_int_dtype(values):
762+
if is_any_int_dtype(dtype):
763763
values = values.astype("f8")
764764
if mask is not None:
765765
values[mask] = np.nan
766766

767-
if is_float_dtype(values):
767+
if is_float_dtype(values.dtype):
768768
count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
769769
else:
770770
count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)

0 commit comments

Comments
 (0)