From 85eb8294f094b758ee11eda46172e0731e8ef764 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Apr 2023 15:44:11 -0700 Subject: [PATCH] PERF: numpy dtype checks --- pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 27 ++++++++++++++++++ pandas/core/arrays/base.py | 6 ++-- pandas/core/arrays/categorical.py | 6 ++-- pandas/core/arrays/datetimelike.py | 28 +++++++++---------- pandas/core/arrays/datetimes.py | 13 ++++----- pandas/core/arrays/timedeltas.py | 12 ++++---- pandas/core/dtypes/astype.py | 6 ++-- pandas/core/dtypes/cast.py | 8 ++---- pandas/core/generic.py | 3 +- pandas/core/indexes/accessors.py | 11 ++++---- pandas/core/indexes/range.py | 3 +- pandas/core/indexes/timedeltas.py | 3 +- pandas/core/methods/describe.py | 4 +-- pandas/core/reshape/tile.py | 10 +++---- pandas/io/formats/format.py | 12 +++----- pandas/io/json/_table_schema.py | 7 ++--- pandas/io/pytables.py | 5 ++-- .../tests/io/json/test_json_table_schema.py | 3 +- pandas/tseries/frequencies.py | 10 +++---- 20 files changed, 91 insertions(+), 87 deletions(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 2e425f5797c62..05d569f0e58eb 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -36,6 +36,7 @@ NoDefault = Literal[_NoDefault.no_default] i8max: int u8max: int +def is_np_dtype(dtype: object, kinds: str | None = ...) -> bool: ... def item_from_zerodim(val: object) -> object: ... def infer_dtype(value: object, skipna: bool = ...) -> str: ... def is_iterator(obj: object) -> bool: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5bf99301d9261..92f1dc2d4ea3b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -3070,3 +3070,30 @@ def dtypes_all_equal(list types not None) -> bool: return False else: return True + + +def is_np_dtype(object dtype, str kinds=None) -> bool: + """ + Optimized check for `isinstance(dtype, np.dtype)` with + optional `and dtype.kind in kinds`. + + dtype = np.dtype("m8[ns]") + + In [7]: %timeit isinstance(dtype, np.dtype) + 117 ns ± 1.91 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each) + + In [8]: %timeit is_np_dtype(dtype) + 64 ns ± 1.51 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each) + + In [9]: %timeit is_timedelta64_dtype(dtype) + 209 ns ± 6.96 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) + + In [10]: %timeit is_np_dtype(dtype, "m") + 93.4 ns ± 1.11 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each) + """ + if not cnp.PyArray_DescrCheck(dtype): + # i.e. not isinstance(dtype, np.dtype) + return False + if kinds is None: + return True + return dtype.kind in kinds diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9342d3b68679c..e0c0f0e045ba5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -40,11 +40,9 @@ from pandas.core.dtypes.cast import maybe_cast_to_extension_array from pandas.core.dtypes.common import ( - is_datetime64_dtype, is_dtype_equal, is_list_like, is_scalar, - is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -582,12 +580,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: cls = dtype.construct_array_type() return cls._from_sequence(self, dtype=dtype, copy=copy) - elif is_datetime64_dtype(dtype): + elif lib.is_np_dtype(dtype, "M"): from pandas.core.arrays import DatetimeArray return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy) - elif is_timedelta64_dtype(dtype): + elif lib.is_np_dtype(dtype, "m"): from pandas.core.arrays import TimedeltaArray return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f41327a4a16ca..9a656eaa485c9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -39,7 +39,6 @@ is_any_real_numeric_dtype, is_bool_dtype, is_categorical_dtype, - is_datetime64_dtype, is_dict_like, is_dtype_equal, is_extension_array_dtype, @@ -47,7 +46,6 @@ is_integer_dtype, is_list_like, is_scalar, - is_timedelta64_dtype, needs_i8_conversion, pandas_dtype, ) @@ -622,9 +620,9 @@ def _from_inferred_categories( # Convert to a specialized type with `dtype` if specified. if is_any_real_numeric_dtype(dtype.categories): cats = to_numeric(inferred_categories, errors="coerce") - elif is_datetime64_dtype(dtype.categories): + elif lib.is_np_dtype(dtype.categories.dtype, "M"): cats = to_datetime(inferred_categories, errors="coerce") - elif is_timedelta64_dtype(dtype.categories): + elif lib.is_np_dtype(dtype.categories.dtype, "m"): cats = to_timedelta(inferred_categories, errors="coerce") elif is_bool_dtype(dtype.categories): if true_values is None: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d428f003f6a68..c4238c09bc45b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -84,7 +84,6 @@ from pandas.core.dtypes.common import ( is_all_strings, is_datetime64_any_dtype, - is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, is_float_dtype, @@ -92,7 +91,6 @@ is_list_like, is_object_dtype, is_string_dtype, - is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -1000,7 +998,7 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None: @final def _add_datetimelike_scalar(self, other) -> DatetimeArray: - if not is_timedelta64_dtype(self.dtype): + if not lib.is_np_dtype(self.dtype, "m"): raise TypeError( f"cannot add {type(self).__name__} and {type(other).__name__}" ) @@ -1036,7 +1034,7 @@ def _add_datetimelike_scalar(self, other) -> DatetimeArray: @final def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray: - if not is_timedelta64_dtype(self.dtype): + if not lib.is_np_dtype(self.dtype, "m"): raise TypeError( f"cannot add {type(self).__name__} and {type(other).__name__}" ) @@ -1100,7 +1098,7 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray: @final def _add_period(self, other: Period) -> PeriodArray: - if not is_timedelta64_dtype(self.dtype): + if not lib.is_np_dtype(self.dtype, "m"): raise TypeError(f"cannot add Period to a {type(self).__name__}") # We will wrap in a PeriodArray and defer to the reversed operation @@ -1301,7 +1299,7 @@ def __add__(self, other): result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datetimelike_scalar(other) - elif isinstance(other, Period) and is_timedelta64_dtype(self.dtype): + elif isinstance(other, Period) and lib.is_np_dtype(self.dtype, "m"): result = self._add_period(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 @@ -1312,13 +1310,13 @@ def __add__(self, other): result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add) # array-like others - elif is_timedelta64_dtype(other_dtype): + elif lib.is_np_dtype(other_dtype, "m"): # TimedeltaIndex, ndarray[timedelta64] result = self._add_timedelta_arraylike(other) elif is_object_dtype(other_dtype): # e.g. Array/Index of DateOffset objects result = self._addsub_object_array(other, operator.add) - elif is_datetime64_dtype(other_dtype) or isinstance( + elif lib.is_np_dtype(other_dtype, "M") or isinstance( other_dtype, DatetimeTZDtype ): # DatetimeIndex, ndarray[datetime64] @@ -1336,7 +1334,7 @@ def __add__(self, other): # In remaining cases, this will end up raising TypeError. return NotImplemented - if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype): + if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"): from pandas.core.arrays import TimedeltaArray return TimedeltaArray(result) @@ -1373,13 +1371,13 @@ def __sub__(self, other): result = self._sub_periodlike(other) # array-like others - elif is_timedelta64_dtype(other_dtype): + elif lib.is_np_dtype(other_dtype, "m"): # TimedeltaIndex, ndarray[timedelta64] result = self._add_timedelta_arraylike(-other) elif is_object_dtype(other_dtype): # e.g. Array/Index of DateOffset objects result = self._addsub_object_array(other, operator.sub) - elif is_datetime64_dtype(other_dtype) or isinstance( + elif lib.is_np_dtype(other_dtype, "M") or isinstance( other_dtype, DatetimeTZDtype ): # DatetimeIndex, ndarray[datetime64] @@ -1396,7 +1394,7 @@ def __sub__(self, other): # Includes ExtensionArrays, float_dtype return NotImplemented - if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype): + if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"): from pandas.core.arrays import TimedeltaArray return TimedeltaArray(result) @@ -1405,7 +1403,7 @@ def __sub__(self, other): def __rsub__(self, other): other_dtype = getattr(other, "dtype", None) - if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype): + if is_datetime64_any_dtype(other_dtype) and lib.is_np_dtype(self.dtype, "m"): # ndarray[datetime64] cannot be subtracted from self, so # we need to wrap in DatetimeArray/Index and flip the operation if lib.is_scalar(other): @@ -1427,10 +1425,10 @@ def __rsub__(self, other): raise TypeError( f"cannot subtract {type(self).__name__} from {type(other).__name__}" ) - elif isinstance(self.dtype, PeriodDtype) and is_timedelta64_dtype(other_dtype): + elif isinstance(self.dtype, PeriodDtype) and lib.is_np_dtype(other_dtype, "m"): # TODO: Can we simplify/generalize these cases at all? raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}") - elif is_timedelta64_dtype(self.dtype): + elif lib.is_np_dtype(self.dtype, "m"): self = cast("TimedeltaArray", self) return (-self) + other diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index dcb1c0965cc5b..12245a144ec2a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -57,7 +57,6 @@ is_object_dtype, is_sparse, is_string_dtype, - is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -670,7 +669,7 @@ def astype(self, dtype, copy: bool = True): elif ( self.tz is None - and is_datetime64_dtype(dtype) + and lib.is_np_dtype(dtype, "M") and not is_unitless(dtype) and is_supported_unit(get_unit_from_dtype(dtype)) ): @@ -679,7 +678,7 @@ def astype(self, dtype, copy: bool = True): return type(self)._simple_new(res_values, dtype=res_values.dtype) # TODO: preserve freq? - elif self.tz is not None and is_datetime64_dtype(dtype): + elif self.tz is not None and lib.is_np_dtype(dtype, "M"): # pre-2.0 behavior for DTA/DTI was # values.tz_convert("UTC").tz_localize(None), which did not match # the Series behavior @@ -691,7 +690,7 @@ def astype(self, dtype, copy: bool = True): elif ( self.tz is None - and is_datetime64_dtype(dtype) + and lib.is_np_dtype(dtype, "M") and dtype != self.dtype and is_unitless(dtype) ): @@ -2083,7 +2082,7 @@ def _sequence_to_dt64ns( tz = _maybe_infer_tz(tz, data.tz) result = data._ndarray - elif is_datetime64_dtype(data_dtype): + elif lib.is_np_dtype(data_dtype, "M"): # tz-naive DatetimeArray or ndarray[datetime64] data = getattr(data, "_ndarray", data) new_dtype = data.dtype @@ -2242,7 +2241,7 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): data = data.astype(DT64NS_DTYPE).view("i8") copy = False - elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): + elif lib.is_np_dtype(data.dtype, "m") or is_bool_dtype(data.dtype): # GH#29794 enforcing deprecation introduced in GH#23539 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") elif isinstance(data.dtype, PeriodDtype): @@ -2391,7 +2390,7 @@ def _validate_tz_from_dtype( raise ValueError("Cannot pass both a timezone-aware dtype and tz=None") tz = dtz - if tz is not None and is_datetime64_dtype(dtype): + if tz is not None and lib.is_np_dtype(dtype, "M"): # We also need to check for the case where the user passed a # tz-naive dtype (i.e. datetime64[ns]) if tz is not None and not timezones.tz_compare(tz, dtz): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 7731bb499cd21..e182ee08f1d58 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -484,7 +484,7 @@ def __mul__(self, other) -> Self: if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self) and not is_timedelta64_dtype(other.dtype): + if len(other) != len(self) and not lib.is_np_dtype(other.dtype, "m"): # Exclude timedelta64 here so we correctly raise TypeError # for that instead of ValueError raise ValueError("Cannot multiply with unequal lengths") @@ -585,7 +585,7 @@ def __truediv__(self, other): other = self._cast_divlike_op(other) if ( - is_timedelta64_dtype(other.dtype) + lib.is_np_dtype(other.dtype, "m") or is_integer_dtype(other.dtype) or is_float_dtype(other.dtype) ): @@ -613,7 +613,7 @@ def __rtruediv__(self, other): return self._scalar_divlike_op(other, op) other = self._cast_divlike_op(other) - if is_timedelta64_dtype(other.dtype): + if lib.is_np_dtype(other.dtype, "m"): return self._vector_divlike_op(other, op) elif is_object_dtype(other.dtype): @@ -634,7 +634,7 @@ def __floordiv__(self, other): other = self._cast_divlike_op(other) if ( - is_timedelta64_dtype(other.dtype) + lib.is_np_dtype(other.dtype, "m") or is_integer_dtype(other.dtype) or is_float_dtype(other.dtype) ): @@ -662,7 +662,7 @@ def __rfloordiv__(self, other): return self._scalar_divlike_op(other, op) other = self._cast_divlike_op(other) - if is_timedelta64_dtype(other.dtype): + if lib.is_np_dtype(other.dtype, "m"): return self._vector_divlike_op(other, op) elif is_object_dtype(other.dtype): @@ -940,7 +940,7 @@ def sequence_to_td64ns( data[mask] = iNaT copy = False - elif is_timedelta64_dtype(data.dtype): + elif lib.is_np_dtype(data.dtype, "m"): data_unit = get_unit_from_dtype(data.dtype) if not is_supported_unit(data_unit): # cast to closest supported unit, i.e. s or ns diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index f865968328286..a69559493c386 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -18,12 +18,10 @@ from pandas.errors import IntCastingNaNError from pandas.core.dtypes.common import ( - is_datetime64_dtype, is_dtype_equal, is_integer_dtype, is_object_dtype, is_string_dtype, - is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -108,14 +106,14 @@ def _astype_nansafe( # if we have a datetime/timedelta array of objects # then coerce to datetime64[ns] and use DatetimeArray.astype - if is_datetime64_dtype(dtype): + if lib.is_np_dtype(dtype, "M"): from pandas import to_datetime dti = to_datetime(arr.ravel()) dta = dti._data.reshape(arr.shape) return dta.astype(dtype, copy=False)._ndarray - elif is_timedelta64_dtype(dtype): + elif lib.is_np_dtype(dtype, "m"): from pandas.core.construction import ensure_wrapped_if_datetimelike # bc we know arr.dtype == object, this is equivalent to diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f1945a2eb32ab..a2c454709fd2e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -52,7 +52,6 @@ is_bool_dtype, is_complex, is_complex_dtype, - is_datetime64_dtype, is_extension_array_dtype, is_float, is_float_dtype, @@ -63,7 +62,6 @@ is_scalar, is_signed_integer_dtype, is_string_dtype, - is_timedelta64_dtype, is_unsigned_integer_dtype, pandas_dtype as pandas_dtype_func, ) @@ -1203,7 +1201,7 @@ def maybe_cast_to_datetime( # _ensure_nanosecond_dtype raises TypeError _ensure_nanosecond_dtype(dtype) - if is_timedelta64_dtype(dtype): + if lib.is_np_dtype(dtype, "m"): res = TimedeltaArray._from_sequence(value, dtype=dtype) return res else: @@ -1407,9 +1405,9 @@ def find_common_type(types): return np.dtype("object") # take lowest unit - if all(is_datetime64_dtype(t) for t in types): + if all(lib.is_np_dtype(t, "M") for t in types): return np.dtype("datetime64[ns]") - if all(is_timedelta64_dtype(t) for t in types): + if all(lib.is_np_dtype(t, "m") for t in types): return np.dtype("timedelta64[ns]") # don't mix bool / int or float or complex diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 907f7264ad144..4701d928bfc6c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -120,7 +120,6 @@ is_numeric_dtype, is_re_compilable, is_scalar, - is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -7773,7 +7772,7 @@ def interpolate( is_numeric_or_datetime = ( is_numeric_dtype(index.dtype) or is_datetime64_any_dtype(index.dtype) - or is_timedelta64_dtype(index.dtype) + or lib.is_np_dtype(index.dtype, "m") ) if method not in methods and not is_numeric_or_datetime: raise ValueError( diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index bf4da90efc17c..4f529b71c867f 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -11,13 +11,12 @@ import numpy as np +from pandas._libs import lib from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( - is_datetime64_dtype, is_integer_dtype, is_list_like, - is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -70,13 +69,13 @@ def __init__(self, data: Series, orig) -> None: def _get_values(self): data = self._parent - if is_datetime64_dtype(data.dtype): + if lib.is_np_dtype(data.dtype, "M"): return DatetimeIndex(data, copy=False, name=self.name) elif isinstance(data.dtype, DatetimeTZDtype): return DatetimeIndex(data, copy=False, name=self.name) - elif is_timedelta64_dtype(data.dtype): + elif lib.is_np_dtype(data.dtype, "m"): return TimedeltaIndex(data, copy=False, name=self.name) elif isinstance(data.dtype, PeriodDtype): @@ -593,11 +592,11 @@ def __new__(cls, data: Series): if isinstance(data.dtype, ArrowDtype) and data.dtype.kind == "M": return ArrowTemporalProperties(data, orig) - if is_datetime64_dtype(data.dtype): + if lib.is_np_dtype(data.dtype, "M"): return DatetimeProperties(data, orig) elif isinstance(data.dtype, DatetimeTZDtype): return DatetimeProperties(data, orig) - elif is_timedelta64_dtype(data.dtype): + elif lib.is_np_dtype(data.dtype, "m"): return TimedeltaProperties(data, orig) elif isinstance(data.dtype, PeriodDtype): return PeriodProperties(data, orig) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c0775a17d76d1..f3752efc206ad 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -34,7 +34,6 @@ is_integer, is_scalar, is_signed_integer_dtype, - is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ABCTimedeltaIndex @@ -978,7 +977,7 @@ def _arith_method(self, other, op): # GH#19333 is_integer evaluated True on timedelta64, # so we need to catch these explicitly return super()._arith_method(other, op) - elif is_timedelta64_dtype(other): + elif lib.is_np_dtype(getattr(other, "dtype", None), "m"): # Must be an np.ndarray; GH#22390 return super()._arith_method(other, op) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index de5e5c61e96f1..cefb876007c97 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.common import ( is_dtype_equal, is_scalar, - is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ABCSeries @@ -185,7 +184,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own? """ - return is_timedelta64_dtype(dtype) # aka self._data._is_recognized_dtype + return lib.is_np_dtype(dtype, "m") # aka self._data._is_recognized_dtype # ------------------------------------------------------------------- # Indexing Methods diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 45cf038ebc19e..2fcb0de6b5451 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -19,6 +19,7 @@ import numpy as np +from pandas._libs import lib from pandas._libs.tslibs import Timestamp from pandas._typing import ( DtypeObj, @@ -33,7 +34,6 @@ is_datetime64_any_dtype, is_extension_array_dtype, is_numeric_dtype, - is_timedelta64_dtype, ) from pandas.core.arrays.arrow.dtype import ArrowDtype @@ -363,7 +363,7 @@ def select_describe_func( return describe_numeric_1d elif is_datetime64_any_dtype(data.dtype): return describe_timestamp_1d - elif is_timedelta64_dtype(data.dtype): + elif lib.is_np_dtype(data.dtype, "m"): return describe_numeric_1d else: return describe_categorical_1d diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 4854a0c853761..326fa93dc945e 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -15,6 +15,7 @@ from pandas._libs import ( Timedelta, Timestamp, + lib, ) from pandas._libs.lib import infer_dtype @@ -30,7 +31,6 @@ is_list_like, is_numeric_dtype, is_scalar, - is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, @@ -491,10 +491,10 @@ def _coerce_to_type(x): if is_datetime64tz_dtype(x.dtype): dtype = x.dtype - elif is_datetime64_dtype(x.dtype): + elif lib.is_np_dtype(x.dtype, "M"): x = to_datetime(x).astype("datetime64[ns]", copy=False) dtype = np.dtype("datetime64[ns]") - elif is_timedelta64_dtype(x.dtype): + elif lib.is_np_dtype(x.dtype, "m"): x = to_timedelta(x) dtype = np.dtype("timedelta64[ns]") elif is_bool_dtype(x.dtype): @@ -529,7 +529,7 @@ def _convert_bin_to_numeric_type(bins, dtype: DtypeObj | None): ValueError if bins are not of a compat dtype to dtype """ bins_dtype = infer_dtype(bins, skipna=False) - if is_timedelta64_dtype(dtype): + if lib.is_np_dtype(dtype, "m"): if bins_dtype in ["timedelta", "timedelta64"]: bins = to_timedelta(bins).view(np.int64) else: @@ -588,7 +588,7 @@ def _format_labels( elif is_datetime64_dtype(dtype): formatter = Timestamp adjust = lambda x: x - Timedelta("1ns") - elif is_timedelta64_dtype(dtype): + elif lib.is_np_dtype(dtype, "m"): formatter = Timedelta adjust = lambda x: x - Timedelta("1ns") else: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4e2da746e0803..ae67b05047a98 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -52,15 +52,11 @@ from pandas.core.dtypes.common import ( is_complex_dtype, - is_datetime64_dtype, is_float, - is_float_dtype, is_integer, - is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar, - is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -1290,17 +1286,17 @@ def format_array( List[str] """ fmt_klass: type[GenericArrayFormatter] - if is_datetime64_dtype(values.dtype): + if lib.is_np_dtype(values.dtype, "M"): fmt_klass = Datetime64Formatter elif isinstance(values.dtype, DatetimeTZDtype): fmt_klass = Datetime64TZFormatter - elif is_timedelta64_dtype(values.dtype): + elif lib.is_np_dtype(values.dtype, "m"): fmt_klass = Timedelta64Formatter elif isinstance(values.dtype, ExtensionDtype): fmt_klass = ExtensionArrayFormatter - elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): + elif lib.is_np_dtype(values.dtype, "fc"): fmt_klass = FloatArrayFormatter - elif is_integer_dtype(values.dtype): + elif lib.is_np_dtype(values.dtype, "iu"): fmt_klass = IntArrayFormatter else: fmt_klass = GenericArrayFormatter diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 4448bfbe977d5..7decab539da34 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -12,6 +12,7 @@ ) import warnings +from pandas._libs import lib from pandas._libs.json import loads from pandas._libs.tslibs import timezones from pandas.util._exceptions import find_stack_level @@ -19,11 +20,9 @@ from pandas.core.dtypes.base import _registry as registry from pandas.core.dtypes.common import ( is_bool_dtype, - is_datetime64_dtype, is_integer_dtype, is_numeric_dtype, is_string_dtype, - is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -84,9 +83,9 @@ def as_json_table_type(x: DtypeObj) -> str: return "boolean" elif is_numeric_dtype(x): return "number" - elif is_datetime64_dtype(x) or isinstance(x, (DatetimeTZDtype, PeriodDtype)): + elif lib.is_np_dtype(x, "M") or isinstance(x, (DatetimeTZDtype, PeriodDtype)): return "datetime" - elif is_timedelta64_dtype(x): + elif lib.is_np_dtype(x, "m"): return "duration" elif isinstance(x, ExtensionDtype): return "any" diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fcaf4d984a4c6..da0ca940791ba 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -61,7 +61,6 @@ is_list_like, is_object_dtype, is_string_dtype, - is_timedelta64_dtype, needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ( @@ -2380,7 +2379,7 @@ def _get_atom(cls, values: ArrayLike) -> Col: atom = cls.get_atom_data(shape, kind=codes.dtype.name) elif is_datetime64_dtype(dtype) or isinstance(dtype, DatetimeTZDtype): atom = cls.get_atom_datetime64(shape) - elif is_timedelta64_dtype(dtype): + elif lib.is_np_dtype(dtype, "m"): atom = cls.get_atom_timedelta64(shape) elif is_complex_dtype(dtype): atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0]) @@ -3100,7 +3099,7 @@ def write_array( # attribute "tz" node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr] node._v_attrs.value_type = "datetime64" - elif is_timedelta64_dtype(value.dtype): + elif lib.is_np_dtype(value.dtype, "m"): self._handle.create_array(self.group, key, value.view("i8")) getattr(self.group, key)._v_attrs.value_type = "timedelta64" elif empty_array: diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 8cd5b8adb27a5..48ab0f1be8c4a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -190,7 +190,6 @@ def test_as_json_table_type_bool_dtypes(self, bool_dtype): @pytest.mark.parametrize( "date_dtype", [ - np.datetime64, np.dtype("