diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 6776064342db0..f93ca4e0f903d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +import inspect from typing import ( Any, Callable, @@ -67,6 +68,12 @@ _is_scipy_sparse = None ensure_float64 = algos.ensure_float64 +ensure_int64 = algos.ensure_int64 +ensure_int32 = algos.ensure_int32 +ensure_int16 = algos.ensure_int16 +ensure_int8 = algos.ensure_int8 +ensure_platform_int = algos.ensure_platform_int +ensure_object = algos.ensure_object def ensure_float(arr): @@ -93,14 +100,6 @@ def ensure_float(arr): return arr -ensure_int64 = algos.ensure_int64 -ensure_int32 = algos.ensure_int32 -ensure_int16 = algos.ensure_int16 -ensure_int8 = algos.ensure_int8 -ensure_platform_int = algos.ensure_platform_int -ensure_object = algos.ensure_object - - def ensure_str(value: bytes | Any) -> str: """ Ensure that bytes and non-strings get converted into ``str`` objects. @@ -158,36 +157,7 @@ def classes_and_not_datetimelike(*klasses) -> Callable: ) -def is_object_dtype(arr_or_dtype) -> bool: - """ - Check whether an array-like or dtype is of the object dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - boolean - Whether or not the array-like or dtype is of the object dtype. - - Examples - -------- - >>> is_object_dtype(object) - True - >>> is_object_dtype(int) - False - >>> is_object_dtype(np.array([], dtype=object)) - True - >>> is_object_dtype(np.array([], dtype=int)) - False - >>> is_object_dtype([1, 2, 3]) - False - """ - return _is_dtype_type(arr_or_dtype, classes(np.object_)) - - +# type checking helpers that accept any array-like arg def is_sparse(arr) -> bool: """ Check whether an array-like is a 1-D pandas sparse array. @@ -312,352 +282,214 @@ def is_categorical(arr) -> bool: return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) -def is_datetime64_dtype(arr_or_dtype) -> bool: +def is_extension_type(arr) -> bool: """ - Check whether an array-like or dtype is of the datetime64 dtype. + Check whether an array-like is of a pandas extension class instance. + + .. deprecated:: 1.0.0 + Use ``is_extension_array_dtype`` instead. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. Parameters ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. + arr : array-like, scalar + The array-like to check. Returns ------- boolean - Whether or not the array-like or dtype is of the datetime64 dtype. + Whether or not the array-like is of a pandas extension class instance. Examples -------- - >>> is_datetime64_dtype(object) + >>> is_extension_type([1, 2, 3]) False - >>> is_datetime64_dtype(np.datetime64) - True - >>> is_datetime64_dtype(np.array([], dtype=int)) + >>> is_extension_type(np.array([1, 2, 3])) False - >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) True - >>> is_datetime64_dtype([1, 2, 3]) + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + True + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True """ - if isinstance(arr_or_dtype, np.dtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "M" - return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) + warnings.warn( + "'is_extension_type' is deprecated and will be removed in a future " + "version. Use 'is_extension_array_dtype' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if is_categorical_dtype(arr): + return True + elif is_sparse(arr): + return True + elif is_datetime64tz_dtype(arr): + return True + return False -def is_datetime64tz_dtype(arr_or_dtype) -> bool: +def is_1d_only_ea_obj(obj: Any) -> bool: """ - Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + ExtensionArray that does not support 2D, or more specifically that does + not use HybridBlock. + """ + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, + ) - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. + return isinstance(obj, ExtensionArray) and not isinstance( + obj, (DatetimeArray, TimedeltaArray, PeriodArray) + ) - Returns - ------- - boolean - Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. - Examples - -------- - >>> is_datetime64tz_dtype(object) - False - >>> is_datetime64tz_dtype([1, 2, 3]) - False - >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive - False - >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) - True +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. - >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") - >>> s = pd.Series([], dtype=dtype) - >>> is_datetime64tz_dtype(dtype) - True - >>> is_datetime64tz_dtype(s) - True + Includes object-dtype ndarray containing all-strings, StringArray, + and Categorical with all-string categories. + Does not include numpy string dtypes. """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "M" + dtype = value.dtype - if arr_or_dtype is None: - return False - return DatetimeTZDtype.is_dtype(arr_or_dtype) + if isinstance(dtype, np.dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): + return dtype.categories.inferred_type == "string" + return dtype == "string" -def is_timedelta64_dtype(arr_or_dtype) -> bool: +# type checking helpers that accept any array-like or dtype arg +def is_numeric_dtype(arr_or_dtype) -> bool: """ - Check whether an array-like or dtype is of the timedelta64 dtype. + Check whether the provided array or dtype is of a numeric dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array-like or dtype to check. + The array or dtype to check. Returns ------- boolean - Whether or not the array-like or dtype is of the timedelta64 dtype. + Whether or not the array or dtype is of a numeric dtype. Examples -------- - >>> is_timedelta64_dtype(object) + >>> is_numeric_dtype(str) False - >>> is_timedelta64_dtype(np.timedelta64) + >>> is_numeric_dtype(int) True - >>> is_timedelta64_dtype([1, 2, 3]) + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) False - >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) True - >>> is_timedelta64_dtype('0 days') + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) False """ - if isinstance(arr_or_dtype, np.dtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "m" - - return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) + ) -def is_period_dtype(arr_or_dtype) -> bool: +def is_float_dtype(arr_or_dtype) -> bool: """ - Check whether an array-like or dtype is of the Period dtype. + Check whether the provided array or dtype is of a float dtype. + + This function is internal and should not be exposed in the public API. Parameters ---------- arr_or_dtype : array-like or dtype - The array-like or dtype to check. + The array or dtype to check. Returns ------- boolean - Whether or not the array-like or dtype is of the Period dtype. + Whether or not the array or dtype is of a float dtype. Examples -------- - >>> is_period_dtype(object) + >>> is_float_dtype(str) False - >>> is_period_dtype(PeriodDtype(freq="D")) + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) True - >>> is_period_dtype([1, 2, 3]) + >>> is_float_dtype(np.array(['a', 'b'])) False - >>> is_period_dtype(pd.Period("2017-01-01")) + >>> is_float_dtype(pd.Series([1, 2])) False - >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + >>> is_float_dtype(pd.Index([1, 2.])) True """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.type is Period - - if arr_or_dtype is None: - return False - return PeriodDtype.is_dtype(arr_or_dtype) + return _is_dtype_type(arr_or_dtype, classes(np.floating)) -def is_interval_dtype(arr_or_dtype) -> bool: +def is_any_int_dtype(arr_or_dtype) -> bool: """ - Check whether an array-like or dtype is of the Interval dtype. + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + This function is internal and should not be exposed in the public API. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. Parameters ---------- arr_or_dtype : array-like or dtype - The array-like or dtype to check. + The array or dtype to check. Returns ------- boolean - Whether or not the array-like or dtype is of the Interval dtype. + Whether or not the array or dtype is of an integer dtype. Examples -------- - >>> is_interval_dtype(object) + >>> is_any_int_dtype(str) False - >>> is_interval_dtype(IntervalDtype()) + >>> is_any_int_dtype(int) True - >>> is_interval_dtype([1, 2, 3]) - False - >>> - >>> interval = pd.Interval(1, 2, closed="right") - >>> is_interval_dtype(interval) - False - >>> is_interval_dtype(pd.IntervalIndex([interval])) - True - """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.type is Interval - - if arr_or_dtype is None: - return False - return IntervalDtype.is_dtype(arr_or_dtype) - - -def is_categorical_dtype(arr_or_dtype) -> bool: - """ - Check whether an array-like or dtype is of the Categorical dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - boolean - Whether or not the array-like or dtype is of the Categorical dtype. - - Examples - -------- - >>> is_categorical_dtype(object) - False - >>> is_categorical_dtype(CategoricalDtype()) - True - >>> is_categorical_dtype([1, 2, 3]) - False - >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) - True - >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) - True - """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.name == "category" - - if arr_or_dtype is None: - return False - return CategoricalDtype.is_dtype(arr_or_dtype) - - -def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: - """ - Faster alternative to is_string_dtype, assumes we have a np.dtype object. - """ - return dtype == object or dtype.kind in "SU" - - -def is_string_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of the string dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of the string dtype. - - Examples - -------- - >>> is_string_dtype(str) - True - >>> is_string_dtype(object) - True - >>> is_string_dtype(int) - False - >>> - >>> is_string_dtype(np.array(['a', 'b'])) - True - >>> is_string_dtype(pd.Series([1, 2])) - False - """ - # TODO: gh-15585: consider making the checks stricter. - def condition(dtype) -> bool: - return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) - - def is_excluded_dtype(dtype) -> bool: - """ - These have kind = "O" but aren't string dtypes so need to be explicitly excluded - """ - return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) - - return _is_dtype(arr_or_dtype, condition) - - -def is_dtype_equal(source, target) -> bool: - """ - Check if two dtypes are equal. - - Parameters - ---------- - source : The first dtype to compare - target : The second dtype to compare - - Returns - ------- - boolean - Whether or not the two dtypes are equal. - - Examples - -------- - >>> is_dtype_equal(int, float) - False - >>> is_dtype_equal("int", int) - True - >>> is_dtype_equal(object, "category") - False - >>> is_dtype_equal(CategoricalDtype(), "category") - True - >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") - False - """ - if isinstance(target, str): - if not isinstance(source, str): - # GH#38516 ensure we get the same behavior from - # is_dtype_equal(CDT, "category") and CDT == "category" - try: - src = get_dtype(source) - if isinstance(src, ExtensionDtype): - return src == target - except (TypeError, AttributeError, ImportError): - return False - elif isinstance(source, str): - return is_dtype_equal(target, source) - - try: - source = get_dtype(source) - target = get_dtype(target) - return source == target - except (TypeError, AttributeError, ImportError): - - # invalid comparison - # object == category will hit this - return False - - -def is_any_int_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of an integer dtype. - - In this function, timedelta64 instances are also considered "any-integer" - type objects and will return True. - - This function is internal and should not be exposed in the public API. - - The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered - as integer by this function. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of an integer dtype. - - Examples - -------- - >>> is_any_int_dtype(str) - False - >>> is_any_int_dtype(int) - True - >>> is_any_int_dtype(float) + >>> is_any_int_dtype(float) False >>> is_any_int_dtype(np.uint64) True @@ -880,9 +712,9 @@ def is_int64_dtype(arr_or_dtype) -> bool: return _is_dtype_type(arr_or_dtype, classes(np.int64)) -def is_datetime64_any_dtype(arr_or_dtype) -> bool: +def is_complex_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of the datetime64 dtype. + Check whether the provided array or dtype is of a complex dtype. Parameters ---------- @@ -891,40 +723,33 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: Returns ------- - bool - Whether or not the array or dtype is of the datetime64 dtype. + boolean + Whether or not the array or dtype is of a complex dtype. Examples -------- - >>> is_datetime64_any_dtype(str) + >>> is_complex_dtype(str) False - >>> is_datetime64_any_dtype(int) + >>> is_complex_dtype(int) False - >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive - True - >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + >>> is_complex_dtype(np.complex_) True - >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + >>> is_complex_dtype(np.array(['a', 'b'])) False - >>> is_datetime64_any_dtype(np.array([1, 2])) + >>> is_complex_dtype(pd.Series([1, 2])) False - >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) - True - >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + >>> is_complex_dtype(np.array([1 + 1j, 5])) True """ - if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "M" - - if arr_or_dtype is None: - return False - return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) + return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) -def is_datetime64_ns_dtype(arr_or_dtype) -> bool: +def needs_i8_conversion(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of the datetime64[ns] dtype. + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype Parameters ---------- @@ -933,77 +758,78 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: Returns ------- - bool - Whether or not the array or dtype is of the datetime64[ns] dtype. + boolean + Whether or not the array or dtype should be converted to int64. Examples -------- - >>> is_datetime64_ns_dtype(str) - False - >>> is_datetime64_ns_dtype(int) + >>> needs_i8_conversion(str) False - >>> is_datetime64_ns_dtype(np.datetime64) # no unit + >>> needs_i8_conversion(np.int64) False - >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + >>> needs_i8_conversion(np.datetime64) True - >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) - False - >>> is_datetime64_ns_dtype(np.array([1, 2])) - False - >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit + >>> needs_i8_conversion(np.array(['a', 'b'])) False - >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit + >>> needs_i8_conversion(pd.Series([1, 2])) False - >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True """ if arr_or_dtype is None: return False - try: - tipo = get_dtype(arr_or_dtype) - except TypeError: - if is_datetime64tz_dtype(arr_or_dtype): - tipo = get_dtype(arr_or_dtype.dtype) - else: - return False - return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE - + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # fastpath + dtype = arr_or_dtype + return dtype.kind in ["m", "M"] or dtype.type is Period -def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of the timedelta64[ns] dtype. + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + if isinstance(dtype, np.dtype): + return dtype.kind in ["m", "M"] + return isinstance(dtype, (PeriodDtype, DatetimeTZDtype)) - This is a very specific dtype, so generic ones like `np.timedelta64` - will return False if passed into this function. + +def is_datetime64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the datetime64 dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array or dtype to check. + The array-like or dtype to check. Returns ------- boolean - Whether or not the array or dtype is of the timedelta64[ns] dtype. + Whether or not the array-like or dtype is of the datetime64 dtype. Examples -------- - >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) True - >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + >>> is_datetime64_dtype(np.array([], dtype=int)) False - >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) True - >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + >>> is_datetime64_dtype([1, 2, 3]) False """ - return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) -def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: +def is_datetime64_any_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of - a timedelta64 or datetime64 dtype. + Check whether the provided array or dtype is of the datetime64 dtype. Parameters ---------- @@ -1012,145 +838,194 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: Returns ------- - boolean - Whether or not the array or dtype is of a timedelta64, - or datetime64 dtype. + bool + Whether or not the array or dtype is of the datetime64 dtype. Examples -------- - >>> is_datetime_or_timedelta_dtype(str) + >>> is_datetime64_any_dtype(str) False - >>> is_datetime_or_timedelta_dtype(int) + >>> is_datetime64_any_dtype(int) False - >>> is_datetime_or_timedelta_dtype(np.datetime64) + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive True - >>> is_datetime_or_timedelta_dtype(np.timedelta64) + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) True - >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) False - >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + >>> is_datetime64_any_dtype(np.array([1, 2])) False - >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) True - >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) True """ - return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) -# This exists to silence numpy deprecation warnings, see GH#29553 -def is_numeric_v_string_like(a: ArrayLike, b): +def is_datetime64_ns_dtype(arr_or_dtype) -> bool: """ - Check if we are comparing a string-like object to a numeric ndarray. - NumPy doesn't like to compare such objects, especially numeric arrays - and scalar string-likes. + Check whether the provided array or dtype is of the datetime64[ns] dtype. Parameters ---------- - a : array-like, scalar - The first object to check. - b : array-like, scalar - The second object to check. + arr_or_dtype : array-like or dtype + The array or dtype to check. Returns ------- - boolean - Whether we return a comparing a string-like object to a numeric array. + bool + Whether or not the array or dtype is of the datetime64[ns] dtype. Examples -------- - >>> is_numeric_v_string_like(np.array([1]), "foo") - True - >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) - True - >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) True - >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) False - >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + True """ - is_a_array = isinstance(a, np.ndarray) - is_b_array = isinstance(b, np.ndarray) - - is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") - is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") - is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") - is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") - - is_b_scalar_string_like = not is_b_array and isinstance(b, str) - - return ( - (is_a_numeric_array and is_b_scalar_string_like) - or (is_a_numeric_array and is_b_string_array) - or (is_b_numeric_array and is_a_string_array) - ) + if arr_or_dtype is None: + return False + try: + tipo = get_dtype(arr_or_dtype) + except TypeError: + if is_datetime64tz_dtype(arr_or_dtype): + tipo = get_dtype(arr_or_dtype.dtype) + else: + return False + return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE -# This exists to silence numpy deprecation warnings, see GH#29553 -def is_datetimelike_v_numeric(a, b): +def is_datetime64tz_dtype(arr_or_dtype) -> bool: """ - Check if we are comparing a datetime-like object to a numeric object. - By "numeric," we mean an object that is either of an int or float dtype. + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. Parameters ---------- - a : array-like, scalar - The first object to check. - b : array-like, scalar - The second object to check. + arr_or_dtype : array-like or dtype + The array-like or dtype to check. Returns ------- boolean - Whether we return a comparing a datetime-like to a numeric object. + Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. Examples -------- - >>> from datetime import datetime - >>> dt = np.datetime64(datetime(2017, 1, 1)) - >>> - >>> is_datetimelike_v_numeric(1, 1) + >>> is_datetime64tz_dtype(object) False - >>> is_datetimelike_v_numeric(dt, dt) + >>> is_datetime64tz_dtype([1, 2, 3]) False - >>> is_datetimelike_v_numeric(1, dt) - True - >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True - >>> is_datetimelike_v_numeric(np.array([dt]), 1) + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) True - >>> is_datetimelike_v_numeric(np.array([1]), dt) + >>> is_datetime64tz_dtype(s) True - >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) True - >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + >>> is_timedelta64_dtype([1, 2, 3]) False - >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + >>> is_timedelta64_dtype('0 days') False """ - if not hasattr(a, "dtype"): - a = np.asarray(a) - if not hasattr(b, "dtype"): - b = np.asarray(b) + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "m" - def is_numeric(x): - """ - Check if an object has a numeric dtype (i.e. integer or float). - """ - return is_integer_dtype(x) or is_float_dtype(x) + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) - return (needs_i8_conversion(a) and is_numeric(b)) or ( - needs_i8_conversion(b) and is_numeric(a) - ) +def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. -def needs_i8_conversion(arr_or_dtype) -> bool: + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + False + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False """ - Check whether the array or dtype should be converted to int64. + return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) - An array-like or dtype "needs" such a conversion if the array-like - or dtype is of a datetime-like dtype + +def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. Parameters ---------- @@ -1160,115 +1035,103 @@ def needs_i8_conversion(arr_or_dtype) -> bool: Returns ------- boolean - Whether or not the array or dtype should be converted to int64. + Whether or not the array or dtype is of a timedelta64, + or datetime64 dtype. Examples -------- - >>> needs_i8_conversion(str) + >>> is_datetime_or_timedelta_dtype(str) False - >>> needs_i8_conversion(np.int64) + >>> is_datetime_or_timedelta_dtype(int) False - >>> needs_i8_conversion(np.datetime64) + >>> is_datetime_or_timedelta_dtype(np.datetime64) True - >>> needs_i8_conversion(np.array(['a', 'b'])) + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) False - >>> needs_i8_conversion(pd.Series([1, 2])) + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) False - >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) True - >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) True """ - if arr_or_dtype is None: - return False - if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): - # fastpath - dtype = arr_or_dtype - return dtype.kind in ["m", "M"] or dtype.type is Period - - try: - dtype = get_dtype(arr_or_dtype) - except (TypeError, ValueError): - return False - if isinstance(dtype, np.dtype): - return dtype.kind in ["m", "M"] - return isinstance(dtype, (PeriodDtype, DatetimeTZDtype)) + return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) -def is_numeric_dtype(arr_or_dtype) -> bool: +def is_period_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of a numeric dtype. + Check whether an array-like or dtype is of the Period dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array or dtype to check. + The array-like or dtype to check. Returns ------- boolean - Whether or not the array or dtype is of a numeric dtype. + Whether or not the array-like or dtype is of the Period dtype. Examples -------- - >>> is_numeric_dtype(str) + >>> is_period_dtype(object) False - >>> is_numeric_dtype(int) - True - >>> is_numeric_dtype(float) - True - >>> is_numeric_dtype(np.uint64) + >>> is_period_dtype(PeriodDtype(freq="D")) True - >>> is_numeric_dtype(np.datetime64) - False - >>> is_numeric_dtype(np.timedelta64) + >>> is_period_dtype([1, 2, 3]) False - >>> is_numeric_dtype(np.array(['a', 'b'])) + >>> is_period_dtype(pd.Period("2017-01-01")) False - >>> is_numeric_dtype(pd.Series([1, 2])) - True - >>> is_numeric_dtype(pd.Index([1, 2.])) + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True - >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) - False """ - return _is_dtype_type( - arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) - ) + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Period + + if arr_or_dtype is None: + return False + return PeriodDtype.is_dtype(arr_or_dtype) -def is_float_dtype(arr_or_dtype) -> bool: +def is_interval_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of a float dtype. - - This function is internal and should not be exposed in the public API. + Check whether an array-like or dtype is of the Interval dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array or dtype to check. + The array-like or dtype to check. Returns ------- boolean - Whether or not the array or dtype is of a float dtype. + Whether or not the array-like or dtype is of the Interval dtype. Examples -------- - >>> is_float_dtype(str) - False - >>> is_float_dtype(int) + >>> is_interval_dtype(object) False - >>> is_float_dtype(float) + >>> is_interval_dtype(IntervalDtype()) True - >>> is_float_dtype(np.array(['a', 'b'])) + >>> is_interval_dtype([1, 2, 3]) False - >>> is_float_dtype(pd.Series([1, 2])) + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) False - >>> is_float_dtype(pd.Index([1, 2.])) + >>> is_interval_dtype(pd.IntervalIndex([interval])) True """ - return _is_dtype_type(arr_or_dtype, classes(np.floating)) + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Interval + + if arr_or_dtype is None: + return False + return IntervalDtype.is_dtype(arr_or_dtype) def is_bool_dtype(arr_or_dtype) -> bool: @@ -1331,99 +1194,111 @@ def is_bool_dtype(arr_or_dtype) -> bool: return issubclass(dtype.type, np.bool_) -def is_extension_type(arr) -> bool: +def is_object_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False """ - Check whether an array-like is of a pandas extension class instance. + return _is_dtype_type(arr_or_dtype, classes(np.object_)) - .. deprecated:: 1.0.0 - Use ``is_extension_array_dtype`` instead. - Extension classes include categoricals, pandas sparse objects (i.e. - classes represented within the pandas library and not ones external - to it like scipy sparse matrices), and datetime-like arrays. +def is_categorical_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Categorical dtype. Parameters ---------- - arr : array-like, scalar - The array-like to check. + arr_or_dtype : array-like or dtype + The array-like or dtype to check. Returns ------- boolean - Whether or not the array-like is of a pandas extension class instance. + Whether or not the array-like or dtype is of the Categorical dtype. Examples -------- - >>> is_extension_type([1, 2, 3]) - False - >>> is_extension_type(np.array([1, 2, 3])) + >>> is_categorical_dtype(object) False - >>> - >>> cat = pd.Categorical([1, 2, 3]) - >>> - >>> is_extension_type(cat) - True - >>> is_extension_type(pd.Series(cat)) - True - >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + >>> is_categorical_dtype(CategoricalDtype()) True - >>> from scipy.sparse import bsr_matrix - >>> is_extension_type(bsr_matrix([1, 2, 3])) - False - >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + >>> is_categorical_dtype([1, 2, 3]) False - >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) True - >>> - >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") - >>> s = pd.Series([], dtype=dtype) - >>> is_extension_type(s) + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) True """ - warnings.warn( - "'is_extension_type' is deprecated and will be removed in a future " - "version. Use 'is_extension_array_dtype' instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.name == "category" - if is_categorical_dtype(arr): - return True - elif is_sparse(arr): - return True - elif is_datetime64tz_dtype(arr): - return True - return False + if arr_or_dtype is None: + return False + return CategoricalDtype.is_dtype(arr_or_dtype) -def is_1d_only_ea_obj(obj: Any) -> bool: - """ - ExtensionArray that does not support 2D, or more specifically that does - not use HybridBlock. +def is_string_dtype(arr_or_dtype) -> bool: """ - from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - PeriodArray, - TimedeltaArray, - ) + Check whether the provided array or dtype is of the string dtype. - return isinstance(obj, ExtensionArray) and not isinstance( - obj, (DatetimeArray, TimedeltaArray, PeriodArray) - ) + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + Returns + ------- + boolean + Whether or not the array or dtype is of the string dtype. -def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: - """ - Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(pd.Series([1, 2])) + False """ - # Note: if other EA dtypes are ever held in HybridBlock, exclude those - # here too. - # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype - # to exclude ArrowTimestampUSDtype - return isinstance(dtype, ExtensionDtype) and not isinstance( - dtype, (DatetimeTZDtype, PeriodDtype) - ) + # TODO: gh-15585: consider making the checks stricter. + def condition(dtype) -> bool: + return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) + + def is_excluded_dtype(dtype) -> bool: + """ + These have kind = "O" but aren't string dtypes so need to be explicitly excluded + """ + return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) + + return _is_dtype(arr_or_dtype, condition) def is_extension_array_dtype(arr_or_dtype) -> bool: @@ -1480,51 +1355,7 @@ def is_extension_array_dtype(arr_or_dtype) -> bool: return registry.find(dtype) is not None -def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool: - """ - Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype. - - Notes - ----- - Checks only for dtype objects, not dtype-castable strings or types. - """ - return isinstance(dtype, ExtensionDtype) or ( - isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"] - ) - - -def is_complex_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of a complex dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of a complex dtype. - - Examples - -------- - >>> is_complex_dtype(str) - False - >>> is_complex_dtype(int) - False - >>> is_complex_dtype(np.complex_) - True - >>> is_complex_dtype(np.array(['a', 'b'])) - False - >>> is_complex_dtype(pd.Series([1, 2])) - False - >>> is_complex_dtype(np.array([1 + 1j, 5])) - True - """ - return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) - - +# type checking helpers that accept an array-like/dtype-like arg and a callable def _is_dtype(arr_or_dtype, condition) -> bool: """ Return true if the condition is satisfied for the arr_or_dtype. @@ -1549,41 +1380,6 @@ def _is_dtype(arr_or_dtype, condition) -> bool: return condition(dtype) -def get_dtype(arr_or_dtype) -> DtypeObj: - """ - Get the dtype instance associated with an array - or dtype object. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype object whose dtype we want to extract. - - Returns - ------- - obj_dtype : The extract dtype instance from the - passed in array or dtype object. - - Raises - ------ - TypeError : The passed in object is None. - """ - if arr_or_dtype is None: - raise TypeError("Cannot deduce dtype from null object") - - # fastpath - elif isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype) - - # if we have an array-like - elif hasattr(arr_or_dtype, "dtype"): - arr_or_dtype = arr_or_dtype.dtype - - return pandas_dtype(arr_or_dtype) - - def _is_dtype_type(arr_or_dtype, condition) -> bool: """ Return true if the condition is satisfied for the arr_or_dtype. @@ -1628,6 +1424,67 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool: return condition(tipo) +# type checking helpers that accept any dtype-like arg +def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: + """ + Faster alternative to is_string_dtype, assumes we have a np.dtype object. + """ + return dtype == object or dtype.kind in "SU" + + +def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: + """ + Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. + """ + # Note: if other EA dtypes are ever held in HybridBlock, exclude those + # here too. + # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype + # to exclude ArrowTimestampUSDtype + return isinstance(dtype, ExtensionDtype) and not isinstance( + dtype, (DatetimeTZDtype, PeriodDtype) + ) + + +def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool: + """ + Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype. + + Notes + ----- + Checks only for dtype objects, not dtype-castable strings or types. + """ + return isinstance(dtype, ExtensionDtype) or ( + isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"] + ) + + +def _validate_date_like_dtype(dtype) -> None: + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + frequency provided is too specific) + """ + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError(e) from e + if typ not in ["generic", "ns"]: + raise ValueError( + f"{repr(dtype.name)} is too specific of a frequency, " + f"try passing {repr(dtype.type.__name__)}" + ) + + +# type inference/extraction functions def infer_dtype_from_object(dtype) -> type: """ Get a numpy dtype.type-style object for a dtype object. @@ -1692,56 +1549,38 @@ def infer_dtype_from_object(dtype) -> type: return infer_dtype_from_object(np.dtype(dtype)) -def _validate_date_like_dtype(dtype) -> None: +def get_dtype(arr_or_dtype) -> DtypeObj: """ - Check whether the dtype is a date-like dtype. Raises an error if invalid. + Get the dtype instance associated with an array + or dtype object. Parameters ---------- - dtype : dtype, type - The dtype to check. + arr_or_dtype : array-like or dtype + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. Raises ------ - TypeError : The dtype could not be casted to a date-like dtype. - ValueError : The dtype is an illegal date-like dtype (e.g. the - frequency provided is too specific) - """ - try: - typ = np.datetime_data(dtype)[0] - except ValueError as e: - raise TypeError(e) from e - if typ not in ["generic", "ns"]: - raise ValueError( - f"{repr(dtype.name)} is too specific of a frequency, " - f"try passing {repr(dtype.type.__name__)}" - ) - - -def validate_all_hashable(*args, error_name: str | None = None) -> None: + TypeError : The passed in object is None. """ - Return None if all args are hashable, else raise a TypeError. - - Parameters - ---------- - *args - Arguments to validate. - error_name : str, optional - The name to use if error + if arr_or_dtype is None: + raise TypeError("Cannot deduce dtype from null object") - Raises - ------ - TypeError : If an argument is not hashable + # fastpath + elif isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + # if we have an array-like + elif hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype - Returns - ------- - None - """ - if not all(is_hashable(arg) for arg in args): - if error_name: - raise TypeError(f"{error_name} must be a hashable type") - else: - raise TypeError("All elements must be hashable") + return pandas_dtype(arr_or_dtype) def pandas_dtype(dtype) -> DtypeObj: @@ -1760,6 +1599,10 @@ def pandas_dtype(dtype) -> DtypeObj: ------ TypeError if not a dtype """ + if inspect.isclass(dtype) and issubclass(dtype, (np.dtype, ExtensionDtype)): + msg = "Must pass dtype instance, not dtype class" + raise TypeError(msg) + # short-circuit if isinstance(dtype, np.ndarray): return dtype.dtype @@ -1794,21 +1637,187 @@ def pandas_dtype(dtype) -> DtypeObj: return npdtype -def is_all_strings(value: ArrayLike) -> bool: +# type comparison functions +def is_dtype_equal(source, target) -> bool: """ - Check if this is an array of strings that we should try parsing. + Check if two dtypes are equal. - Includes object-dtype ndarray containing all-strings, StringArray, - and Categorical with all-string categories. - Does not include numpy string dtypes. + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ------- + boolean + Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") + False """ - dtype = value.dtype + if isinstance(target, str): + if not isinstance(source, str): + # GH#38516 ensure we get the same behavior from + # is_dtype_equal(CDT, "category") and CDT == "category" + try: + src = get_dtype(source) + if isinstance(src, ExtensionDtype): + return src == target + except (TypeError, AttributeError, ImportError): + return False + elif isinstance(source, str): + return is_dtype_equal(target, source) - if isinstance(dtype, np.dtype): - return ( - dtype == np.dtype("object") - and lib.infer_dtype(value, skipna=False) == "string" - ) - elif isinstance(dtype, CategoricalDtype): - return dtype.categories.inferred_type == "string" - return dtype == "string" + try: + source = get_dtype(source) + target = get_dtype(target) + return source == target + except (TypeError, AttributeError, ImportError): + + # invalid comparison + # object == category will hit this + return False + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a: ArrayLike, b): + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") + is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") + is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") + is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") + + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b): + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> from datetime import datetime + >>> dt = np.datetime64(datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + +# misc. +def validate_all_hashable(*args, error_name: str | None = None) -> None: + """ + Return None if all args are hashable, else raise a TypeError. + + Parameters + ---------- + *args + Arguments to validate. + error_name : str, optional + The name to use if error + + Raises + ------ + TypeError : If an argument is not hashable + + Returns + ------- + None + """ + if not all(is_hashable(arg) for arg in args): + if error_name: + raise TypeError(f"{error_name} must be a hashable type") + else: + raise TypeError("All elements must be hashable") diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 713d80c26ef7a..8cc4e50cc8d1b 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -648,7 +648,7 @@ def _mask_datetimelike_result( return result -@disallow(PeriodDtype) +@disallow(PeriodDtype()) @bottleneck_switch() @_datetimelike_compat def nanmean( diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a32b37fbdd71b..da9fbeeec94a0 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import datetime +import re import numpy as np import pytest @@ -8,6 +9,10 @@ import pandas.util._test_decorators as td from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.base import ( + ExtensionDtype, + _registry, +) import pandas.core.dtypes.common as com from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -24,6 +29,28 @@ from pandas.arrays import SparseArray +@pytest.fixture(name="ea_dtype", params=_registry.dtypes, scope="module") +def fixture_ea_dtype(request) -> type[ExtensionDtype]: + """ + All registered ExtensionDtype subclasses. + """ + return request.param + + +@pytest.fixture( + name="is_dtype_func", + params=(f for f in dir(com) if re.fullmatch(r"^is_\w+_dtype$", f)), + scope="module", +) +def fixture_is_dtype_func(request): + """ + All functions of the form 'is_*_dtype' in pandas.core.dtypes.common, e.g. + 'is_interval_dtype'. + """ + fname = request.param + return getattr(com, fname) + + # EA & Actual Dtypes def to_ea_dtypes(dtypes): """convert list of string dtypes to EA dtype""" @@ -36,25 +63,38 @@ def to_numpy_dtypes(dtypes): class TestPandasDtype: - # Passing invalid dtype, both as a string or object, must raise TypeError # Per issue GH15520 @pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list]) def test_invalid_dtype_error(self, box): - with pytest.raises(TypeError, match="not understood"): + msg = "|".join( + ( + "Must pass dtype instance, not dtype class", + "not understood", + ) + ) + with pytest.raises(TypeError, match=msg): com.pandas_dtype(box) + def test_raises_if_passed_dtype_class(self, ea_dtype: type[ExtensionDtype]): + """ + GH 47108 + """ + msg = "Must pass dtype instance, not dtype class" + with pytest.raises(TypeError, match=msg): + com.pandas_dtype(ea_dtype) + @pytest.mark.parametrize( "dtype", [ object, - "float64", np.object_, np.dtype("object"), "O", - np.float64, float, + np.float64, np.dtype("float64"), + "float64", ], ) def test_pandas_dtype_valid(self, dtype): @@ -107,6 +147,37 @@ def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) == dtype +@pytest.mark.xfail(reason="not yet implemented") +def test_is_dtype_func_raises_if_passed_dtype_class( + is_dtype_func, + ea_dtype: type[ExtensionDtype], +): + """ + GH 47108 + + These should raise, like com.pandas_dtype, if passed an ExtensionDtype subclass. + """ + msg = "Must pass dtype instance, not dtype class" + with pytest.raises(TypeError, match=msg): + is_dtype_func(ea_dtype) + + +def test_is_dtype_func_returns_false_if_passed_none(is_dtype_func, request): + """ + GH 15941 + + is_*_dtype functions all return False if passed None (and don't raise). + """ + if is_dtype_func is com.is_string_or_object_np_dtype: + xfail = pytest.mark.xfail( + reason="fastpath requires np.dtype obj", + raises=AttributeError, + ) + request.node.add_marker(xfail) + + assert is_dtype_func(None) is False + + dtypes = { "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), "datetime": com.pandas_dtype("datetime64[ns]"), @@ -155,26 +226,6 @@ def test_dtype_equal_strict(dtype1, dtype2): assert not com.is_dtype_equal(dtype1, dtype2) -def get_is_dtype_funcs(): - """ - Get all functions in pandas.core.dtypes.common that - begin with 'is_' and end with 'dtype' - - """ - fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] - fnames.remove("is_string_or_object_np_dtype") # fastpath requires np.dtype obj - return [getattr(com, fname) for fname in fnames] - - -@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__) -def test_get_dtype_error_catch(func): - # see gh-15941 - # - # No exception should be raised. - - assert not func(None) - - def test_is_object(): assert com.is_object_dtype(object) assert com.is_object_dtype(np.array([], dtype=object)) @@ -558,29 +609,43 @@ def test_is_float_dtype(): assert com.is_float_dtype(pd.Index([1, 2.0])) -def test_is_bool_dtype(): - assert not com.is_bool_dtype(int) - assert not com.is_bool_dtype(str) - assert not com.is_bool_dtype(pd.Series([1, 2])) - assert not com.is_bool_dtype(pd.Series(["a", "b"], dtype="category")) - assert not com.is_bool_dtype(np.array(["a", "b"])) - assert not com.is_bool_dtype(pd.Index(["a", "b"])) - assert not com.is_bool_dtype("Int64") - - assert com.is_bool_dtype(bool) - assert com.is_bool_dtype(np.bool_) - assert com.is_bool_dtype(pd.Series([True, False], dtype="category")) - assert com.is_bool_dtype(np.array([True, False])) - assert com.is_bool_dtype(pd.Index([True, False])) - - assert com.is_bool_dtype(pd.BooleanDtype()) - assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) - assert com.is_bool_dtype("boolean") +@pytest.mark.parametrize( + "value", + ( + True, + False, + int, + str, + "Int64", + "0 - Name", # GH39010 + pd.array(("a", "b")), + pd.Index(("a", "b")), + pd.Series(("a", "b"), dtype="category"), + pd.Series((1, 2)), + ), +) +def test_is_bool_dtype_returns_false(value): + assert com.is_bool_dtype(value) is False -def test_is_bool_dtype_numpy_error(): - # GH39010 - assert not com.is_bool_dtype("0 - Name") +@pytest.mark.parametrize( + "value", + ( + bool, + np.bool_, + np.dtype(np.bool_), + pd.BooleanDtype(), + "bool", + "boolean", + pd.array((True, False)), + pd.Index((True, False)), + pd.Series((True, False)), + pd.Series((True, False), dtype="category"), + pd.Series((True, False, None), dtype="boolean"), + ), +) +def test_is_bool_dtype_returns_true(value): + assert com.is_bool_dtype(value) is True @pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") @@ -674,6 +739,7 @@ def test_is_complex_dtype(): (PeriodDtype(freq="D"), PeriodDtype(freq="D")), ("period[D]", PeriodDtype(freq="D")), (IntervalDtype(), IntervalDtype()), + (pd.BooleanDtype(), pd.BooleanDtype()), ], ) def test_get_dtype(input_param, result):