From c835dd8876ec6a8b1edfb43dd36163ae25b5a2c0 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 24 May 2022 14:24:45 -0700 Subject: [PATCH 1/7] add failing case reported in #45417 And parameterize while we're at it. --- pandas/tests/dtypes/test_common.py | 61 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a32b37fbdd71b..cbd2ce2d4e4aa 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -558,29 +558,44 @@ def test_is_float_dtype(): assert com.is_float_dtype(pd.Index([1, 2.0])) -def test_is_bool_dtype(): - assert not com.is_bool_dtype(int) - assert not com.is_bool_dtype(str) - assert not com.is_bool_dtype(pd.Series([1, 2])) - assert not com.is_bool_dtype(pd.Series(["a", "b"], dtype="category")) - assert not com.is_bool_dtype(np.array(["a", "b"])) - assert not com.is_bool_dtype(pd.Index(["a", "b"])) - assert not com.is_bool_dtype("Int64") - - assert com.is_bool_dtype(bool) - assert com.is_bool_dtype(np.bool_) - assert com.is_bool_dtype(pd.Series([True, False], dtype="category")) - assert com.is_bool_dtype(np.array([True, False])) - assert com.is_bool_dtype(pd.Index([True, False])) - - assert com.is_bool_dtype(pd.BooleanDtype()) - assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) - assert com.is_bool_dtype("boolean") - - -def test_is_bool_dtype_numpy_error(): - # GH39010 - assert not com.is_bool_dtype("0 - Name") +@pytest.mark.parametrize( + "value", + ( + True, + False, + int, + str, + "Int64", + "0 - Name", # GH39010 + pd.array(("a", "b")), + pd.Index(("a", "b")), + pd.Series(("a", "b"), dtype="category"), + pd.Series((1, 2)), + ), +) +def test_is_bool_dtype_returns_false(value): + assert com.is_bool_dtype(value) is False + + +@pytest.mark.parametrize( + "value", + ( + bool, + np.bool_, + np.dtype(np.bool_), + pd.BooleanDtype, + pd.BooleanDtype(), + "bool", + "boolean", + pd.array((True, False)), + pd.Index((True, False)), + pd.Series((True, False)), + pd.Series((True, False), dtype="category"), + pd.Series((True, False, None), dtype="boolean"), + ), +) +def test_is_bool_dtype_returns_true(value): + assert com.is_bool_dtype(value) is True @pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") From 6f40d3de2e41debe4887f9cfd37acba8ee8aba71 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 24 May 2022 16:11:52 -0700 Subject: [PATCH 2/7] make pandas_dtype more consistent --- pandas/core/dtypes/common.py | 4 +++ pandas/tests/dtypes/test_common.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 6776064342db0..429b41f8205fd 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -2,6 +2,7 @@ Common type operations. """ from __future__ import annotations +import inspect from typing import ( Any, @@ -30,6 +31,7 @@ DatetimeTZDtype, ExtensionDtype, IntervalDtype, + PandasExtensionDtype, PeriodDtype, ) from pandas.core.dtypes.generic import ( @@ -1765,6 +1767,8 @@ def pandas_dtype(dtype) -> DtypeObj: return dtype.dtype elif isinstance(dtype, (np.dtype, ExtensionDtype)): return dtype + elif inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): + return dtype() # registered extension types result = registry.find(dtype) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index cbd2ce2d4e4aa..db1509b004102 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -106,6 +106,48 @@ def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) == PeriodDtype(dtype) assert com.pandas_dtype(dtype) == dtype + @pytest.mark.parametrize( + "cls", + ( + pd.BooleanDtype, + pd.Int8Dtype, + pd.Int16Dtype, + pd.Int32Dtype, + pd.Int64Dtype, + pd.UInt8Dtype, + pd.UInt16Dtype, + pd.UInt32Dtype, + pd.UInt64Dtype, + pd.Float32Dtype, + pd.Float64Dtype, + pd.SparseDtype, + pd.StringDtype, + IntervalDtype, + CategoricalDtype, + pytest.param( + DatetimeTZDtype, + marks=pytest.mark.xfail(reason="must specify TZ", raises=TypeError), + ), + pytest.param( + PeriodDtype, + marks=pytest.mark.xfail( + reason="must specify frequency", raises=AttributeError + ), + ), + ), + ) + def test_pd_extension_dtype(self, cls): + """ + TODO: desired behavior? + + For extension dtypes that admit no options OR can be initialized with no args + passed, convert the extension dtype class to an instance of that class. + """ + expected = cls() + result = com.pandas_dtype(cls) + + assert result == expected + dtypes = { "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), @@ -689,6 +731,8 @@ def test_is_complex_dtype(): (PeriodDtype(freq="D"), PeriodDtype(freq="D")), ("period[D]", PeriodDtype(freq="D")), (IntervalDtype(), IntervalDtype()), + (pd.BooleanDtype, pd.BooleanDtype()), + (pd.BooleanDtype(), pd.BooleanDtype()), ], ) def test_get_dtype(input_param, result): From 2404f3daee1ee19c75fc782347081187d5c60151 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 24 May 2022 16:26:40 -0700 Subject: [PATCH 3/7] narrow the check, want only np types --- pandas/core/dtypes/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 429b41f8205fd..8bfd664fce9b0 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -2,8 +2,8 @@ Common type operations. """ from __future__ import annotations -import inspect +import inspect from typing import ( Any, Callable, @@ -31,7 +31,6 @@ DatetimeTZDtype, ExtensionDtype, IntervalDtype, - PandasExtensionDtype, PeriodDtype, ) from pandas.core.dtypes.generic import ( @@ -1576,9 +1575,8 @@ def get_dtype(arr_or_dtype) -> DtypeObj: # fastpath elif isinstance(arr_or_dtype, np.dtype): return arr_or_dtype - elif isinstance(arr_or_dtype, type): + elif inspect.isclass(arr_or_dtype) and issubclass(arr_or_dtype, np.generic): return np.dtype(arr_or_dtype) - # if we have an array-like elif hasattr(arr_or_dtype, "dtype"): arr_or_dtype = arr_or_dtype.dtype From f1abd834e3dd3b1a440bebadaa58b531ee782a12 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 25 May 2022 12:27:33 -0700 Subject: [PATCH 4/7] raise if dtype class obj passed --- pandas/core/dtypes/common.py | 6 ++- pandas/core/nanops.py | 2 +- pandas/tests/dtypes/test_common.py | 69 +++++++++--------------------- 3 files changed, 26 insertions(+), 51 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 8bfd664fce9b0..233be10370345 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1760,13 +1760,15 @@ def pandas_dtype(dtype) -> DtypeObj: ------ TypeError if not a dtype """ + if inspect.isclass(dtype) and issubclass(dtype, (np.dtype, ExtensionDtype)): + msg = "Must pass dtype instance, not dtype class" + raise TypeError(msg) + # short-circuit if isinstance(dtype, np.ndarray): return dtype.dtype elif isinstance(dtype, (np.dtype, ExtensionDtype)): return dtype - elif inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): - return dtype() # registered extension types result = registry.find(dtype) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 713d80c26ef7a..8cc4e50cc8d1b 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -648,7 +648,7 @@ def _mask_datetimelike_result( return result -@disallow(PeriodDtype) +@disallow(PeriodDtype()) @bottleneck_switch() @_datetimelike_compat def nanmean( diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index db1509b004102..60ff79a58a46a 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -8,6 +8,10 @@ import pandas.util._test_decorators as td from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.base import ( + ExtensionDtype, + _registry, +) import pandas.core.dtypes.common as com from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -23,6 +27,8 @@ from pandas.api.types import pandas_dtype from pandas.arrays import SparseArray +ALL_EA_DTYPES = _registry.dtypes + # EA & Actual Dtypes def to_ea_dtypes(dtypes): @@ -36,25 +42,36 @@ def to_numpy_dtypes(dtypes): class TestPandasDtype: - # Passing invalid dtype, both as a string or object, must raise TypeError # Per issue GH15520 @pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list]) def test_invalid_dtype_error(self, box): - with pytest.raises(TypeError, match="not understood"): + msg = "|".join( + ( + "Must pass dtype instance, not dtype class", + "not understood", + ) + ) + with pytest.raises(TypeError, match=msg): com.pandas_dtype(box) + @pytest.mark.parametrize("cls", ALL_EA_DTYPES) + def test_raises_for_dtype_class(self, cls: type[ExtensionDtype]): + msg = "Must pass dtype instance, not dtype class" + with pytest.raises(TypeError, match=msg): + com.pandas_dtype(cls) + @pytest.mark.parametrize( "dtype", [ object, - "float64", np.object_, np.dtype("object"), "O", - np.float64, float, + np.float64, np.dtype("float64"), + "float64", ], ) def test_pandas_dtype_valid(self, dtype): @@ -106,48 +123,6 @@ def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) == PeriodDtype(dtype) assert com.pandas_dtype(dtype) == dtype - @pytest.mark.parametrize( - "cls", - ( - pd.BooleanDtype, - pd.Int8Dtype, - pd.Int16Dtype, - pd.Int32Dtype, - pd.Int64Dtype, - pd.UInt8Dtype, - pd.UInt16Dtype, - pd.UInt32Dtype, - pd.UInt64Dtype, - pd.Float32Dtype, - pd.Float64Dtype, - pd.SparseDtype, - pd.StringDtype, - IntervalDtype, - CategoricalDtype, - pytest.param( - DatetimeTZDtype, - marks=pytest.mark.xfail(reason="must specify TZ", raises=TypeError), - ), - pytest.param( - PeriodDtype, - marks=pytest.mark.xfail( - reason="must specify frequency", raises=AttributeError - ), - ), - ), - ) - def test_pd_extension_dtype(self, cls): - """ - TODO: desired behavior? - - For extension dtypes that admit no options OR can be initialized with no args - passed, convert the extension dtype class to an instance of that class. - """ - expected = cls() - result = com.pandas_dtype(cls) - - assert result == expected - dtypes = { "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), @@ -625,7 +600,6 @@ def test_is_bool_dtype_returns_false(value): bool, np.bool_, np.dtype(np.bool_), - pd.BooleanDtype, pd.BooleanDtype(), "bool", "boolean", @@ -731,7 +705,6 @@ def test_is_complex_dtype(): (PeriodDtype(freq="D"), PeriodDtype(freq="D")), ("period[D]", PeriodDtype(freq="D")), (IntervalDtype(), IntervalDtype()), - (pd.BooleanDtype, pd.BooleanDtype()), (pd.BooleanDtype(), pd.BooleanDtype()), ], ) From 2f50ea0e1807d7e11c5a839052070211923088cd Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 25 May 2022 14:04:33 -0700 Subject: [PATCH 5/7] add tests for is__dtype functions --- pandas/tests/dtypes/test_common.py | 81 +++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 60ff79a58a46a..dc4afbcd010f3 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import datetime +import re import numpy as np import pytest @@ -27,7 +28,27 @@ from pandas.api.types import pandas_dtype from pandas.arrays import SparseArray -ALL_EA_DTYPES = _registry.dtypes + +@pytest.fixture(name="ea_dtype", params=_registry.dtypes, scope="module") +def fixture_ea_dtype(request) -> type[ExtensionDtype]: + """ + All registered ExtensionDtype subclasses. + """ + return request.param + + +@pytest.fixture( + name="is_dtype_func", + params=(f for f in dir(com) if re.fullmatch(r"^is_\w+_dtype$", f)), + scope="module", +) +def fixture_is_dtype_func(request): + """ + All functions of the form 'is_*_dtype' in pandas.core.dtypes.common, e.g. + 'is_interval_dtype'. + """ + fname = request.param + return getattr(com, fname) # EA & Actual Dtypes @@ -55,11 +76,13 @@ def test_invalid_dtype_error(self, box): with pytest.raises(TypeError, match=msg): com.pandas_dtype(box) - @pytest.mark.parametrize("cls", ALL_EA_DTYPES) - def test_raises_for_dtype_class(self, cls: type[ExtensionDtype]): + def test_raises_if_passed_dtype_class(self, ea_dtype: type[ExtensionDtype]): + """ + GH 47108 + """ msg = "Must pass dtype instance, not dtype class" with pytest.raises(TypeError, match=msg): - com.pandas_dtype(cls) + com.pandas_dtype(ea_dtype) @pytest.mark.parametrize( "dtype", @@ -124,6 +147,36 @@ def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) == dtype +def test_is_dtype_func_raises_if_passed_dtype_class( + is_dtype_func, + ea_dtype: type[ExtensionDtype], +): + """ + GH 47108 + + These should raise, like com.pandas_dtype, if passed an ExtensionDtype subclass. + """ + msg = "Must pass dtype instance, not dtype class" + with pytest.raises(TypeError, match=msg): + is_dtype_func(ea_dtype) + + +def test_is_dtype_func_returns_false_if_passed_none(is_dtype_func, request): + """ + GH 15941 + + is_*_dtype functions all return False if passed None (and don't raise). + """ + if is_dtype_func is com.is_string_or_object_np_dtype: + xfail = pytest.mark.xfail( + reason="fastpath requires np.dtype obj", + raises=AttributeError, + ) + request.node.add_marker(xfail) + + assert is_dtype_func(None) is False + + dtypes = { "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), "datetime": com.pandas_dtype("datetime64[ns]"), @@ -172,26 +225,6 @@ def test_dtype_equal_strict(dtype1, dtype2): assert not com.is_dtype_equal(dtype1, dtype2) -def get_is_dtype_funcs(): - """ - Get all functions in pandas.core.dtypes.common that - begin with 'is_' and end with 'dtype' - - """ - fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] - fnames.remove("is_string_or_object_np_dtype") # fastpath requires np.dtype obj - return [getattr(com, fname) for fname in fnames] - - -@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__) -def test_get_dtype_error_catch(func): - # see gh-15941 - # - # No exception should be raised. - - assert not func(None) - - def test_is_object(): assert com.is_object_dtype(object) assert com.is_object_dtype(np.array([], dtype=object)) From 7d331f47f6808badc250563f61c303085994f74a Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 25 May 2022 16:36:01 -0700 Subject: [PATCH 6/7] group like functions but keep current behavior This just rearranges lines, for easier visual grepping. --- pandas/core/dtypes/common.py | 1413 +++++++++++++++++----------------- 1 file changed, 709 insertions(+), 704 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 233be10370345..f93ca4e0f903d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -68,6 +68,12 @@ _is_scipy_sparse = None ensure_float64 = algos.ensure_float64 +ensure_int64 = algos.ensure_int64 +ensure_int32 = algos.ensure_int32 +ensure_int16 = algos.ensure_int16 +ensure_int8 = algos.ensure_int8 +ensure_platform_int = algos.ensure_platform_int +ensure_object = algos.ensure_object def ensure_float(arr): @@ -94,14 +100,6 @@ def ensure_float(arr): return arr -ensure_int64 = algos.ensure_int64 -ensure_int32 = algos.ensure_int32 -ensure_int16 = algos.ensure_int16 -ensure_int8 = algos.ensure_int8 -ensure_platform_int = algos.ensure_platform_int -ensure_object = algos.ensure_object - - def ensure_str(value: bytes | Any) -> str: """ Ensure that bytes and non-strings get converted into ``str`` objects. @@ -159,36 +157,7 @@ def classes_and_not_datetimelike(*klasses) -> Callable: ) -def is_object_dtype(arr_or_dtype) -> bool: - """ - Check whether an array-like or dtype is of the object dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - boolean - Whether or not the array-like or dtype is of the object dtype. - - Examples - -------- - >>> is_object_dtype(object) - True - >>> is_object_dtype(int) - False - >>> is_object_dtype(np.array([], dtype=object)) - True - >>> is_object_dtype(np.array([], dtype=int)) - False - >>> is_object_dtype([1, 2, 3]) - False - """ - return _is_dtype_type(arr_or_dtype, classes(np.object_)) - - +# type checking helpers that accept any array-like arg def is_sparse(arr) -> bool: """ Check whether an array-like is a 1-D pandas sparse array. @@ -313,352 +282,214 @@ def is_categorical(arr) -> bool: return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) -def is_datetime64_dtype(arr_or_dtype) -> bool: +def is_extension_type(arr) -> bool: """ - Check whether an array-like or dtype is of the datetime64 dtype. + Check whether an array-like is of a pandas extension class instance. + + .. deprecated:: 1.0.0 + Use ``is_extension_array_dtype`` instead. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. Parameters ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. + arr : array-like, scalar + The array-like to check. Returns ------- boolean - Whether or not the array-like or dtype is of the datetime64 dtype. + Whether or not the array-like is of a pandas extension class instance. Examples -------- - >>> is_datetime64_dtype(object) + >>> is_extension_type([1, 2, 3]) False - >>> is_datetime64_dtype(np.datetime64) - True - >>> is_datetime64_dtype(np.array([], dtype=int)) + >>> is_extension_type(np.array([1, 2, 3])) False - >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) True - >>> is_datetime64_dtype([1, 2, 3]) + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + True + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True """ - if isinstance(arr_or_dtype, np.dtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "M" - return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) + warnings.warn( + "'is_extension_type' is deprecated and will be removed in a future " + "version. Use 'is_extension_array_dtype' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if is_categorical_dtype(arr): + return True + elif is_sparse(arr): + return True + elif is_datetime64tz_dtype(arr): + return True + return False -def is_datetime64tz_dtype(arr_or_dtype) -> bool: +def is_1d_only_ea_obj(obj: Any) -> bool: """ - Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + ExtensionArray that does not support 2D, or more specifically that does + not use HybridBlock. + """ + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, + ) - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. + return isinstance(obj, ExtensionArray) and not isinstance( + obj, (DatetimeArray, TimedeltaArray, PeriodArray) + ) - Returns - ------- - boolean - Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. - Examples - -------- - >>> is_datetime64tz_dtype(object) - False - >>> is_datetime64tz_dtype([1, 2, 3]) - False - >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive - False - >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) - True +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. - >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") - >>> s = pd.Series([], dtype=dtype) - >>> is_datetime64tz_dtype(dtype) - True - >>> is_datetime64tz_dtype(s) - True + Includes object-dtype ndarray containing all-strings, StringArray, + and Categorical with all-string categories. + Does not include numpy string dtypes. """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "M" + dtype = value.dtype - if arr_or_dtype is None: - return False - return DatetimeTZDtype.is_dtype(arr_or_dtype) + if isinstance(dtype, np.dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): + return dtype.categories.inferred_type == "string" + return dtype == "string" -def is_timedelta64_dtype(arr_or_dtype) -> bool: +# type checking helpers that accept any array-like or dtype arg +def is_numeric_dtype(arr_or_dtype) -> bool: """ - Check whether an array-like or dtype is of the timedelta64 dtype. + Check whether the provided array or dtype is of a numeric dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array-like or dtype to check. + The array or dtype to check. Returns ------- boolean - Whether or not the array-like or dtype is of the timedelta64 dtype. + Whether or not the array or dtype is of a numeric dtype. Examples -------- - >>> is_timedelta64_dtype(object) + >>> is_numeric_dtype(str) False - >>> is_timedelta64_dtype(np.timedelta64) + >>> is_numeric_dtype(int) True - >>> is_timedelta64_dtype([1, 2, 3]) + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) False - >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) True - >>> is_timedelta64_dtype('0 days') + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) False """ - if isinstance(arr_or_dtype, np.dtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "m" - - return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) + ) -def is_period_dtype(arr_or_dtype) -> bool: +def is_float_dtype(arr_or_dtype) -> bool: """ - Check whether an array-like or dtype is of the Period dtype. + Check whether the provided array or dtype is of a float dtype. + + This function is internal and should not be exposed in the public API. Parameters ---------- arr_or_dtype : array-like or dtype - The array-like or dtype to check. + The array or dtype to check. Returns ------- boolean - Whether or not the array-like or dtype is of the Period dtype. + Whether or not the array or dtype is of a float dtype. Examples -------- - >>> is_period_dtype(object) + >>> is_float_dtype(str) False - >>> is_period_dtype(PeriodDtype(freq="D")) + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) True - >>> is_period_dtype([1, 2, 3]) + >>> is_float_dtype(np.array(['a', 'b'])) False - >>> is_period_dtype(pd.Period("2017-01-01")) + >>> is_float_dtype(pd.Series([1, 2])) False - >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + >>> is_float_dtype(pd.Index([1, 2.])) True """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.type is Period - - if arr_or_dtype is None: - return False - return PeriodDtype.is_dtype(arr_or_dtype) + return _is_dtype_type(arr_or_dtype, classes(np.floating)) -def is_interval_dtype(arr_or_dtype) -> bool: +def is_any_int_dtype(arr_or_dtype) -> bool: """ - Check whether an array-like or dtype is of the Interval dtype. + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + This function is internal and should not be exposed in the public API. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. Parameters ---------- arr_or_dtype : array-like or dtype - The array-like or dtype to check. + The array or dtype to check. Returns ------- boolean - Whether or not the array-like or dtype is of the Interval dtype. + Whether or not the array or dtype is of an integer dtype. Examples -------- - >>> is_interval_dtype(object) + >>> is_any_int_dtype(str) False - >>> is_interval_dtype(IntervalDtype()) + >>> is_any_int_dtype(int) True - >>> is_interval_dtype([1, 2, 3]) - False - >>> - >>> interval = pd.Interval(1, 2, closed="right") - >>> is_interval_dtype(interval) - False - >>> is_interval_dtype(pd.IntervalIndex([interval])) - True - """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.type is Interval - - if arr_or_dtype is None: - return False - return IntervalDtype.is_dtype(arr_or_dtype) - - -def is_categorical_dtype(arr_or_dtype) -> bool: - """ - Check whether an array-like or dtype is of the Categorical dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - boolean - Whether or not the array-like or dtype is of the Categorical dtype. - - Examples - -------- - >>> is_categorical_dtype(object) - False - >>> is_categorical_dtype(CategoricalDtype()) - True - >>> is_categorical_dtype([1, 2, 3]) - False - >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) - True - >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) - True - """ - if isinstance(arr_or_dtype, ExtensionDtype): - # GH#33400 fastpath for dtype object - return arr_or_dtype.name == "category" - - if arr_or_dtype is None: - return False - return CategoricalDtype.is_dtype(arr_or_dtype) - - -def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: - """ - Faster alternative to is_string_dtype, assumes we have a np.dtype object. - """ - return dtype == object or dtype.kind in "SU" - - -def is_string_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of the string dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of the string dtype. - - Examples - -------- - >>> is_string_dtype(str) - True - >>> is_string_dtype(object) - True - >>> is_string_dtype(int) - False - >>> - >>> is_string_dtype(np.array(['a', 'b'])) - True - >>> is_string_dtype(pd.Series([1, 2])) - False - """ - # TODO: gh-15585: consider making the checks stricter. - def condition(dtype) -> bool: - return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) - - def is_excluded_dtype(dtype) -> bool: - """ - These have kind = "O" but aren't string dtypes so need to be explicitly excluded - """ - return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) - - return _is_dtype(arr_or_dtype, condition) - - -def is_dtype_equal(source, target) -> bool: - """ - Check if two dtypes are equal. - - Parameters - ---------- - source : The first dtype to compare - target : The second dtype to compare - - Returns - ------- - boolean - Whether or not the two dtypes are equal. - - Examples - -------- - >>> is_dtype_equal(int, float) - False - >>> is_dtype_equal("int", int) - True - >>> is_dtype_equal(object, "category") - False - >>> is_dtype_equal(CategoricalDtype(), "category") - True - >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") - False - """ - if isinstance(target, str): - if not isinstance(source, str): - # GH#38516 ensure we get the same behavior from - # is_dtype_equal(CDT, "category") and CDT == "category" - try: - src = get_dtype(source) - if isinstance(src, ExtensionDtype): - return src == target - except (TypeError, AttributeError, ImportError): - return False - elif isinstance(source, str): - return is_dtype_equal(target, source) - - try: - source = get_dtype(source) - target = get_dtype(target) - return source == target - except (TypeError, AttributeError, ImportError): - - # invalid comparison - # object == category will hit this - return False - - -def is_any_int_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of an integer dtype. - - In this function, timedelta64 instances are also considered "any-integer" - type objects and will return True. - - This function is internal and should not be exposed in the public API. - - The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered - as integer by this function. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of an integer dtype. - - Examples - -------- - >>> is_any_int_dtype(str) - False - >>> is_any_int_dtype(int) - True - >>> is_any_int_dtype(float) + >>> is_any_int_dtype(float) False >>> is_any_int_dtype(np.uint64) True @@ -881,9 +712,9 @@ def is_int64_dtype(arr_or_dtype) -> bool: return _is_dtype_type(arr_or_dtype, classes(np.int64)) -def is_datetime64_any_dtype(arr_or_dtype) -> bool: +def is_complex_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of the datetime64 dtype. + Check whether the provided array or dtype is of a complex dtype. Parameters ---------- @@ -892,40 +723,33 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: Returns ------- - bool - Whether or not the array or dtype is of the datetime64 dtype. + boolean + Whether or not the array or dtype is of a complex dtype. Examples -------- - >>> is_datetime64_any_dtype(str) + >>> is_complex_dtype(str) False - >>> is_datetime64_any_dtype(int) + >>> is_complex_dtype(int) False - >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive - True - >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + >>> is_complex_dtype(np.complex_) True - >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + >>> is_complex_dtype(np.array(['a', 'b'])) False - >>> is_datetime64_any_dtype(np.array([1, 2])) + >>> is_complex_dtype(pd.Series([1, 2])) False - >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) - True - >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + >>> is_complex_dtype(np.array([1 + 1j, 5])) True """ - if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): - # GH#33400 fastpath for dtype object - return arr_or_dtype.kind == "M" - - if arr_or_dtype is None: - return False - return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) + return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) -def is_datetime64_ns_dtype(arr_or_dtype) -> bool: +def needs_i8_conversion(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of the datetime64[ns] dtype. + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype Parameters ---------- @@ -934,77 +758,78 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: Returns ------- - bool - Whether or not the array or dtype is of the datetime64[ns] dtype. + boolean + Whether or not the array or dtype should be converted to int64. Examples -------- - >>> is_datetime64_ns_dtype(str) - False - >>> is_datetime64_ns_dtype(int) + >>> needs_i8_conversion(str) False - >>> is_datetime64_ns_dtype(np.datetime64) # no unit + >>> needs_i8_conversion(np.int64) False - >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + >>> needs_i8_conversion(np.datetime64) True - >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) - False - >>> is_datetime64_ns_dtype(np.array([1, 2])) - False - >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit + >>> needs_i8_conversion(np.array(['a', 'b'])) False - >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit + >>> needs_i8_conversion(pd.Series([1, 2])) False - >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True """ if arr_or_dtype is None: return False - try: - tipo = get_dtype(arr_or_dtype) - except TypeError: - if is_datetime64tz_dtype(arr_or_dtype): - tipo = get_dtype(arr_or_dtype.dtype) - else: - return False - return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE - + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # fastpath + dtype = arr_or_dtype + return dtype.kind in ["m", "M"] or dtype.type is Period -def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of the timedelta64[ns] dtype. + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + if isinstance(dtype, np.dtype): + return dtype.kind in ["m", "M"] + return isinstance(dtype, (PeriodDtype, DatetimeTZDtype)) - This is a very specific dtype, so generic ones like `np.timedelta64` - will return False if passed into this function. + +def is_datetime64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the datetime64 dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array or dtype to check. + The array-like or dtype to check. Returns ------- boolean - Whether or not the array or dtype is of the timedelta64[ns] dtype. + Whether or not the array-like or dtype is of the datetime64 dtype. Examples -------- - >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) True - >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + >>> is_datetime64_dtype(np.array([], dtype=int)) False - >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) True - >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + >>> is_datetime64_dtype([1, 2, 3]) False """ - return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) -def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: +def is_datetime64_any_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of - a timedelta64 or datetime64 dtype. + Check whether the provided array or dtype is of the datetime64 dtype. Parameters ---------- @@ -1013,145 +838,194 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: Returns ------- - boolean - Whether or not the array or dtype is of a timedelta64, - or datetime64 dtype. + bool + Whether or not the array or dtype is of the datetime64 dtype. Examples -------- - >>> is_datetime_or_timedelta_dtype(str) + >>> is_datetime64_any_dtype(str) False - >>> is_datetime_or_timedelta_dtype(int) + >>> is_datetime64_any_dtype(int) False - >>> is_datetime_or_timedelta_dtype(np.datetime64) + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive True - >>> is_datetime_or_timedelta_dtype(np.timedelta64) + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) True - >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) False - >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + >>> is_datetime64_any_dtype(np.array([1, 2])) False - >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) True - >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) True """ - return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) -# This exists to silence numpy deprecation warnings, see GH#29553 -def is_numeric_v_string_like(a: ArrayLike, b): +def is_datetime64_ns_dtype(arr_or_dtype) -> bool: """ - Check if we are comparing a string-like object to a numeric ndarray. - NumPy doesn't like to compare such objects, especially numeric arrays - and scalar string-likes. + Check whether the provided array or dtype is of the datetime64[ns] dtype. Parameters ---------- - a : array-like, scalar - The first object to check. - b : array-like, scalar - The second object to check. + arr_or_dtype : array-like or dtype + The array or dtype to check. Returns ------- - boolean - Whether we return a comparing a string-like object to a numeric array. + bool + Whether or not the array or dtype is of the datetime64[ns] dtype. Examples -------- - >>> is_numeric_v_string_like(np.array([1]), "foo") - True - >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) - True - >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) True - >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) False - >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + True """ - is_a_array = isinstance(a, np.ndarray) - is_b_array = isinstance(b, np.ndarray) - - is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") - is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") - is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") - is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") - - is_b_scalar_string_like = not is_b_array and isinstance(b, str) - - return ( - (is_a_numeric_array and is_b_scalar_string_like) - or (is_a_numeric_array and is_b_string_array) - or (is_b_numeric_array and is_a_string_array) - ) + if arr_or_dtype is None: + return False + try: + tipo = get_dtype(arr_or_dtype) + except TypeError: + if is_datetime64tz_dtype(arr_or_dtype): + tipo = get_dtype(arr_or_dtype.dtype) + else: + return False + return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE -# This exists to silence numpy deprecation warnings, see GH#29553 -def is_datetimelike_v_numeric(a, b): +def is_datetime64tz_dtype(arr_or_dtype) -> bool: """ - Check if we are comparing a datetime-like object to a numeric object. - By "numeric," we mean an object that is either of an int or float dtype. + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. Parameters ---------- - a : array-like, scalar - The first object to check. - b : array-like, scalar - The second object to check. + arr_or_dtype : array-like or dtype + The array-like or dtype to check. Returns ------- boolean - Whether we return a comparing a datetime-like to a numeric object. + Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. Examples -------- - >>> from datetime import datetime - >>> dt = np.datetime64(datetime(2017, 1, 1)) - >>> - >>> is_datetimelike_v_numeric(1, 1) + >>> is_datetime64tz_dtype(object) False - >>> is_datetimelike_v_numeric(dt, dt) + >>> is_datetime64tz_dtype([1, 2, 3]) False - >>> is_datetimelike_v_numeric(1, dt) - True - >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True - >>> is_datetimelike_v_numeric(np.array([dt]), 1) + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) True - >>> is_datetimelike_v_numeric(np.array([1]), dt) + >>> is_datetime64tz_dtype(s) True - >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) True - >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + >>> is_timedelta64_dtype([1, 2, 3]) False - >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + >>> is_timedelta64_dtype('0 days') False """ - if not hasattr(a, "dtype"): - a = np.asarray(a) - if not hasattr(b, "dtype"): - b = np.asarray(b) + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "m" - def is_numeric(x): - """ - Check if an object has a numeric dtype (i.e. integer or float). - """ - return is_integer_dtype(x) or is_float_dtype(x) + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) - return (needs_i8_conversion(a) and is_numeric(b)) or ( - needs_i8_conversion(b) and is_numeric(a) - ) +def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. + + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. -def needs_i8_conversion(arr_or_dtype) -> bool: + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + False + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False """ - Check whether the array or dtype should be converted to int64. + return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) - An array-like or dtype "needs" such a conversion if the array-like - or dtype is of a datetime-like dtype + +def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. Parameters ---------- @@ -1161,115 +1035,103 @@ def needs_i8_conversion(arr_or_dtype) -> bool: Returns ------- boolean - Whether or not the array or dtype should be converted to int64. + Whether or not the array or dtype is of a timedelta64, + or datetime64 dtype. Examples -------- - >>> needs_i8_conversion(str) + >>> is_datetime_or_timedelta_dtype(str) False - >>> needs_i8_conversion(np.int64) + >>> is_datetime_or_timedelta_dtype(int) False - >>> needs_i8_conversion(np.datetime64) + >>> is_datetime_or_timedelta_dtype(np.datetime64) True - >>> needs_i8_conversion(np.array(['a', 'b'])) + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) False - >>> needs_i8_conversion(pd.Series([1, 2])) + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) False - >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) True - >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) True """ - if arr_or_dtype is None: - return False - if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): - # fastpath - dtype = arr_or_dtype - return dtype.kind in ["m", "M"] or dtype.type is Period - - try: - dtype = get_dtype(arr_or_dtype) - except (TypeError, ValueError): - return False - if isinstance(dtype, np.dtype): - return dtype.kind in ["m", "M"] - return isinstance(dtype, (PeriodDtype, DatetimeTZDtype)) + return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) -def is_numeric_dtype(arr_or_dtype) -> bool: +def is_period_dtype(arr_or_dtype) -> bool: """ - Check whether the provided array or dtype is of a numeric dtype. + Check whether an array-like or dtype is of the Period dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array or dtype to check. + The array-like or dtype to check. Returns ------- boolean - Whether or not the array or dtype is of a numeric dtype. + Whether or not the array-like or dtype is of the Period dtype. Examples -------- - >>> is_numeric_dtype(str) + >>> is_period_dtype(object) False - >>> is_numeric_dtype(int) - True - >>> is_numeric_dtype(float) - True - >>> is_numeric_dtype(np.uint64) + >>> is_period_dtype(PeriodDtype(freq="D")) True - >>> is_numeric_dtype(np.datetime64) - False - >>> is_numeric_dtype(np.timedelta64) + >>> is_period_dtype([1, 2, 3]) False - >>> is_numeric_dtype(np.array(['a', 'b'])) + >>> is_period_dtype(pd.Period("2017-01-01")) False - >>> is_numeric_dtype(pd.Series([1, 2])) - True - >>> is_numeric_dtype(pd.Index([1, 2.])) + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True - >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) - False """ - return _is_dtype_type( - arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) - ) + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Period + if arr_or_dtype is None: + return False + return PeriodDtype.is_dtype(arr_or_dtype) -def is_float_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of a float dtype. - This function is internal and should not be exposed in the public API. +def is_interval_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Interval dtype. Parameters ---------- arr_or_dtype : array-like or dtype - The array or dtype to check. + The array-like or dtype to check. Returns ------- boolean - Whether or not the array or dtype is of a float dtype. + Whether or not the array-like or dtype is of the Interval dtype. Examples -------- - >>> is_float_dtype(str) - False - >>> is_float_dtype(int) + >>> is_interval_dtype(object) False - >>> is_float_dtype(float) + >>> is_interval_dtype(IntervalDtype()) True - >>> is_float_dtype(np.array(['a', 'b'])) + >>> is_interval_dtype([1, 2, 3]) False - >>> is_float_dtype(pd.Series([1, 2])) + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) False - >>> is_float_dtype(pd.Index([1, 2.])) + >>> is_interval_dtype(pd.IntervalIndex([interval])) True """ - return _is_dtype_type(arr_or_dtype, classes(np.floating)) + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Interval + + if arr_or_dtype is None: + return False + return IntervalDtype.is_dtype(arr_or_dtype) def is_bool_dtype(arr_or_dtype) -> bool: @@ -1332,99 +1194,111 @@ def is_bool_dtype(arr_or_dtype) -> bool: return issubclass(dtype.type, np.bool_) -def is_extension_type(arr) -> bool: +def is_object_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False """ - Check whether an array-like is of a pandas extension class instance. + return _is_dtype_type(arr_or_dtype, classes(np.object_)) - .. deprecated:: 1.0.0 - Use ``is_extension_array_dtype`` instead. - Extension classes include categoricals, pandas sparse objects (i.e. - classes represented within the pandas library and not ones external - to it like scipy sparse matrices), and datetime-like arrays. +def is_categorical_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Categorical dtype. Parameters ---------- - arr : array-like, scalar - The array-like to check. + arr_or_dtype : array-like or dtype + The array-like or dtype to check. Returns ------- boolean - Whether or not the array-like is of a pandas extension class instance. + Whether or not the array-like or dtype is of the Categorical dtype. Examples -------- - >>> is_extension_type([1, 2, 3]) - False - >>> is_extension_type(np.array([1, 2, 3])) + >>> is_categorical_dtype(object) False - >>> - >>> cat = pd.Categorical([1, 2, 3]) - >>> - >>> is_extension_type(cat) - True - >>> is_extension_type(pd.Series(cat)) - True - >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + >>> is_categorical_dtype(CategoricalDtype()) True - >>> from scipy.sparse import bsr_matrix - >>> is_extension_type(bsr_matrix([1, 2, 3])) - False - >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + >>> is_categorical_dtype([1, 2, 3]) False - >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) True - >>> - >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") - >>> s = pd.Series([], dtype=dtype) - >>> is_extension_type(s) + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) True """ - warnings.warn( - "'is_extension_type' is deprecated and will be removed in a future " - "version. Use 'is_extension_array_dtype' instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.name == "category" - if is_categorical_dtype(arr): - return True - elif is_sparse(arr): - return True - elif is_datetime64tz_dtype(arr): - return True - return False + if arr_or_dtype is None: + return False + return CategoricalDtype.is_dtype(arr_or_dtype) -def is_1d_only_ea_obj(obj: Any) -> bool: - """ - ExtensionArray that does not support 2D, or more specifically that does - not use HybridBlock. +def is_string_dtype(arr_or_dtype) -> bool: """ - from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - PeriodArray, - TimedeltaArray, - ) + Check whether the provided array or dtype is of the string dtype. - return isinstance(obj, ExtensionArray) and not isinstance( - obj, (DatetimeArray, TimedeltaArray, PeriodArray) - ) + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + Returns + ------- + boolean + Whether or not the array or dtype is of the string dtype. -def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: - """ - Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(pd.Series([1, 2])) + False """ - # Note: if other EA dtypes are ever held in HybridBlock, exclude those - # here too. - # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype - # to exclude ArrowTimestampUSDtype - return isinstance(dtype, ExtensionDtype) and not isinstance( - dtype, (DatetimeTZDtype, PeriodDtype) - ) + # TODO: gh-15585: consider making the checks stricter. + def condition(dtype) -> bool: + return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) + + def is_excluded_dtype(dtype) -> bool: + """ + These have kind = "O" but aren't string dtypes so need to be explicitly excluded + """ + return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) + + return _is_dtype(arr_or_dtype, condition) def is_extension_array_dtype(arr_or_dtype) -> bool: @@ -1481,51 +1355,7 @@ def is_extension_array_dtype(arr_or_dtype) -> bool: return registry.find(dtype) is not None -def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool: - """ - Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype. - - Notes - ----- - Checks only for dtype objects, not dtype-castable strings or types. - """ - return isinstance(dtype, ExtensionDtype) or ( - isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"] - ) - - -def is_complex_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of a complex dtype. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of a complex dtype. - - Examples - -------- - >>> is_complex_dtype(str) - False - >>> is_complex_dtype(int) - False - >>> is_complex_dtype(np.complex_) - True - >>> is_complex_dtype(np.array(['a', 'b'])) - False - >>> is_complex_dtype(pd.Series([1, 2])) - False - >>> is_complex_dtype(np.array([1 + 1j, 5])) - True - """ - return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) - - +# type checking helpers that accept an array-like/dtype-like arg and a callable def _is_dtype(arr_or_dtype, condition) -> bool: """ Return true if the condition is satisfied for the arr_or_dtype. @@ -1550,40 +1380,6 @@ def _is_dtype(arr_or_dtype, condition) -> bool: return condition(dtype) -def get_dtype(arr_or_dtype) -> DtypeObj: - """ - Get the dtype instance associated with an array - or dtype object. - - Parameters - ---------- - arr_or_dtype : array-like or dtype - The array-like or dtype object whose dtype we want to extract. - - Returns - ------- - obj_dtype : The extract dtype instance from the - passed in array or dtype object. - - Raises - ------ - TypeError : The passed in object is None. - """ - if arr_or_dtype is None: - raise TypeError("Cannot deduce dtype from null object") - - # fastpath - elif isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype - elif inspect.isclass(arr_or_dtype) and issubclass(arr_or_dtype, np.generic): - return np.dtype(arr_or_dtype) - # if we have an array-like - elif hasattr(arr_or_dtype, "dtype"): - arr_or_dtype = arr_or_dtype.dtype - - return pandas_dtype(arr_or_dtype) - - def _is_dtype_type(arr_or_dtype, condition) -> bool: """ Return true if the condition is satisfied for the arr_or_dtype. @@ -1628,6 +1424,67 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool: return condition(tipo) +# type checking helpers that accept any dtype-like arg +def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: + """ + Faster alternative to is_string_dtype, assumes we have a np.dtype object. + """ + return dtype == object or dtype.kind in "SU" + + +def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: + """ + Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. + """ + # Note: if other EA dtypes are ever held in HybridBlock, exclude those + # here too. + # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype + # to exclude ArrowTimestampUSDtype + return isinstance(dtype, ExtensionDtype) and not isinstance( + dtype, (DatetimeTZDtype, PeriodDtype) + ) + + +def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool: + """ + Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype. + + Notes + ----- + Checks only for dtype objects, not dtype-castable strings or types. + """ + return isinstance(dtype, ExtensionDtype) or ( + isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"] + ) + + +def _validate_date_like_dtype(dtype) -> None: + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + frequency provided is too specific) + """ + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError(e) from e + if typ not in ["generic", "ns"]: + raise ValueError( + f"{repr(dtype.name)} is too specific of a frequency, " + f"try passing {repr(dtype.type.__name__)}" + ) + + +# type inference/extraction functions def infer_dtype_from_object(dtype) -> type: """ Get a numpy dtype.type-style object for a dtype object. @@ -1692,56 +1549,38 @@ def infer_dtype_from_object(dtype) -> type: return infer_dtype_from_object(np.dtype(dtype)) -def _validate_date_like_dtype(dtype) -> None: +def get_dtype(arr_or_dtype) -> DtypeObj: """ - Check whether the dtype is a date-like dtype. Raises an error if invalid. + Get the dtype instance associated with an array + or dtype object. Parameters ---------- - dtype : dtype, type - The dtype to check. + arr_or_dtype : array-like or dtype + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. Raises ------ - TypeError : The dtype could not be casted to a date-like dtype. - ValueError : The dtype is an illegal date-like dtype (e.g. the - frequency provided is too specific) - """ - try: - typ = np.datetime_data(dtype)[0] - except ValueError as e: - raise TypeError(e) from e - if typ not in ["generic", "ns"]: - raise ValueError( - f"{repr(dtype.name)} is too specific of a frequency, " - f"try passing {repr(dtype.type.__name__)}" - ) - - -def validate_all_hashable(*args, error_name: str | None = None) -> None: + TypeError : The passed in object is None. """ - Return None if all args are hashable, else raise a TypeError. - - Parameters - ---------- - *args - Arguments to validate. - error_name : str, optional - The name to use if error + if arr_or_dtype is None: + raise TypeError("Cannot deduce dtype from null object") - Raises - ------ - TypeError : If an argument is not hashable + # fastpath + elif isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + # if we have an array-like + elif hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype - Returns - ------- - None - """ - if not all(is_hashable(arg) for arg in args): - if error_name: - raise TypeError(f"{error_name} must be a hashable type") - else: - raise TypeError("All elements must be hashable") + return pandas_dtype(arr_or_dtype) def pandas_dtype(dtype) -> DtypeObj: @@ -1798,21 +1637,187 @@ def pandas_dtype(dtype) -> DtypeObj: return npdtype -def is_all_strings(value: ArrayLike) -> bool: +# type comparison functions +def is_dtype_equal(source, target) -> bool: """ - Check if this is an array of strings that we should try parsing. + Check if two dtypes are equal. - Includes object-dtype ndarray containing all-strings, StringArray, - and Categorical with all-string categories. - Does not include numpy string dtypes. + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ------- + boolean + Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") + False """ - dtype = value.dtype + if isinstance(target, str): + if not isinstance(source, str): + # GH#38516 ensure we get the same behavior from + # is_dtype_equal(CDT, "category") and CDT == "category" + try: + src = get_dtype(source) + if isinstance(src, ExtensionDtype): + return src == target + except (TypeError, AttributeError, ImportError): + return False + elif isinstance(source, str): + return is_dtype_equal(target, source) - if isinstance(dtype, np.dtype): - return ( - dtype == np.dtype("object") - and lib.infer_dtype(value, skipna=False) == "string" - ) - elif isinstance(dtype, CategoricalDtype): - return dtype.categories.inferred_type == "string" - return dtype == "string" + try: + source = get_dtype(source) + target = get_dtype(target) + return source == target + except (TypeError, AttributeError, ImportError): + + # invalid comparison + # object == category will hit this + return False + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a: ArrayLike, b): + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") + is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") + is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") + is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") + + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b): + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> from datetime import datetime + >>> dt = np.datetime64(datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + +# misc. +def validate_all_hashable(*args, error_name: str | None = None) -> None: + """ + Return None if all args are hashable, else raise a TypeError. + + Parameters + ---------- + *args + Arguments to validate. + error_name : str, optional + The name to use if error + + Raises + ------ + TypeError : If an argument is not hashable + + Returns + ------- + None + """ + if not all(is_hashable(arg) for arg in args): + if error_name: + raise TypeError(f"{error_name} must be a hashable type") + else: + raise TypeError("All elements must be hashable") From 7994adb3af41d87b4906f3add5a839f2d17b3d05 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 25 May 2022 17:53:37 -0700 Subject: [PATCH 7/7] match current behavior for now --- pandas/tests/dtypes/test_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index dc4afbcd010f3..da9fbeeec94a0 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -147,6 +147,7 @@ def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) == dtype +@pytest.mark.xfail(reason="not yet implemented") def test_is_dtype_func_raises_if_passed_dtype_class( is_dtype_func, ea_dtype: type[ExtensionDtype],