From d9acf462781fada3c3ada1f20924a174390ec716 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 2 May 2017 13:07:27 -0400 Subject: [PATCH 1/2] DOC: Document pandas.core.dtypes.common Closes gh-15895. --- pandas/core/dtypes/common.py | 1209 ++++++++++++++++++++++++++++++++-- 1 file changed, 1156 insertions(+), 53 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index ba822071a3b72..6c2bbe330eeee 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -37,7 +37,7 @@ def _ensure_float(arr): Parameters ---------- - arr : ndarray, Series + arr : array-like The array whose data type we want to enforce as float. Returns @@ -82,46 +82,243 @@ def _ensure_categorical(arr): def is_object_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.object_) -def is_sparse(array): - """ return if we are a sparse array """ - return isinstance(array, (ABCSparseArray, ABCSparseSeries)) +def is_sparse(arr): + """ + Check whether an array-like is a pandas sparse array. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a pandas sparse array. + + Examples + -------- + >>> is_sparse(np.array([1, 2, 3])) + False + >>> is_sparse(pd.SparseArray([1, 2, 3])) + True + >>> is_sparse(pd.SparseSeries([1, 2, 3])) + True + + This function checks only for pandas sparse array instances, so + sparse arrays from other libraries will return False. + + >>> from scipy.sparse import bsr_matrix + >>> is_sparse(bsr_matrix([1, 2, 3])) + False + """ + + return isinstance(arr, (ABCSparseArray, ABCSparseSeries)) + + +def is_scipy_sparse(arr): + """ + Check whether an array-like is a scipy.sparse.spmatrix instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a + scipy.sparse.spmatrix instance. + + Notes + ----- + If scipy is not installed, this function will always return False. + Examples + -------- + >>> from scipy.sparse import bsr_matrix + >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) + True + >>> is_scipy_sparse(pd.SparseArray([1, 2, 3])) + False + >>> is_scipy_sparse(pd.SparseSeries([1, 2, 3])) + False + """ -def is_scipy_sparse(array): - """ return if we are a scipy.sparse.spmatrix """ global _is_scipy_sparse + if _is_scipy_sparse is None: try: from scipy.sparse import issparse as _is_scipy_sparse except ImportError: _is_scipy_sparse = lambda _: False - return _is_scipy_sparse(array) + return _is_scipy_sparse(arr) -def is_categorical(array): - """ return if we are a categorical possibility """ - return isinstance(array, ABCCategorical) or is_categorical_dtype(array) +def is_categorical(arr): + """ + Check whether an array-like is a Categorical instance. -def is_datetimetz(array): - """ return if we are a datetime with tz array """ - return ((isinstance(array, ABCDatetimeIndex) and - getattr(array, 'tz', None) is not None) or - is_datetime64tz_dtype(array)) + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is of a Categorical instance. + + Examples + -------- + >>> is_categorical([1, 2, 3]) + False + + Categoricals and Series Categoricals will return True. + + >>> cat = pd.Categorical([1, 2, 3]) + >>> is_categorical(cat) + True + >>> is_categorical(pd.Series(cat)) + True + """ + + return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) + + +def is_datetimetz(arr): + """ + Check whether an array-like is a datetime array-like with a timezone + component in its dtype. + + Parameters + ---------- + arr : array-like + The array-like to check. + Returns + ------- + boolean : Whether or not the array-like is a datetime array-like with + a timezone component in its dtype. + + Examples + -------- + >>> is_datetimetz([1, 2, 3]) + False + + Although the following examples are both DatetimeIndex objects, + the first one returns False because it has no timezone component + unlike the second one, which returns True. + + >>> is_datetimetz(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + The object need not be a DatetimeIndex object. It just needs to have + a dtype which has a timezone component. + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetimetz(s) + True + """ + + # TODO: do we need this function? + # It seems like a repeat of is_datetime64tz_dtype. + + return ((isinstance(arr, ABCDatetimeIndex) and + getattr(arr, 'tz', None) is not None) or + is_datetime64tz_dtype(arr)) + + +def is_period(arr): + """ + Check whether an array-like is a periodical index. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a periodical index. + + Examples + -------- + >>> is_period([1, 2, 3]) + False + >>> is_period(pd.Index([1, 2, 3])) + False + >>> is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) + True + """ -def is_period(array): - """ return if we are a period array """ - return isinstance(array, ABCPeriodIndex) or is_period_arraylike(array) + # TODO: do we need this function? + # It seems like a repeat of is_period_arraylike. + return isinstance(arr, ABCPeriodIndex) or is_period_arraylike(arr) def is_datetime64_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of + the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) + True + >>> is_datetime64_dtype(np.array([], dtype=int)) + False + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_dtype([1, 2, 3]) + False + """ + if arr_or_dtype is None: return False try: @@ -132,12 +329,69 @@ def is_datetime64_dtype(arr_or_dtype): def is_datetime64tz_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of + a DatetimeTZDtype dtype. + + Examples + -------- + >>> is_datetime64tz_dtype(object) + False + >>> is_datetime64tz_dtype([1, 2, 3]) + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) + True + >>> is_datetime64tz_dtype(s) + True + """ + if arr_or_dtype is None: return False return DatetimeTZDtype.is_dtype(arr_or_dtype) def is_timedelta64_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is + of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) + True + >>> is_timedelta64_dtype([1, 2, 3]) + False + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -145,18 +399,102 @@ def is_timedelta64_dtype(arr_or_dtype): def is_period_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the Period dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is of the Period dtype. + + Examples + -------- + >>> is_period_dtype(object) + False + >>> is_period_dtype(PeriodDtype(freq="D")) + True + >>> is_period_dtype([1, 2, 3]) + False + >>> is_period_dtype(pd.Period("2017-01-01")) + False + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + True + """ + + # TODO: Consider making Period an instance of PeriodDtype if arr_or_dtype is None: return False return PeriodDtype.is_dtype(arr_or_dtype) def is_interval_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the Interval dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is + of the Interval dtype. + + Examples + -------- + >>> is_interval_dtype(object) + False + >>> is_interval_dtype(IntervalDtype()) + True + >>> is_interval_dtype([1, 2, 3]) + False + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) + False + >>> is_interval_dtype(pd.IntervalIndex([interval])) + True + """ + + # TODO: Consider making Interval an instance of IntervalDtype if arr_or_dtype is None: return False return IntervalDtype.is_dtype(arr_or_dtype) def is_categorical_dtype(arr_or_dtype): + """ + Check whether an array-like or dtype is of the Categorical dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean : Whether or not the array-like or dtype is + of the Categorical dtype. + + Examples + -------- + >>> is_categorical_dtype(object) + False + >>> is_categorical_dtype(CategoricalDtype()) + True + >>> is_categorical_dtype([1, 2, 3]) + False + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) + True + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + True + """ + if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) @@ -168,7 +506,7 @@ def is_string_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, dtype, type + arr_or_dtype : array-like The array or dtype to check. Returns @@ -186,7 +524,7 @@ def is_string_dtype(arr_or_dtype): >>> >>> is_string_dtype(np.array(['a', 'b'])) True - >>> is_string_dtype(np.array([1, 2])) + >>> is_string_dtype(pd.Series([1, 2])) False """ @@ -202,7 +540,29 @@ def is_string_dtype(arr_or_dtype): def is_period_arraylike(arr): - """ return if we are period arraylike / PeriodIndex """ + """ + Check whether an array-like is a periodical array-like or PeriodIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a periodical + array-like or PeriodIndex instance. + + Examples + -------- + >>> is_period_arraylike([1, 2, 3]) + False + >>> is_period_arraylike(pd.Index([1, 2, 3])) + False + >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) + True + """ + if isinstance(arr, ABCPeriodIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): @@ -211,7 +571,29 @@ def is_period_arraylike(arr): def is_datetime_arraylike(arr): - """ return if we are datetime arraylike / DatetimeIndex """ + """ + Check whether an array-like is a datetime array-like or DatetimeIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a datetime + array-like or DatetimeIndex. + + Examples + -------- + >>> is_datetime_arraylike([1, 2, 3]) + False + >>> is_datetime_arraylike(pd.Index([1, 2, 3])) + False + >>> is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) + True + """ + if isinstance(arr, ABCDatetimeIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): @@ -220,6 +602,44 @@ def is_datetime_arraylike(arr): def is_datetimelike(arr): + """ + Check whether an array-like is a datetime-like array-like. + + Acceptable datetime-like objects are (but not limited to) datetime + indices, periodic indices, and timedelta indices. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a datetime-like array-like. + + Examples + -------- + >>> is_datetimelike([1, 2, 3]) + False + >>> is_datetimelike(pd.Index([1, 2, 3])) + False + >>> is_datetimelike(pd.DatetimeIndex([1, 2, 3])) + True + >>> is_datetimelike(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> is_datetimelike(pd.PeriodIndex([], freq="A")) + True + >>> is_datetimelike(np.array([], dtype=np.datetime64)) + True + >>> is_datetimelike(pd.Series([], dtype="timedelta64[ns]")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetimelike(s) + True + """ + return (is_datetime64_dtype(arr) or is_datetime64tz_dtype(arr) or is_timedelta64_dtype(arr) or isinstance(arr, ABCPeriodIndex) or @@ -227,7 +647,32 @@ def is_datetimelike(arr): def is_dtype_equal(source, target): - """ return a boolean if the dtypes are equal """ + """ + Check if two dtypes are equal. + + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ---------- + boolean : Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(), "datetime64") + False + """ + try: source = _get_dtype(source) target = _get_dtype(target) @@ -240,6 +685,47 @@ def is_dtype_equal(source, target): def is_any_int_dtype(arr_or_dtype): + """ + DEPRECATED: This function will be removed in a future version. + + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an integer dtype. + + Examples + -------- + >>> is_any_int_dtype(str) + False + >>> is_any_int_dtype(int) + True + >>> is_any_int_dtype(float) + False + >>> is_any_int_dtype(np.uint64) + True + >>> is_any_int_dtype(np.datetime64) + False + >>> is_any_int_dtype(np.timedelta64) + True + >>> is_any_int_dtype(np.array(['a', 'b'])) + False + >>> is_any_int_dtype(pd.Series([1, 2])) + True + >>> is_any_int_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_any_int_dtype(pd.Index([1, 2.])) # float + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -247,6 +733,45 @@ def is_any_int_dtype(arr_or_dtype): def is_integer_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of an integer dtype. + + Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_integer_dtype(str) + False + >>> is_integer_dtype(int) + True + >>> is_integer_dtype(float) + False + >>> is_integer_dtype(np.uint64) + True + >>> is_integer_dtype(np.datetime64) + False + >>> is_integer_dtype(np.timedelta64) + False + >>> is_integer_dtype(np.array(['a', 'b'])) + False + >>> is_integer_dtype(pd.Series([1, 2])) + True + >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_integer_dtype(pd.Index([1, 2.])) # float + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -255,6 +780,47 @@ def is_integer_dtype(arr_or_dtype): def is_signed_integer_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a signed integer dtype. + + Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a signed integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_signed_integer_dtype(str) + False + >>> is_signed_integer_dtype(int) + True + >>> is_signed_integer_dtype(float) + False + >>> is_signed_integer_dtype(np.uint64) # unsigned + False + >>> is_signed_integer_dtype(np.datetime64) + False + >>> is_signed_integer_dtype(np.timedelta64) + False + >>> is_signed_integer_dtype(np.array(['a', 'b'])) + False + >>> is_signed_integer_dtype(pd.Series([1, 2])) + True + >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -263,6 +829,39 @@ def is_signed_integer_dtype(arr_or_dtype): def is_unsigned_integer_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of an unsigned integer dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an + unsigned integer dtype. + + Examples + -------- + >>> is_unsigned_integer_dtype(str) + False + >>> is_unsigned_integer_dtype(int) # signed + False + >>> is_unsigned_integer_dtype(float) + False + >>> is_unsigned_integer_dtype(np.uint64) + True + >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) + False + >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed + False + >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -271,6 +870,46 @@ def is_unsigned_integer_dtype(arr_or_dtype): def is_int64_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of the int64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the int64 dtype. + + Notes + ----- + Depending on system architecture, the return value of `is_int64_dtype( + int)` will be True if the OS uses 64-bit integers and False if the OS + uses 32-bit integers. + + Examples + -------- + >>> is_int64_dtype(str) + False + >>> is_int64_dtype(np.int32) + False + >>> is_int64_dtype(np.int64) + True + >>> is_int64_dtype(float) + False + >>> is_int64_dtype(np.uint64) # unsigned + False + >>> is_int64_dtype(np.array(['a', 'b'])) + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_int64_dtype(pd.Index([1, 2.])) # float + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -278,6 +917,46 @@ def is_int64_dtype(arr_or_dtype): def is_int_or_datetime_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of an + integer, timedelta64, or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of an + integer, timedelta64, or datetime64 dtype. + + Examples + -------- + >>> is_int_or_datetime_dtype(str) + False + >>> is_int_or_datetime_dtype(int) + True + >>> is_int_or_datetime_dtype(float) + False + >>> is_int_or_datetime_dtype(np.uint64) + True + >>> is_int_or_datetime_dtype(np.datetime64) + True + >>> is_int_or_datetime_dtype(np.timedelta64) + True + >>> is_int_or_datetime_dtype(np.array(['a', 'b'])) + False + >>> is_int_or_datetime_dtype(pd.Series([1, 2])) + True + >>> is_int_or_datetime_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_int_or_datetime_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_int_or_datetime_dtype(pd.Index([1, 2.])) # float + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -285,7 +964,40 @@ def is_int_or_datetime_dtype(arr_or_dtype): issubclass(tipo, (np.datetime64, np.timedelta64))) -def is_datetime64_any_dtype(arr_or_dtype): +def is_datetime64_any_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_any_dtype(str) + False + >>> is_datetime64_any_dtype(int) + False + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive + True + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_any_dtype(np.array([1, 2])) + False + >>> is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + True + """ + if arr_or_dtype is None: return False return (is_datetime64_dtype(arr_or_dtype) or @@ -293,6 +1005,42 @@ def is_datetime64_any_dtype(arr_or_dtype): def is_datetime64_ns_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of the datetime64[ns] dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the datetime64[ns] dtype. + + Examples + -------- + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], + dtype="datetime64[ps]")) # wrong unit + False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) # has 'ns' unit + True + """ + if arr_or_dtype is None: return False try: @@ -314,21 +1062,20 @@ def is_timedelta64_ns_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, dtype, type + arr_or_dtype : array-like The array or dtype to check. Returns ------- - boolean : Whether or not the array or dtype - is of the timedelta64[ns] dtype. + boolean : Whether or not the array or dtype is of the + timedelta64[ns] dtype. Examples -------- - >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]') + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) True - >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]') # Wrong frequency + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency False - >>> >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) True >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) @@ -345,6 +1092,40 @@ def is_timedelta64_ns_dtype(arr_or_dtype): def is_datetime_or_timedelta_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a + timedelta64, or datetime64 dtype. + + Examples + -------- + >>> is_datetime_or_timedelta_dtype(str) + False + >>> is_datetime_or_timedelta_dtype(int) + False + >>> is_datetime_or_timedelta_dtype(np.datetime64) + True + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + False + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + False + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -378,11 +1159,45 @@ def _is_unorderable_exception(e): def is_numeric_v_string_like(a, b): """ - numpy doesn't like to compare numeric arrays vs scalar string-likes + Check if we are comparing a string-like object to a numeric ndarray. + + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. - return a boolean result if this is the case for a,b or b,a + Returns + ------- + boolean : Whether we return a comparing a string-like + object to a numeric array. + Examples + -------- + >>> is_numeric_v_string_like(1, 1) + False + >>> is_numeric_v_string_like("foo", "foo") + False + >>> is_numeric_v_string_like(1, "foo") # non-array numeric + False + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False """ + is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) @@ -401,13 +1216,56 @@ def is_numeric_v_string_like(a, b): def is_datetimelike_v_numeric(a, b): - # return if we have an i8 convertible and numeric comparison + """ + Check if we are comparing a datetime-like object to a numeric object. + + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean : Whether we return a comparing a datetime-like + to a numeric object. + + Examples + -------- + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, 'dtype'): a = np.asarray(a) if not hasattr(b, 'dtype'): b = np.asarray(b) def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ return is_integer_dtype(x) or is_float_dtype(x) is_datetimelike = needs_i8_conversion @@ -416,24 +1274,92 @@ def is_numeric(x): def is_datetimelike_v_object(a, b): - # return if we have an i8 convertible and object comparsion + """ + Check if we are comparing a datetime-like object to an object instance. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean : Whether we return a comparing a datetime-like + to an object instance. + + Examples + -------- + >>> obj = object() + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_object(obj, obj) + False + >>> is_datetimelike_v_object(dt, dt) + False + >>> is_datetimelike_v_object(obj, dt) + True + >>> is_datetimelike_v_object(dt, obj) # symmetric check + True + >>> is_datetimelike_v_object(np.array([dt]), obj) + True + >>> is_datetimelike_v_object(np.array([obj]), dt) + True + >>> is_datetimelike_v_object(np.array([dt]), np.array([obj])) + True + >>> is_datetimelike_v_object(np.array([obj]), np.array([obj])) + False + >>> is_datetimelike_v_object(np.array([dt]), np.array([1])) + False + >>> is_datetimelike_v_object(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, 'dtype'): a = np.asarray(a) if not hasattr(b, 'dtype'): b = np.asarray(b) - def f(x): - return is_object_dtype(x) - - def is_object(x): - return is_integer_dtype(x) or is_float_dtype(x) - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_object(b)) or - (is_datetimelike(b) and is_object(a))) + return ((is_datetimelike(a) and is_object_dtype(b)) or + (is_datetimelike(b) and is_object_dtype(a))) def needs_i8_conversion(arr_or_dtype): + """ + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype should be converted to int64. + + Examples + -------- + >>> needs_i8_conversion(str) + False + >>> needs_i8_conversion(np.int64) + False + >>> needs_i8_conversion(np.datetime64) + True + >>> needs_i8_conversion(np.array(['a', 'b'])) + False + >>> needs_i8_conversion(pd.Series([1, 2])) + False + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + """ + if arr_or_dtype is None: return False return (is_datetime_or_timedelta_dtype(arr_or_dtype) or @@ -442,6 +1368,42 @@ def needs_i8_conversion(arr_or_dtype): def is_numeric_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a numeric dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a numeric dtype. + + Examples + -------- + >>> is_numeric_dtype(str) + False + >>> is_numeric_dtype(int) + True + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) + False + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) + True + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) + False + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -458,7 +1420,7 @@ def is_string_like_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, dtype, type + arr_or_dtype : array-like The array or dtype to check. Returns @@ -471,10 +1433,9 @@ def is_string_like_dtype(arr_or_dtype): True >>> is_string_like_dtype(object) False - >>> >>> is_string_like_dtype(np.array(['a', 'b'])) True - >>> is_string_like_dtype(np.array([1, 2])) + >>> is_string_like_dtype(pd.Series([1, 2])) False """ @@ -488,6 +1449,34 @@ def is_string_like_dtype(arr_or_dtype): def is_float_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a float dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a float dtype. + + Examples + -------- + >>> is_float_dtype(str) + False + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) + True + >>> is_float_dtype(np.array(['a', 'b'])) + False + >>> is_float_dtype(pd.Series([1, 2])) + False + >>> is_float_dtype(pd.Index([1, 2.])) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -495,6 +1484,16 @@ def is_float_dtype(arr_or_dtype): def is_floating_dtype(arr_or_dtype): + """ + DEPRECATED: This function will be removed in a future version. + + Check whether the provided array or dtype is an instance of + numpy's float dtype. + + Unlike, `is_float_dtype`, this check is a lot stricter, as it requires + `isinstance` of `np.floating` and not `issubclass`. + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -502,6 +1501,36 @@ def is_floating_dtype(arr_or_dtype): def is_bool_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a boolean dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a boolean dtype. + + Examples + -------- + >>> is_bool_dtype(str) + False + >>> is_bool_dtype(int) + False + >>> is_bool_dtype(bool) + True + >>> is_bool_dtype(np.bool) + True + >>> is_bool_dtype(np.array(['a', 'b'])) + False + >>> is_bool_dtype(pd.Series([1, 2])) + False + >>> is_bool_dtype(np.array([True, False])) + True + """ + if arr_or_dtype is None: return False try: @@ -512,21 +1541,94 @@ def is_bool_dtype(arr_or_dtype): return issubclass(tipo, np.bool_) -def is_extension_type(value): +def is_extension_type(arr): """ - if we are a klass that is preserved by the internals - these are internal klasses that we represent (and don't use a np.array) + Check whether an array-like is of a pandas extension class instance. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is of a pandas + extension class instance. + + Examples + -------- + >>> is_extension_type([1, 2, 3]) + False + >>> is_extension_type(np.array([1, 2, 3])) + False + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) + True + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.SparseArray([1, 2, 3])) + True + >>> is_extension_type(pd.SparseSeries([1, 2, 3])) + True + >>> + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True """ - if is_categorical(value): + + if is_categorical(arr): return True - elif is_sparse(value): + elif is_sparse(arr): return True - elif is_datetimetz(value): + elif is_datetimetz(arr): return True return False def is_complex_dtype(arr_or_dtype): + """ + Check whether the provided array or dtype is of a complex dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of a compex dtype. + + Examples + -------- + >>> is_complex_dtype(str) + False + >>> is_complex_dtype(int) + False + >>> is_complex_dtype(np.complex) + True + >>> is_complex_dtype(np.array(['a', 'b'])) + False + >>> is_complex_dtype(pd.Series([1, 2])) + False + >>> is_complex_dtype(np.array([1 + 1j, 5])) + True + """ + if arr_or_dtype is None: return False tipo = _get_dtype_type(arr_or_dtype) @@ -570,7 +1672,7 @@ def _get_dtype(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, Series, dtype, type + arr_or_dtype : array-like The array-like or dtype object whose dtype we want to extract. Returns @@ -619,7 +1721,7 @@ def _get_dtype_type(arr_or_dtype): Parameters ---------- - arr_or_dtype : ndarray, Series, dtype, type + arr_or_dtype : array-like The array-like or dtype object whose type we want to extract. Returns @@ -754,6 +1856,7 @@ def pandas_dtype(dtype): ------- np.dtype or a pandas dtype """ + if isinstance(dtype, DatetimeTZDtype): return dtype elif isinstance(dtype, PeriodDtype): From a5521101a30ff56bdd87b2ddfe0232860cebd5e1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 4 May 2017 11:48:59 -0400 Subject: [PATCH 2/2] TST: Add tests for pandas.core.dtypes.common The testing of this module was especially lacking with the exception of is_dtype_equal and pandas_dtype. --- pandas/tests/dtypes/test_common.py | 459 +++++++++++++++++++++++++++-- 1 file changed, 428 insertions(+), 31 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 68518e235d417..5b74397b1e770 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -4,11 +4,10 @@ import numpy as np import pandas as pd -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, PeriodDtype, CategoricalDtype) -from pandas.core.dtypes.common import ( - pandas_dtype, is_dtype_equal) +from pandas.core.dtypes.dtypes import (DatetimeTZDtype, PeriodDtype, + CategoricalDtype, IntervalDtype) +import pandas.core.dtypes.common as com import pandas.util.testing as tm @@ -21,49 +20,49 @@ def test_invalid_dtype_error(self): invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: with tm.assert_raises_regex(TypeError, msg): - pandas_dtype(dtype) + com.pandas_dtype(dtype) valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O', np.float64, float, np.dtype('float64')] for dtype in valid_list: - pandas_dtype(dtype) + com.pandas_dtype(dtype) def test_numpy_dtype(self): for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - assert pandas_dtype(dtype) == np.dtype(dtype) + assert com.pandas_dtype(dtype) == np.dtype(dtype) def test_numpy_string_dtype(self): # do not parse freq-like string as period dtype - assert pandas_dtype('U') == np.dtype('U') - assert pandas_dtype('S') == np.dtype('S') + assert com.pandas_dtype('U') == np.dtype('U') + assert com.pandas_dtype('S') == np.dtype('S') def test_datetimetz_dtype(self): for dtype in ['datetime64[ns, US/Eastern]', 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']: - assert pandas_dtype(dtype) is DatetimeTZDtype(dtype) - assert pandas_dtype(dtype) == DatetimeTZDtype(dtype) - assert pandas_dtype(dtype) == dtype + assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype) + assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype) + assert com.pandas_dtype(dtype) == dtype def test_categorical_dtype(self): - assert pandas_dtype('category') == CategoricalDtype() + assert com.pandas_dtype('category') == CategoricalDtype() def test_period_dtype(self): for dtype in ['period[D]', 'period[3M]', 'period[U]', 'Period[D]', 'Period[3M]', 'Period[U]']: - assert pandas_dtype(dtype) is PeriodDtype(dtype) - assert pandas_dtype(dtype) == PeriodDtype(dtype) - assert pandas_dtype(dtype) == dtype + assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == dtype -dtypes = dict(datetime_tz=pandas_dtype('datetime64[ns, US/Eastern]'), - datetime=pandas_dtype('datetime64[ns]'), - timedelta=pandas_dtype('timedelta64[ns]'), +dtypes = dict(datetime_tz=com.pandas_dtype('datetime64[ns, US/Eastern]'), + datetime=com.pandas_dtype('datetime64[ns]'), + timedelta=com.pandas_dtype('timedelta64[ns]'), period=PeriodDtype('D'), integer=np.dtype(np.int64), float=np.dtype(np.float64), object=np.dtype(np.object), - category=pandas_dtype('category')) + category=com.pandas_dtype('category')) @pytest.mark.parametrize('name1,dtype1', @@ -75,31 +74,30 @@ def test_period_dtype(self): def test_dtype_equal(name1, dtype1, name2, dtype2): # match equal to self, but not equal to other - assert is_dtype_equal(dtype1, dtype1) + assert com.is_dtype_equal(dtype1, dtype1) if name1 != name2: - assert not is_dtype_equal(dtype1, dtype2) + assert not com.is_dtype_equal(dtype1, dtype2) def test_dtype_equal_strict(): # we are strict on kind equality for dtype in [np.int8, np.int16, np.int32]: - assert not is_dtype_equal(np.int64, dtype) + assert not com.is_dtype_equal(np.int64, dtype) for dtype in [np.float32]: - assert not is_dtype_equal(np.float64, dtype) + assert not com.is_dtype_equal(np.float64, dtype) # strict w.r.t. PeriodDtype - assert not is_dtype_equal(PeriodDtype('D'), - PeriodDtype('2D')) + assert not com.is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')) # strict w.r.t. datetime64 - assert not is_dtype_equal( - pandas_dtype('datetime64[ns, US/Eastern]'), - pandas_dtype('datetime64[ns, CET]')) + assert not com.is_dtype_equal( + com.pandas_dtype('datetime64[ns, US/Eastern]'), + com.pandas_dtype('datetime64[ns, CET]')) # see gh-15941: no exception should be raised - assert not is_dtype_equal(None, None) + assert not com.is_dtype_equal(None, None) def get_is_dtype_funcs(): @@ -108,7 +106,6 @@ def get_is_dtype_funcs(): begin with 'is_' and end with 'dtype' """ - import pandas.core.dtypes.common as com fnames = [f for f in dir(com) if (f.startswith('is_') and f.endswith('dtype'))] @@ -124,3 +121,403 @@ def test_get_dtype_error_catch(func): # No exception should be raised. assert not func(None) + + +def test_is_object(): + assert com.is_object_dtype(object) + assert com.is_object_dtype(np.array([], dtype=object)) + + assert not com.is_object_dtype(int) + assert not com.is_object_dtype(np.array([], dtype=int)) + assert not com.is_object_dtype([1, 2, 3]) + + +def test_is_sparse(): + assert com.is_sparse(pd.SparseArray([1, 2, 3])) + assert com.is_sparse(pd.SparseSeries([1, 2, 3])) + + assert not com.is_sparse(np.array([1, 2, 3])) + + # This test will only skip if the previous assertions + # pass AND scipy is not installed. + sparse = pytest.importorskip("scipy.sparse") + assert not com.is_sparse(sparse.bsr_matrix([1, 2, 3])) + + +def test_is_scipy_sparse(): + tm._skip_if_no_scipy() + + from scipy.sparse import bsr_matrix + assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) + + assert not com.is_scipy_sparse(pd.SparseArray([1, 2, 3])) + assert not com.is_scipy_sparse(pd.SparseSeries([1, 2, 3])) + + +def test_is_categorical(): + cat = pd.Categorical([1, 2, 3]) + assert com.is_categorical(cat) + assert com.is_categorical(pd.Series(cat)) + + assert not com.is_categorical([1, 2, 3]) + + +def test_is_datetimetz(): + assert not com.is_datetimetz([1, 2, 3]) + assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3])) + + assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_datetimetz(s) + + +def test_is_period(): + assert not com.is_period([1, 2, 3]) + assert not com.is_period(pd.Index([1, 2, 3])) + assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) + + +def test_is_datetime64_dtype(): + assert not com.is_datetime64_dtype(object) + assert not com.is_datetime64_dtype([1, 2, 3]) + assert not com.is_datetime64_dtype(np.array([], dtype=int)) + + assert com.is_datetime64_dtype(np.datetime64) + assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_datetime64tz_dtype(): + assert not com.is_datetime64tz_dtype(object) + assert not com.is_datetime64tz_dtype([1, 2, 3]) + assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetime64tz_dtype(pd.DatetimeIndex( + [1, 2, 3], tz="US/Eastern")) + + +def test_is_timedelta64_dtype(): + assert not com.is_timedelta64_dtype(object) + assert not com.is_timedelta64_dtype([1, 2, 3]) + + assert com.is_timedelta64_dtype(np.timedelta64) + assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + + +def test_is_period_dtype(): + assert not com.is_period_dtype(object) + assert not com.is_period_dtype([1, 2, 3]) + assert not com.is_period_dtype(pd.Period("2017-01-01")) + + assert com.is_period_dtype(PeriodDtype(freq="D")) + assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) + + +def test_is_interval_dtype(): + assert not com.is_interval_dtype(object) + assert not com.is_interval_dtype([1, 2, 3]) + + assert com.is_interval_dtype(IntervalDtype()) + + interval = pd.Interval(1, 2, closed="right") + assert not com.is_interval_dtype(interval) + assert com.is_interval_dtype(pd.IntervalIndex([interval])) + + +def test_is_categorical_dtype(): + assert not com.is_categorical_dtype(object) + assert not com.is_categorical_dtype([1, 2, 3]) + + assert com.is_categorical_dtype(CategoricalDtype()) + assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) + assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + + +def test_is_string_dtype(): + assert not com.is_string_dtype(int) + assert not com.is_string_dtype(pd.Series([1, 2])) + + assert com.is_string_dtype(str) + assert com.is_string_dtype(object) + assert com.is_string_dtype(np.array(['a', 'b'])) + + +def test_is_period_arraylike(): + assert not com.is_period_arraylike([1, 2, 3]) + assert not com.is_period_arraylike(pd.Index([1, 2, 3])) + assert com.is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) + + +def test_is_datetime_arraylike(): + assert not com.is_datetime_arraylike([1, 2, 3]) + assert not com.is_datetime_arraylike(pd.Index([1, 2, 3])) + assert com.is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) + + +def test_is_datetimelike(): + assert not com.is_datetimelike([1, 2, 3]) + assert not com.is_datetimelike(pd.Index([1, 2, 3])) + + assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetimelike(pd.PeriodIndex([], freq="A")) + assert com.is_datetimelike(np.array([], dtype=np.datetime64)) + assert com.is_datetimelike(pd.Series([], dtype="timedelta64[ns]")) + assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_datetimelike(s) + + +def test_is_integer_dtype(): + assert not com.is_integer_dtype(str) + assert not com.is_integer_dtype(float) + assert not com.is_integer_dtype(np.datetime64) + assert not com.is_integer_dtype(np.timedelta64) + assert not com.is_integer_dtype(pd.Index([1, 2.])) + assert not com.is_integer_dtype(np.array(['a', 'b'])) + assert not com.is_integer_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_integer_dtype(int) + assert com.is_integer_dtype(np.uint64) + assert com.is_integer_dtype(pd.Series([1, 2])) + + +def test_is_signed_integer_dtype(): + assert not com.is_signed_integer_dtype(str) + assert not com.is_signed_integer_dtype(float) + assert not com.is_signed_integer_dtype(np.uint64) + assert not com.is_signed_integer_dtype(np.datetime64) + assert not com.is_signed_integer_dtype(np.timedelta64) + assert not com.is_signed_integer_dtype(pd.Index([1, 2.])) + assert not com.is_signed_integer_dtype(np.array(['a', 'b'])) + assert not com.is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) + assert not com.is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_signed_integer_dtype(int) + assert com.is_signed_integer_dtype(pd.Series([1, 2])) + + +def test_is_unsigned_integer_dtype(): + assert not com.is_unsigned_integer_dtype(str) + assert not com.is_unsigned_integer_dtype(int) + assert not com.is_unsigned_integer_dtype(float) + assert not com.is_unsigned_integer_dtype(pd.Series([1, 2])) + assert not com.is_unsigned_integer_dtype(pd.Index([1, 2.])) + assert not com.is_unsigned_integer_dtype(np.array(['a', 'b'])) + + assert com.is_unsigned_integer_dtype(np.uint64) + assert com.is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + + +def test_is_int64_dtype(): + assert not com.is_int64_dtype(str) + assert not com.is_int64_dtype(float) + assert not com.is_int64_dtype(np.int32) + assert not com.is_int64_dtype(np.uint64) + assert not com.is_int64_dtype(pd.Index([1, 2.])) + assert not com.is_int64_dtype(np.array(['a', 'b'])) + assert not com.is_int64_dtype(np.array([1, 2], dtype=np.uint32)) + + assert com.is_int64_dtype(np.int64) + assert com.is_int64_dtype(np.array([1, 2], dtype=np.int64)) + + +def test_is_int_or_datetime_dtype(): + assert not com.is_int_or_datetime_dtype(str) + assert not com.is_int_or_datetime_dtype(float) + assert not com.is_int_or_datetime_dtype(pd.Index([1, 2.])) + assert not com.is_int_or_datetime_dtype(np.array(['a', 'b'])) + + assert com.is_int_or_datetime_dtype(int) + assert com.is_int_or_datetime_dtype(np.uint64) + assert com.is_int_or_datetime_dtype(np.datetime64) + assert com.is_int_or_datetime_dtype(np.timedelta64) + assert com.is_int_or_datetime_dtype(pd.Series([1, 2])) + assert com.is_int_or_datetime_dtype(np.array([], dtype=np.datetime64)) + assert com.is_int_or_datetime_dtype(np.array([], dtype=np.timedelta64)) + + +def test_is_datetime64_any_dtype(): + assert not com.is_datetime64_any_dtype(int) + assert not com.is_datetime64_any_dtype(str) + assert not com.is_datetime64_any_dtype(np.array([1, 2])) + assert not com.is_datetime64_any_dtype(np.array(['a', 'b'])) + + assert com.is_datetime64_any_dtype(np.datetime64) + assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + + +def test_is_datetime64_ns_dtype(): + assert not com.is_datetime64_ns_dtype(int) + assert not com.is_datetime64_ns_dtype(str) + assert not com.is_datetime64_ns_dtype(np.datetime64) + assert not com.is_datetime64_ns_dtype(np.array([1, 2])) + assert not com.is_datetime64_ns_dtype(np.array(['a', 'b'])) + assert not com.is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) + + # This datetime array has the wrong unit (ps instead of ns) + assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) + + assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + + +def test_is_timedelta64_ns_dtype(): + assert not com.is_timedelta64_ns_dtype(np.dtype('m8[ps]')) + assert not com.is_timedelta64_ns_dtype( + np.array([1, 2], dtype=np.timedelta64)) + + assert com.is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + assert com.is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + + +def test_is_datetime_or_timedelta_dtype(): + assert not com.is_datetime_or_timedelta_dtype(int) + assert not com.is_datetime_or_timedelta_dtype(str) + assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + + assert com.is_datetime_or_timedelta_dtype(np.datetime64) + assert com.is_datetime_or_timedelta_dtype(np.timedelta64) + assert com.is_datetime_or_timedelta_dtype( + np.array([], dtype=np.timedelta64)) + assert com.is_datetime_or_timedelta_dtype( + np.array([], dtype=np.datetime64)) + + +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(1, 1) + assert not com.is_numeric_v_string_like(1, "foo") + assert not com.is_numeric_v_string_like("foo", "foo") + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like( + np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like("foo", np.array([1])) + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(pd.datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + +def test_is_datetimelike_v_object(): + obj = object() + dt = np.datetime64(pd.datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_object(dt, dt) + assert not com.is_datetimelike_v_object(obj, obj) + assert not com.is_datetimelike_v_object(np.array([dt]), np.array([1])) + assert not com.is_datetimelike_v_object(np.array([dt]), np.array([dt])) + assert not com.is_datetimelike_v_object(np.array([obj]), np.array([obj])) + + assert com.is_datetimelike_v_object(dt, obj) + assert com.is_datetimelike_v_object(obj, dt) + assert com.is_datetimelike_v_object(np.array([dt]), obj) + assert com.is_datetimelike_v_object(np.array([obj]), dt) + assert com.is_datetimelike_v_object(np.array([dt]), np.array([obj])) + + +def test_needs_i8_conversion(): + assert not com.needs_i8_conversion(str) + assert not com.needs_i8_conversion(np.int64) + assert not com.needs_i8_conversion(pd.Series([1, 2])) + assert not com.needs_i8_conversion(np.array(['a', 'b'])) + + assert com.needs_i8_conversion(np.datetime64) + assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + assert com.needs_i8_conversion(pd.DatetimeIndex( + [1, 2, 3], tz="US/Eastern")) + + +def test_is_numeric_dtype(): + assert not com.is_numeric_dtype(str) + assert not com.is_numeric_dtype(np.datetime64) + assert not com.is_numeric_dtype(np.timedelta64) + assert not com.is_numeric_dtype(np.array(['a', 'b'])) + assert not com.is_numeric_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_numeric_dtype(int) + assert com.is_numeric_dtype(float) + assert com.is_numeric_dtype(np.uint64) + assert com.is_numeric_dtype(pd.Series([1, 2])) + assert com.is_numeric_dtype(pd.Index([1, 2.])) + + +def test_is_string_like_dtype(): + assert not com.is_string_like_dtype(object) + assert not com.is_string_like_dtype(pd.Series([1, 2])) + + assert com.is_string_like_dtype(str) + assert com.is_string_like_dtype(np.array(['a', 'b'])) + + +def test_is_float_dtype(): + assert not com.is_float_dtype(str) + assert not com.is_float_dtype(int) + assert not com.is_float_dtype(pd.Series([1, 2])) + assert not com.is_float_dtype(np.array(['a', 'b'])) + + assert com.is_float_dtype(float) + assert com.is_float_dtype(pd.Index([1, 2.])) + + +def test_is_bool_dtype(): + assert not com.is_bool_dtype(int) + assert not com.is_bool_dtype(str) + assert not com.is_bool_dtype(pd.Series([1, 2])) + assert not com.is_bool_dtype(np.array(['a', 'b'])) + + assert com.is_bool_dtype(bool) + assert com.is_bool_dtype(np.bool) + assert com.is_bool_dtype(np.array([True, False])) + + +def test_is_extension_type(): + assert not com.is_extension_type([1, 2, 3]) + assert not com.is_extension_type(np.array([1, 2, 3])) + assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_type(cat) + assert com.is_extension_type(pd.Series(cat)) + assert com.is_extension_type(pd.SparseArray([1, 2, 3])) + assert com.is_extension_type(pd.SparseSeries([1, 2, 3])) + assert com.is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_type(s) + + # This test will only skip if the previous assertions + # pass AND scipy is not installed. + sparse = pytest.importorskip("scipy.sparse") + assert not com.is_extension_type(sparse.bsr_matrix([1, 2, 3])) + + +def test_is_complex_dtype(): + assert not com.is_complex_dtype(int) + assert not com.is_complex_dtype(str) + assert not com.is_complex_dtype(pd.Series([1, 2])) + assert not com.is_complex_dtype(np.array(['a', 'b'])) + + assert com.is_complex_dtype(np.complex) + assert com.is_complex_dtype(np.array([1 + 1j, 5]))