From a3cc121b5cacbf9e06e438dc86dcef8facfeb11e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 7 Apr 2017 10:45:28 -0400 Subject: [PATCH 1/3] DOC: document internal methods in types/common.py Partially addresses gh-15895. --- pandas/types/common.py | 146 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 131 insertions(+), 15 deletions(-) diff --git a/pandas/types/common.py b/pandas/types/common.py index 017805673defe..5ecc95a05e8ce 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -31,6 +31,20 @@ def _ensure_float(arr): + """ + Ensure that an array object has a float dtype if possible. + + Parameters + ---------- + arr : ndarray, Series + The array whose data type we want to enforce as float. + + Returns + ------- + float_arr : The original array cast to the float dtype if + possible. Otherwise, the original array is returned. + """ + if issubclass(arr.dtype.type, (np.integer, np.bool_)): arr = arr.astype(float) return arr @@ -46,6 +60,20 @@ def _ensure_float(arr): def _ensure_categorical(arr): + """ + Ensure that an array-like object is a Categorical (if not already). + + Parameters + ---------- + arr : array-like + The array that we want to convert into a Categorical. + + Returns + ------- + cat_arr : The original array cast as a Categorical. If it already + is a Categorical, we return as is. + """ + if not is_categorical(arr): from pandas import Categorical arr = Categorical(arr) @@ -220,10 +248,22 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): def _is_unorderable_exception(e): """ - return a boolean if we an unorderable exception error message + Check if the exception raised is an unorderable exception. + + The error message differs for 3 <= PY <= 3.5 and PY >= 3.6, so + we need to condition based on Python version. + + Parameters + ---------- + e : Exception or sub-class + The exception object to check. - These are different error message for PY>=3<=3.5 and PY>=3.6 + Returns + ------- + is_orderable_exc : Whether or not the exception raised is an + unorderable exception. """ + if PY36: return "'>' not supported between instances of" in str(e) @@ -346,7 +386,22 @@ def is_complex_dtype(arr_or_dtype): def _coerce_to_dtype(dtype): - """ coerce a string / np.dtype to a dtype """ + """ + Coerce a string or np.dtype to a pandas or numpy + dtype if possible. + + If we cannot convert to a pandas dtype initially, + we convert to a numpy dtype. + + Parameters + ---------- + dtype : The dtype that we want to coerce. + + Returns + ------- + pd_or_np_dtype : The coerced dtype. + """ + if is_categorical_dtype(dtype): dtype = CategoricalDtype() elif is_datetime64tz_dtype(dtype): @@ -359,8 +414,27 @@ def _coerce_to_dtype(dtype): def _get_dtype(arr_or_dtype): + """ + Get the dtype instance associated with an array + or dtype object. + + Parameters + ---------- + arr_or_dtype : ndarray, Series, dtype, type + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. + + Raises + ------ + TypeError : The passed in object is None. + """ + if arr_or_dtype is None: - raise TypeError + raise TypeError("Cannot deduce dtype from null object") if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): @@ -385,6 +459,21 @@ def _get_dtype(arr_or_dtype): def _get_dtype_type(arr_or_dtype): + """ + Get the type (NOT dtype) instance associated with + an array or dtype object. + + Parameters + ---------- + arr_or_dtype : ndarray, Series, dtype, type + The array-like or dtype object whose type we want to extract. + + Returns + ------- + obj_type : The extract type instance from the + passed in array or dtype object. + """ + if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype.type elif isinstance(arr_or_dtype, type): @@ -410,16 +499,27 @@ def _get_dtype_type(arr_or_dtype): def _get_dtype_from_object(dtype): - """Get a numpy dtype.type-style object. This handles the datetime64[ns] - and datetime64[ns, TZ] compat + """ + Get a numpy dtype.type-style object for a dtype object. + + This methods also includes handling of the datetime64[ns] and + datetime64[ns, TZ] objects. + + If no dtype can be found, we return ``object``. + + Parameters + ---------- + dtype : dtype, type + The dtype object whose numpy dtype.type-style + object we want to extract. - Notes - ----- - If nothing can be found, returns ``object``. + Returns + ------- + dtype_object : The extracted numpy dtype.type-style object. """ - # type object from a dtype if isinstance(dtype, type) and issubclass(dtype, np.generic): + # Type object from a dtype return dtype elif is_categorical(dtype): return CategoricalDtype().type @@ -429,7 +529,7 @@ def _get_dtype_from_object(dtype): try: _validate_date_like_dtype(dtype) except TypeError: - # should still pass if we don't have a datelike + # Should still pass if we don't have a date-like pass return dtype.type elif isinstance(dtype, string_types): @@ -444,10 +544,11 @@ def _get_dtype_from_object(dtype): try: return _get_dtype_from_object(getattr(np, dtype)) except (AttributeError, TypeError): - # handles cases like _get_dtype(int) - # i.e., python objects that are valid dtypes (unlike user-defined - # types, in general) - # TypeError handles the float16 typecode of 'e' + # Handles cases like _get_dtype(int) i.e., + # Python objects that are valid dtypes + # (unlike user-defined types, in general) + # + # TypeError handles the float16 type code of 'e' # further handle internal types pass @@ -455,6 +556,21 @@ def _get_dtype_from_object(dtype): def _validate_date_like_dtype(dtype): + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + the frequency provided is too specific) + """ + try: typ = np.datetime_data(dtype)[0] except ValueError as e: From 327687c96defc0c539b2aed62eae98c0f8a22c9b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 7 Apr 2017 13:06:33 -0400 Subject: [PATCH 2/3] BUG: Catch TypeError when calling _get_dtype The following functions were not catching the TypeError raised by _get_dtype: 1) is_string_dtype 2) is_string_like_dtype 3) is_timedelta64_ns_dtype Thus, when "None" was passed in, an Exception was raised instead of returning False, as other functions did. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/types/test_common.py | 28 ++++++++ pandas/types/common.py | 111 +++++++++++++++++++++++++++--- 3 files changed, 131 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0b98e57c606a3..436d51da6e873 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1145,6 +1145,7 @@ Conversion - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) - Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) +- Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) Indexing ^^^^^^^^ diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index c15f219c8fad6..600aa5bf3634d 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -6,6 +6,7 @@ from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype from pandas.types.common import pandas_dtype, is_dtype_equal +import pandas.types.common as com import pandas.util.testing as tm @@ -80,3 +81,30 @@ def test_dtype_equal_strict(): assert not is_dtype_equal( pandas_dtype('datetime64[ns, US/Eastern]'), pandas_dtype('datetime64[ns, CET]')) + + # see gh-15941: no exception should be raised + assert not is_dtype_equal(None, None) + + +def get_is_dtype_funcs(): + """ + Get all functions in pandas.types.common that + begin with 'is_' and end with 'dtype' + + Assumes that we have imported the module + "pandas.types.common" as com. + """ + + fnames = [f for f in dir(com) if (f.startswith('is_') and + f.endswith('dtype'))] + return [getattr(com, fname) for fname in fnames] + + +@pytest.mark.parametrize('func', + get_is_dtype_funcs()) +def test_get_dtype_error_catch(func): + # see gh-15941 + # + # No exception should be raised. + + assert not func(None) diff --git a/pandas/types/common.py b/pandas/types/common.py index 5ecc95a05e8ce..7ab2e068ac69f 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -144,8 +144,40 @@ def is_categorical_dtype(arr_or_dtype): def is_string_dtype(arr_or_dtype): - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) + """ + Check whether the provided array or dtype is of the string dtype. + + Parameters + ---------- + arr_or_dtype : ndarray, dtype, type + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(np.array([1, 2])) + False + """ + + # TODO: gh-15585: consider making the checks stricter. + + try: + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) + except TypeError: + return False def is_period_arraylike(arr): @@ -237,8 +269,40 @@ def is_datetime64_ns_dtype(arr_or_dtype): def is_timedelta64_ns_dtype(arr_or_dtype): - tipo = _get_dtype(arr_or_dtype) - return tipo == _TD_DTYPE + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. + + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. + + Parameters + ---------- + arr_or_dtype : ndarray, dtype, type + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype + is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]') + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]') # Wrong frequency + False + >>> + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False + """ + + try: + tipo = _get_dtype(arr_or_dtype) + return tipo == _TD_DTYPE + except TypeError: + return False def is_datetime_or_timedelta_dtype(arr_or_dtype): @@ -260,8 +324,7 @@ def _is_unorderable_exception(e): Returns ------- - is_orderable_exc : Whether or not the exception raised is an - unorderable exception. + boolean : Whether or not the exception raised is an unorderable exception. """ if PY36: @@ -342,9 +405,39 @@ def is_numeric_dtype(arr_or_dtype): def is_string_like_dtype(arr_or_dtype): - # exclude object as its a mixed dtype - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('S', 'U') + """ + Check whether the provided array or dtype is of a string-like dtype. + + Unlike `is_string_dtype`, the object dtype is excluded because it + is a mixed dtype. + + Parameters + ---------- + arr_or_dtype : ndarray, dtype, type + The array or dtype to check. + + Returns + ------- + boolean : Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_like_dtype(str) + True + >>> is_string_like_dtype(object) + False + >>> + >>> is_string_like_dtype(np.array(['a', 'b'])) + True + >>> is_string_like_dtype(np.array([1, 2])) + False + """ + + try: + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('S', 'U') + except TypeError: + return False def is_float_dtype(arr_or_dtype): From aa9ec1bc67cfbd033ff9056b39815e3b8e530cfd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Apr 2017 18:41:12 -0400 Subject: [PATCH 3/3] TST: use ids to have nice parameterized function names --- pandas/tests/types/test_common.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 600aa5bf3634d..21772bab44d01 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -6,7 +6,6 @@ from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype from pandas.types.common import pandas_dtype, is_dtype_equal -import pandas.types.common as com import pandas.util.testing as tm @@ -91,9 +90,8 @@ def get_is_dtype_funcs(): Get all functions in pandas.types.common that begin with 'is_' and end with 'dtype' - Assumes that we have imported the module - "pandas.types.common" as com. """ + import pandas.types.common as com fnames = [f for f in dir(com) if (f.startswith('is_') and f.endswith('dtype'))] @@ -101,7 +99,8 @@ def get_is_dtype_funcs(): @pytest.mark.parametrize('func', - get_is_dtype_funcs()) + get_is_dtype_funcs(), + ids=lambda x: x.__name__) def test_get_dtype_error_catch(func): # see gh-15941 #