Skip to content

Commit bc2fa16

Browse files
gfyoungjreback
authored andcommitted
BUG/DOC: Add documentation in types/common.py (pandas-dev#15941)
* DOC: document internal methods in types/common.py Partially addresses pandas-devgh-15895. * BUG: Catch TypeError when calling _get_dtype The following functions were not catching the TypeError raised by _get_dtype: 1) is_string_dtype 2) is_string_like_dtype 3) is_timedelta64_ns_dtype Thus, when "None" was passed in, an Exception was raised instead of returning False, as other functions did. * TST: use ids to have nice parameterized function names
1 parent 6d90a43 commit bc2fa16

File tree

3 files changed

+259
-22
lines changed

3 files changed

+259
-22
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,7 @@ Conversion
11451145
- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`)
11461146
- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`)
11471147
- Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`)
1148+
- Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`)
11481149

11491150
Indexing
11501151
^^^^^^^^

pandas/tests/types/test_common.py

+27
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,30 @@ def test_dtype_equal_strict():
8080
assert not is_dtype_equal(
8181
pandas_dtype('datetime64[ns, US/Eastern]'),
8282
pandas_dtype('datetime64[ns, CET]'))
83+
84+
# see gh-15941: no exception should be raised
85+
assert not is_dtype_equal(None, None)
86+
87+
88+
def get_is_dtype_funcs():
89+
"""
90+
Get all functions in pandas.types.common that
91+
begin with 'is_' and end with 'dtype'
92+
93+
"""
94+
import pandas.types.common as com
95+
96+
fnames = [f for f in dir(com) if (f.startswith('is_') and
97+
f.endswith('dtype'))]
98+
return [getattr(com, fname) for fname in fnames]
99+
100+
101+
@pytest.mark.parametrize('func',
102+
get_is_dtype_funcs(),
103+
ids=lambda x: x.__name__)
104+
def test_get_dtype_error_catch(func):
105+
# see gh-15941
106+
#
107+
# No exception should be raised.
108+
109+
assert not func(None)

pandas/types/common.py

+231-22
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@
3131

3232

3333
def _ensure_float(arr):
34+
"""
35+
Ensure that an array object has a float dtype if possible.
36+
37+
Parameters
38+
----------
39+
arr : ndarray, Series
40+
The array whose data type we want to enforce as float.
41+
42+
Returns
43+
-------
44+
float_arr : The original array cast to the float dtype if
45+
possible. Otherwise, the original array is returned.
46+
"""
47+
3448
if issubclass(arr.dtype.type, (np.integer, np.bool_)):
3549
arr = arr.astype(float)
3650
return arr
@@ -46,6 +60,20 @@ def _ensure_float(arr):
4660

4761

4862
def _ensure_categorical(arr):
63+
"""
64+
Ensure that an array-like object is a Categorical (if not already).
65+
66+
Parameters
67+
----------
68+
arr : array-like
69+
The array that we want to convert into a Categorical.
70+
71+
Returns
72+
-------
73+
cat_arr : The original array cast as a Categorical. If it already
74+
is a Categorical, we return as is.
75+
"""
76+
4977
if not is_categorical(arr):
5078
from pandas import Categorical
5179
arr = Categorical(arr)
@@ -116,8 +144,40 @@ def is_categorical_dtype(arr_or_dtype):
116144

117145

118146
def is_string_dtype(arr_or_dtype):
119-
dtype = _get_dtype(arr_or_dtype)
120-
return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype)
147+
"""
148+
Check whether the provided array or dtype is of the string dtype.
149+
150+
Parameters
151+
----------
152+
arr_or_dtype : ndarray, dtype, type
153+
The array or dtype to check.
154+
155+
Returns
156+
-------
157+
boolean : Whether or not the array or dtype is of the string dtype.
158+
159+
Examples
160+
--------
161+
>>> is_string_dtype(str)
162+
True
163+
>>> is_string_dtype(object)
164+
True
165+
>>> is_string_dtype(int)
166+
False
167+
>>>
168+
>>> is_string_dtype(np.array(['a', 'b']))
169+
True
170+
>>> is_string_dtype(np.array([1, 2]))
171+
False
172+
"""
173+
174+
# TODO: gh-15585: consider making the checks stricter.
175+
176+
try:
177+
dtype = _get_dtype(arr_or_dtype)
178+
return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype)
179+
except TypeError:
180+
return False
121181

122182

123183
def is_period_arraylike(arr):
@@ -209,8 +269,40 @@ def is_datetime64_ns_dtype(arr_or_dtype):
209269

210270

211271
def is_timedelta64_ns_dtype(arr_or_dtype):
212-
tipo = _get_dtype(arr_or_dtype)
213-
return tipo == _TD_DTYPE
272+
"""
273+
Check whether the provided array or dtype is of the timedelta64[ns] dtype.
274+
275+
This is a very specific dtype, so generic ones like `np.timedelta64`
276+
will return False if passed into this function.
277+
278+
Parameters
279+
----------
280+
arr_or_dtype : ndarray, dtype, type
281+
The array or dtype to check.
282+
283+
Returns
284+
-------
285+
boolean : Whether or not the array or dtype
286+
is of the timedelta64[ns] dtype.
287+
288+
Examples
289+
--------
290+
>>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')
291+
True
292+
>>> is_timedelta64_ns_dtype(np.dtype('m8[ps]') # Wrong frequency
293+
False
294+
>>>
295+
>>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]'))
296+
True
297+
>>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))
298+
False
299+
"""
300+
301+
try:
302+
tipo = _get_dtype(arr_or_dtype)
303+
return tipo == _TD_DTYPE
304+
except TypeError:
305+
return False
214306

215307

216308
def is_datetime_or_timedelta_dtype(arr_or_dtype):
@@ -220,10 +312,21 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype):
220312

221313
def _is_unorderable_exception(e):
222314
"""
223-
return a boolean if we an unorderable exception error message
315+
Check if the exception raised is an unorderable exception.
224316
225-
These are different error message for PY>=3<=3.5 and PY>=3.6
317+
The error message differs for 3 <= PY <= 3.5 and PY >= 3.6, so
318+
we need to condition based on Python version.
319+
320+
Parameters
321+
----------
322+
e : Exception or sub-class
323+
The exception object to check.
324+
325+
Returns
326+
-------
327+
boolean : Whether or not the exception raised is an unorderable exception.
226328
"""
329+
227330
if PY36:
228331
return "'>' not supported between instances of" in str(e)
229332

@@ -302,9 +405,39 @@ def is_numeric_dtype(arr_or_dtype):
302405

303406

304407
def is_string_like_dtype(arr_or_dtype):
305-
# exclude object as its a mixed dtype
306-
dtype = _get_dtype(arr_or_dtype)
307-
return dtype.kind in ('S', 'U')
408+
"""
409+
Check whether the provided array or dtype is of a string-like dtype.
410+
411+
Unlike `is_string_dtype`, the object dtype is excluded because it
412+
is a mixed dtype.
413+
414+
Parameters
415+
----------
416+
arr_or_dtype : ndarray, dtype, type
417+
The array or dtype to check.
418+
419+
Returns
420+
-------
421+
boolean : Whether or not the array or dtype is of the string dtype.
422+
423+
Examples
424+
--------
425+
>>> is_string_like_dtype(str)
426+
True
427+
>>> is_string_like_dtype(object)
428+
False
429+
>>>
430+
>>> is_string_like_dtype(np.array(['a', 'b']))
431+
True
432+
>>> is_string_like_dtype(np.array([1, 2]))
433+
False
434+
"""
435+
436+
try:
437+
dtype = _get_dtype(arr_or_dtype)
438+
return dtype.kind in ('S', 'U')
439+
except TypeError:
440+
return False
308441

309442

310443
def is_float_dtype(arr_or_dtype):
@@ -346,7 +479,22 @@ def is_complex_dtype(arr_or_dtype):
346479

347480

348481
def _coerce_to_dtype(dtype):
349-
""" coerce a string / np.dtype to a dtype """
482+
"""
483+
Coerce a string or np.dtype to a pandas or numpy
484+
dtype if possible.
485+
486+
If we cannot convert to a pandas dtype initially,
487+
we convert to a numpy dtype.
488+
489+
Parameters
490+
----------
491+
dtype : The dtype that we want to coerce.
492+
493+
Returns
494+
-------
495+
pd_or_np_dtype : The coerced dtype.
496+
"""
497+
350498
if is_categorical_dtype(dtype):
351499
dtype = CategoricalDtype()
352500
elif is_datetime64tz_dtype(dtype):
@@ -359,8 +507,27 @@ def _coerce_to_dtype(dtype):
359507

360508

361509
def _get_dtype(arr_or_dtype):
510+
"""
511+
Get the dtype instance associated with an array
512+
or dtype object.
513+
514+
Parameters
515+
----------
516+
arr_or_dtype : ndarray, Series, dtype, type
517+
The array-like or dtype object whose dtype we want to extract.
518+
519+
Returns
520+
-------
521+
obj_dtype : The extract dtype instance from the
522+
passed in array or dtype object.
523+
524+
Raises
525+
------
526+
TypeError : The passed in object is None.
527+
"""
528+
362529
if arr_or_dtype is None:
363-
raise TypeError
530+
raise TypeError("Cannot deduce dtype from null object")
364531
if isinstance(arr_or_dtype, np.dtype):
365532
return arr_or_dtype
366533
elif isinstance(arr_or_dtype, type):
@@ -385,6 +552,21 @@ def _get_dtype(arr_or_dtype):
385552

386553

387554
def _get_dtype_type(arr_or_dtype):
555+
"""
556+
Get the type (NOT dtype) instance associated with
557+
an array or dtype object.
558+
559+
Parameters
560+
----------
561+
arr_or_dtype : ndarray, Series, dtype, type
562+
The array-like or dtype object whose type we want to extract.
563+
564+
Returns
565+
-------
566+
obj_type : The extract type instance from the
567+
passed in array or dtype object.
568+
"""
569+
388570
if isinstance(arr_or_dtype, np.dtype):
389571
return arr_or_dtype.type
390572
elif isinstance(arr_or_dtype, type):
@@ -410,16 +592,27 @@ def _get_dtype_type(arr_or_dtype):
410592

411593

412594
def _get_dtype_from_object(dtype):
413-
"""Get a numpy dtype.type-style object. This handles the datetime64[ns]
414-
and datetime64[ns, TZ] compat
595+
"""
596+
Get a numpy dtype.type-style object for a dtype object.
415597
416-
Notes
417-
-----
418-
If nothing can be found, returns ``object``.
598+
This methods also includes handling of the datetime64[ns] and
599+
datetime64[ns, TZ] objects.
600+
601+
If no dtype can be found, we return ``object``.
602+
603+
Parameters
604+
----------
605+
dtype : dtype, type
606+
The dtype object whose numpy dtype.type-style
607+
object we want to extract.
608+
609+
Returns
610+
-------
611+
dtype_object : The extracted numpy dtype.type-style object.
419612
"""
420613

421-
# type object from a dtype
422614
if isinstance(dtype, type) and issubclass(dtype, np.generic):
615+
# Type object from a dtype
423616
return dtype
424617
elif is_categorical(dtype):
425618
return CategoricalDtype().type
@@ -429,7 +622,7 @@ def _get_dtype_from_object(dtype):
429622
try:
430623
_validate_date_like_dtype(dtype)
431624
except TypeError:
432-
# should still pass if we don't have a datelike
625+
# Should still pass if we don't have a date-like
433626
pass
434627
return dtype.type
435628
elif isinstance(dtype, string_types):
@@ -444,17 +637,33 @@ def _get_dtype_from_object(dtype):
444637
try:
445638
return _get_dtype_from_object(getattr(np, dtype))
446639
except (AttributeError, TypeError):
447-
# handles cases like _get_dtype(int)
448-
# i.e., python objects that are valid dtypes (unlike user-defined
449-
# types, in general)
450-
# TypeError handles the float16 typecode of 'e'
640+
# Handles cases like _get_dtype(int) i.e.,
641+
# Python objects that are valid dtypes
642+
# (unlike user-defined types, in general)
643+
#
644+
# TypeError handles the float16 type code of 'e'
451645
# further handle internal types
452646
pass
453647

454648
return _get_dtype_from_object(np.dtype(dtype))
455649

456650

457651
def _validate_date_like_dtype(dtype):
652+
"""
653+
Check whether the dtype is a date-like dtype. Raises an error if invalid.
654+
655+
Parameters
656+
----------
657+
dtype : dtype, type
658+
The dtype to check.
659+
660+
Raises
661+
------
662+
TypeError : The dtype could not be casted to a date-like dtype.
663+
ValueError : The dtype is an illegal date-like dtype (e.g. the
664+
the frequency provided is too specific)
665+
"""
666+
458667
try:
459668
typ = np.datetime_data(dtype)[0]
460669
except ValueError as e:

0 commit comments

Comments
 (0)