Skip to content

Commit cba2916

Browse files
committed
CLN: Add 'is_hashable' predicate to core/common
Some types will pass a test against collections.Hashable but fail when they are actually hashed with hash(). Introduce a new predicate to help handle these types. Will use this in subsequent commit. Test Plan: In addition to running tests, run the doctest $ nosetests pandas/core/common.py:is_hashable --with-doc -v
1 parent 2063c1f commit cba2916

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

pandas/core/common.py

+32
Original file line numberDiff line numberDiff line change
@@ -2504,6 +2504,38 @@ def is_list_like(arg):
25042504
not isinstance(arg, compat.string_and_binary_types))
25052505

25062506

2507+
def is_hashable(arg):
2508+
"""Return True if hash(arg) will succeed, False otherwise.
2509+
2510+
Some types will pass a test against collections.Hashable but fail when they
2511+
are actually hashed with hash().
2512+
2513+
Distinguish between these and other types by trying the call to hash() and
2514+
seeing if they raise TypeError.
2515+
2516+
Examples
2517+
--------
2518+
>>> a = ([],)
2519+
>>> isinstance(a, collections.Hashable)
2520+
True
2521+
>>> is_hashable(a)
2522+
False
2523+
"""
2524+
# don't consider anything not collections.Hashable, so as not to broaden
2525+
# the definition of hashable beyond that. For example, old-style classes
2526+
# are not collections.Hashable but they won't fail hash().
2527+
if not isinstance(arg, collections.Hashable):
2528+
return False
2529+
2530+
# narrow the definition of hashable if hash(arg) fails in practice
2531+
try:
2532+
hash(arg)
2533+
except TypeError:
2534+
return False
2535+
else:
2536+
return True
2537+
2538+
25072539
def is_sequence(x):
25082540
try:
25092541
iter(x)

pandas/tests/test_common.py

+51
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import collections
12
from datetime import datetime
23
import re
4+
import sys
35

46
import nose
57
from nose.tools import assert_equal
@@ -398,6 +400,55 @@ def test_is_list_like():
398400
assert not com.is_list_like(f)
399401

400402

403+
def test_is_hashable():
404+
405+
# all new-style classes are hashable by default
406+
class HashableClass(object):
407+
pass
408+
409+
class UnhashableClass1(object):
410+
__hash__ = None
411+
412+
class UnhashableClass2(object):
413+
def __hash__(self):
414+
raise TypeError("Not hashable")
415+
416+
hashable = (
417+
1, 'a', tuple(), (1,), HashableClass(),
418+
)
419+
not_hashable = (
420+
[], UnhashableClass1(),
421+
)
422+
abc_hashable_not_really_hashable = (
423+
([],), UnhashableClass2(),
424+
)
425+
426+
for i in hashable:
427+
assert isinstance(i, collections.Hashable)
428+
assert com.is_hashable(i)
429+
for i in not_hashable:
430+
assert not isinstance(i, collections.Hashable)
431+
assert not com.is_hashable(i)
432+
for i in abc_hashable_not_really_hashable:
433+
assert isinstance(i, collections.Hashable)
434+
assert not com.is_hashable(i)
435+
436+
# numpy.array is no longer collections.Hashable as of
437+
# https://github.com/numpy/numpy/pull/5326, just test
438+
# pandas.common.is_hashable()
439+
assert not com.is_hashable(np.array([]))
440+
441+
# old-style classes in Python 2 don't appear hashable to
442+
# collections.Hashable but also seem to support hash() by default
443+
if sys.version_info[0] == 2:
444+
class OldStyleClass():
445+
pass
446+
c = OldStyleClass()
447+
assert not isinstance(c, collections.Hashable)
448+
assert not com.is_hashable(c)
449+
hash(c) # this will not raise
450+
451+
401452
def test_ensure_int32():
402453
values = np.arange(10, dtype=np.int32)
403454
result = com._ensure_int32(values)

0 commit comments

Comments
 (0)