Skip to content

Commit 48a2402

Browse files
committed
support for mixed type arrays
1 parent 58f682d commit 48a2402

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

pandas/tools/hashing.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
149149
"""
150150
Hash an MultiIndex / list-of-tuples efficiently
151151
152+
.. versionadded:: 0.20.0
153+
152154
Parameters
153155
----------
154156
vals : MultiIndex, list-of-tuples, or single tuple
@@ -265,7 +267,13 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
265267
ordered=False, fastpath=True)
266268
return _hash_categorical(cat, encoding, hash_key)
267269

268-
vals = _hash.hash_object_array(vals, hash_key, encoding)
270+
try:
271+
vals = _hash.hash_object_array(vals, hash_key, encoding)
272+
except TypeError:
273+
274+
# we have mixed types
275+
vals = _hash.hash_object_array(vals.astype(str).astype(object),
276+
hash_key, encoding)
269277

270278
# Then, redistribute these 64-bit ints within the space of 64-bit ints
271279
vals ^= vals >> 30

pandas/tools/tests/test_hashing.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ def test_hash_array(self):
3636
a = s.values
3737
tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
3838

39+
def test_hash_array_mixed(self):
40+
for data in [np.array([3, 4, 'All']),
41+
np.array([3, 4, 'All'], dtype=object)]:
42+
tm.assert_numpy_array_equal(hash_array(data), hash_array(data))
43+
3944
def check_equal(self, obj, **kwargs):
4045
a = hash_pandas_object(obj, **kwargs)
4146
b = hash_pandas_object(obj, **kwargs)
@@ -159,15 +164,6 @@ def f():
159164
hash_pandas_object(Series(list('abc')), hash_key='foo')
160165
self.assertRaises(ValueError, f)
161166

162-
def test_unsupported_objects(self):
163-
164-
# mixed objects are not supported
165-
obj = Series(['1', 2, 3])
166-
167-
def f():
168-
hash_pandas_object(obj)
169-
self.assertRaises(TypeError, f)
170-
171167
def test_alread_encoded(self):
172168
# if already encoded then ok
173169

0 commit comments

Comments
 (0)