Skip to content

Commit e88b658

Browse files
PERF: specialized hash function for single tuples
1 parent aca4453 commit e88b658

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

pandas/core/indexes/multi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ def _hashed_indexing_key(self, key):
748748
we need to stringify if we have mixed levels
749749
750750
"""
751-
from pandas.core.util.hashing import hash_tuples
751+
from pandas.core.util.hashing import hash_tuples, hash_tuple
752752

753753
if not isinstance(key, tuple):
754754
return hash_tuples(key)
@@ -762,7 +762,7 @@ def f(k, stringify):
762762
return k
763763
key = tuple([f(k, stringify)
764764
for k, stringify in zip(key, self._have_mixed_levels)])
765-
return hash_tuples(key)
765+
return hash_tuple(key)
766766

767767
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
768768
def duplicated(self, keep='first'):

pandas/core/util/hashing.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,29 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
164164
return h
165165

166166

167+
def hash_tuple(val, encoding='utf8', hash_key=None):
168+
"""
169+
Hash a single tuple efficiently
170+
171+
Parameters
172+
----------
173+
val : single tuple
174+
encoding : string, default 'utf8'
175+
hash_key : string key to encode, default to _default_hash_key
176+
177+
Returns
178+
-------
179+
hash
180+
181+
"""
182+
hashes = (hash_array(np.array([v]), encoding=encoding, hash_key=hash_key,
183+
categorize=False)
184+
for v in val)
185+
h = _combine_hash_arrays(hashes, len(val))[0]
186+
187+
return h
188+
189+
167190
def _hash_categorical(c, encoding, hash_key):
168191
"""
169192
Hash a Categorical by hashing its categories, and then mapping the codes
@@ -264,7 +287,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
264287

265288
try:
266289
vals = hashing.hash_object_array(vals, hash_key, encoding)
267-
except TypeError:
290+
except (TypeError, ValueError):
268291
# we have mixed types
269292
vals = hashing.hash_object_array(vals.astype(str).astype(object),
270293
hash_key, encoding)

pandas/tests/util/test_hashing.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pandas import DataFrame, Series, Index, MultiIndex
88
from pandas.util import hash_array, hash_pandas_object
9-
from pandas.core.util.hashing import hash_tuples
9+
from pandas.core.util.hashing import hash_tuples, hash_tuple
1010
import pandas.util.testing as tm
1111

1212

@@ -79,6 +79,13 @@ def test_hash_tuples(self):
7979
result = hash_tuples(tups[0])
8080
assert result == expected[0]
8181

82+
def test_hash_tuple(self):
83+
# test equivalence between hash_tuples and hash_tuple
84+
tup = (1, 'one')
85+
result = hash_tuple(tup)
86+
expected = hash_tuples([tup])[0]
87+
assert result == expected
88+
8289
def test_hash_tuples_err(self):
8390

8491
for val in [5, 'foo', pd.Timestamp('20130101')]:

0 commit comments

Comments
 (0)