Skip to content

Commit 7a9249d

Browse files
committed
memory optimization
1 parent 64dd638 commit 7a9249d

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

pandas/tools/hashing.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from pandas.lib import is_bool_array
1010
from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame
1111
from pandas.types.common import (is_categorical_dtype, is_numeric_dtype,
12-
is_datetime64_dtype, is_timedelta64_dtype)
12+
is_datetime64_dtype, is_timedelta64_dtype,
13+
is_list_like)
1314

1415
# 16 byte long hashing key
1516
_default_hash_key = '0123456789123456'
@@ -20,7 +21,7 @@ def _combine_hash_arrays(arrays, num_items):
2021
first = next(arrays)
2122
arrays = itertools.chain([first], arrays)
2223

23-
mult = np.zeros_like(first) + np.uint64(1000003)
24+
mult = np.uint64(1000003)
2425
out = np.zeros_like(first) + np.uint64(0x345678)
2526
for i, a in enumerate(arrays):
2627
inverse_i = num_items - i
@@ -135,11 +136,11 @@ def _hash_lists(vals, encoding='utf8', hash_key=None):
135136

136137
def hash_tuples(vals, encoding='utf8', hash_key=None):
137138
"""
138-
Hash an MultiIndex / array_of_tuples efficiently
139+
Hash an MultiIndex / list-of-tuples efficiently
139140
140141
Parameters
141142
----------
142-
vals : MultiIndex, ndarray of tuples, or single tuple
143+
vals : MultiIndex, list-of-tuples, or single tuple
143144
encoding : string, default 'utf8'
144145
hash_key : string key to encode, default to _default_hash_key
145146
@@ -152,6 +153,8 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
152153
if isinstance(vals, tuple):
153154
vals = [vals]
154155
is_tuple = True
156+
elif not is_list_like(vals):
157+
raise TypeError("must be convertible to a list-of-tuples")
155158

156159
if not isinstance(vals, MultiIndex):
157160
vals = MultiIndex.from_tuples(vals)

pandas/tools/tests/test_hashing.py

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ def test_hash_tuples(self):
6464
result = hash_tuples(tups[0])
6565
self.assertEqual(result, expected[0])
6666

67+
def test_hash_tuples_err(self):
68+
69+
for val in [5, 'foo', pd.Timestamp('20130101')]:
70+
self.assertRaises(TypeError, hash_tuples, val)
71+
6772
def test_multiindex_unique(self):
6873
mi = MultiIndex.from_tuples([(118, 472), (236, 118),
6974
(51, 204), (102, 51)])

0 commit comments

Comments
 (0)