|
16 | 16 |
|
17 | 17 |
|
18 | 18 | def _combine_hash_arrays(arrays, num_items):
|
| 19 | + "Should be the same as CPython's tupleobject.c" |
19 | 20 | first = next(arrays)
|
20 | 21 | arrays = itertools.chain([first], arrays)
|
21 | 22 |
|
22 | 23 | mult = np.zeros_like(first) + np.uint64(1000003)
|
23 | 24 | out = np.zeros_like(first) + np.uint64(0x345678)
|
24 | 25 | for i, a in enumerate(arrays):
|
25 | 26 | inverse_i = num_items - i
|
26 |
| - out = (out ^ a) * mult |
| 27 | + out ^= a |
| 28 | + out *= mult |
27 | 29 | mult += np.uint64(82520 + inverse_i + inverse_i)
|
28 | 30 | assert i + 1 == num_items, 'Fed in wrong num_items'
|
29 | 31 | out += np.uint64(97531)
|
@@ -70,15 +72,17 @@ def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None,
|
70 | 72 | h = hash_array(obj.values, encoding, hash_key,
|
71 | 73 | categorize).astype('uint64', copy=False)
|
72 | 74 | if index:
|
73 |
| - h = _combine_hash_arrays(iter([ |
74 |
| - h, |
75 |
| - hash_pandas_object(obj.index, |
76 |
| - index=False, |
77 |
| - encoding=encoding, |
78 |
| - hash_key=hash_key, |
79 |
| - categorize=categorize).values]), |
80 |
| - 2) |
| 75 | + index_iter = (hash_pandas_object(obj.index, |
| 76 | + index=False, |
| 77 | + encoding=encoding, |
| 78 | + hash_key=hash_key, |
| 79 | + categorize=categorize).values |
| 80 | + for _ in [None]) |
| 81 | + arrays = itertools.chain([h], index_iter) |
| 82 | + h = _combine_hash_arrays(arrays, 2) |
| 83 | + |
81 | 84 | h = Series(h, index=obj.index, dtype='uint64', copy=False)
|
| 85 | + |
82 | 86 | elif isinstance(obj, ABCDataFrame):
|
83 | 87 | hashes = (hash_array(series.values) for _, series in obj.iteritems())
|
84 | 88 | num_items = len(obj.columns)
|
|
0 commit comments