Skip to content

Commit afa8775

Browse files
Simplify to reduce code duplication
1 parent 47cdca1 commit afa8775

File tree

1 file changed

+13
-49
lines changed

1 file changed

+13
-49
lines changed

pandas/core/util/hashing.py

+13-49
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,6 @@ def hash_tuple(val, encoding='utf8', hash_key=None):
182182
hash
183183
184184
"""
185-
#def to_array(v):
186-
# dtype, arr = infer_dtype_from_array([v])
187-
# return np.asarray(arr, dtype=dtype)
188-
189-
#hashes = (hash_array(to_array(v), encoding=encoding, hash_key=hash_key,
190-
# categorize=False)
191-
# for v in val)
192-
193185
hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key)
194186
for v in val)
195187

@@ -298,7 +290,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
298290

299291
try:
300292
vals = hashing.hash_object_array(vals, hash_key, encoding)
301-
except (TypeError, ValueError):
293+
except TypeError:
302294
# we have mixed types
303295
vals = hashing.hash_object_array(vals.astype(str).astype(object),
304296
hash_key, encoding)
@@ -321,52 +313,24 @@ def _hash_scalar(val, encoding='utf8', hash_key=None):
321313
1d uint64 numpy array of hash value, of length 1
322314
"""
323315

324-
if hash_key is None:
325-
hash_key = _default_hash_key
326-
327316
if isnull(val):
328317
# this is to be consistent with the _hash_categorical implementation
329318
return np.array([np.iinfo(np.uint64).max], dtype='u8')
330319

331320
if isinstance(val, string_and_binary_types + (text_type,)):
332321
vals = np.array([val], dtype=object)
333-
string_like = True
334322
else:
335323
vals = np.array([val])
336-
string_like = False
337-
338-
dtype = vals.dtype
339-
340-
#dtype, vals = infer_dtype_from_array([vals])
341-
#if dtype == np.object_:
342-
# vals = np.asarray(vals, dtype='object')
343-
# dtype = vals.dtype
344324

345-
# we'll be working with everything as 64-bit values, so handle this
346-
# 128-bit value early
347-
if np.issubdtype(dtype, np.complex128):
348-
return hash_array(vals.real) + 23 * hash_array(vals.imag)
349-
350-
# First, turn whatever array this is into unsigned 64-bit ints, if we can
351-
# manage it.
352-
elif isinstance(dtype, np.bool):
353-
vals = vals.astype('u8')
354-
elif issubclass(dtype.type, (np.datetime64, np.timedelta64)):
355-
vals = vals.view('i8').astype('u8', copy=False)
356-
elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8:
357-
vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8')
358-
else:
359-
if not string_like:
360-
from pandas import Index
361-
vals = Index(vals).values
362-
return hash_array(vals, hash_key=hash_key, encoding=encoding,
363-
categorize=False)
364-
vals = hashing.hash_object_array(vals, hash_key, encoding)
365-
366-
# Then, redistribute these 64-bit ints within the space of 64-bit ints
367-
vals ^= vals >> 30
368-
vals *= np.uint64(0xbf58476d1ce4e5b9)
369-
vals ^= vals >> 27
370-
vals *= np.uint64(0x94d049bb133111eb)
371-
vals ^= vals >> 31
372-
return vals
325+
if vals.dtype == np.object_:
326+
from pandas import Timestamp, Timedelta, Period, Interval
327+
if isinstance(val, (Timestamp, Timedelta)):
328+
vals = np.array([val.value])
329+
elif isinstance(val, (Period, Interval)):
330+
pass
331+
else:
332+
from pandas import Index
333+
vals = Index(vals).values
334+
335+
return hash_array(vals, hash_key=hash_key, encoding=encoding,
336+
categorize=False)

0 commit comments

Comments
 (0)