@@ -182,14 +182,6 @@ def hash_tuple(val, encoding='utf8', hash_key=None):
182
182
hash
183
183
184
184
"""
185
- #def to_array(v):
186
- # dtype, arr = infer_dtype_from_array([v])
187
- # return np.asarray(arr, dtype=dtype)
188
-
189
- #hashes = (hash_array(to_array(v), encoding=encoding, hash_key=hash_key,
190
- # categorize=False)
191
- # for v in val)
192
-
193
185
hashes = (_hash_scalar (v , encoding = encoding , hash_key = hash_key )
194
186
for v in val )
195
187
@@ -298,7 +290,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
298
290
299
291
try :
300
292
vals = hashing .hash_object_array (vals , hash_key , encoding )
301
- except ( TypeError , ValueError ) :
293
+ except TypeError :
302
294
# we have mixed types
303
295
vals = hashing .hash_object_array (vals .astype (str ).astype (object ),
304
296
hash_key , encoding )
@@ -321,52 +313,24 @@ def _hash_scalar(val, encoding='utf8', hash_key=None):
321
313
1d uint64 numpy array of hash value, of length 1
322
314
"""
323
315
324
- if hash_key is None :
325
- hash_key = _default_hash_key
326
-
327
316
if isnull (val ):
328
317
# this is to be consistent with the _hash_categorical implementation
329
318
return np .array ([np .iinfo (np .uint64 ).max ], dtype = 'u8' )
330
319
331
320
if isinstance (val , string_and_binary_types + (text_type ,)):
332
321
vals = np .array ([val ], dtype = object )
333
- string_like = True
334
322
else :
335
323
vals = np .array ([val ])
336
- string_like = False
337
-
338
- dtype = vals .dtype
339
-
340
- #dtype, vals = infer_dtype_from_array([vals])
341
- #if dtype == np.object_:
342
- # vals = np.asarray(vals, dtype='object')
343
- # dtype = vals.dtype
344
324
345
- # we'll be working with everything as 64-bit values, so handle this
346
- # 128-bit value early
347
- if np .issubdtype (dtype , np .complex128 ):
348
- return hash_array (vals .real ) + 23 * hash_array (vals .imag )
349
-
350
- # First, turn whatever array this is into unsigned 64-bit ints, if we can
351
- # manage it.
352
- elif isinstance (dtype , np .bool ):
353
- vals = vals .astype ('u8' )
354
- elif issubclass (dtype .type , (np .datetime64 , np .timedelta64 )):
355
- vals = vals .view ('i8' ).astype ('u8' , copy = False )
356
- elif issubclass (dtype .type , np .number ) and dtype .itemsize <= 8 :
357
- vals = vals .view ('u{}' .format (vals .dtype .itemsize )).astype ('u8' )
358
- else :
359
- if not string_like :
360
- from pandas import Index
361
- vals = Index (vals ).values
362
- return hash_array (vals , hash_key = hash_key , encoding = encoding ,
363
- categorize = False )
364
- vals = hashing .hash_object_array (vals , hash_key , encoding )
365
-
366
- # Then, redistribute these 64-bit ints within the space of 64-bit ints
367
- vals ^= vals >> 30
368
- vals *= np .uint64 (0xbf58476d1ce4e5b9 )
369
- vals ^= vals >> 27
370
- vals *= np .uint64 (0x94d049bb133111eb )
371
- vals ^= vals >> 31
372
- return vals
325
+ if vals .dtype == np .object_ :
326
+ from pandas import Timestamp , Timedelta , Period , Interval
327
+ if isinstance (val , (Timestamp , Timedelta )):
328
+ vals = np .array ([val .value ])
329
+ elif isinstance (val , (Period , Interval )):
330
+ pass
331
+ else :
332
+ from pandas import Index
333
+ vals = Index (vals ).values
334
+
335
+ return hash_array (vals , hash_key = hash_key , encoding = encoding ,
336
+ categorize = False )
0 commit comments