|
5 | 5 |
|
6 | 6 | import numpy as np
|
7 | 7 | from pandas._libs import hashing
|
8 |
| -from pandas._libs.lib import is_bool_array |
9 | 8 | from pandas.core.dtypes.generic import (
|
10 | 9 | ABCMultiIndex,
|
11 | 10 | ABCIndexClass,
|
12 | 11 | ABCSeries,
|
13 | 12 | ABCDataFrame)
|
14 | 13 | from pandas.core.dtypes.common import (
|
15 |
| - is_categorical_dtype, is_numeric_dtype, |
16 |
| - is_datetime64_dtype, is_timedelta64_dtype, |
17 |
| - is_list_like) |
| 14 | + is_categorical_dtype, is_list_like) |
18 | 15 |
|
19 | 16 | # 16 byte long hashing key
|
20 | 17 | _default_hash_key = '0123456789123456'
|
@@ -136,7 +133,6 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
|
136 | 133 | -------
|
137 | 134 | ndarray of hashed values array
|
138 | 135 | """
|
139 |
| - |
140 | 136 | is_tuple = False
|
141 | 137 | if isinstance(vals, tuple):
|
142 | 138 | vals = [vals]
|
@@ -231,29 +227,29 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
|
231 | 227 |
|
232 | 228 | if not hasattr(vals, 'dtype'):
|
233 | 229 | raise TypeError("must pass a ndarray-like")
|
| 230 | + dtype = vals.dtype |
234 | 231 |
|
235 | 232 | if hash_key is None:
|
236 | 233 | hash_key = _default_hash_key
|
237 | 234 |
|
238 | 235 | # For categoricals, we hash the categories, then remap the codes to the
|
239 | 236 | # hash values. (This check is above the complex check so that we don't ask
|
240 | 237 | # numpy if categorical is a subdtype of complex, as it will choke.
|
241 |
| - if is_categorical_dtype(vals.dtype): |
| 238 | + if is_categorical_dtype(dtype): |
242 | 239 | return _hash_categorical(vals, encoding, hash_key)
|
243 | 240 |
|
244 | 241 | # we'll be working with everything as 64-bit values, so handle this
|
245 | 242 | # 128-bit value early
|
246 |
| - if np.issubdtype(vals.dtype, np.complex128): |
| 243 | + elif np.issubdtype(dtype, np.complex128): |
247 | 244 | return hash_array(vals.real) + 23 * hash_array(vals.imag)
|
248 | 245 |
|
249 | 246 | # First, turn whatever array this is into unsigned 64-bit ints, if we can
|
250 | 247 | # manage it.
|
251 |
| - if is_bool_array(vals): |
| 248 | + elif isinstance(dtype, np.bool): |
252 | 249 | vals = vals.astype('u8')
|
253 |
| - elif (is_datetime64_dtype(vals) or |
254 |
| - is_timedelta64_dtype(vals)): |
| 250 | + elif issubclass(dtype.type, (np.datetime64, np.timedelta64)): |
255 | 251 | vals = vals.view('i8').astype('u8', copy=False)
|
256 |
| - elif (is_numeric_dtype(vals) and vals.dtype.itemsize <= 8): |
| 252 | + elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8: |
257 | 253 | vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8')
|
258 | 254 | else:
|
259 | 255 | # With repeated values, its MUCH faster to categorize object dtypes,
|
|
0 commit comments