diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index d9c8611c94cdb..3406753a3b5ba 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -6,10 +6,8 @@ import numpy as np -from pandas._libs import Timestamp import pandas._libs.hashing as hashing -from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( is_categorical_dtype, is_extension_array_dtype, @@ -21,7 +19,6 @@ ABCMultiIndex, ABCSeries, ) -from pandas.core.dtypes.missing import isna # 16 byte long hashing key _default_hash_key = "0123456789123456" @@ -185,28 +182,6 @@ def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key): return h -def hash_tuple(val, encoding: str = "utf8", hash_key: str = _default_hash_key): - """ - Hash a single tuple efficiently - - Parameters - ---------- - val : single tuple - encoding : str, default 'utf8' - hash_key : str, default _default_hash_key - - Returns - ------- - hash - - """ - hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key) for v in val) - - h = _combine_hash_arrays(hashes, len(val))[0] - - return h - - def _hash_categorical(c, encoding: str, hash_key: str): """ Hash a Categorical by hashing its categories, and then mapping the codes @@ -321,37 +296,3 @@ def hash_array( vals *= np.uint64(0x94D049BB133111EB) vals ^= vals >> 31 return vals - - -def _hash_scalar( - val, encoding: str = "utf8", hash_key: str = _default_hash_key -) -> np.ndarray: - """ - Hash scalar value. - - Parameters - ---------- - val : scalar - encoding : str, default "utf8" - hash_key : str, default _default_hash_key - - Returns - ------- - 1d uint64 numpy array of hash value, of length 1 - """ - if isna(val): - # this is to be consistent with the _hash_categorical implementation - return np.array([np.iinfo(np.uint64).max], dtype="u8") - - if getattr(val, "tzinfo", None) is not None: - # for tz-aware datetimes, we need the underlying naive UTC value and - # not the tz aware object or pd extension type (as - # infer_dtype_from_scalar would do) - if not isinstance(val, Timestamp): - val = Timestamp(val) - val = val.tz_convert(None) - - dtype, val = infer_dtype_from_scalar(val) - vals = np.array([val], dtype=dtype) - - return hash_array(vals, hash_key=hash_key, encoding=encoding, categorize=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 6411b9ab654f1..ff29df39e1871 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -1,12 +1,10 @@ -import datetime - import numpy as np import pytest import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm -from pandas.core.util.hashing import _hash_scalar, hash_tuple, hash_tuples +from pandas.core.util.hashing import hash_tuples from pandas.util import hash_array, hash_pandas_object @@ -111,46 +109,6 @@ def test_hash_tuples(): assert result == expected[0] -@pytest.mark.parametrize( - "tup", - [(1, "one"), (1, np.nan), (1.0, pd.NaT, "A"), ("A", pd.Timestamp("2012-01-01"))], -) -def test_hash_tuple(tup): - # Test equivalence between - # hash_tuples and hash_tuple. - result = hash_tuple(tup) - expected = hash_tuples([tup])[0] - - assert result == expected - - -@pytest.mark.parametrize( - "val", - [ - 1, - 1.4, - "A", - b"A", - pd.Timestamp("2012-01-01"), - pd.Timestamp("2012-01-01", tz="Europe/Brussels"), - datetime.datetime(2012, 1, 1), - pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(), - pd.Timedelta("1 days"), - datetime.timedelta(1), - pd.Period("2012-01-01", freq="D"), - pd.Interval(0, 1), - np.nan, - pd.NaT, - None, - ], -) -def test_hash_scalar(val): - result = _hash_scalar(val) - expected = hash_array(np.array([val], dtype=object), categorize=True) - - assert result[0] == expected[0] - - @pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")]) def test_hash_tuples_err(val): msg = "must be convertible to a list-of-tuples"