diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index d9c8611c94cdb..ebfba5a1e1ff6 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -90,13 +90,13 @@ def hash_pandas_object( return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) elif isinstance(obj, ABCIndexClass): - h = hash_array(obj.values, encoding, hash_key, categorize).astype( + h = hash_array(obj._values, encoding, hash_key, categorize).astype( "uint64", copy=False ) h = Series(h, index=obj, dtype="uint64", copy=False) elif isinstance(obj, ABCSeries): - h = hash_array(obj.values, encoding, hash_key, categorize).astype( + h = hash_array(obj._values, encoding, hash_key, categorize).astype( "uint64", copy=False ) if index: @@ -107,7 +107,7 @@ def hash_pandas_object( encoding=encoding, hash_key=hash_key, categorize=categorize, - ).values + )._values for _ in [None] ) arrays = itertools.chain([h], index_iter) @@ -116,7 +116,7 @@ def hash_pandas_object( h = Series(h, index=obj.index, dtype="uint64", copy=False) elif isinstance(obj, ABCDataFrame): - hashes = (hash_array(series.values) for _, series in obj.items()) + hashes = (hash_array(series._values) for _, series in obj.items()) num_items = len(obj.columns) if index: index_hash_generator = ( @@ -126,7 +126,7 @@ def hash_pandas_object( encoding=encoding, hash_key=hash_key, categorize=categorize, - ).values # noqa + )._values for _ in [None] ) num_items += 1 @@ -185,28 +185,6 @@ def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key): return h -def hash_tuple(val, encoding: str = "utf8", hash_key: str = _default_hash_key): - """ - Hash a single tuple efficiently - - Parameters - ---------- - val : single tuple - encoding : str, default 'utf8' - hash_key : str, default _default_hash_key - - Returns - ------- - hash - - """ - hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key) for v in val) - - h = _combine_hash_arrays(hashes, len(val))[0] - - return h - - def _hash_categorical(c, encoding: str, hash_key: str): """ Hash a Categorical by hashing its categories, and then mapping the codes @@ -223,7 +201,7 @@ def _hash_categorical(c, encoding: str, hash_key: str): ndarray of hashed values array, same size as len(c) """ # Convert ExtensionArrays to ndarrays - values = np.asarray(c.categories.values) + values = np.asarray(c.categories._values) hashed = hash_array(values, encoding, hash_key, categorize=False) # we have uint64, as we don't directly support missing values diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 6411b9ab654f1..ff29df39e1871 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -1,12 +1,10 @@ -import datetime - import numpy as np import pytest import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm -from pandas.core.util.hashing import _hash_scalar, hash_tuple, hash_tuples +from pandas.core.util.hashing import hash_tuples from pandas.util import hash_array, hash_pandas_object @@ -111,46 +109,6 @@ def test_hash_tuples(): assert result == expected[0] -@pytest.mark.parametrize( - "tup", - [(1, "one"), (1, np.nan), (1.0, pd.NaT, "A"), ("A", pd.Timestamp("2012-01-01"))], -) -def test_hash_tuple(tup): - # Test equivalence between - # hash_tuples and hash_tuple. - result = hash_tuple(tup) - expected = hash_tuples([tup])[0] - - assert result == expected - - -@pytest.mark.parametrize( - "val", - [ - 1, - 1.4, - "A", - b"A", - pd.Timestamp("2012-01-01"), - pd.Timestamp("2012-01-01", tz="Europe/Brussels"), - datetime.datetime(2012, 1, 1), - pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(), - pd.Timedelta("1 days"), - datetime.timedelta(1), - pd.Period("2012-01-01", freq="D"), - pd.Interval(0, 1), - np.nan, - pd.NaT, - None, - ], -) -def test_hash_scalar(val): - result = _hash_scalar(val) - expected = hash_array(np.array([val], dtype=object), categorize=True) - - assert result[0] == expected[0] - - @pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")]) def test_hash_tuples_err(val): msg = "must be convertible to a list-of-tuples"