Skip to content

CLN: remove unused util.hashing functions #33511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 0 additions & 59 deletions pandas/core/util/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@

import numpy as np

from pandas._libs import Timestamp
import pandas._libs.hashing as hashing

from pandas.core.dtypes.cast import infer_dtype_from_scalar
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_extension_array_dtype,
Expand All @@ -21,7 +19,6 @@
ABCMultiIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import isna

# 16 byte long hashing key
_default_hash_key = "0123456789123456"
Expand Down Expand Up @@ -185,28 +182,6 @@ def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key):
return h


def hash_tuple(val, encoding: str = "utf8", hash_key: str = _default_hash_key):
"""
Hash a single tuple efficiently

Parameters
----------
val : single tuple
encoding : str, default 'utf8'
hash_key : str, default _default_hash_key

Returns
-------
hash

"""
hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key) for v in val)

h = _combine_hash_arrays(hashes, len(val))[0]

return h


def _hash_categorical(c, encoding: str, hash_key: str):
"""
Hash a Categorical by hashing its categories, and then mapping the codes
Expand Down Expand Up @@ -321,37 +296,3 @@ def hash_array(
vals *= np.uint64(0x94D049BB133111EB)
vals ^= vals >> 31
return vals


def _hash_scalar(
val, encoding: str = "utf8", hash_key: str = _default_hash_key
) -> np.ndarray:
"""
Hash scalar value.

Parameters
----------
val : scalar
encoding : str, default "utf8"
hash_key : str, default _default_hash_key

Returns
-------
1d uint64 numpy array of hash value, of length 1
"""
if isna(val):
# this is to be consistent with the _hash_categorical implementation
return np.array([np.iinfo(np.uint64).max], dtype="u8")

if getattr(val, "tzinfo", None) is not None:
# for tz-aware datetimes, we need the underlying naive UTC value and
# not the tz aware object or pd extension type (as
# infer_dtype_from_scalar would do)
if not isinstance(val, Timestamp):
val = Timestamp(val)
val = val.tz_convert(None)

dtype, val = infer_dtype_from_scalar(val)
vals = np.array([val], dtype=dtype)

return hash_array(vals, hash_key=hash_key, encoding=encoding, categorize=False)
44 changes: 1 addition & 43 deletions pandas/tests/util/test_hashing.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import datetime

import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
import pandas._testing as tm
from pandas.core.util.hashing import _hash_scalar, hash_tuple, hash_tuples
from pandas.core.util.hashing import hash_tuples
from pandas.util import hash_array, hash_pandas_object


Expand Down Expand Up @@ -111,46 +109,6 @@ def test_hash_tuples():
assert result == expected[0]


@pytest.mark.parametrize(
"tup",
[(1, "one"), (1, np.nan), (1.0, pd.NaT, "A"), ("A", pd.Timestamp("2012-01-01"))],
)
def test_hash_tuple(tup):
# Test equivalence between
# hash_tuples and hash_tuple.
result = hash_tuple(tup)
expected = hash_tuples([tup])[0]

assert result == expected


@pytest.mark.parametrize(
"val",
[
1,
1.4,
"A",
b"A",
pd.Timestamp("2012-01-01"),
pd.Timestamp("2012-01-01", tz="Europe/Brussels"),
datetime.datetime(2012, 1, 1),
pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(),
pd.Timedelta("1 days"),
datetime.timedelta(1),
pd.Period("2012-01-01", freq="D"),
pd.Interval(0, 1),
np.nan,
pd.NaT,
None,
],
)
def test_hash_scalar(val):
result = _hash_scalar(val)
expected = hash_array(np.array([val], dtype=object), categorize=True)

assert result[0] == expected[0]


@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")])
def test_hash_tuples_err(val):
msg = "must be convertible to a list-of-tuples"
Expand Down