diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 43655fa3ea913..3366f10b92604 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,6 +2,7 @@ data hash pandas / numpy objects """ import itertools +from typing import Optional import numpy as np @@ -58,7 +59,7 @@ def hash_pandas_object( obj, index: bool = True, encoding: str = "utf8", - hash_key: str = _default_hash_key, + hash_key: Optional[str] = _default_hash_key, categorize: bool = True, ): """ @@ -82,6 +83,9 @@ def hash_pandas_object( """ from pandas import Series + if hash_key is None: + hash_key = _default_hash_key + if isinstance(obj, ABCMultiIndex): return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index c915edad4bb8e..c856585f20138 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -374,3 +374,10 @@ def test_hash_with_tuple(): df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) with pytest.raises(TypeError, match="unhashable type: 'list'"): hash_pandas_object(df3) + + +def test_hash_object_none_key(): + # https://github.com/pandas-dev/pandas/issues/30887 + result = pd.util.hash_pandas_object(pd.Series(["a", "b"]), hash_key=None) + expected = pd.Series([4578374827886788867, 17338122309987883691], dtype="uint64") + tm.assert_series_equal(result, expected)