From ce401f0a045224581109f6dc0c525670abaa539a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 13 Jan 2020 09:22:03 -0600 Subject: [PATCH] Backport PR #30900: REGR: Fixed hash_key=None for object values --- pandas/core/util/hashing.py | 6 +++++- pandas/tests/util/test_hashing.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 43655fa3ea913..3366f10b92604 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,6 +2,7 @@ data hash pandas / numpy objects """ import itertools +from typing import Optional import numpy as np @@ -58,7 +59,7 @@ def hash_pandas_object( obj, index: bool = True, encoding: str = "utf8", - hash_key: str = _default_hash_key, + hash_key: Optional[str] = _default_hash_key, categorize: bool = True, ): """ @@ -82,6 +83,9 @@ def hash_pandas_object( """ from pandas import Series + if hash_key is None: + hash_key = _default_hash_key + if isinstance(obj, ABCMultiIndex): return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index c915edad4bb8e..c856585f20138 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -374,3 +374,10 @@ def test_hash_with_tuple(): df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) with pytest.raises(TypeError, match="unhashable type: 'list'"): hash_pandas_object(df3) + + +def test_hash_object_none_key(): + # https://github.com/pandas-dev/pandas/issues/30887 + result = pd.util.hash_pandas_object(pd.Series(["a", "b"]), hash_key=None) + expected = pd.Series([4578374827886788867, 17338122309987883691], dtype="uint64") + tm.assert_series_equal(result, expected)