From 492c61385e25caf76377bf9819e21e7344e7a1bd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 10 Jan 2020 14:45:32 -0600 Subject: [PATCH 1/2] REGR: Fixed hash_key=None for object values Closes https://github.com/pandas-dev/pandas/issues/30887 --- pandas/core/util/hashing.py | 6 +++++- pandas/tests/util/test_hashing.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 43655fa3ea913..3366f10b92604 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,6 +2,7 @@ data hash pandas / numpy objects """ import itertools +from typing import Optional import numpy as np @@ -58,7 +59,7 @@ def hash_pandas_object( obj, index: bool = True, encoding: str = "utf8", - hash_key: str = _default_hash_key, + hash_key: Optional[str] = _default_hash_key, categorize: bool = True, ): """ @@ -82,6 +83,9 @@ def hash_pandas_object( """ from pandas import Series + if hash_key is None: + hash_key = _default_hash_key + if isinstance(obj, ABCMultiIndex): return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index c915edad4bb8e..702f9bc98e7e2 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -374,3 +374,9 @@ def test_hash_with_tuple(): df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) with pytest.raises(TypeError, match="unhashable type: 'list'"): hash_pandas_object(df3) + + +def test_hash_object_none_key(): + result = pd.util.hash_pandas_object(pd.Series(["a", "b"]), hash_key=None) + expected = pd.Series([4578374827886788867, 17338122309987883691], dtype="uint64") + tm.assert_series_equal(result, expected) From 78e59ef35fa422ad845491bea58dbefa8a3c8fee Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 13 Jan 2020 08:17:14 -0600 Subject: [PATCH 2/2] ref --- pandas/tests/util/test_hashing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 702f9bc98e7e2..c856585f20138 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -377,6 +377,7 @@ def test_hash_with_tuple(): def test_hash_object_none_key(): + # https://github.com/pandas-dev/pandas/issues/30887 result = pd.util.hash_pandas_object(pd.Series(["a", "b"]), hash_key=None) expected = pd.Series([4578374827886788867, 17338122309987883691], dtype="uint64") tm.assert_series_equal(result, expected)