From 8056cd1f63b1e8d2c9c7bb8b4ac31b19bffe7a98 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 30 Apr 2024 00:50:11 +0200 Subject: [PATCH 1/2] BUG: hashing read only object categories raises --- pandas/_libs/hashing.pyx | 3 ++- pandas/tests/arrays/categorical/test_algos.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index a9bf784d5f973..a1fd70529efa7 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -11,6 +11,7 @@ import numpy as np from numpy cimport ( import_array, + ndarray, uint8_t, uint64_t, ) @@ -22,7 +23,7 @@ from pandas._libs.util cimport is_nan @cython.boundscheck(False) def hash_object_array( - object[:] arr, str key, str encoding="utf8" + ndarray[object, ndim=1] arr, str key, str encoding="utf8" ) -> np.ndarray[np.uint64]: """ Parameters diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 69c3364c7e98e..e83af813646f6 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -86,3 +86,11 @@ def test_diff(): df = ser.to_frame(name="A") with pytest.raises(TypeError, match=msg): df.diff() + + +def test_hash_read_only_categorical(): + # GH#58481 + idx = pd.Index(pd.Index(["a", "b", "c"], dtype="object").values) + cat = pd.CategoricalDtype(idx) + arr = pd.Series(["a", "b"], dtype=cat).values + assert hash(arr.dtype) == 1532899084736511412 From f7c409dcdf5017eb51f9ad5f5059dd6c8638474d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 30 Apr 2024 21:19:43 +0200 Subject: [PATCH 2/2] Fixup --- pandas/tests/arrays/categorical/test_algos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index e83af813646f6..a7d0becc30dd9 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -93,4 +93,4 @@ def test_hash_read_only_categorical(): idx = pd.Index(pd.Index(["a", "b", "c"], dtype="object").values) cat = pd.CategoricalDtype(idx) arr = pd.Series(["a", "b"], dtype=cat).values - assert hash(arr.dtype) == 1532899084736511412 + assert hash(arr.dtype) == hash(arr.dtype)