From 1e7315e314fecd3d5517fb2c06b0f87439ea55b3 Mon Sep 17 00:00:00 2001 From: Harald Husum Date: Mon, 10 Jul 2023 13:07:57 +0200 Subject: [PATCH 1/3] Add test for timedelta hash equality inariance --- .../tests/scalar/timedelta/test_timedelta.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index a83c6a8596575..38f26ad94ba13 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,5 +1,7 @@ """ test the scalar Timedelta """ from datetime import timedelta +import sys +from typing import Hashable from hypothesis import ( given, @@ -918,6 +920,30 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + @pytest.mark.xfail( + reason="pd.Timedelta violates the Python hash invariant (GH#44504).", + raises=AssertionError, + strict=True, + ) + @given(st.integers(min_value=(-sys.maxsize - 1) / 500, max_value=sys.maxsize / 500)) + def test_hash_equality_invariance(self, half_microseconds: int) -> None: + # GH#44504 + + def _upholds_hash_equality_invariance(v1: Hashable, v2: Hashable, /) -> bool: + if v1 != v2: + return True + + # See: https://docs.python.org/3/glossary.html#term-hashable + # Hashable objects which compare equal must have the same hash value. + return hash(v1) == hash(v2) + + nanoseconds = half_microseconds * 500 + + pandas_timedelta = Timedelta(nanoseconds) + numpy_timedelta = np.timedelta64(nanoseconds) + + assert _upholds_hash_equality_invariance(pandas_timedelta, numpy_timedelta) + def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) From 1d349af9ad539e27fdfd74ef50fe48b6b42bbd87 Mon Sep 17 00:00:00 2001 From: Harald Husum Date: Mon, 10 Jul 2023 13:48:23 +0200 Subject: [PATCH 2/3] Ensure int bounds --- pandas/tests/scalar/timedelta/test_timedelta.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 38f26ad94ba13..0d11125fa569f 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -925,7 +925,12 @@ def test_timedelta_hash_equality(self): raises=AssertionError, strict=True, ) - @given(st.integers(min_value=(-sys.maxsize - 1) / 500, max_value=sys.maxsize / 500)) + @given( + st.integers( + min_value=(-sys.maxsize - 1) // 500, + max_value=sys.maxsize // 500, + ) + ) def test_hash_equality_invariance(self, half_microseconds: int) -> None: # GH#44504 From 9e4ad76c51f45c58487d1d51ac85eb965ec0145e Mon Sep 17 00:00:00 2001 From: Harald Husum Date: Wed, 12 Jul 2023 10:38:54 +0200 Subject: [PATCH 3/3] PR feedback --- pandas/tests/scalar/timedelta/test_timedelta.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 0d11125fa569f..701cfdf157d26 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,7 +1,6 @@ """ test the scalar Timedelta """ from datetime import timedelta import sys -from typing import Hashable from hypothesis import ( given, @@ -923,7 +922,6 @@ def test_timedelta_hash_equality(self): @pytest.mark.xfail( reason="pd.Timedelta violates the Python hash invariant (GH#44504).", raises=AssertionError, - strict=True, ) @given( st.integers( @@ -934,20 +932,16 @@ def test_timedelta_hash_equality(self): def test_hash_equality_invariance(self, half_microseconds: int) -> None: # GH#44504 - def _upholds_hash_equality_invariance(v1: Hashable, v2: Hashable, /) -> bool: - if v1 != v2: - return True - - # See: https://docs.python.org/3/glossary.html#term-hashable - # Hashable objects which compare equal must have the same hash value. - return hash(v1) == hash(v2) - nanoseconds = half_microseconds * 500 pandas_timedelta = Timedelta(nanoseconds) numpy_timedelta = np.timedelta64(nanoseconds) - assert _upholds_hash_equality_invariance(pandas_timedelta, numpy_timedelta) + # See: https://docs.python.org/3/glossary.html#term-hashable + # Hashable objects which compare equal must have the same hash value. + assert pandas_timedelta != numpy_timedelta or hash(pandas_timedelta) == hash( + numpy_timedelta + ) def test_implementation_limits(self): min_td = Timedelta(Timedelta.min)