Skip to content

Commit d4e3963

Browse files
authored
BUG: DataFrame from dict with non-nano Timedelta (#48901)
* BUG: DataFrame from dict with non-nano Timedelta * fix __hash__ * test for both hash cases
1 parent a99c1ad commit d4e3963

File tree

4 files changed

+71
-20
lines changed

4 files changed

+71
-20
lines changed

pandas/_libs/tslibs/timedeltas.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cdef class _Timedelta(timedelta):
2222

2323
cpdef timedelta to_pytimedelta(_Timedelta self)
2424
cdef bint _has_ns(self)
25+
cdef bint _is_in_pytimedelta_bounds(self)
2526
cdef _ensure_components(_Timedelta self)
2627
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
2728
cdef _Timedelta _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=*)

pandas/_libs/tslibs/timedeltas.pyx

+27-1
Original file line numberDiff line numberDiff line change
@@ -1093,8 +1093,27 @@ cdef class _Timedelta(timedelta):
10931093
# non-invariant behavior.
10941094
# see GH#44504
10951095
return hash(self.value)
1096-
else:
1096+
elif self._is_in_pytimedelta_bounds() and (
1097+
self._creso == NPY_FR_ns or self._creso == NPY_DATETIMEUNIT.NPY_FR_us
1098+
):
1099+
# If we can defer to timedelta.__hash__, do so, as that
1100+
# ensures the hash is invariant to our _reso.
1101+
# We can only defer for ns and us, as for these two resos we
1102+
# call _Timedelta.__new__ with the correct input in
1103+
# _timedelta_from_value_and_reso; so timedelta.__hash__
1104+
# will be correct
10971105
return timedelta.__hash__(self)
1106+
else:
1107+
# We want to ensure that two equivalent Timedelta objects
1108+
# have the same hash. So we try downcasting to the next-lowest
1109+
# resolution.
1110+
try:
1111+
obj = (<_Timedelta>self)._as_creso(<NPY_DATETIMEUNIT>(self._creso + 1))
1112+
except OverflowError:
1113+
# Doesn't fit, so we're off the hook
1114+
return hash(self.value)
1115+
else:
1116+
return hash(obj)
10981117

10991118
def __richcmp__(_Timedelta self, object other, int op):
11001119
cdef:
@@ -1152,6 +1171,13 @@ cdef class _Timedelta(timedelta):
11521171
else:
11531172
raise NotImplementedError(self._creso)
11541173

1174+
cdef bint _is_in_pytimedelta_bounds(self):
1175+
"""
1176+
Check if we are within the bounds of datetime.timedelta.
1177+
"""
1178+
self._ensure_components()
1179+
return -999999999 <= self._d and self._d <= 999999999
1180+
11551181
cdef _ensure_components(_Timedelta self):
11561182
"""
11571183
compute the components

pandas/tests/frame/test_constructors.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -846,30 +846,19 @@ def create_data(constructor):
846846
tm.assert_frame_equal(result_Timestamp, expected)
847847

848848
@pytest.mark.parametrize(
849-
"klass",
849+
"klass,name",
850850
[
851-
pytest.param(
852-
np.timedelta64,
853-
marks=pytest.mark.xfail(
854-
reason="hash mismatch (GH#44504) causes lib.fast_multiget "
855-
"to mess up on dict lookups with equal Timedeltas with "
856-
"mismatched resos"
857-
),
858-
),
859-
timedelta,
860-
Timedelta,
851+
(lambda x: np.timedelta64(x, "D"), "timedelta64"),
852+
(lambda x: timedelta(days=x), "pytimedelta"),
853+
(lambda x: Timedelta(x, "D"), "Timedelta[ns]"),
854+
(lambda x: Timedelta(x, "D").as_unit("s"), "Timedelta[s]"),
861855
],
862856
)
863-
def test_constructor_dict_timedelta64_index(self, klass):
857+
def test_constructor_dict_timedelta64_index(self, klass, name):
864858
# GH 10160
865859
td_as_int = [1, 2, 3, 4]
866860

867-
if klass is timedelta:
868-
constructor = lambda x: timedelta(days=x)
869-
else:
870-
constructor = lambda x: klass(x, "D")
871-
872-
data = {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)}
861+
data = {i: {klass(s): 2 * i} for i, s in enumerate(td_as_int)}
873862

874863
expected = DataFrame(
875864
[
@@ -881,7 +870,13 @@ def test_constructor_dict_timedelta64_index(self, klass):
881870
index=[Timedelta(td, "D") for td in td_as_int],
882871
)
883872

884-
result = DataFrame(data)
873+
if name == "Timedelta[s]":
874+
# TODO(2.0): passing index here shouldn't be necessary, is for now
875+
# otherwise we raise in _extract_index
876+
result = DataFrame(data, index=expected.index)
877+
else:
878+
result = DataFrame(data)
879+
885880
tm.assert_frame_equal(result, expected)
886881

887882
def test_constructor_period_dict(self):

pandas/tests/libs/test_lib.py

+29
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pytest
33

44
from pandas._libs import (
5+
Timedelta,
56
lib,
67
writers as libwriters,
78
)
@@ -42,6 +43,34 @@ def test_fast_unique_multiple_list_gen_sort(self):
4243
out = lib.fast_unique_multiple_list_gen(gen, sort=False)
4344
tm.assert_numpy_array_equal(np.array(out), expected)
4445

46+
def test_fast_multiget_timedelta_resos(self):
47+
# This will become relevant for test_constructor_dict_timedelta64_index
48+
# once Timedelta constructor preserves reso when passed a
49+
# np.timedelta64 object
50+
td = Timedelta(days=1)
51+
52+
mapping1 = {td: 1}
53+
mapping2 = {td.as_unit("s"): 1}
54+
55+
oindex = Index([td * n for n in range(3)])._values.astype(object)
56+
57+
expected = lib.fast_multiget(mapping1, oindex)
58+
result = lib.fast_multiget(mapping2, oindex)
59+
tm.assert_numpy_array_equal(result, expected)
60+
61+
# case that can't be cast to td64ns
62+
td = Timedelta(np.timedelta64(400, "Y"))
63+
assert hash(td) == hash(td.as_unit("ms"))
64+
assert hash(td) == hash(td.as_unit("us"))
65+
mapping1 = {td: 1}
66+
mapping2 = {td.as_unit("ms"): 1}
67+
68+
oindex = Index([td * n for n in range(3)])._values.astype(object)
69+
70+
expected = lib.fast_multiget(mapping1, oindex)
71+
result = lib.fast_multiget(mapping2, oindex)
72+
tm.assert_numpy_array_equal(result, expected)
73+
4574

4675
class TestIndexing:
4776
def test_maybe_indices_to_slice_left_edge(self):

0 commit comments

Comments
 (0)