Skip to content

Commit 0e8331f

Browse files
authored
BUG: can't round-trip non-nano Timestamp (#51087)
* add test * default to ns * wip * fixup tests * fixup tests * 🚚 out_reso -> reso; explicit None check * keep test mixed * fixup * wip * can undo change now * fixup * remove debug stmt * fix logic * fix precision calculation * noop * exhaust match * use libc log10, reso -> out_reso, periods_per_second * uncomment on: * Revert "uncomment on:" This reverts commit 27fd951. * set unit to ns if None --------- Co-authored-by: MarcoGorelli <>
1 parent 13db83a commit 0e8331f

File tree

5 files changed

+48
-35
lines changed

5 files changed

+48
-35
lines changed

pandas/_libs/tslibs/conversion.pxd

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit,
4242
cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
4343

4444
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
45-
cdef int64_t cast_from_unit(object ts, str unit) except? -1
46-
cpdef (int64_t, int) precision_from_unit(str unit)
45+
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
46+
cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*)
4747

4848
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
4949

pandas/_libs/tslibs/conversion.pyx

+33-26
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22

33
cimport numpy as cnp
4+
from libc.math cimport log10
45
from numpy cimport (
56
int32_t,
67
int64_t,
@@ -81,7 +82,11 @@ TD64NS_DTYPE = np.dtype("m8[ns]")
8182
# ----------------------------------------------------------------------
8283
# Unit Conversion Helpers
8384

84-
cdef int64_t cast_from_unit(object ts, str unit) except? -1:
85+
cdef int64_t cast_from_unit(
86+
object ts,
87+
str unit,
88+
NPY_DATETIMEUNIT out_reso=NPY_FR_ns
89+
) except? -1:
8590
"""
8691
Return a casting of the unit represented to nanoseconds
8792
round the fractional part of a float to our precision, p.
@@ -99,7 +104,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
99104
int64_t m
100105
int p
101106

102-
m, p = precision_from_unit(unit)
107+
m, p = precision_from_unit(unit, out_reso)
103108

104109
# just give me the unit back
105110
if ts is None:
@@ -119,7 +124,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
119124
if is_float_object(ts):
120125
ts = int(ts)
121126
dt64obj = np.datetime64(ts, unit)
122-
return get_datetime64_nanos(dt64obj, NPY_FR_ns)
127+
return get_datetime64_nanos(dt64obj, out_reso)
123128

124129
# cast the unit, multiply base/frac separately
125130
# to avoid precision issues from float -> int
@@ -142,7 +147,10 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
142147
) from err
143148

144149

145-
cpdef inline (int64_t, int) precision_from_unit(str unit):
150+
cpdef inline (int64_t, int) precision_from_unit(
151+
str unit,
152+
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
153+
):
146154
"""
147155
Return a casting of the unit represented to nanoseconds + the precision
148156
to round the fractional part.
@@ -154,45 +162,39 @@ cpdef inline (int64_t, int) precision_from_unit(str unit):
154162
"""
155163
cdef:
156164
int64_t m
165+
int64_t multiplier
157166
int p
158167
NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit)
159168

169+
multiplier = periods_per_second(out_reso)
170+
160171
if reso == NPY_DATETIMEUNIT.NPY_FR_Y:
161172
# each 400 years we have 97 leap years, for an average of 97/400=.2425
162173
# extra days each year. We get 31556952 by writing
163174
# 3600*24*365.2425=31556952
164-
m = 1_000_000_000 * 31556952
165-
p = 9
175+
m = multiplier * 31556952
166176
elif reso == NPY_DATETIMEUNIT.NPY_FR_M:
167177
# 2629746 comes from dividing the "Y" case by 12.
168-
m = 1_000_000_000 * 2629746
169-
p = 9
178+
m = multiplier * 2629746
170179
elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
171-
m = 1_000_000_000 * 3600 * 24 * 7
172-
p = 9
180+
m = multiplier * 3600 * 24 * 7
173181
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
174-
m = 1_000_000_000 * 3600 * 24
175-
p = 9
182+
m = multiplier * 3600 * 24
176183
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
177-
m = 1_000_000_000 * 3600
178-
p = 9
184+
m = multiplier * 3600
179185
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
180-
m = 1_000_000_000 * 60
181-
p = 9
186+
m = multiplier * 60
182187
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
183-
m = 1_000_000_000
184-
p = 9
188+
m = multiplier
185189
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
186-
m = 1_000_000
187-
p = 6
190+
m = multiplier // 1_000
188191
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
189-
m = 1000
190-
p = 3
192+
m = multiplier // 1_000_000
191193
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
192-
m = 1
193-
p = 0
194+
m = multiplier // 1_000_000_000
194195
else:
195196
raise ValueError(f"cannot cast unit {unit}")
197+
p = <int>log10(m) # number of digits in 'm' minus 1
196198
return m, p
197199

198200

@@ -294,9 +296,14 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
294296
if ts == NPY_NAT:
295297
obj.value = NPY_NAT
296298
else:
297-
ts = cast_from_unit(ts, unit)
299+
if unit is None:
300+
unit = "ns"
301+
in_reso = abbrev_to_npy_unit(unit)
302+
reso = get_supported_reso(in_reso)
303+
ts = cast_from_unit(ts, unit, reso)
298304
obj.value = ts
299-
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
305+
obj.creso = reso
306+
pandas_datetime_to_datetimestruct(ts, reso, &obj.dts)
300307
elif is_float_object(ts):
301308
if ts != ts or ts == NPY_NAT:
302309
obj.value = NPY_NAT

pandas/tests/scalar/timestamp/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_constructor_from_date_second_reso(self):
7676
@pytest.mark.parametrize("typ", [int, float])
7777
def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ):
7878
# GH#50870 make sure we get a OutOfBoundsDatetime instead of OverflowError
79-
val = typ(150000000)
79+
val = typ(150000000000000)
8080

8181
msg = f"cannot convert input {val} with the unit 'D'"
8282
with pytest.raises(OutOfBoundsDatetime, match=msg):

pandas/tests/scalar/timestamp/test_timestamp.py

+6
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,12 @@ def test_resolution(self, ts):
997997
assert result == expected
998998
assert result._creso == expected._creso
999999

1000+
def test_out_of_ns_bounds(self):
1001+
# https://github.com/pandas-dev/pandas/issues/51060
1002+
result = Timestamp(-52700112000, unit="s")
1003+
assert result == Timestamp("0300-01-01")
1004+
assert result.to_numpy() == np.datetime64("0300-01-01T00:00:00", "s")
1005+
10001006

10011007
def test_timestamp_class_min_max_resolution():
10021008
# when accessed on the class (as opposed to an instance), we default

pandas/tests/tools/test_to_datetime.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1767,11 +1767,11 @@ def test_unit(self, cache):
17671767
to_datetime([1], unit="D", format="%Y%m%d", cache=cache)
17681768

17691769
def test_unit_array_mixed_nans(self, cache):
1770-
values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
1770+
values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
17711771
result = to_datetime(values, unit="D", errors="ignore", cache=cache)
17721772
expected = Index(
17731773
[
1774-
11111111,
1774+
11111111111111111,
17751775
Timestamp("1970-01-02"),
17761776
Timestamp("1970-01-02"),
17771777
NaT,
@@ -1790,22 +1790,22 @@ def test_unit_array_mixed_nans(self, cache):
17901790
)
17911791
tm.assert_index_equal(result, expected)
17921792

1793-
msg = "cannot convert input 11111111 with the unit 'D'"
1793+
msg = "cannot convert input 11111111111111111 with the unit 'D'"
17941794
with pytest.raises(OutOfBoundsDatetime, match=msg):
17951795
to_datetime(values, unit="D", errors="raise", cache=cache)
17961796

17971797
def test_unit_array_mixed_nans_large_int(self, cache):
1798-
values = [1420043460000, iNaT, NaT, np.nan, "NaT"]
1798+
values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]
17991799

18001800
result = to_datetime(values, errors="ignore", unit="s", cache=cache)
1801-
expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object)
1801+
expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object)
18021802
tm.assert_index_equal(result, expected)
18031803

18041804
result = to_datetime(values, errors="coerce", unit="s", cache=cache)
18051805
expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"])
18061806
tm.assert_index_equal(result, expected)
18071807

1808-
msg = "cannot convert input 1420043460000 with the unit 's'"
1808+
msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
18091809
with pytest.raises(OutOfBoundsDatetime, match=msg):
18101810
to_datetime(values, errors="raise", unit="s", cache=cache)
18111811

0 commit comments

Comments
 (0)