Skip to content

BUG: can't round-trip non-nano Timestamp #51087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit,
cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1

cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
cdef int64_t cast_from_unit(object ts, str unit) except? -1
cpdef (int64_t, int) precision_from_unit(str unit)
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*)

cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

Expand Down
59 changes: 33 additions & 26 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np

cimport numpy as cnp
from libc.math cimport log10
from numpy cimport (
int32_t,
int64_t,
Expand Down Expand Up @@ -81,7 +82,11 @@ TD64NS_DTYPE = np.dtype("m8[ns]")
# ----------------------------------------------------------------------
# Unit Conversion Helpers

cdef int64_t cast_from_unit(object ts, str unit) except? -1:
cdef int64_t cast_from_unit(
object ts,
str unit,
NPY_DATETIMEUNIT out_reso=NPY_FR_ns
) except? -1:
"""
Return a casting of the unit represented to nanoseconds
round the fractional part of a float to our precision, p.
Expand All @@ -99,7 +104,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
int64_t m
int p

m, p = precision_from_unit(unit)
m, p = precision_from_unit(unit, out_reso)

# just give me the unit back
if ts is None:
Expand All @@ -119,7 +124,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
if is_float_object(ts):
ts = int(ts)
dt64obj = np.datetime64(ts, unit)
return get_datetime64_nanos(dt64obj, NPY_FR_ns)
return get_datetime64_nanos(dt64obj, out_reso)

# cast the unit, multiply base/frac separately
# to avoid precision issues from float -> int
Expand All @@ -142,7 +147,10 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
) from err


cpdef inline (int64_t, int) precision_from_unit(str unit):
cpdef inline (int64_t, int) precision_from_unit(
str unit,
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
):
"""
Return a casting of the unit represented to nanoseconds + the precision
to round the fractional part.
Expand All @@ -154,45 +162,39 @@ cpdef inline (int64_t, int) precision_from_unit(str unit):
"""
cdef:
int64_t m
int64_t multiplier
int p
NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit)

multiplier = periods_per_second(out_reso)

if reso == NPY_DATETIMEUNIT.NPY_FR_Y:
# each 400 years we have 97 leap years, for an average of 97/400=.2425
# extra days each year. We get 31556952 by writing
# 3600*24*365.2425=31556952
m = 1_000_000_000 * 31556952
p = 9
m = multiplier * 31556952
elif reso == NPY_DATETIMEUNIT.NPY_FR_M:
# 2629746 comes from dividing the "Y" case by 12.
m = 1_000_000_000 * 2629746
p = 9
m = multiplier * 2629746
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure the M and Y cases in precision_from_unit are no longer reached, in which case the entire m part of this function i think can be replaced with get_conversion_factor (OK to consider out of scope)

elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
m = 1_000_000_000 * 3600 * 24 * 7
p = 9
m = multiplier * 3600 * 24 * 7
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
m = 1_000_000_000 * 3600 * 24
p = 9
m = multiplier * 3600 * 24
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
m = 1_000_000_000 * 3600
p = 9
m = multiplier * 3600
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
m = 1_000_000_000 * 60
p = 9
m = multiplier * 60
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
m = 1_000_000_000
p = 9
m = multiplier
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
m = 1_000_000
p = 6
m = multiplier // 1_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
m = 1000
p = 3
m = multiplier // 1_000_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
m = 1
p = 0
m = multiplier // 1_000_000_000
else:
raise ValueError(f"cannot cast unit {unit}")
p = <int>log10(m) # number of digits in 'm' minus 1
return m, p


Expand Down Expand Up @@ -294,9 +296,14 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
if ts == NPY_NAT:
obj.value = NPY_NAT
else:
ts = cast_from_unit(ts, unit)
if unit is None:
unit = "ns"
in_reso = abbrev_to_npy_unit(unit)
reso = get_supported_reso(in_reso)
ts = cast_from_unit(ts, unit, reso)
obj.value = ts
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
obj.creso = reso
pandas_datetime_to_datetimestruct(ts, reso, &obj.dts)
elif is_float_object(ts):
if ts != ts or ts == NPY_NAT:
obj.value = NPY_NAT
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/scalar/timestamp/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_constructor_from_date_second_reso(self):
@pytest.mark.parametrize("typ", [int, float])
def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ):
# GH#50870 make sure we get a OutOfBoundsDatetime instead of OverflowError
val = typ(150000000)
val = typ(150000000000000)

msg = f"cannot convert input {val} with the unit 'D'"
with pytest.raises(OutOfBoundsDatetime, match=msg):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,12 @@ def test_resolution(self, ts):
assert result == expected
assert result._creso == expected._creso

def test_out_of_ns_bounds(self):
# https://github.com/pandas-dev/pandas/issues/51060
result = Timestamp(-52700112000, unit="s")
assert result == Timestamp("0300-01-01")
assert result.to_numpy() == np.datetime64("0300-01-01T00:00:00", "s")


def test_timestamp_class_min_max_resolution():
# when accessed on the class (as opposed to an instance), we default
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,11 +1767,11 @@ def test_unit(self, cache):
to_datetime([1], unit="D", format="%Y%m%d", cache=cache)

def test_unit_array_mixed_nans(self, cache):
values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
result = to_datetime(values, unit="D", errors="ignore", cache=cache)
expected = Index(
[
11111111,
11111111111111111,
Timestamp("1970-01-02"),
Timestamp("1970-01-02"),
NaT,
Expand All @@ -1790,22 +1790,22 @@ def test_unit_array_mixed_nans(self, cache):
)
tm.assert_index_equal(result, expected)

msg = "cannot convert input 11111111 with the unit 'D'"
msg = "cannot convert input 11111111111111111 with the unit 'D'"
with pytest.raises(OutOfBoundsDatetime, match=msg):
to_datetime(values, unit="D", errors="raise", cache=cache)

def test_unit_array_mixed_nans_large_int(self, cache):
values = [1420043460000, iNaT, NaT, np.nan, "NaT"]
values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]

result = to_datetime(values, errors="ignore", unit="s", cache=cache)
expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object)
expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object)
tm.assert_index_equal(result, expected)

result = to_datetime(values, errors="coerce", unit="s", cache=cache)
expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"])
tm.assert_index_equal(result, expected)

msg = "cannot convert input 1420043460000 with the unit 's'"
msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
with pytest.raises(OutOfBoundsDatetime, match=msg):
to_datetime(values, errors="raise", unit="s", cache=cache)

Expand Down