diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index b2948ac0fe54c..1b3214605582a 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -42,8 +42,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) -cdef int64_t cast_from_unit(object ts, str unit) except? -1 -cpdef (int64_t, int) precision_from_unit(str unit) +cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1 +cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2c70d1681b051..03a53b1b451e9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,6 +1,7 @@ import numpy as np cimport numpy as cnp +from libc.math cimport log10 from numpy cimport ( int32_t, int64_t, @@ -81,7 +82,11 @@ TD64NS_DTYPE = np.dtype("m8[ns]") # ---------------------------------------------------------------------- # Unit Conversion Helpers -cdef int64_t cast_from_unit(object ts, str unit) except? -1: +cdef int64_t cast_from_unit( + object ts, + str unit, + NPY_DATETIMEUNIT out_reso=NPY_FR_ns +) except? -1: """ Return a casting of the unit represented to nanoseconds round the fractional part of a float to our precision, p. @@ -99,7 +104,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: int64_t m int p - m, p = precision_from_unit(unit) + m, p = precision_from_unit(unit, out_reso) # just give me the unit back if ts is None: @@ -119,7 +124,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: if is_float_object(ts): ts = int(ts) dt64obj = np.datetime64(ts, unit) - return get_datetime64_nanos(dt64obj, NPY_FR_ns) + return get_datetime64_nanos(dt64obj, out_reso) # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int @@ -142,7 +147,10 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: ) from err -cpdef inline (int64_t, int) precision_from_unit(str unit): +cpdef inline (int64_t, int) precision_from_unit( + str unit, + NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. @@ -154,45 +162,39 @@ cpdef inline (int64_t, int) precision_from_unit(str unit): """ cdef: int64_t m + int64_t multiplier int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) + multiplier = periods_per_second(out_reso) + if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 # extra days each year. We get 31556952 by writing # 3600*24*365.2425=31556952 - m = 1_000_000_000 * 31556952 - p = 9 + m = multiplier * 31556952 elif reso == NPY_DATETIMEUNIT.NPY_FR_M: # 2629746 comes from dividing the "Y" case by 12. - m = 1_000_000_000 * 2629746 - p = 9 + m = multiplier * 2629746 elif reso == NPY_DATETIMEUNIT.NPY_FR_W: - m = 1_000_000_000 * 3600 * 24 * 7 - p = 9 + m = multiplier * 3600 * 24 * 7 elif reso == NPY_DATETIMEUNIT.NPY_FR_D: - m = 1_000_000_000 * 3600 * 24 - p = 9 + m = multiplier * 3600 * 24 elif reso == NPY_DATETIMEUNIT.NPY_FR_h: - m = 1_000_000_000 * 3600 - p = 9 + m = multiplier * 3600 elif reso == NPY_DATETIMEUNIT.NPY_FR_m: - m = 1_000_000_000 * 60 - p = 9 + m = multiplier * 60 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: - m = 1_000_000_000 - p = 9 + m = multiplier elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - m = 1_000_000 - p = 6 + m = multiplier // 1_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_us: - m = 1000 - p = 3 + m = multiplier // 1_000_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - m = 1 - p = 0 + m = multiplier // 1_000_000_000 else: raise ValueError(f"cannot cast unit {unit}") + p = log10(m) # number of digits in 'm' minus 1 return m, p @@ -294,9 +296,14 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, if ts == NPY_NAT: obj.value = NPY_NAT else: - ts = cast_from_unit(ts, unit) + if unit is None: + unit = "ns" + in_reso = abbrev_to_npy_unit(unit) + reso = get_supported_reso(in_reso) + ts = cast_from_unit(ts, unit, reso) obj.value = ts - pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) + obj.creso = reso + pandas_datetime_to_datetimestruct(ts, reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: obj.value = NPY_NAT diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 5ea8fc53a6bab..7f615a18167ae 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -76,7 +76,7 @@ def test_constructor_from_date_second_reso(self): @pytest.mark.parametrize("typ", [int, float]) def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ): # GH#50870 make sure we get a OutOfBoundsDatetime instead of OverflowError - val = typ(150000000) + val = typ(150000000000000) msg = f"cannot convert input {val} with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 968ec122cde20..1150e5985c181 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -997,6 +997,12 @@ def test_resolution(self, ts): assert result == expected assert result._creso == expected._creso + def test_out_of_ns_bounds(self): + # https://github.com/pandas-dev/pandas/issues/51060 + result = Timestamp(-52700112000, unit="s") + assert result == Timestamp("0300-01-01") + assert result.to_numpy() == np.datetime64("0300-01-01T00:00:00", "s") + def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index f2a6edef979a6..ef5ace2d1f1ed 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1767,11 +1767,11 @@ def test_unit(self, cache): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) def test_unit_array_mixed_nans(self, cache): - values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] + values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] result = to_datetime(values, unit="D", errors="ignore", cache=cache) expected = Index( [ - 11111111, + 11111111111111111, Timestamp("1970-01-02"), Timestamp("1970-01-02"), NaT, @@ -1790,22 +1790,22 @@ def test_unit_array_mixed_nans(self, cache): ) tm.assert_index_equal(result, expected) - msg = "cannot convert input 11111111 with the unit 'D'" + msg = "cannot convert input 11111111111111111 with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(values, unit="D", errors="raise", cache=cache) def test_unit_array_mixed_nans_large_int(self, cache): - values = [1420043460000, iNaT, NaT, np.nan, "NaT"] + values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"] result = to_datetime(values, errors="ignore", unit="s", cache=cache) - expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) + expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) result = to_datetime(values, errors="coerce", unit="s", cache=cache) expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"]) tm.assert_index_equal(result, expected) - msg = "cannot convert input 1420043460000 with the unit 's'" + msg = "cannot convert input 1420043460000000000000000 with the unit 's'" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(values, errors="raise", unit="s", cache=cache)