From d94ef37159ae021c5ad632a0aad8b926cf0e735e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 31 Jan 2023 15:29:01 +0000 Subject: [PATCH 01/20] add test --- pandas/_libs/tslibs/conversion.pyx | 9 ++++++--- pandas/tests/scalar/timestamp/test_timestamp.py | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5b636ff69a6a6..a72b42229adcc 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -297,9 +297,12 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, if ts == NPY_NAT: obj.value = NPY_NAT else: - ts = cast_from_unit(ts, unit) - obj.value = ts - pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) + in_reso = abbrev_to_npy_unit(unit) + out_reso = get_supported_reso(in_reso) + value = convert_reso(ts, in_reso, out_reso, False) + obj.value = value + obj.creso = out_reso + pandas_datetime_to_datetimestruct(ts, out_reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: obj.value = NPY_NAT diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 22f5286569c6e..215cc494da112 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -996,6 +996,12 @@ def test_resolution(self, ts): assert result == expected assert result._creso == expected._creso + def test_out_of_ns_bounds(self): + # https://github.com/pandas-dev/pandas/issues/51060 + result = Timestamp(-52700112000, unit="s") + assert result == Timestamp("0300-01-01") + assert result.to_numpy() == np.datetime64("0300-01-01T00:00:00", "s") + def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default From 9a57c9a95c8d9c48a785fad7873687bbc1207b9c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 31 Jan 2023 16:03:36 +0000 Subject: [PATCH 02/20] default to ns --- pandas/_libs/tslibs/conversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a72b42229adcc..0bc5fd0c65248 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -297,7 +297,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, if ts == NPY_NAT: obj.value = NPY_NAT else: - in_reso = abbrev_to_npy_unit(unit) + in_reso = abbrev_to_npy_unit(unit or "ns") out_reso = get_supported_reso(in_reso) value = convert_reso(ts, in_reso, out_reso, False) obj.value = value From 30bb898821e3c1f95df5da1d96e77b5ed3db0bf4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 31 Jan 2023 17:07:55 +0000 Subject: [PATCH 03/20] wip --- pandas/_libs/tslibs/conversion.pyx | 11 ++++++++++- pandas/tests/scalar/timestamp/test_constructors.py | 2 +- pandas/tests/scalar/timestamp/test_timestamp.py | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0bc5fd0c65248..b61333812f953 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -299,9 +299,18 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, else: in_reso = abbrev_to_npy_unit(unit or "ns") out_reso = get_supported_reso(in_reso) - value = convert_reso(ts, in_reso, out_reso, False) + try: + value = convert_reso(ts, in_reso, out_reso, False) + except OverflowError: + # GH#26651 re-raise as OutOfBoundsDatetime + raise OutOfBoundsDatetime(f"cannot convert input {ts} with the unit '{unit}'") obj.value = value obj.creso = out_reso + print('ts', ts) + print('value', value) + print('in_reso', in_reso) + print('out_reso', out_reso) + print('unit', unit) pandas_datetime_to_datetimestruct(ts, out_reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index c6ceb2fcb0ebd..34aa266cc9471 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -58,7 +58,7 @@ def test_constructor_from_date_second_reso(self): @pytest.mark.parametrize("typ", [int, float]) def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ): # GH#50870 make sure we get a OutOfBoundsDatetime instead of OverflowError - val = typ(150000000) + val = typ(150000000000000) msg = f"cannot convert input {val} with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 215cc494da112..b86d4083ac076 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -363,6 +363,7 @@ def test_basics_nanos(self): ) def test_unit(self, value, check_kwargs): def check(value, unit=None, h=1, s=1, us=0, ns=0): + breakpoint() stamp = Timestamp(value, unit=unit) assert stamp.year == 2000 assert stamp.month == 1 From 648037ec065681a79cbe2865466ec2ba88faeb1a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 31 Jan 2023 17:29:09 +0000 Subject: [PATCH 04/20] fixup tests --- pandas/_libs/tslibs/conversion.pyx | 10 +++------- pandas/tests/scalar/timestamp/test_timestamp.py | 1 - pandas/tests/tools/test_to_datetime.py | 6 ++++-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b61333812f953..899aa57043d53 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -303,15 +303,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, value = convert_reso(ts, in_reso, out_reso, False) except OverflowError: # GH#26651 re-raise as OutOfBoundsDatetime - raise OutOfBoundsDatetime(f"cannot convert input {ts} with the unit '{unit}'") + raise OutOfBoundsDatetime( + f"cannot convert input {ts} with the unit '{unit}'") obj.value = value obj.creso = out_reso - print('ts', ts) - print('value', value) - print('in_reso', in_reso) - print('out_reso', out_reso) - print('unit', unit) - pandas_datetime_to_datetimestruct(ts, out_reso, &obj.dts) + pandas_datetime_to_datetimestruct(value, out_reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: obj.value = NPY_NAT diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index b86d4083ac076..215cc494da112 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -363,7 +363,6 @@ def test_basics_nanos(self): ) def test_unit(self, value, check_kwargs): def check(value, unit=None, h=1, s=1, us=0, ns=0): - breakpoint() stamp = Timestamp(value, unit=unit) assert stamp.year == 2000 assert stamp.month == 1 diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a1217b268613a..6aa085d1a8d60 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1765,7 +1765,7 @@ def test_unit_array_mixed_nans(self, cache): result = to_datetime(values, unit="D", errors="ignore", cache=cache) expected = Index( [ - 11111111, + Timestamp(11111111, unit="D"), Timestamp("1970-01-02"), Timestamp("1970-01-02"), NaT, @@ -1792,7 +1792,9 @@ def test_unit_array_mixed_nans_large_int(self, cache): values = [1420043460000, iNaT, NaT, np.nan, "NaT"] result = to_datetime(values, errors="ignore", unit="s", cache=cache) - expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) + expected = Index( + [Timestamp(1420043460000, unit="s"), NaT, NaT, NaT, NaT], dtype=object + ) tm.assert_index_equal(result, expected) result = to_datetime(values, errors="coerce", unit="s", cache=cache) From bada08976382afce960f7fc26a52363ee4b9e206 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 31 Jan 2023 17:29:15 +0000 Subject: [PATCH 05/20] fixup tests --- pandas/_libs/tslibs/conversion.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 899aa57043d53..17d869025bd0f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -304,7 +304,8 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, except OverflowError: # GH#26651 re-raise as OutOfBoundsDatetime raise OutOfBoundsDatetime( - f"cannot convert input {ts} with the unit '{unit}'") + f"cannot convert input {ts} with the unit '{unit}'" + ) obj.value = value obj.creso = out_reso pandas_datetime_to_datetimestruct(value, out_reso, &obj.dts) From f9a4efcfcb0d03f6c9981e5ad21d2fb9af7b99af Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 09:13:21 +0000 Subject: [PATCH 06/20] :truck: out_reso -> reso; explicit None check --- pandas/_libs/tslibs/conversion.pyx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5f13f3fe857cb..faaf35105e854 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -294,18 +294,21 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, if ts == NPY_NAT: obj.value = NPY_NAT else: - in_reso = abbrev_to_npy_unit(unit or "ns") - out_reso = get_supported_reso(in_reso) + if unit is None: + in_reso = abbrev_to_npy_unit("ns") + else: + in_reso = abbrev_to_npy_unit(unit) + reso = get_supported_reso(in_reso) try: - value = convert_reso(ts, in_reso, out_reso, False) + value = convert_reso(ts, in_reso, reso, False) except OverflowError: # GH#26651 re-raise as OutOfBoundsDatetime raise OutOfBoundsDatetime( f"cannot convert input {ts} with the unit '{unit}'" ) obj.value = value - obj.creso = out_reso - pandas_datetime_to_datetimestruct(value, out_reso, &obj.dts) + obj.creso = reso + pandas_datetime_to_datetimestruct(value, reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: obj.value = NPY_NAT From 6a5e5f1aa92d0d755dfa8b8c7ee4770b50d069ac Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 6 Feb 2023 16:58:55 +0000 Subject: [PATCH 07/20] keep test mixed --- pandas/tests/tools/test_to_datetime.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 889571a0b6c5f..a872dd17d5573 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1767,11 +1767,11 @@ def test_unit(self, cache): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) def test_unit_array_mixed_nans(self, cache): - values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] + values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] result = to_datetime(values, unit="D", errors="ignore", cache=cache) expected = Index( [ - Timestamp(11111111, unit="D"), + 11111111111111111, Timestamp("1970-01-02"), Timestamp("1970-01-02"), NaT, @@ -1790,7 +1790,7 @@ def test_unit_array_mixed_nans(self, cache): ) tm.assert_index_equal(result, expected) - msg = "cannot convert input 11111111 with the unit 'D'" + msg = "cannot convert input 11111111111111111 with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(values, unit="D", errors="raise", cache=cache) From f5abfebc0a4662b5e5fdad1713ef89a979e628cd Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 11:40:38 +0000 Subject: [PATCH 08/20] fixup --- pandas/_libs/tslibs/conversion.pxd | 4 +- pandas/_libs/tslibs/conversion.pyx | 74 +++++++++++++++++------------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index b2948ac0fe54c..e45117085f778 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -42,8 +42,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) -cdef int64_t cast_from_unit(object ts, str unit) except? -1 -cpdef (int64_t, int) precision_from_unit(str unit) +cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=*) except? -1 +cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index faaf35105e854..7dfa1b32f7c55 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -81,7 +81,7 @@ TD64NS_DTYPE = np.dtype("m8[ns]") # ---------------------------------------------------------------------- # Unit Conversion Helpers -cdef int64_t cast_from_unit(object ts, str unit) except? -1: +cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=NPY_FR_ns) except? -1: """ Return a casting of the unit represented to nanoseconds round the fractional part of a float to our precision, p. @@ -99,7 +99,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: int64_t m int p - m, p = precision_from_unit(unit) + m, p = precision_from_unit(unit, reso) # just give me the unit back if ts is None: @@ -119,7 +119,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: if is_float_object(ts): ts = int(ts) dt64obj = np.datetime64(ts, unit) - return get_datetime64_nanos(dt64obj, NPY_FR_ns) + return get_datetime64_nanos(dt64obj, reso) # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int @@ -142,7 +142,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: ) from err -cpdef inline (int64_t, int) precision_from_unit(str unit): +cpdef inline (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. @@ -156,41 +156,55 @@ cpdef inline (int64_t, int) precision_from_unit(str unit): int64_t m int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) + int64_t multiplier + + if out_reso == NPY_DATETIMEUNIT.NPY_FR_ns: + multiplier = 1_000_000_000 + p = 0 + elif out_reso == NPY_DATETIMEUNIT.NPY_FR_us: + multiplier = 1_000_000 + p = 3 + elif out_reso == NPY_DATETIMEUNIT.NPY_FR_ms: + multiplier = 1_000 + p = 6 + elif out_reso == NPY_DATETIMEUNIT.NPY_FR_s: + multiplier = 1 + p = 9 if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 # extra days each year. We get 31556952 by writing # 3600*24*365.2425=31556952 - m = 1_000_000_000 * 31556952 - p = 9 + m = multiplier * 31556952 + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_M: # 2629746 comes from dividing the "Y" case by 12. - m = 1_000_000_000 * 2629746 - p = 9 + m = multiplier * 2629746 + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_W: - m = 1_000_000_000 * 3600 * 24 * 7 - p = 9 + m = multiplier * 3600 * 24 * 7 + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_D: - m = 1_000_000_000 * 3600 * 24 - p = 9 + m = multiplier * 3600 * 24 + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_h: - m = 1_000_000_000 * 3600 - p = 9 + m = multiplier * 3600 + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_m: - m = 1_000_000_000 * 60 - p = 9 + m = multiplier * 60 + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: - m = 1_000_000_000 - p = 9 + m = multiplier + #p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - m = 1_000_000 - p = 6 + m = multiplier + #p = 6 elif reso == NPY_DATETIMEUNIT.NPY_FR_us: - m = 1000 - p = 3 + m = multiplier + #p = 3 elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - m = 1 - p = 0 + m = multiplier + #p = 0 else: raise ValueError(f"cannot cast unit {unit}") return m, p @@ -299,16 +313,10 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, else: in_reso = abbrev_to_npy_unit(unit) reso = get_supported_reso(in_reso) - try: - value = convert_reso(ts, in_reso, reso, False) - except OverflowError: - # GH#26651 re-raise as OutOfBoundsDatetime - raise OutOfBoundsDatetime( - f"cannot convert input {ts} with the unit '{unit}'" - ) - obj.value = value + ts = cast_from_unit(ts, unit, reso) + obj.value = ts obj.creso = reso - pandas_datetime_to_datetimestruct(value, reso, &obj.dts) + pandas_datetime_to_datetimestruct(ts, reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: obj.value = NPY_NAT From 00b881677726e8b543c3a04749592b4e84284bdb Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 13:52:17 +0000 Subject: [PATCH 09/20] wip --- pandas/_libs/tslibs/conversion.pyx | 54 +++++++++++++++++------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7dfa1b32f7c55..8cfe913cd3f51 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -100,6 +100,10 @@ cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=NPY_FR_ns int p m, p = precision_from_unit(unit, reso) + print("unit", unit) + print("reso", reso) + print("ts", ts) + print("m", m) # just give me the unit back if ts is None: @@ -142,7 +146,7 @@ cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=NPY_FR_ns ) from err -cpdef inline (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns): +cpdef inline (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso = NPY_DATETIMEUNIT.NPY_FR_ns): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. @@ -154,57 +158,59 @@ cpdef inline (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_r """ cdef: int64_t m + int64_t multiplier int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) - int64_t multiplier if out_reso == NPY_DATETIMEUNIT.NPY_FR_ns: - multiplier = 1_000_000_000 - p = 0 + multiplier = 1 elif out_reso == NPY_DATETIMEUNIT.NPY_FR_us: - multiplier = 1_000_000 - p = 3 - elif out_reso == NPY_DATETIMEUNIT.NPY_FR_ms: multiplier = 1_000 - p = 6 + elif out_reso == NPY_DATETIMEUNIT.NPY_FR_ms: + multiplier = 1_000_000 elif out_reso == NPY_DATETIMEUNIT.NPY_FR_s: - multiplier = 1 - p = 9 + multiplier = 1_000_000_000 if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 # extra days each year. We get 31556952 by writing # 3600*24*365.2425=31556952 m = multiplier * 31556952 - #p = 9 + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_M: # 2629746 comes from dividing the "Y" case by 12. m = multiplier * 2629746 - #p = 9 + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_W: m = multiplier * 3600 * 24 * 7 - #p = 9 + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_D: m = multiplier * 3600 * 24 - #p = 9 + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_h: m = multiplier * 3600 - #p = 9 + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_m: m = multiplier * 60 - #p = 9 + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: - m = multiplier - #p = 9 + m = 1_000_000_000 // multiplier + p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - m = multiplier - #p = 6 + if out_reso not in [NPY_DATETIMEUNIT.NPY_FR_ns, NPY_DATETIMEUNIT.NPY_FR_us, NPY_DATETIMEUNIT.NPY_FR_ms]: + raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") + m = 1_000_000 // multiplier + p = 6 elif reso == NPY_DATETIMEUNIT.NPY_FR_us: - m = multiplier - #p = 3 + if out_reso not in [NPY_DATETIMEUNIT.NPY_FR_ns, NPY_DATETIMEUNIT.NPY_FR_us]: + raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") + m = 1_000 // multiplier + p = 3 elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - m = multiplier - #p = 0 + if out_reso != NPY_DATETIMEUNIT.NPY_FR_ns: + raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") + m = 1 + p = 0 else: raise ValueError(f"cannot cast unit {unit}") return m, p From fc11352f997b138c7566772cce42cfbc0d831b72 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 13:55:57 +0000 Subject: [PATCH 10/20] can undo change now --- pandas/tests/tools/test_to_datetime.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a872dd17d5573..8f34255597c34 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1798,9 +1798,7 @@ def test_unit_array_mixed_nans_large_int(self, cache): values = [1420043460000, iNaT, NaT, np.nan, "NaT"] result = to_datetime(values, errors="ignore", unit="s", cache=cache) - expected = Index( - [Timestamp(1420043460000, unit="s"), NaT, NaT, NaT, NaT], dtype=object - ) + expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) result = to_datetime(values, errors="coerce", unit="s", cache=cache) From 65e0f442edea89898322eef47ae0d94228742a7c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 13:57:31 +0000 Subject: [PATCH 11/20] fixup --- pandas/_libs/tslibs/conversion.pyx | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8cfe913cd3f51..92a5731b63f8f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -81,7 +81,11 @@ TD64NS_DTYPE = np.dtype("m8[ns]") # ---------------------------------------------------------------------- # Unit Conversion Helpers -cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=NPY_FR_ns) except? -1: +cdef int64_t cast_from_unit( + object ts, + str unit, + NPY_DATETIMEUNIT reso=NPY_FR_ns +) except? -1: """ Return a casting of the unit represented to nanoseconds round the fractional part of a float to our precision, p. @@ -146,7 +150,10 @@ cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=NPY_FR_ns ) from err -cpdef inline (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso = NPY_DATETIMEUNIT.NPY_FR_ns): +cpdef inline (int64_t, int) precision_from_unit( + str unit, + NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. @@ -197,7 +204,11 @@ cpdef inline (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_r m = 1_000_000_000 // multiplier p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - if out_reso not in [NPY_DATETIMEUNIT.NPY_FR_ns, NPY_DATETIMEUNIT.NPY_FR_us, NPY_DATETIMEUNIT.NPY_FR_ms]: + if out_reso not in [ + NPY_DATETIMEUNIT.NPY_FR_ns, + NPY_DATETIMEUNIT.NPY_FR_us, + NPY_DATETIMEUNIT.NPY_FR_ms, + ]: raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") m = 1_000_000 // multiplier p = 6 From 82a80db7d81bb28e9fdf3cd5c3800a0cf7b6ebe1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 13:58:43 +0000 Subject: [PATCH 12/20] remove debug stmt --- pandas/_libs/tslibs/conversion.pyx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 92a5731b63f8f..4b9f23fd12dd1 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -104,10 +104,6 @@ cdef int64_t cast_from_unit( int p m, p = precision_from_unit(unit, reso) - print("unit", unit) - print("reso", reso) - print("ts", ts) - print("m", m) # just give me the unit back if ts is None: From b71f629ce09a9518e8afe9ec113573a7aea44e5c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 14:48:39 +0000 Subject: [PATCH 13/20] fix logic --- pandas/_libs/tslibs/conversion.pyx | 26 ++++++++------------------ pandas/tests/tools/test_to_datetime.py | 6 +++--- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 4b9f23fd12dd1..75f8269f0b63b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -166,13 +166,13 @@ cpdef inline (int64_t, int) precision_from_unit( NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) if out_reso == NPY_DATETIMEUNIT.NPY_FR_ns: - multiplier = 1 + multiplier = 1_000_000_000 elif out_reso == NPY_DATETIMEUNIT.NPY_FR_us: - multiplier = 1_000 - elif out_reso == NPY_DATETIMEUNIT.NPY_FR_ms: multiplier = 1_000_000 + elif out_reso == NPY_DATETIMEUNIT.NPY_FR_ms: + multiplier = 1_000 elif out_reso == NPY_DATETIMEUNIT.NPY_FR_s: - multiplier = 1_000_000_000 + multiplier = 1 if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 @@ -197,26 +197,16 @@ cpdef inline (int64_t, int) precision_from_unit( m = multiplier * 60 p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: - m = 1_000_000_000 // multiplier + m = multiplier p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - if out_reso not in [ - NPY_DATETIMEUNIT.NPY_FR_ns, - NPY_DATETIMEUNIT.NPY_FR_us, - NPY_DATETIMEUNIT.NPY_FR_ms, - ]: - raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") - m = 1_000_000 // multiplier + m = multiplier // 1_000 p = 6 elif reso == NPY_DATETIMEUNIT.NPY_FR_us: - if out_reso not in [NPY_DATETIMEUNIT.NPY_FR_ns, NPY_DATETIMEUNIT.NPY_FR_us]: - raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") - m = 1_000 // multiplier + m = multiplier // 1_000_000 p = 3 elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - if out_reso != NPY_DATETIMEUNIT.NPY_FR_ns: - raise ValueError(f"cannot cast unit {unit} to reso {out_reso}") - m = 1 + m = multiplier // 1_000_000_000 p = 0 else: raise ValueError(f"cannot cast unit {unit}") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index c11adf84e0f77..87635db949914 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1795,17 +1795,17 @@ def test_unit_array_mixed_nans(self, cache): to_datetime(values, unit="D", errors="raise", cache=cache) def test_unit_array_mixed_nans_large_int(self, cache): - values = [1420043460000, iNaT, NaT, np.nan, "NaT"] + values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"] result = to_datetime(values, errors="ignore", unit="s", cache=cache) - expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) + expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) result = to_datetime(values, errors="coerce", unit="s", cache=cache) expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"]) tm.assert_index_equal(result, expected) - msg = "cannot convert input 1420043460000 with the unit 's'" + msg = "cannot convert input 1420043460000000000000000 with the unit 's'" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(values, errors="raise", unit="s", cache=cache) From f749d4d7a5ae4910ba6ef189ee683b0823d49b5b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 15:01:47 +0000 Subject: [PATCH 14/20] fix precision calculation --- pandas/_libs/tslibs/conversion.pyx | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 75f8269f0b63b..380e6b476798d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -179,37 +179,28 @@ cpdef inline (int64_t, int) precision_from_unit( # extra days each year. We get 31556952 by writing # 3600*24*365.2425=31556952 m = multiplier * 31556952 - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_M: # 2629746 comes from dividing the "Y" case by 12. m = multiplier * 2629746 - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_W: m = multiplier * 3600 * 24 * 7 - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_D: m = multiplier * 3600 * 24 - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_h: m = multiplier * 3600 - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_m: m = multiplier * 60 - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: m = multiplier - p = 9 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: m = multiplier // 1_000 - p = 6 elif reso == NPY_DATETIMEUNIT.NPY_FR_us: m = multiplier // 1_000_000 - p = 3 elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: m = multiplier // 1_000_000_000 - p = 0 else: raise ValueError(f"cannot cast unit {unit}") + p = min(9, len(str(m))-1) return m, p From 7d27066aaf9600c06db457b58fb4c9fcc703ef8c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 15:59:59 +0000 Subject: [PATCH 15/20] noop From de66535b5b5f98b2f452a399c309ea6e15f11e0a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 7 Feb 2023 16:30:19 +0000 Subject: [PATCH 16/20] exhaust match --- pandas/_libs/tslibs/conversion.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 380e6b476798d..5423864b97e30 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -173,6 +173,8 @@ cpdef inline (int64_t, int) precision_from_unit( multiplier = 1_000 elif out_reso == NPY_DATETIMEUNIT.NPY_FR_s: multiplier = 1 + else: + raise ValueError(f"Invalid out_reso: {out_reso}") if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 From f3205b2cda14ee0dd8bc67fb155cbf7a1451fee1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 8 Feb 2023 15:06:22 +0000 Subject: [PATCH 17/20] use libc log10, reso -> out_reso, periods_per_second --- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index e45117085f778..1b3214605582a 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -42,7 +42,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) -cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT reso=*) except? -1 +cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1 cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5423864b97e30..76070239864b8 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,6 +1,7 @@ import numpy as np cimport numpy as cnp +from libc.math cimport log10 from numpy cimport ( int32_t, int64_t, @@ -84,7 +85,7 @@ TD64NS_DTYPE = np.dtype("m8[ns]") cdef int64_t cast_from_unit( object ts, str unit, - NPY_DATETIMEUNIT reso=NPY_FR_ns + NPY_DATETIMEUNIT out_reso=NPY_FR_ns ) except? -1: """ Return a casting of the unit represented to nanoseconds @@ -103,7 +104,7 @@ cdef int64_t cast_from_unit( int64_t m int p - m, p = precision_from_unit(unit, reso) + m, p = precision_from_unit(unit, out_reso) # just give me the unit back if ts is None: @@ -123,7 +124,7 @@ cdef int64_t cast_from_unit( if is_float_object(ts): ts = int(ts) dt64obj = np.datetime64(ts, unit) - return get_datetime64_nanos(dt64obj, reso) + return get_datetime64_nanos(dt64obj, out_reso) # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int @@ -165,16 +166,7 @@ cpdef inline (int64_t, int) precision_from_unit( int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) - if out_reso == NPY_DATETIMEUNIT.NPY_FR_ns: - multiplier = 1_000_000_000 - elif out_reso == NPY_DATETIMEUNIT.NPY_FR_us: - multiplier = 1_000_000 - elif out_reso == NPY_DATETIMEUNIT.NPY_FR_ms: - multiplier = 1_000 - elif out_reso == NPY_DATETIMEUNIT.NPY_FR_s: - multiplier = 1 - else: - raise ValueError(f"Invalid out_reso: {out_reso}") + multiplier = periods_per_second(out_reso) if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 @@ -202,7 +194,7 @@ cpdef inline (int64_t, int) precision_from_unit( m = multiplier // 1_000_000_000 else: raise ValueError(f"cannot cast unit {unit}") - p = min(9, len(str(m))-1) + p = log10(m) # number of digits in 'm' minus 1 return m, p From 27fd951e907414cdd7f60b4f38f7060f7cbe7048 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 8 Feb 2023 16:07:39 +0000 Subject: [PATCH 18/20] uncomment on: --- .github/workflows/python-dev.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index 4e0a1f98d1c59..2ece92dc50b87 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -23,14 +23,12 @@ name: Python Dev on: push: branches: -# - main -# - 1.5.x - - None + - main + - 1.5.x pull_request: branches: -# - main -# - 1.5.x - - None + - main + - 1.5.x paths-ignore: - "doc/**" From 3b54d5e7ae39dd799314f811b391dd8e1527856f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 8 Feb 2023 16:08:02 +0000 Subject: [PATCH 19/20] Revert "uncomment on:" This reverts commit 27fd951e907414cdd7f60b4f38f7060f7cbe7048. --- .github/workflows/python-dev.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index 2ece92dc50b87..4e0a1f98d1c59 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -23,12 +23,14 @@ name: Python Dev on: push: branches: - - main - - 1.5.x +# - main +# - 1.5.x + - None pull_request: branches: - - main - - 1.5.x +# - main +# - 1.5.x + - None paths-ignore: - "doc/**" From 1f5fa7a726745f2b743253943655ba1b6752c131 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 9 Feb 2023 10:17:05 +0000 Subject: [PATCH 20/20] set unit to ns if None --- pandas/_libs/tslibs/conversion.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7ef123d7411aa..03a53b1b451e9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -297,9 +297,8 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, obj.value = NPY_NAT else: if unit is None: - in_reso = abbrev_to_npy_unit("ns") - else: - in_reso = abbrev_to_npy_unit(unit) + unit = "ns" + in_reso = abbrev_to_npy_unit(unit) reso = get_supported_reso(in_reso) ts = cast_from_unit(ts, unit, reso) obj.value = ts