From 12c769d67f87b16a5b50810238ecebc2dc1a4c51 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Jan 2023 13:58:38 -0800 Subject: [PATCH] REF: consolidate cast_from_unit checks --- pandas/_libs/tslib.pyx | 48 ++++++++++-------------------- pandas/_libs/tslibs/conversion.pyx | 16 +++++----- 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index df6204d61f983..5f7fb05876b35 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -220,6 +220,19 @@ def format_array_from_datetime( return result +cdef int64_t _wrapped_cast_from_unit(object val, str unit) except? -1: + """ + Call cast_from_unit and re-raise OverflowError as OutOfBoundsDatetime + """ + # See also timedeltas._maybe_cast_from_unit + try: + return cast_from_unit(val, unit) + except OverflowError as err: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit '{unit}'" + ) from err + + def array_with_unit_to_datetime( ndarray[object] values, str unit, @@ -261,13 +274,10 @@ def array_with_unit_to_datetime( bint is_raise = errors=="raise" ndarray[int64_t] iresult object tz = None - bint is_ym float fval assert is_ignore or is_coerce or is_raise - is_ym = unit in "YM" - if unit == "ns": result, tz = array_to_datetime( values.astype(object, copy=False), @@ -292,19 +302,7 @@ def array_with_unit_to_datetime( if val != val or val == NPY_NAT: iresult[i] = NPY_NAT else: - if is_ym and is_float_object(val) and not val.is_integer(): - # Analogous to GH#47266 for Timestamp - raise ValueError( - f"Conversion of non-round float with unit={unit} " - "is ambiguous" - ) - - try: - iresult[i] = cast_from_unit(val, unit) - except OverflowError: - raise OutOfBoundsDatetime( - f"cannot convert input {val} with the unit '{unit}'" - ) + iresult[i] = _wrapped_cast_from_unit(val, unit) elif isinstance(val, str): if len(val) == 0 or val in nat_strings: @@ -319,23 +317,7 @@ def array_with_unit_to_datetime( f"non convertible value {val} with the unit '{unit}'" ) - if is_ym and not fval.is_integer(): - # Analogous to GH#47266 for Timestamp - raise ValueError( - f"Conversion of non-round float with unit={unit} " - "is ambiguous" - ) - - try: - iresult[i] = cast_from_unit(fval, unit) - except ValueError: - raise ValueError( - f"non convertible value {val} with the unit '{unit}'" - ) - except OverflowError: - raise OutOfBoundsDatetime( - f"cannot convert input {val} with the unit '{unit}'" - ) + iresult[i] = _wrapped_cast_from_unit(fval, unit) else: # TODO: makes more sense as TypeError, but that would be an diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8cdad5c79bc1b..d862b5bb606cc 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -108,6 +108,15 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: if ts is None: return m + if unit in ["Y", "M"] and is_float_object(ts) and not ts.is_integer(): + # GH#47267 it is clear that 2 "M" corresponds to 1970-02-01, + # but not clear what 2.5 "M" corresponds to, so we will + # disallow that case. + raise ValueError( + f"Conversion of non-round float with unit={unit} " + "is ambiguous" + ) + # cast the unit, multiply base/frace separately # to avoid precision issues from float -> int base = ts @@ -287,13 +296,6 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, # GH#47266 Avoid cast_from_unit, which would give weird results # e.g. with "Y" and 150.0 we'd get 2120-01-01 09:00:00 return convert_to_tsobject(int(ts), tz, unit, False, False) - else: - # GH#47267 it is clear that 2 "M" corresponds to 1970-02-01, - # but not clear what 2.5 "M" corresponds to, so we will - # disallow that case. - raise ValueError( - f"Conversion of non-round float with unit={unit} is ambiguous." - ) ts = cast_from_unit(ts, unit) obj.value = ts