diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4989feaf84006..8fdbd9affa58a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -46,7 +46,10 @@ from pandas._libs.tslibs.np_datetime cimport ( import_pandas_datetime() -from pandas._libs.tslibs.strptime cimport parse_today_now +from pandas._libs.tslibs.strptime cimport ( + DatetimeParseState, + parse_today_now, +) from pandas._libs.util cimport ( is_float_object, is_integer_object, @@ -58,7 +61,6 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, convert_str_to_tsobject, - convert_timezone, get_datetime64_nanos, parse_pydatetime, ) @@ -454,9 +456,9 @@ cpdef array_to_datetime( float tz_offset set out_tzoffset_vals = set() tzinfo tz_out = None - bint found_tz = False, found_naive = False cnp.flatiter it = cnp.PyArray_IterNew(values) NPY_DATETIMEUNIT creso = NPY_FR_ns + DatetimeParseState state = DatetimeParseState() # specify error conditions assert is_raise or is_ignore or is_coerce @@ -474,17 +476,7 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT elif PyDateTime_Check(val): - if val.tzinfo is not None: - found_tz = True - else: - found_naive = True - tz_out = convert_timezone( - val.tzinfo, - tz_out, - found_naive, - found_tz, - utc_convert, - ) + tz_out = state.process_datetime(val, tz_out, utc_convert) iresult[i] = parse_pydatetime(val, &dts, utc_convert, creso=creso) elif PyDate_Check(val): diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 3e5a79e833a25..36c57a8b9ce24 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -47,13 +47,6 @@ cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) -cdef tzinfo convert_timezone( - tzinfo tz_in, - tzinfo tz_out, - bint found_naive, - bint found_tz, - bint utc_convert, -) cdef int64_t parse_pydatetime( datetime val, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a1c59489b3d38..0568576c83870 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -673,59 +673,6 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): return _localize_pydatetime(dt, tz) -cdef tzinfo convert_timezone( - tzinfo tz_in, - tzinfo tz_out, - bint found_naive, - bint found_tz, - bint utc_convert, -): - """ - Validate that ``tz_in`` can be converted/localized to ``tz_out``. - - Parameters - ---------- - tz_in : tzinfo or None - Timezone info of element being processed. - tz_out : tzinfo or None - Timezone info of output. - found_naive : bool - Whether a timezone-naive element has been found so far. - found_tz : bool - Whether a timezone-aware element has been found so far. - utc_convert : bool - Whether to convert/localize to UTC. - - Returns - ------- - tz_info - Timezone info of output. - - Raises - ------ - ValueError - If ``tz_in`` can't be converted/localized to ``tz_out``. - """ - if tz_in is not None: - if utc_convert: - pass - elif found_naive: - raise ValueError("Tz-aware datetime.datetime " - "cannot be converted to " - "datetime64 unless utc=True") - elif tz_out is not None and not tz_compare(tz_out, tz_in): - raise ValueError("Tz-aware datetime.datetime " - "cannot be converted to " - "datetime64 unless utc=True") - else: - tz_out = tz_in - else: - if found_tz and not utc_convert: - raise ValueError("Cannot mix tz-aware with " - "tz-naive values") - return tz_out - - cdef int64_t parse_pydatetime( datetime val, npy_datetimestruct *dts, diff --git a/pandas/_libs/tslibs/strptime.pxd b/pandas/_libs/tslibs/strptime.pxd index 175195d4362e4..d09612f4fbf7d 100644 --- a/pandas/_libs/tslibs/strptime.pxd +++ b/pandas/_libs/tslibs/strptime.pxd @@ -1,4 +1,16 @@ +from cpython.datetime cimport ( + datetime, + tzinfo, +) from numpy cimport int64_t cdef bint parse_today_now(str val, int64_t* iresult, bint utc) + + +cdef class DatetimeParseState: + cdef: + bint found_tz + bint found_naive + + cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 717ec41a918d8..d1c6bab180576 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -48,10 +48,7 @@ from numpy cimport ( ) from pandas._libs.missing cimport checknull_with_nat_and_na -from pandas._libs.tslibs.conversion cimport ( - convert_timezone, - get_datetime64_nanos, -) +from pandas._libs.tslibs.conversion cimport get_datetime64_nanos from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_nat_strings as nat_strings, @@ -73,6 +70,7 @@ import_pandas_datetime() from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare from pandas._libs.util cimport ( is_float_object, is_integer_object, @@ -156,6 +154,37 @@ cdef dict _parse_code_table = {"y": 0, "u": 22} +cdef class DatetimeParseState: + def __cinit__(self): + self.found_tz = False + self.found_naive = False + + cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert): + if dt.tzinfo is not None: + self.found_tz = True + else: + self.found_naive = True + + if dt.tzinfo is not None: + if utc_convert: + pass + elif self.found_naive: + raise ValueError("Tz-aware datetime.datetime " + "cannot be converted to " + "datetime64 unless utc=True") + elif tz is not None and not tz_compare(tz, dt.tzinfo): + raise ValueError("Tz-aware datetime.datetime " + "cannot be converted to " + "datetime64 unless utc=True") + else: + tz = dt.tzinfo + else: + if self.found_tz and not utc_convert: + raise ValueError("Cannot mix tz-aware with " + "tz-naive values") + return tz + + def array_strptime( ndarray[object] values, str fmt, @@ -183,13 +212,12 @@ def array_strptime( bint is_raise = errors=="raise" bint is_ignore = errors=="ignore" bint is_coerce = errors=="coerce" - bint found_naive = False - bint found_tz = False tzinfo tz_out = None bint iso_format = format_is_iso(fmt) NPY_DATETIMEUNIT out_bestunit int out_local = 0, out_tzoffset = 0 bint string_to_dts_succeeded = 0 + DatetimeParseState state = DatetimeParseState() assert is_raise or is_ignore or is_coerce @@ -276,17 +304,7 @@ def array_strptime( iresult[i] = NPY_NAT continue elif PyDateTime_Check(val): - if val.tzinfo is not None: - found_tz = True - else: - found_naive = True - tz_out = convert_timezone( - val.tzinfo, - tz_out, - found_naive, - found_tz, - utc, - ) + tz_out = state.process_datetime(val, tz_out, utc) if isinstance(val, _Timestamp): iresult[i] = val.tz_localize(None).as_unit("ns")._value else: