From 71cc65a78aa283b445acea58faa176eab1c05e31 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Oct 2023 09:07:00 -0700 Subject: [PATCH 1/3] REF: implement ParseState --- pandas/_libs/tslib.pyx | 20 ++++++------------ pandas/_libs/tslibs/strptime.pxd | 12 +++++++++++ pandas/_libs/tslibs/strptime.pyx | 35 ++++++++++++++++++++------------ 3 files changed, 40 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4989feaf84006..1bbb60c4fbbcd 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -46,7 +46,10 @@ from pandas._libs.tslibs.np_datetime cimport ( import_pandas_datetime() -from pandas._libs.tslibs.strptime cimport parse_today_now +from pandas._libs.tslibs.strptime cimport ( + ParseState, + parse_today_now, +) from pandas._libs.util cimport ( is_float_object, is_integer_object, @@ -58,7 +61,6 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, convert_str_to_tsobject, - convert_timezone, get_datetime64_nanos, parse_pydatetime, ) @@ -454,9 +456,9 @@ cpdef array_to_datetime( float tz_offset set out_tzoffset_vals = set() tzinfo tz_out = None - bint found_tz = False, found_naive = False cnp.flatiter it = cnp.PyArray_IterNew(values) NPY_DATETIMEUNIT creso = NPY_FR_ns + ParseState state = ParseState() # specify error conditions assert is_raise or is_ignore or is_coerce @@ -474,17 +476,7 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT elif PyDateTime_Check(val): - if val.tzinfo is not None: - found_tz = True - else: - found_naive = True - tz_out = convert_timezone( - val.tzinfo, - tz_out, - found_naive, - found_tz, - utc_convert, - ) + tz_out = state.process_datetime(val, tz_out, utc_convert) iresult[i] = parse_pydatetime(val, &dts, utc_convert, creso=creso) elif PyDate_Check(val): diff --git a/pandas/_libs/tslibs/strptime.pxd b/pandas/_libs/tslibs/strptime.pxd index 175195d4362e4..aa8fbdb6779f6 100644 --- a/pandas/_libs/tslibs/strptime.pxd +++ b/pandas/_libs/tslibs/strptime.pxd @@ -1,4 +1,16 @@ +from cpython.datetime cimport ( + datetime, + tzinfo, +) from numpy cimport int64_t cdef bint parse_today_now(str val, int64_t* iresult, bint utc) + + +cdef class ParseState: + cdef: + bint found_tz + bint found_naive + + cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 866181246a284..e6f6599031a08 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -156,6 +156,26 @@ cdef dict _parse_code_table = {"y": 0, "u": 22} +cdef class ParseState: + def __cinit__(self): + self.found_tz = False + self.found_naive = False + + cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert): + if dt.tzinfo is not None: + self.found_tz = True + else: + self.found_naive = True + tz = convert_timezone( + dt.tzinfo, + tz, + self.found_naive, + self.found_tz, + utc_convert, + ) + return tz + + def array_strptime( ndarray[object] values, str fmt, @@ -187,13 +207,12 @@ def array_strptime( bint is_raise = errors=="raise" bint is_ignore = errors=="ignore" bint is_coerce = errors=="coerce" - bint found_naive = False - bint found_tz = False tzinfo tz_out = None bint iso_format = format_is_iso(fmt) NPY_DATETIMEUNIT out_bestunit int out_local = 0, out_tzoffset = 0 bint string_to_dts_succeeded = 0 + ParseState state = ParseState() assert is_raise or is_ignore or is_coerce @@ -280,17 +299,7 @@ def array_strptime( iresult[i] = NPY_NAT continue elif PyDateTime_Check(val): - if val.tzinfo is not None: - found_tz = True - else: - found_naive = True - tz_out = convert_timezone( - val.tzinfo, - tz_out, - found_naive, - found_tz, - utc, - ) + tz_out = state.process_datetime(val, tz_out, utc) if isinstance(val, _Timestamp): iresult[i] = val.tz_localize(None).as_unit("ns")._value else: From 4898efc6b95ded3d4a812fcb76af95c0ebdf22a0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 20 Oct 2023 15:34:25 -0700 Subject: [PATCH 2/3] REF: inline convert_timezone --- pandas/_libs/tslibs/conversion.pxd | 7 ---- pandas/_libs/tslibs/conversion.pyx | 53 ------------------------------ pandas/_libs/tslibs/strptime.pyx | 31 ++++++++++------- 3 files changed, 20 insertions(+), 71 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 3e5a79e833a25..36c57a8b9ce24 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -47,13 +47,6 @@ cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) -cdef tzinfo convert_timezone( - tzinfo tz_in, - tzinfo tz_out, - bint found_naive, - bint found_tz, - bint utc_convert, -) cdef int64_t parse_pydatetime( datetime val, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a1c59489b3d38..0568576c83870 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -673,59 +673,6 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): return _localize_pydatetime(dt, tz) -cdef tzinfo convert_timezone( - tzinfo tz_in, - tzinfo tz_out, - bint found_naive, - bint found_tz, - bint utc_convert, -): - """ - Validate that ``tz_in`` can be converted/localized to ``tz_out``. - - Parameters - ---------- - tz_in : tzinfo or None - Timezone info of element being processed. - tz_out : tzinfo or None - Timezone info of output. - found_naive : bool - Whether a timezone-naive element has been found so far. - found_tz : bool - Whether a timezone-aware element has been found so far. - utc_convert : bool - Whether to convert/localize to UTC. - - Returns - ------- - tz_info - Timezone info of output. - - Raises - ------ - ValueError - If ``tz_in`` can't be converted/localized to ``tz_out``. - """ - if tz_in is not None: - if utc_convert: - pass - elif found_naive: - raise ValueError("Tz-aware datetime.datetime " - "cannot be converted to " - "datetime64 unless utc=True") - elif tz_out is not None and not tz_compare(tz_out, tz_in): - raise ValueError("Tz-aware datetime.datetime " - "cannot be converted to " - "datetime64 unless utc=True") - else: - tz_out = tz_in - else: - if found_tz and not utc_convert: - raise ValueError("Cannot mix tz-aware with " - "tz-naive values") - return tz_out - - cdef int64_t parse_pydatetime( datetime val, npy_datetimestruct *dts, diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index e6f6599031a08..a232d666c86f6 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -48,10 +48,7 @@ from numpy cimport ( ) from pandas._libs.missing cimport checknull_with_nat_and_na -from pandas._libs.tslibs.conversion cimport ( - convert_timezone, - get_datetime64_nanos, -) +from pandas._libs.tslibs.conversion cimport get_datetime64_nanos from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_nat_strings as nat_strings, @@ -73,6 +70,7 @@ import_pandas_datetime() from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare from pandas._libs.util cimport ( is_float_object, is_integer_object, @@ -166,13 +164,24 @@ cdef class ParseState: self.found_tz = True else: self.found_naive = True - tz = convert_timezone( - dt.tzinfo, - tz, - self.found_naive, - self.found_tz, - utc_convert, - ) + + if dt.tzinfo is not None: + if utc_convert: + pass + elif self.found_naive: + raise ValueError("Tz-aware datetime.datetime " + "cannot be converted to " + "datetime64 unless utc=True") + elif tz is not None and not tz_compare(tz, dt.tzinfo): + raise ValueError("Tz-aware datetime.datetime " + "cannot be converted to " + "datetime64 unless utc=True") + else: + tz = dt.tzinfo + else: + if self.found_tz and not utc_convert: + raise ValueError("Cannot mix tz-aware with " + "tz-naive values") return tz From 5dbffdc97ff9b78b0538184d5ecef103167a6a1d Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Oct 2023 08:15:55 -0700 Subject: [PATCH 3/3] ParseState->DatetimeParseState --- pandas/_libs/tslib.pyx | 4 ++-- pandas/_libs/tslibs/strptime.pxd | 2 +- pandas/_libs/tslibs/strptime.pyx | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 1bbb60c4fbbcd..8fdbd9affa58a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -47,7 +47,7 @@ import_pandas_datetime() from pandas._libs.tslibs.strptime cimport ( - ParseState, + DatetimeParseState, parse_today_now, ) from pandas._libs.util cimport ( @@ -458,7 +458,7 @@ cpdef array_to_datetime( tzinfo tz_out = None cnp.flatiter it = cnp.PyArray_IterNew(values) NPY_DATETIMEUNIT creso = NPY_FR_ns - ParseState state = ParseState() + DatetimeParseState state = DatetimeParseState() # specify error conditions assert is_raise or is_ignore or is_coerce diff --git a/pandas/_libs/tslibs/strptime.pxd b/pandas/_libs/tslibs/strptime.pxd index aa8fbdb6779f6..d09612f4fbf7d 100644 --- a/pandas/_libs/tslibs/strptime.pxd +++ b/pandas/_libs/tslibs/strptime.pxd @@ -8,7 +8,7 @@ from numpy cimport int64_t cdef bint parse_today_now(str val, int64_t* iresult, bint utc) -cdef class ParseState: +cdef class DatetimeParseState: cdef: bint found_tz bint found_naive diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 25baac329f442..d1c6bab180576 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -154,7 +154,7 @@ cdef dict _parse_code_table = {"y": 0, "u": 22} -cdef class ParseState: +cdef class DatetimeParseState: def __cinit__(self): self.found_tz = False self.found_naive = False @@ -217,7 +217,7 @@ def array_strptime( NPY_DATETIMEUNIT out_bestunit int out_local = 0, out_tzoffset = 0 bint string_to_dts_succeeded = 0 - ParseState state = ParseState() + DatetimeParseState state = DatetimeParseState() assert is_raise or is_ignore or is_coerce