Skip to content

REF: implement ParseState #55617

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ from pandas._libs.tslibs.np_datetime cimport (
import_pandas_datetime()


from pandas._libs.tslibs.strptime cimport parse_today_now
from pandas._libs.tslibs.strptime cimport (
DatetimeParseState,
parse_today_now,
)
from pandas._libs.util cimport (
is_float_object,
is_integer_object,
Expand All @@ -58,7 +61,6 @@ from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
convert_str_to_tsobject,
convert_timezone,
get_datetime64_nanos,
parse_pydatetime,
)
Expand Down Expand Up @@ -454,9 +456,9 @@ cpdef array_to_datetime(
float tz_offset
set out_tzoffset_vals = set()
tzinfo tz_out = None
bint found_tz = False, found_naive = False
cnp.flatiter it = cnp.PyArray_IterNew(values)
NPY_DATETIMEUNIT creso = NPY_FR_ns
DatetimeParseState state = DatetimeParseState()

# specify error conditions
assert is_raise or is_ignore or is_coerce
Expand All @@ -474,17 +476,7 @@ cpdef array_to_datetime(
iresult[i] = NPY_NAT

elif PyDateTime_Check(val):
if val.tzinfo is not None:
found_tz = True
else:
found_naive = True
tz_out = convert_timezone(
val.tzinfo,
tz_out,
found_naive,
found_tz,
utc_convert,
)
tz_out = state.process_datetime(val, tz_out, utc_convert)
iresult[i] = parse_pydatetime(val, &dts, utc_convert, creso=creso)

elif PyDate_Check(val):
Expand Down
7 changes: 0 additions & 7 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,6 @@ cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*)

cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

cdef tzinfo convert_timezone(
tzinfo tz_in,
tzinfo tz_out,
bint found_naive,
bint found_tz,
bint utc_convert,
)

cdef int64_t parse_pydatetime(
datetime val,
Expand Down
53 changes: 0 additions & 53 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -673,59 +673,6 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
return _localize_pydatetime(dt, tz)


cdef tzinfo convert_timezone(
tzinfo tz_in,
tzinfo tz_out,
bint found_naive,
bint found_tz,
bint utc_convert,
):
"""
Validate that ``tz_in`` can be converted/localized to ``tz_out``.

Parameters
----------
tz_in : tzinfo or None
Timezone info of element being processed.
tz_out : tzinfo or None
Timezone info of output.
found_naive : bool
Whether a timezone-naive element has been found so far.
found_tz : bool
Whether a timezone-aware element has been found so far.
utc_convert : bool
Whether to convert/localize to UTC.

Returns
-------
tz_info
Timezone info of output.

Raises
------
ValueError
If ``tz_in`` can't be converted/localized to ``tz_out``.
"""
if tz_in is not None:
if utc_convert:
pass
elif found_naive:
raise ValueError("Tz-aware datetime.datetime "
"cannot be converted to "
"datetime64 unless utc=True")
elif tz_out is not None and not tz_compare(tz_out, tz_in):
raise ValueError("Tz-aware datetime.datetime "
"cannot be converted to "
"datetime64 unless utc=True")
else:
tz_out = tz_in
else:
if found_tz and not utc_convert:
raise ValueError("Cannot mix tz-aware with "
"tz-naive values")
return tz_out


cdef int64_t parse_pydatetime(
datetime val,
npy_datetimestruct *dts,
Expand Down
12 changes: 12 additions & 0 deletions pandas/_libs/tslibs/strptime.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
from cpython.datetime cimport (
datetime,
tzinfo,
)
from numpy cimport int64_t


cdef bint parse_today_now(str val, int64_t* iresult, bint utc)


cdef class DatetimeParseState:
cdef:
bint found_tz
bint found_naive

cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert)
52 changes: 35 additions & 17 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,7 @@ from numpy cimport (
)

from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.conversion cimport (
convert_timezone,
get_datetime64_nanos,
)
from pandas._libs.tslibs.conversion cimport get_datetime64_nanos
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_nat_strings as nat_strings,
Expand All @@ -73,6 +70,7 @@ import_pandas_datetime()
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime

from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timezones cimport tz_compare
from pandas._libs.util cimport (
is_float_object,
is_integer_object,
Expand Down Expand Up @@ -156,6 +154,37 @@ cdef dict _parse_code_table = {"y": 0,
"u": 22}


cdef class DatetimeParseState:
def __cinit__(self):
self.found_tz = False
self.found_naive = False

cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert):
if dt.tzinfo is not None:
self.found_tz = True
else:
self.found_naive = True

if dt.tzinfo is not None:
if utc_convert:
pass
elif self.found_naive:
raise ValueError("Tz-aware datetime.datetime "
"cannot be converted to "
"datetime64 unless utc=True")
elif tz is not None and not tz_compare(tz, dt.tzinfo):
raise ValueError("Tz-aware datetime.datetime "
"cannot be converted to "
"datetime64 unless utc=True")
else:
tz = dt.tzinfo
else:
if self.found_tz and not utc_convert:
raise ValueError("Cannot mix tz-aware with "
"tz-naive values")
return tz


def array_strptime(
ndarray[object] values,
str fmt,
Expand Down Expand Up @@ -183,13 +212,12 @@ def array_strptime(
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint found_naive = False
bint found_tz = False
tzinfo tz_out = None
bint iso_format = format_is_iso(fmt)
NPY_DATETIMEUNIT out_bestunit
int out_local = 0, out_tzoffset = 0
bint string_to_dts_succeeded = 0
DatetimeParseState state = DatetimeParseState()

assert is_raise or is_ignore or is_coerce

Expand Down Expand Up @@ -276,17 +304,7 @@ def array_strptime(
iresult[i] = NPY_NAT
continue
elif PyDateTime_Check(val):
if val.tzinfo is not None:
found_tz = True
else:
found_naive = True
tz_out = convert_timezone(
val.tzinfo,
tz_out,
found_naive,
found_tz,
utc,
)
tz_out = state.process_datetime(val, tz_out, utc)
if isinstance(val, _Timestamp):
iresult[i] = val.tz_localize(None).as_unit("ns")._value
else:
Expand Down