Skip to content

Commit 0c7d303

Browse files
authored
REF: implement ParseState (#55617)
* REF: implement ParseState * REF: inline convert_timezone * ParseState->DatetimeParseState
1 parent ea65f90 commit 0c7d303

File tree

5 files changed

+53
-91
lines changed

5 files changed

+53
-91
lines changed

pandas/_libs/tslib.pyx

+6-14
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ from pandas._libs.tslibs.np_datetime cimport (
4646
import_pandas_datetime()
4747

4848

49-
from pandas._libs.tslibs.strptime cimport parse_today_now
49+
from pandas._libs.tslibs.strptime cimport (
50+
DatetimeParseState,
51+
parse_today_now,
52+
)
5053
from pandas._libs.util cimport (
5154
is_float_object,
5255
is_integer_object,
@@ -58,7 +61,6 @@ from pandas._libs.tslibs.conversion cimport (
5861
_TSObject,
5962
cast_from_unit,
6063
convert_str_to_tsobject,
61-
convert_timezone,
6264
get_datetime64_nanos,
6365
parse_pydatetime,
6466
)
@@ -454,9 +456,9 @@ cpdef array_to_datetime(
454456
float tz_offset
455457
set out_tzoffset_vals = set()
456458
tzinfo tz_out = None
457-
bint found_tz = False, found_naive = False
458459
cnp.flatiter it = cnp.PyArray_IterNew(values)
459460
NPY_DATETIMEUNIT creso = NPY_FR_ns
461+
DatetimeParseState state = DatetimeParseState()
460462

461463
# specify error conditions
462464
assert is_raise or is_ignore or is_coerce
@@ -474,17 +476,7 @@ cpdef array_to_datetime(
474476
iresult[i] = NPY_NAT
475477

476478
elif PyDateTime_Check(val):
477-
if val.tzinfo is not None:
478-
found_tz = True
479-
else:
480-
found_naive = True
481-
tz_out = convert_timezone(
482-
val.tzinfo,
483-
tz_out,
484-
found_naive,
485-
found_tz,
486-
utc_convert,
487-
)
479+
tz_out = state.process_datetime(val, tz_out, utc_convert)
488480
iresult[i] = parse_pydatetime(val, &dts, utc_convert, creso=creso)
489481

490482
elif PyDate_Check(val):

pandas/_libs/tslibs/conversion.pxd

-7
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,6 @@ cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*)
4747

4848
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
4949

50-
cdef tzinfo convert_timezone(
51-
tzinfo tz_in,
52-
tzinfo tz_out,
53-
bint found_naive,
54-
bint found_tz,
55-
bint utc_convert,
56-
)
5750

5851
cdef int64_t parse_pydatetime(
5952
datetime val,

pandas/_libs/tslibs/conversion.pyx

-53
Original file line numberDiff line numberDiff line change
@@ -673,59 +673,6 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
673673
return _localize_pydatetime(dt, tz)
674674

675675

676-
cdef tzinfo convert_timezone(
677-
tzinfo tz_in,
678-
tzinfo tz_out,
679-
bint found_naive,
680-
bint found_tz,
681-
bint utc_convert,
682-
):
683-
"""
684-
Validate that ``tz_in`` can be converted/localized to ``tz_out``.
685-
686-
Parameters
687-
----------
688-
tz_in : tzinfo or None
689-
Timezone info of element being processed.
690-
tz_out : tzinfo or None
691-
Timezone info of output.
692-
found_naive : bool
693-
Whether a timezone-naive element has been found so far.
694-
found_tz : bool
695-
Whether a timezone-aware element has been found so far.
696-
utc_convert : bool
697-
Whether to convert/localize to UTC.
698-
699-
Returns
700-
-------
701-
tz_info
702-
Timezone info of output.
703-
704-
Raises
705-
------
706-
ValueError
707-
If ``tz_in`` can't be converted/localized to ``tz_out``.
708-
"""
709-
if tz_in is not None:
710-
if utc_convert:
711-
pass
712-
elif found_naive:
713-
raise ValueError("Tz-aware datetime.datetime "
714-
"cannot be converted to "
715-
"datetime64 unless utc=True")
716-
elif tz_out is not None and not tz_compare(tz_out, tz_in):
717-
raise ValueError("Tz-aware datetime.datetime "
718-
"cannot be converted to "
719-
"datetime64 unless utc=True")
720-
else:
721-
tz_out = tz_in
722-
else:
723-
if found_tz and not utc_convert:
724-
raise ValueError("Cannot mix tz-aware with "
725-
"tz-naive values")
726-
return tz_out
727-
728-
729676
cdef int64_t parse_pydatetime(
730677
datetime val,
731678
npy_datetimestruct *dts,

pandas/_libs/tslibs/strptime.pxd

+12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
1+
from cpython.datetime cimport (
2+
datetime,
3+
tzinfo,
4+
)
15
from numpy cimport int64_t
26

37

48
cdef bint parse_today_now(str val, int64_t* iresult, bint utc)
9+
10+
11+
cdef class DatetimeParseState:
12+
cdef:
13+
bint found_tz
14+
bint found_naive
15+
16+
cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert)

pandas/_libs/tslibs/strptime.pyx

+35-17
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,7 @@ from numpy cimport (
4848
)
4949

5050
from pandas._libs.missing cimport checknull_with_nat_and_na
51-
from pandas._libs.tslibs.conversion cimport (
52-
convert_timezone,
53-
get_datetime64_nanos,
54-
)
51+
from pandas._libs.tslibs.conversion cimport get_datetime64_nanos
5552
from pandas._libs.tslibs.nattype cimport (
5653
NPY_NAT,
5754
c_nat_strings as nat_strings,
@@ -73,6 +70,7 @@ import_pandas_datetime()
7370
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
7471

7572
from pandas._libs.tslibs.timestamps cimport _Timestamp
73+
from pandas._libs.tslibs.timezones cimport tz_compare
7674
from pandas._libs.util cimport (
7775
is_float_object,
7876
is_integer_object,
@@ -156,6 +154,37 @@ cdef dict _parse_code_table = {"y": 0,
156154
"u": 22}
157155

158156

157+
cdef class DatetimeParseState:
158+
def __cinit__(self):
159+
self.found_tz = False
160+
self.found_naive = False
161+
162+
cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert):
163+
if dt.tzinfo is not None:
164+
self.found_tz = True
165+
else:
166+
self.found_naive = True
167+
168+
if dt.tzinfo is not None:
169+
if utc_convert:
170+
pass
171+
elif self.found_naive:
172+
raise ValueError("Tz-aware datetime.datetime "
173+
"cannot be converted to "
174+
"datetime64 unless utc=True")
175+
elif tz is not None and not tz_compare(tz, dt.tzinfo):
176+
raise ValueError("Tz-aware datetime.datetime "
177+
"cannot be converted to "
178+
"datetime64 unless utc=True")
179+
else:
180+
tz = dt.tzinfo
181+
else:
182+
if self.found_tz and not utc_convert:
183+
raise ValueError("Cannot mix tz-aware with "
184+
"tz-naive values")
185+
return tz
186+
187+
159188
def array_strptime(
160189
ndarray[object] values,
161190
str fmt,
@@ -183,13 +212,12 @@ def array_strptime(
183212
bint is_raise = errors=="raise"
184213
bint is_ignore = errors=="ignore"
185214
bint is_coerce = errors=="coerce"
186-
bint found_naive = False
187-
bint found_tz = False
188215
tzinfo tz_out = None
189216
bint iso_format = format_is_iso(fmt)
190217
NPY_DATETIMEUNIT out_bestunit
191218
int out_local = 0, out_tzoffset = 0
192219
bint string_to_dts_succeeded = 0
220+
DatetimeParseState state = DatetimeParseState()
193221

194222
assert is_raise or is_ignore or is_coerce
195223

@@ -276,17 +304,7 @@ def array_strptime(
276304
iresult[i] = NPY_NAT
277305
continue
278306
elif PyDateTime_Check(val):
279-
if val.tzinfo is not None:
280-
found_tz = True
281-
else:
282-
found_naive = True
283-
tz_out = convert_timezone(
284-
val.tzinfo,
285-
tz_out,
286-
found_naive,
287-
found_tz,
288-
utc,
289-
)
307+
tz_out = state.process_datetime(val, tz_out, utc)
290308
if isinstance(val, _Timestamp):
291309
iresult[i] = val.tz_localize(None).as_unit("ns")._value
292310
else:

0 commit comments

Comments
 (0)