Skip to content

Commit a7a23db

Browse files
jbrockmendelpooja-subramaniam
authored andcommitted
REF: tighter typing in parsing.pyx (pandas-dev#50851)
1 parent f10ba02 commit a7a23db

File tree

4 files changed

+49
-34
lines changed

4 files changed

+49
-34
lines changed

pandas/_libs/tslib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def array_with_unit_to_datetime(
273273
bint is_coerce = errors=="coerce"
274274
bint is_raise = errors=="raise"
275275
ndarray[int64_t] iresult
276-
object tz = None
276+
tzinfo tz = None
277277
float fval
278278

279279
assert is_ignore or is_coerce or is_raise
@@ -346,7 +346,7 @@ cdef _array_with_unit_to_datetime_object_fallback(ndarray[object] values, str un
346346
cdef:
347347
Py_ssize_t i, n = len(values)
348348
ndarray[object] oresult
349-
object tz = None
349+
tzinfo tz = None
350350

351351
# TODO: fix subtle differences between this and no-unit code
352352
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)

pandas/_libs/tslibs/dtypes.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
1111
cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
1212

1313
cdef dict attrname_to_abbrevs
14+
cdef dict npy_unit_to_attrname
1415

1516
cdef enum c_FreqGroup:
1617
# Mirrors FreqGroup in the .pyx file

pandas/_libs/tslibs/dtypes.pyx

+12
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,15 @@ cdef dict _reso_str_map = {
423423
}
424424

425425
cdef dict _str_reso_map = {v: k for k, v in _reso_str_map.items()}
426+
427+
cdef dict npy_unit_to_attrname = {
428+
NPY_DATETIMEUNIT.NPY_FR_Y: "year",
429+
NPY_DATETIMEUNIT.NPY_FR_M: "month",
430+
NPY_DATETIMEUNIT.NPY_FR_D: "day",
431+
NPY_DATETIMEUNIT.NPY_FR_h: "hour",
432+
NPY_DATETIMEUNIT.NPY_FR_m: "minute",
433+
NPY_DATETIMEUNIT.NPY_FR_s: "second",
434+
NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond",
435+
NPY_DATETIMEUNIT.NPY_FR_us: "microsecond",
436+
NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond",
437+
}

pandas/_libs/tslibs/parsing.pyx

+34-32
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ from cpython.datetime cimport (
1212
datetime,
1313
datetime_new,
1414
import_datetime,
15+
timedelta,
16+
tzinfo,
1517
)
18+
from datetime import timezone
1619
from cpython.object cimport PyObject_Str
1720
from cython cimport Py_ssize_t
1821
from libc.string cimport strchr
@@ -49,6 +52,7 @@ from dateutil.tz import (
4952
from pandas._config import get_option
5053

5154
from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS
55+
from pandas._libs.tslibs.dtypes cimport npy_unit_to_attrname
5256
from pandas._libs.tslibs.nattype cimport (
5357
c_NaT as NaT,
5458
c_nat_strings as nat_strings,
@@ -120,7 +124,9 @@ cdef int _parse_4digit(const char* s):
120124
return result
121125

122126

123-
cdef object _parse_delimited_date(str date_string, bint dayfirst):
127+
cdef datetime _parse_delimited_date(
128+
str date_string, bint dayfirst, NPY_DATETIMEUNIT* creso
129+
):
124130
"""
125131
Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY.
126132
@@ -138,12 +144,12 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
138144
----------
139145
date_string : str
140146
dayfirst : bool
147+
creso : NPY_DATETIMEUNIT*
148+
For specifying identified resolution.
141149
142150
Returns:
143151
--------
144152
datetime or None
145-
str or None
146-
Describing resolution of the parsed string.
147153
"""
148154
cdef:
149155
const char* buf
@@ -157,53 +163,53 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
157163
month = _parse_2digit(buf)
158164
day = _parse_2digit(buf + 3)
159165
year = _parse_4digit(buf + 6)
160-
reso = "day"
166+
creso[0] = NPY_DATETIMEUNIT.NPY_FR_D
161167
can_swap = 1
162168
elif length == 9 and _is_delimiter(buf[1]) and _is_delimiter(buf[4]):
163169
# parsing M?DD?YYYY and D?MM?YYYY dates
164170
month = _parse_1digit(buf)
165171
day = _parse_2digit(buf + 2)
166172
year = _parse_4digit(buf + 5)
167-
reso = "day"
173+
creso[0] = NPY_DATETIMEUNIT.NPY_FR_D
168174
can_swap = 1
169175
elif length == 9 and _is_delimiter(buf[2]) and _is_delimiter(buf[4]):
170176
# parsing MM?D?YYYY and DD?M?YYYY dates
171177
month = _parse_2digit(buf)
172178
day = _parse_1digit(buf + 3)
173179
year = _parse_4digit(buf + 5)
174-
reso = "day"
180+
creso[0] = NPY_DATETIMEUNIT.NPY_FR_D
175181
can_swap = 1
176182
elif length == 8 and _is_delimiter(buf[1]) and _is_delimiter(buf[3]):
177183
# parsing M?D?YYYY and D?M?YYYY dates
178184
month = _parse_1digit(buf)
179185
day = _parse_1digit(buf + 2)
180186
year = _parse_4digit(buf + 4)
181-
reso = "day"
187+
creso[0] = NPY_DATETIMEUNIT.NPY_FR_D
182188
can_swap = 1
183189
elif length == 7 and _is_delimiter(buf[2]):
184190
# parsing MM?YYYY dates
185191
if buf[2] == b".":
186192
# we cannot reliably tell whether e.g. 10.2010 is a float
187193
# or a date, thus we refuse to parse it here
188-
return None, None
194+
return None
189195
month = _parse_2digit(buf)
190196
year = _parse_4digit(buf + 3)
191-
reso = "month"
197+
creso[0] = NPY_DATETIMEUNIT.NPY_FR_M
192198
else:
193-
return None, None
199+
return None
194200

195201
if month < 0 or day < 0 or year < 1000:
196202
# some part is not an integer, so
197203
# date_string can't be converted to date, above format
198-
return None, None
204+
return None
199205

200206
if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \
201207
and (month <= MAX_MONTH or day <= MAX_MONTH):
202208
if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap:
203209
day, month = month, day
204210
# In Python <= 3.6.0 there is no range checking for invalid dates
205211
# in C api, thus we call faster C version for 3.6.1 or newer
206-
return datetime_new(year, month, day, 0, 0, 0, 0, None), reso
212+
return datetime_new(year, month, day, 0, 0, 0, 0, None)
207213

208214
raise DateParseError(f"Invalid date specified ({month}/{day})")
209215

@@ -264,6 +270,7 @@ def parse_datetime_string(
264270

265271
cdef:
266272
datetime dt
273+
NPY_DATETIMEUNIT creso
267274

268275
if not _does_string_look_like_datetime(date_string):
269276
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
@@ -274,7 +281,7 @@ def parse_datetime_string(
274281
yearfirst=yearfirst)
275282
return dt
276283

277-
dt, _ = _parse_delimited_date(date_string, dayfirst)
284+
dt = _parse_delimited_date(date_string, dayfirst, &creso)
278285
if dt is not None:
279286
return dt
280287

@@ -351,18 +358,19 @@ def parse_datetime_string_with_reso(
351358
bint string_to_dts_failed
352359
npy_datetimestruct dts
353360
NPY_DATETIMEUNIT out_bestunit
354-
int out_local
361+
int out_local = 0
355362
int out_tzoffset
363+
tzinfo tz
356364

357365
if not _does_string_look_like_datetime(date_string):
358366
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
359367

360-
parsed, reso = _parse_delimited_date(date_string, dayfirst)
368+
parsed = _parse_delimited_date(date_string, dayfirst, &out_bestunit)
361369
if parsed is not None:
370+
reso = npy_unit_to_attrname[out_bestunit]
362371
return parsed, reso
363372

364373
# Try iso8601 first, as it handles nanoseconds
365-
# TODO: does this render some/all of parse_delimited_date redundant?
366374
string_to_dts_failed = string_to_dts(
367375
date_string, &dts, &out_bestunit, &out_local,
368376
&out_tzoffset, False
@@ -372,31 +380,25 @@ def parse_datetime_string_with_reso(
372380
NPY_DATETIMEUNIT.NPY_FR_ps,
373381
NPY_DATETIMEUNIT.NPY_FR_fs,
374382
NPY_DATETIMEUNIT.NPY_FR_as}
375-
if out_bestunit in timestamp_units or out_local:
376-
# TODO: the not-out_local case we could do without Timestamp;
377-
# avoid circular import
383+
if out_bestunit in timestamp_units:
384+
# TODO: avoid circular import
378385
from pandas import Timestamp
379386
parsed = Timestamp(date_string)
380387
else:
381-
parsed = datetime(
382-
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us
388+
if out_local:
389+
tz = timezone(timedelta(minutes=out_tzoffset))
390+
else:
391+
tz = None
392+
parsed = datetime_new(
393+
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
383394
)
384395
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
385396
# The new resolution will just be nano
386397
# GH 50417
387398
if out_bestunit in timestamp_units:
388399
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
389-
reso = {
390-
NPY_DATETIMEUNIT.NPY_FR_Y: "year",
391-
NPY_DATETIMEUNIT.NPY_FR_M: "month",
392-
NPY_DATETIMEUNIT.NPY_FR_D: "day",
393-
NPY_DATETIMEUNIT.NPY_FR_h: "hour",
394-
NPY_DATETIMEUNIT.NPY_FR_m: "minute",
395-
NPY_DATETIMEUNIT.NPY_FR_s: "second",
396-
NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond",
397-
NPY_DATETIMEUNIT.NPY_FR_us: "microsecond",
398-
NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond",
399-
}[out_bestunit]
400+
401+
reso = npy_unit_to_attrname[out_bestunit]
400402
return parsed, reso
401403

402404
try:

0 commit comments

Comments
 (0)