Skip to content

Commit 2e4e0b9

Browse files
anmyachevjreback
authored andcommitted
PERF: added no exception versions of '_string_to_dts' and 'parse_iso_8601_datetime' functions (pandas-dev#26220)
1 parent a2686c6 commit 2e4e0b9

File tree

6 files changed

+117
-71
lines changed

6 files changed

+117
-71
lines changed

pandas/_libs/tslib.pyx

+10-13
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def _test_parse_iso8601(object ts):
204204
elif ts == 'today':
205205
return Timestamp.now().normalize()
206206

207-
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
207+
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
208208
obj.value = dtstruct_to_dt64(&obj.dts)
209209
check_dts_bounds(&obj.dts)
210210
if out_local == 1:
@@ -511,6 +511,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
511511
int out_local=0, out_tzoffset=0
512512
float offset_seconds, tz_offset
513513
set out_tzoffset_vals = set()
514+
bint string_to_dts_failed
514515

515516
# specify error conditions
516517
assert is_raise or is_ignore or is_coerce
@@ -578,10 +579,12 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
578579
iresult[i] = NPY_NAT
579580
continue
580581

581-
try:
582-
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
583-
except ValueError:
584-
# A ValueError at this point is a _parsing_ error
582+
string_to_dts_failed = _string_to_dts(
583+
val, &dts, &out_local,
584+
&out_tzoffset, False
585+
)
586+
if string_to_dts_failed:
587+
# An error at this point is a _parsing_ error
585588
# specifically _not_ OutOfBoundsDatetime
586589
if _parse_today_now(val, &iresult[i]):
587590
continue
@@ -623,14 +626,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
623626

624627
_ts = convert_datetime_to_tsobject(py_dt, None)
625628
iresult[i] = _ts.value
626-
except:
627-
# TODO: What exception are we concerned with here?
628-
if is_coerce:
629-
iresult[i] = NPY_NAT
630-
continue
631-
raise
632-
else:
633-
# No error raised by string_to_dts, pick back up
629+
if not string_to_dts_failed:
630+
# No error reported by string_to_dts, pick back up
634631
# where we left off
635632
value = dtstruct_to_dt64(&dts)
636633
if out_local == 1:

pandas/_libs/tslibs/conversion.pyx

+59-30
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,44 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
392392
return obj
393393

394394

395+
cdef _TSObject create_tsobject_tz_using_offset(int64_t value,
396+
int tzoffset, object tz=None):
397+
"""
398+
Convert a numpy datetime64 `value`, along with initial timezone offset
399+
`tzoffset` to a _TSObject (with timezone object `tz` - optional).
400+
401+
Parameters
402+
----------
403+
value: int64_t
404+
numpy dt64
405+
tzoffset: int
406+
tz : tzinfo or None
407+
timezone for the timezone-aware output.
408+
409+
Returns
410+
-------
411+
obj : _TSObject
412+
"""
413+
cdef:
414+
_TSObject obj
415+
datetime dt
416+
417+
tzinfo = pytz.FixedOffset(tzoffset)
418+
value = tz_convert_single(value, tzinfo, UTC)
419+
obj = convert_to_tsobject(value, tzinfo, None, 0, 0)
420+
if tz is None:
421+
check_overflows(obj)
422+
return obj
423+
424+
# Keep the converter same as PyDateTime's
425+
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
426+
obj.dts.hour, obj.dts.min, obj.dts.sec,
427+
obj.dts.us, obj.tzinfo)
428+
obj = convert_datetime_to_tsobject(
429+
dt, tz, nanos=obj.dts.ps // 1000)
430+
return obj
431+
432+
395433
cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
396434
bint dayfirst=False,
397435
bint yearfirst=False):
@@ -420,15 +458,14 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
420458
obj : _TSObject
421459
"""
422460
cdef:
423-
_TSObject obj
461+
npy_datetimestruct dts
462+
int64_t value # numpy dt64
424463
int out_local = 0, out_tzoffset = 0
425-
datetime dt
464+
bint do_parse_datetime_string = False
426465

427466
if tz is not None:
428467
tz = maybe_get_tz(tz)
429468

430-
obj = _TSObject()
431-
432469
assert isinstance(ts, str)
433470

434471
if len(ts) == 0 or ts in nat_strings:
@@ -443,34 +480,23 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
443480
ts = datetime.now(tz)
444481
# equiv: datetime.today().replace(tzinfo=tz)
445482
else:
483+
string_to_dts_failed = _string_to_dts(
484+
ts, &dts, &out_local,
485+
&out_tzoffset, False
486+
)
446487
try:
447-
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
448-
obj.value = dtstruct_to_dt64(&obj.dts)
449-
check_dts_bounds(&obj.dts)
450-
if out_local == 1:
451-
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
452-
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC)
453-
if tz is None:
454-
check_dts_bounds(&obj.dts)
455-
check_overflows(obj)
456-
return obj
488+
if not string_to_dts_failed:
489+
check_dts_bounds(&dts)
490+
value = dtstruct_to_dt64(&dts)
491+
if out_local == 1:
492+
return create_tsobject_tz_using_offset(value,
493+
out_tzoffset, tz)
457494
else:
458-
# Keep the converter same as PyDateTime's
459-
obj = convert_to_tsobject(obj.value, obj.tzinfo,
460-
None, 0, 0)
461-
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
462-
obj.dts.hour, obj.dts.min, obj.dts.sec,
463-
obj.dts.us, obj.tzinfo)
464-
obj = convert_datetime_to_tsobject(
465-
dt, tz, nanos=obj.dts.ps // 1000)
466-
return obj
467-
468-
else:
469-
ts = obj.value
470-
if tz is not None:
471-
# shift for localize_tso
472-
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
473-
ambiguous='raise')[0]
495+
ts = value
496+
if tz is not None:
497+
# shift for localize_tso
498+
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
499+
ambiguous='raise')[0]
474500

475501
except OutOfBoundsDatetime:
476502
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
@@ -479,6 +505,9 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
479505
raise
480506

481507
except ValueError:
508+
do_parse_datetime_string = True
509+
510+
if string_to_dts_failed or do_parse_datetime_string:
482511
try:
483512
ts = parse_datetime_string(ts, dayfirst=dayfirst,
484513
yearfirst=yearfirst)

pandas/_libs/tslibs/np_datetime.pxd

+2-1
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,5 @@ cdef npy_timedelta get_timedelta64_value(object obj) nogil
7373
cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil
7474

7575
cdef int _string_to_dts(object val, npy_datetimestruct* dts,
76-
int* out_local, int* out_tzoffset) except? -1
76+
int* out_local, int* out_tzoffset,
77+
bint want_exc) except? -1

pandas/_libs/tslibs/np_datetime.pyx

+4-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ cdef extern from "src/datetime/np_datetime.h":
3131
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
3232

3333
cdef extern from "src/datetime/np_datetime_strings.h":
34-
int parse_iso_8601_datetime(const char *str, int len,
34+
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
3535
npy_datetimestruct *out,
3636
int *out_local, int *out_tzoffset)
3737

@@ -170,11 +170,12 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
170170

171171

172172
cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
173-
int* out_local, int* out_tzoffset) except? -1:
173+
int* out_local, int* out_tzoffset,
174+
bint want_exc) except? -1:
174175
cdef:
175176
Py_ssize_t length
176177
const char* buf
177178

178179
buf = get_c_string_buf_and_size(val, &length)
179-
return parse_iso_8601_datetime(buf, length,
180+
return parse_iso_8601_datetime(buf, length, want_exc,
180181
dts, out_local, out_tzoffset)

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+41-22
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ This file implements string parsing and creation for NumPy datetime.
6666
*
6767
* Returns 0 on success, -1 on failure.
6868
*/
69-
int parse_iso_8601_datetime(const char *str, int len,
69+
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
7070
npy_datetimestruct *out,
7171
int *out_local, int *out_tzoffset) {
7272
int year_leap = 0;
@@ -173,8 +173,10 @@ int parse_iso_8601_datetime(const char *str, int len,
173173
goto parse_error;
174174
}
175175
if (out->month < 1 || out->month > 12) {
176-
PyErr_Format(PyExc_ValueError,
177-
"Month out of range in datetime string \"%s\"", str);
176+
if (want_exc) {
177+
PyErr_Format(PyExc_ValueError,
178+
"Month out of range in datetime string \"%s\"", str);
179+
}
178180
goto error;
179181
}
180182

@@ -217,8 +219,10 @@ int parse_iso_8601_datetime(const char *str, int len,
217219
}
218220
if (out->day < 1 ||
219221
out->day > days_per_month_table[year_leap][out->month - 1]) {
220-
PyErr_Format(PyExc_ValueError,
221-
"Day out of range in datetime string \"%s\"", str);
222+
if (want_exc) {
223+
PyErr_Format(PyExc_ValueError,
224+
"Day out of range in datetime string \"%s\"", str);
225+
}
222226
goto error;
223227
}
224228

@@ -251,8 +255,11 @@ int parse_iso_8601_datetime(const char *str, int len,
251255
++substr;
252256
--sublen;
253257
if (out->hour >= 24) {
254-
PyErr_Format(PyExc_ValueError,
255-
"Hours out of range in datetime string \"%s\"", str);
258+
if (want_exc) {
259+
PyErr_Format(PyExc_ValueError,
260+
"Hours out of range in datetime string \"%s\"",
261+
str);
262+
}
256263
goto error;
257264
}
258265
}
@@ -291,8 +298,11 @@ int parse_iso_8601_datetime(const char *str, int len,
291298
++substr;
292299
--sublen;
293300
if (out->min >= 60) {
294-
PyErr_Format(PyExc_ValueError,
295-
"Minutes out of range in datetime string \"%s\"", str);
301+
if (want_exc) {
302+
PyErr_Format(PyExc_ValueError,
303+
"Minutes out of range in datetime string \"%s\"",
304+
str);
305+
}
296306
goto error;
297307
}
298308
} else if (!has_hms_sep) {
@@ -328,8 +338,11 @@ int parse_iso_8601_datetime(const char *str, int len,
328338
++substr;
329339
--sublen;
330340
if (out->sec >= 60) {
331-
PyErr_Format(PyExc_ValueError,
332-
"Seconds out of range in datetime string \"%s\"", str);
341+
if (want_exc) {
342+
PyErr_Format(PyExc_ValueError,
343+
"Seconds out of range in datetime string \"%s\"",
344+
str);
345+
}
333346
goto error;
334347
}
335348
} else if (!has_hms_sep) {
@@ -438,10 +451,12 @@ int parse_iso_8601_datetime(const char *str, int len,
438451
substr += 2;
439452
sublen -= 2;
440453
if (offset_hour >= 24) {
441-
PyErr_Format(PyExc_ValueError,
442-
"Timezone hours offset out of range "
443-
"in datetime string \"%s\"",
444-
str);
454+
if (want_exc) {
455+
PyErr_Format(PyExc_ValueError,
456+
"Timezone hours offset out of range "
457+
"in datetime string \"%s\"",
458+
str);
459+
}
445460
goto error;
446461
}
447462
} else if (sublen >= 1 && isdigit(substr[0])) {
@@ -466,10 +481,12 @@ int parse_iso_8601_datetime(const char *str, int len,
466481
substr += 2;
467482
sublen -= 2;
468483
if (offset_minute >= 60) {
469-
PyErr_Format(PyExc_ValueError,
470-
"Timezone minutes offset out of range "
471-
"in datetime string \"%s\"",
472-
str);
484+
if (want_exc) {
485+
PyErr_Format(PyExc_ValueError,
486+
"Timezone minutes offset out of range "
487+
"in datetime string \"%s\"",
488+
str);
489+
}
473490
goto error;
474491
}
475492
} else if (sublen >= 1 && isdigit(substr[0])) {
@@ -507,9 +524,11 @@ int parse_iso_8601_datetime(const char *str, int len,
507524
return 0;
508525

509526
parse_error:
510-
PyErr_Format(PyExc_ValueError,
511-
"Error parsing datetime string \"%s\" at position %d", str,
512-
(int)(substr - str));
527+
if (want_exc) {
528+
PyErr_Format(PyExc_ValueError,
529+
"Error parsing datetime string \"%s\" at position %d", str,
530+
(int)(substr - str));
531+
}
513532
return -1;
514533

515534
error:

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ This file implements string parsing and creation for NumPy datetime.
5454
* Returns 0 on success, -1 on failure.
5555
*/
5656
int
57-
parse_iso_8601_datetime(const char *str, int len,
57+
parse_iso_8601_datetime(const char *str, int len, int want_exc,
5858
npy_datetimestruct *out,
5959
int *out_local,
6060
int *out_tzoffset);
@@ -79,5 +79,4 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
7979
int
8080
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
8181
NPY_DATETIMEUNIT base);
82-
8382
#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_

0 commit comments

Comments
 (0)