Skip to content

Commit b366e31

Browse files
author
MarcoGorelli
committed
fixup
1 parent 0971f55 commit b366e31

File tree

5 files changed

+172
-53
lines changed

5 files changed

+172
-53
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ repos:
6363
'--extensions=c,h',
6464
'--headers=h',
6565
--recursive,
66-
'--filter=-readability/casting,-runtime/int,-build/include_subdir'
66+
'--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
6767
]
6868
- repo: https://github.com/PyCQA/flake8
6969
rev: 6.0.0

pandas/_libs/tslibs/np_datetime.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ cdef int string_to_dts(
9696
int* out_tzoffset,
9797
bint want_exc,
9898
format: str | None = *,
99-
bint exact = *
99+
int exact = *
100100
) except? -1
101101

102102
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)

pandas/_libs/tslibs/np_datetime.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ cdef int string_to_dts(
279279
int* out_tzoffset,
280280
bint want_exc,
281281
format: str | None=None,
282-
bint exact=True,
282+
int exact=1,
283283
) except? -1:
284284
cdef:
285285
Py_ssize_t length
@@ -291,7 +291,7 @@ cdef int string_to_dts(
291291
if format is None:
292292
format_buf = b""
293293
format_length = 0
294-
exact = False
294+
exact = 2
295295
else:
296296
format_buf = get_c_string_buf_and_size(format, &format_length)
297297
return parse_iso_8601_datetime(buf, length, want_exc,

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+137-49
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ This file implements string parsing and creation for NumPy datetime.
6363
* to 0 otherwise. The values 'now' and 'today' don't get counted
6464
* as local, and neither do UTC +/-#### timezone offsets, because
6565
* they aren't using the computer's local timezone offset.
66+
* 'exact' can be one of three values:
67+
* * 0: don't require an exact match. For example, if the string
68+
* is '2020-01-01 05:00:00' and format is '%Y-%m-%d', then
69+
* parse '2020-01-01';
70+
* * 1: require exact match. If the string is '2020-01-01', then
71+
* the only format which will be able to parse it without
72+
* error is '%Y-%m-%d';
73+
* * 2: don't require any match.
6674
*
6775
* Returns 0 on success, -1 on failure.
6876
*/
@@ -73,18 +81,8 @@ This file implements string parsing and creation for NumPy datetime.
7381
static int compare_format(const char **format, int *characters_remaining,
7482
const char *compare_to, int n, const int exact) {
7583
if (*characters_remaining < n) {
76-
if (exact) {
77-
// TODO(pandas-dev): in the future we should set a PyErr here
78-
// to be very clear about what went wrong
79-
return -1;
80-
} else if (*characters_remaining) {
81-
// TODO(pandas-dev): same return value in this function as
82-
// above branch, but stub out a future where
83-
// we have a better error message
84-
return -1;
85-
} else {
86-
return 0;
87-
}
84+
// TODO(pandas-dev): PyErr to differentiate what went wrong
85+
return -1;
8886
} else {
8987
if (strncmp(*format, compare_to, n)) {
9088
// TODO(pandas-dev): PyErr to differentiate what went wrong
@@ -139,8 +137,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
139137
while (sublen > 0 && isspace(*substr)) {
140138
++substr;
141139
--sublen;
142-
if (compare_format(&format, &format_len, " ", 1, exact)) {
143-
goto parse_error;
140+
if (exact != 2) {
141+
if (exact == 0 && !format_len) {
142+
goto finish;
143+
}
144+
if (compare_format(&format, &format_len, " ", 1, exact)) {
145+
goto parse_error;
146+
}
144147
}
145148
}
146149

@@ -155,8 +158,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
155158
}
156159

157160
/* PARSE THE YEAR (4 digits) */
158-
if (compare_format(&format, &format_len, "%Y", 2, exact)) {
159-
goto parse_error;
161+
if (exact != 2) {
162+
if (exact == 0 && !format_len) {
163+
goto finish;
164+
}
165+
if (compare_format(&format, &format_len, "%Y", 2, exact)) {
166+
goto parse_error;
167+
}
160168
}
161169

162170
out->year = 0;
@@ -202,8 +210,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
202210
++substr;
203211
--sublen;
204212

205-
if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
206-
goto parse_error;
213+
if (exact != 2) {
214+
if (exact == 0 && !format_len) {
215+
goto finish;
216+
}
217+
if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
218+
goto parse_error;
219+
}
207220
}
208221
/* Cannot have trailing separator */
209222
if (sublen == 0 || !isdigit(*substr)) {
@@ -212,8 +225,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
212225
}
213226

214227
/* PARSE THE MONTH */
215-
if (compare_format(&format, &format_len, "%m", 2, exact)) {
216-
goto parse_error;
228+
if (exact != 2) {
229+
if (exact == 0 && !format_len) {
230+
goto finish;
231+
}
232+
if (compare_format(&format, &format_len, "%m", 2, exact)) {
233+
goto parse_error;
234+
}
217235
}
218236
/* First digit required */
219237
out->month = (*substr - '0');
@@ -258,14 +276,24 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
258276
}
259277
++substr;
260278
--sublen;
261-
if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
262-
goto parse_error;
279+
if (exact != 2) {
280+
if (exact == 0 && !format_len) {
281+
goto finish;
282+
}
283+
if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
284+
goto parse_error;
285+
}
263286
}
264287
}
265288

266289
/* PARSE THE DAY */
267-
if (compare_format(&format, &format_len, "%d", 2, exact)) {
268-
goto parse_error;
290+
if (exact != 2) {
291+
if (exact == 0 && !format_len) {
292+
goto finish;
293+
}
294+
if (compare_format(&format, &format_len, "%d", 2, exact)) {
295+
goto parse_error;
296+
}
269297
}
270298
/* First digit required */
271299
if (!isdigit(*substr)) {
@@ -306,15 +334,25 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
306334
if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
307335
goto parse_error;
308336
}
309-
if (compare_format(&format, &format_len, substr, 1, exact)) {
310-
goto parse_error;
337+
if (exact != 2) {
338+
if (exact == 0 && !format_len) {
339+
goto finish;
340+
}
341+
if (compare_format(&format, &format_len, substr, 1, exact)) {
342+
goto parse_error;
343+
}
311344
}
312345
++substr;
313346
--sublen;
314347

315348
/* PARSE THE HOURS */
316-
if (compare_format(&format, &format_len, "%H", 2, exact)) {
317-
goto parse_error;
349+
if (exact != 2) {
350+
if (exact == 0 && !format_len) {
351+
goto finish;
352+
}
353+
if (compare_format(&format, &format_len, "%H", 2, exact)) {
354+
goto parse_error;
355+
}
318356
}
319357
/* First digit required */
320358
if (!isdigit(*substr)) {
@@ -359,8 +397,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
359397
if (sublen == 0 || !isdigit(*substr)) {
360398
goto parse_error;
361399
}
362-
if (compare_format(&format, &format_len, ":", 1, exact)) {
363-
goto parse_error;
400+
if (exact != 2) {
401+
if (exact == 0 && !format_len) {
402+
goto finish;
403+
}
404+
if (compare_format(&format, &format_len, ":", 1, exact)) {
405+
goto parse_error;
406+
}
364407
}
365408
} else if (!isdigit(*substr)) {
366409
if (!hour_was_2_digits) {
@@ -370,8 +413,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
370413
}
371414

372415
/* PARSE THE MINUTES */
373-
if (compare_format(&format, &format_len, "%M", 2, exact)) {
374-
goto parse_error;
416+
if (exact != 2) {
417+
if (exact == 0 && !format_len) {
418+
goto finish;
419+
}
420+
if (compare_format(&format, &format_len, "%M", 2, exact)) {
421+
goto parse_error;
422+
}
375423
}
376424
/* First digit required */
377425
out->min = (*substr - '0');
@@ -405,8 +453,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
405453
/* If we make it through this condition block, then the next
406454
* character is a digit. */
407455
if (has_hms_sep && *substr == ':') {
408-
if (compare_format(&format, &format_len, ":", 1, exact)) {
409-
goto parse_error;
456+
if (exact != 2) {
457+
if (exact == 0 && !format_len) {
458+
goto finish;
459+
}
460+
if (compare_format(&format, &format_len, ":", 1, exact)) {
461+
goto parse_error;
462+
}
410463
}
411464
++substr;
412465
--sublen;
@@ -420,9 +473,14 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
420473
}
421474

422475
/* PARSE THE SECONDS */
423-
if (compare_format(&format, &format_len, "%S", 2, exact)) {
424-
goto parse_error;
425-
}
476+
if (exact != 2) {
477+
if (exact == 0 && !format_len) {
478+
goto finish;
479+
}
480+
if (compare_format(&format, &format_len, "%S", 2, exact)) {
481+
goto parse_error;
482+
}
483+
}
426484
/* First digit required */
427485
out->sec = (*substr - '0');
428486
++substr;
@@ -448,17 +506,27 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
448506
if (sublen > 0 && *substr == '.') {
449507
++substr;
450508
--sublen;
451-
if (compare_format(&format, &format_len, ".", 1, exact)) {
452-
goto parse_error;
509+
if (exact != 2) {
510+
if (exact == 0 && !format_len) {
511+
goto finish;
512+
}
513+
if (compare_format(&format, &format_len, ".", 1, exact)) {
514+
goto parse_error;
515+
}
453516
}
454517
} else {
455518
bestunit = NPY_FR_s;
456519
goto parse_timezone;
457520
}
458521

459522
/* PARSE THE MICROSECONDS (0 to 6 digits) */
460-
if (compare_format(&format, &format_len, "%f", 2, exact)) {
461-
goto parse_error;
523+
if (exact != 2) {
524+
if (exact == 0 && !format_len) {
525+
goto finish;
526+
}
527+
if (compare_format(&format, &format_len, "%f", 2, exact)) {
528+
goto parse_error;
529+
}
462530
}
463531
numdigits = 0;
464532
for (i = 0; i < 6; ++i) {
@@ -524,8 +592,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
524592
while (sublen > 0 && isspace(*substr)) {
525593
++substr;
526594
--sublen;
527-
if (compare_format(&format, &format_len, " ", 1, exact)) {
528-
goto parse_error;
595+
if (exact != 2) {
596+
if (exact == 0 && !format_len) {
597+
goto finish;
598+
}
599+
if (compare_format(&format, &format_len, " ", 1, exact)) {
600+
goto parse_error;
601+
}
529602
}
530603
}
531604

@@ -539,8 +612,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
539612

540613
/* UTC specifier */
541614
if (*substr == 'Z') {
542-
if (compare_format(&format, &format_len, "%z", 2, exact)) {
543-
goto parse_error;
615+
if (exact != 2) {
616+
if (exact == 0 && !format_len) {
617+
goto finish;
618+
}
619+
if (compare_format(&format, &format_len, "%z", 2, exact)) {
620+
goto parse_error;
621+
}
544622
}
545623
/* "Z" should be equivalent to tz offset "+00:00" */
546624
if (out_local != NULL) {
@@ -561,8 +639,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
561639
--sublen;
562640
}
563641
} else if (*substr == '-' || *substr == '+') {
564-
if (compare_format(&format, &format_len, "%z", 2, exact)) {
565-
goto parse_error;
642+
if (exact != 2) {
643+
if (exact == 0 && !format_len) {
644+
goto finish;
645+
}
646+
if (compare_format(&format, &format_len, "%z", 2, exact)) {
647+
goto parse_error;
648+
}
566649
}
567650
/* Time zone offset */
568651
int offset_neg = 0, offset_hour = 0, offset_minute = 0;
@@ -647,8 +730,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
647730
while (sublen > 0 && isspace(*substr)) {
648731
++substr;
649732
--sublen;
650-
if (compare_format(&format, &format_len, " ", 1, exact)) {
651-
goto parse_error;
733+
if (exact != 2) {
734+
if (exact == 0 && !format_len) {
735+
goto finish;
736+
}
737+
if (compare_format(&format, &format_len, " ", 1, exact)) {
738+
goto parse_error;
739+
}
652740
}
653741
}
654742

pandas/tests/tools/test_to_datetime.py

+31
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,37 @@ def test_to_datetime_with_non_exact(self, cache):
353353
)
354354
tm.assert_series_equal(result, expected)
355355

356+
@pytest.mark.parametrize(
357+
"format, expected",
358+
[
359+
("%Y-%m-%d", Timestamp(2000, 1, 3)),
360+
("%Y-%d-%m", Timestamp(2000, 3, 1)),
361+
("%Y-%m-%d %H", Timestamp(2000, 1, 3, 12)),
362+
("%Y-%d-%m %H", Timestamp(2000, 3, 1, 12)),
363+
("%Y-%m-%d %H:%M", Timestamp(2000, 1, 3, 12, 34)),
364+
("%Y-%d-%m %H:%M", Timestamp(2000, 3, 1, 12, 34)),
365+
("%Y-%m-%d %H:%M:%S", Timestamp(2000, 1, 3, 12, 34, 56)),
366+
("%Y-%d-%m %H:%M:%S", Timestamp(2000, 3, 1, 12, 34, 56)),
367+
("%Y-%m-%d %H:%M:%S.%f", Timestamp(2000, 1, 3, 12, 34, 56, 123456)),
368+
("%Y-%d-%m %H:%M:%S.%f", Timestamp(2000, 3, 1, 12, 34, 56, 123456)),
369+
(
370+
"%Y-%m-%d %H:%M:%S.%f%z",
371+
Timestamp(2000, 1, 3, 12, 34, 56, 123456, tz="UTC+01:00"),
372+
),
373+
(
374+
"%Y-%d-%m %H:%M:%S.%f%z",
375+
Timestamp(2000, 3, 1, 12, 34, 56, 123456, tz="UTC+01:00"),
376+
),
377+
],
378+
)
379+
def test_non_exact_doesnt_parse_whole_string(self, cache, format, expected):
380+
# https://github.com/pandas-dev/pandas/issues/50412
381+
# the formats alternate between ISO8601 and non-ISO8601 to check both paths
382+
result = to_datetime(
383+
"2000-01-03 12:34:56.123456+01:00", format=format, exact=False
384+
)
385+
assert result == expected
386+
356387
@pytest.mark.parametrize(
357388
"arg",
358389
[

0 commit comments

Comments
 (0)