@@ -15,7 +15,9 @@ from cpython.datetime cimport (
15
15
timedelta,
16
16
tzinfo,
17
17
)
18
+
18
19
from datetime import timezone
20
+
19
21
from cpython.object cimport PyObject_Str
20
22
from cython cimport Py_ssize_t
21
23
from libc.string cimport strchr
@@ -52,18 +54,25 @@ from dateutil.tz import (
52
54
from pandas._config import get_option
53
55
54
56
from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS
55
- from pandas._libs.tslibs.dtypes cimport npy_unit_to_attrname
57
+ from pandas._libs.tslibs.dtypes cimport (
58
+ attrname_to_npy_unit,
59
+ npy_unit_to_attrname,
60
+ )
56
61
from pandas._libs.tslibs.nattype cimport (
57
62
c_NaT as NaT,
58
63
c_nat_strings as nat_strings,
59
64
)
65
+
60
66
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
67
+
61
68
from pandas._libs.tslibs.np_datetime cimport (
62
69
NPY_DATETIMEUNIT,
63
70
npy_datetimestruct,
64
71
string_to_dts,
65
72
)
73
+
66
74
from pandas._libs.tslibs.strptime import array_strptime
75
+
67
76
from pandas._libs.tslibs.util cimport (
68
77
get_c_string_buf_and_size,
69
78
is_array,
@@ -92,6 +101,14 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
92
101
cdef:
93
102
set _not_datelike_strings = {" a" , " A" , " m" , " M" , " p" , " P" , " t" , " T" }
94
103
104
+ # _timestamp_units -> units that we round to nanos
105
+ set _timestamp_units = {
106
+ NPY_DATETIMEUNIT.NPY_FR_ns,
107
+ NPY_DATETIMEUNIT.NPY_FR_ps,
108
+ NPY_DATETIMEUNIT.NPY_FR_fs,
109
+ NPY_DATETIMEUNIT.NPY_FR_as,
110
+ }
111
+
95
112
# ----------------------------------------------------------------------
96
113
cdef:
97
114
const char * delimiters = " /-."
@@ -125,7 +142,7 @@ cdef int _parse_4digit(const char* s):
125
142
126
143
127
144
cdef datetime _parse_delimited_date(
128
- str date_string, bint dayfirst, NPY_DATETIMEUNIT* creso
145
+ str date_string, bint dayfirst, NPY_DATETIMEUNIT* out_bestunit
129
146
):
130
147
"""
131
148
Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY.
@@ -144,7 +161,7 @@ cdef datetime _parse_delimited_date(
144
161
----------
145
162
date_string : str
146
163
dayfirst : bool
147
- creso : NPY_DATETIMEUNIT*
164
+ out_bestunit : NPY_DATETIMEUNIT*
148
165
For specifying identified resolution.
149
166
150
167
Returns:
@@ -163,28 +180,28 @@ cdef datetime _parse_delimited_date(
163
180
month = _parse_2digit(buf)
164
181
day = _parse_2digit(buf + 3 )
165
182
year = _parse_4digit(buf + 6 )
166
- creso [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
183
+ out_bestunit [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
167
184
can_swap = 1
168
185
elif length == 9 and _is_delimiter(buf[1 ]) and _is_delimiter(buf[4 ]):
169
186
# parsing M?DD?YYYY and D?MM?YYYY dates
170
187
month = _parse_1digit(buf)
171
188
day = _parse_2digit(buf + 2 )
172
189
year = _parse_4digit(buf + 5 )
173
- creso [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
190
+ out_bestunit [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
174
191
can_swap = 1
175
192
elif length == 9 and _is_delimiter(buf[2 ]) and _is_delimiter(buf[4 ]):
176
193
# parsing MM?D?YYYY and DD?M?YYYY dates
177
194
month = _parse_2digit(buf)
178
195
day = _parse_1digit(buf + 3 )
179
196
year = _parse_4digit(buf + 5 )
180
- creso [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
197
+ out_bestunit [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
181
198
can_swap = 1
182
199
elif length == 8 and _is_delimiter(buf[1 ]) and _is_delimiter(buf[3 ]):
183
200
# parsing M?D?YYYY and D?M?YYYY dates
184
201
month = _parse_1digit(buf)
185
202
day = _parse_1digit(buf + 2 )
186
203
year = _parse_4digit(buf + 4 )
187
- creso [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
204
+ out_bestunit [0 ] = NPY_DATETIMEUNIT.NPY_FR_D
188
205
can_swap = 1
189
206
elif length == 7 and _is_delimiter(buf[2 ]):
190
207
# parsing MM?YYYY dates
@@ -194,7 +211,7 @@ cdef datetime _parse_delimited_date(
194
211
return None
195
212
month = _parse_2digit(buf)
196
213
year = _parse_4digit(buf + 3 )
197
- creso [0 ] = NPY_DATETIMEUNIT.NPY_FR_M
214
+ out_bestunit [0 ] = NPY_DATETIMEUNIT.NPY_FR_M
198
215
else :
199
216
return None
200
217
@@ -270,7 +287,8 @@ def parse_datetime_string(
270
287
271
288
cdef:
272
289
datetime dt
273
- NPY_DATETIMEUNIT creso
290
+ NPY_DATETIMEUNIT out_bestunit
291
+ bint is_quarter = 0
274
292
275
293
if not _does_string_look_like_datetime(date_string ):
276
294
raise ValueError (f' Given date string "{date_string}" not likely a datetime' )
@@ -281,21 +299,23 @@ def parse_datetime_string(
281
299
yearfirst = yearfirst)
282
300
return dt
283
301
284
- dt = _parse_delimited_date(date_string, dayfirst, & creso )
302
+ dt = _parse_delimited_date(date_string, dayfirst, & out_bestunit )
285
303
if dt is not None :
286
304
return dt
287
305
288
306
try :
289
- dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq = None )
307
+ dt = _parse_dateabbr_string(
308
+ date_string, _DEFAULT_DATETIME, None , & out_bestunit, & is_quarter
309
+ )
290
310
return dt
291
311
except DateParseError:
292
312
raise
293
313
except ValueError :
294
314
pass
295
315
296
- dt, _ = dateutil_parse(date_string, default = _DEFAULT_DATETIME,
297
- dayfirst = dayfirst, yearfirst = yearfirst,
298
- ignoretz = False )
316
+ dt = dateutil_parse(date_string, default = _DEFAULT_DATETIME,
317
+ dayfirst = dayfirst, yearfirst = yearfirst,
318
+ ignoretz = False , out_bestunit = & out_bestunit )
299
319
300
320
if dt.tzinfo is not None :
301
321
# dateutil can return a datetime with a tzoffset outside of (-24H, 24H)
@@ -361,26 +381,24 @@ def parse_datetime_string_with_reso(
361
381
int out_local = 0
362
382
int out_tzoffset
363
383
tzinfo tz
384
+ bint is_quarter = 0
364
385
365
386
if not _does_string_look_like_datetime(date_string):
366
387
raise ValueError (f' Given date string "{date_string}" not likely a datetime' )
367
388
368
- parsed = _parse_delimited_date(date_string, dayfirst, & out_bestunit)
369
- if parsed is not None :
370
- reso = npy_unit_to_attrname[out_bestunit]
371
- return parsed, reso
372
-
373
389
# Try iso8601 first, as it handles nanoseconds
374
390
string_to_dts_failed = string_to_dts(
375
391
date_string, & dts, & out_bestunit, & out_local,
376
392
& out_tzoffset, False
377
393
)
378
394
if not string_to_dts_failed:
379
- timestamp_units = {NPY_DATETIMEUNIT.NPY_FR_ns,
380
- NPY_DATETIMEUNIT.NPY_FR_ps,
381
- NPY_DATETIMEUNIT.NPY_FR_fs,
382
- NPY_DATETIMEUNIT.NPY_FR_as}
383
- if out_bestunit in timestamp_units:
395
+ # Match Timestamp and drop picoseconds, femtoseconds, attoseconds
396
+ # The new resolution will just be nano
397
+ # GH#50417
398
+ if out_bestunit in _timestamp_units:
399
+ out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
400
+
401
+ if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
384
402
# TODO: avoid circular import
385
403
from pandas import Timestamp
386
404
parsed = Timestamp(date_string)
@@ -392,25 +410,34 @@ def parse_datetime_string_with_reso(
392
410
parsed = datetime_new(
393
411
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
394
412
)
395
- # Match Timestamp and drop picoseconds, femtoseconds, attoseconds
396
- # The new resolution will just be nano
397
- # GH 50417
398
- if out_bestunit in timestamp_units:
399
- out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
400
413
401
414
reso = npy_unit_to_attrname[out_bestunit]
402
415
return parsed, reso
403
416
417
+ parsed = _parse_delimited_date(date_string, dayfirst, & out_bestunit)
418
+ if parsed is not None :
419
+ reso = npy_unit_to_attrname[out_bestunit]
420
+ return parsed, reso
421
+
404
422
try :
405
- return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
423
+ parsed = _parse_dateabbr_string(
424
+ date_string, _DEFAULT_DATETIME, freq, & out_bestunit, & is_quarter
425
+ )
406
426
except DateParseError:
407
427
raise
408
428
except ValueError :
409
429
pass
430
+ else :
431
+ if is_quarter:
432
+ reso = " quarter"
433
+ else :
434
+ reso = npy_unit_to_attrname[out_bestunit]
435
+ return parsed, reso
410
436
411
- parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME,
412
- dayfirst = dayfirst, yearfirst = yearfirst,
413
- ignoretz = False )
437
+ parsed = dateutil_parse(date_string, _DEFAULT_DATETIME,
438
+ dayfirst = dayfirst, yearfirst = yearfirst,
439
+ ignoretz = False , out_bestunit = & out_bestunit)
440
+ reso = npy_unit_to_attrname[out_bestunit]
414
441
return parsed, reso
415
442
416
443
@@ -461,8 +488,9 @@ cpdef bint _does_string_look_like_datetime(str py_string):
461
488
return True
462
489
463
490
464
- cdef object _parse_dateabbr_string(str date_string, datetime default,
465
- str freq = None ):
491
+ cdef datetime _parse_dateabbr_string(str date_string, datetime default,
492
+ str freq, NPY_DATETIMEUNIT* out_bestunit,
493
+ bint* is_quarter):
466
494
# special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
467
495
cdef:
468
496
datetime ret
@@ -472,7 +500,9 @@ cdef object _parse_dateabbr_string(str date_string, datetime default,
472
500
const char * buf
473
501
474
502
if date_string in nat_strings:
475
- return NaT, " "
503
+ # default to nanos, could also reasonably do NPY_FR_GENERIC
504
+ out_bestunit[0 ] = NPY_DATETIMEUNIT.NPY_FR_ns
505
+ return NaT
476
506
477
507
date_string = date_string.upper()
478
508
date_len = len (date_string)
@@ -481,7 +511,8 @@ cdef object _parse_dateabbr_string(str date_string, datetime default,
481
511
# parse year only like 2000
482
512
try :
483
513
ret = default.replace(year = int (date_string))
484
- return ret, " year"
514
+ out_bestunit[0 ] = NPY_DATETIMEUNIT.NPY_FR_Y
515
+ return ret
485
516
except ValueError :
486
517
pass
487
518
@@ -534,7 +565,10 @@ cdef object _parse_dateabbr_string(str date_string, datetime default,
534
565
f" freq: {freq}" )
535
566
536
567
ret = default.replace(year = year, month = month)
537
- return ret, " quarter"
568
+ # Monthly is as close as we can get to a non-existent NPY_FR_Q
569
+ out_bestunit[0 ] = NPY_DATETIMEUNIT.NPY_FR_M
570
+ is_quarter[0 ] = 1
571
+ return ret
538
572
539
573
except DateParseError:
540
574
raise
@@ -547,15 +581,17 @@ cdef object _parse_dateabbr_string(str date_string, datetime default,
547
581
month = int (date_string[4 :6 ])
548
582
try :
549
583
ret = default.replace(year = year, month = month)
550
- return ret, " month"
584
+ out_bestunit[0 ] = NPY_DATETIMEUNIT.NPY_FR_M
585
+ return ret
551
586
except ValueError as err:
552
587
# We can infer that none of the patterns below will match
553
588
raise ValueError (f" Unable to parse {date_string}" ) from err
554
589
555
590
for pat in [" %Y -%m " , " %b %Y " , " %b -%Y " ]:
556
591
try :
557
592
ret = datetime.strptime(date_string, pat)
558
- return ret, " month"
593
+ out_bestunit[0 ] = NPY_DATETIMEUNIT.NPY_FR_M
594
+ return ret
559
595
except ValueError :
560
596
pass
561
597
@@ -597,12 +633,13 @@ cpdef quarter_to_myear(int year, int quarter, str freq):
597
633
return year, month
598
634
599
635
600
- cdef dateutil_parse(
636
+ cdef datetime dateutil_parse(
601
637
str timestr,
602
638
datetime default,
603
- bint ignoretz = False ,
604
- bint dayfirst = False ,
605
- bint yearfirst = False ,
639
+ bint ignoretz,
640
+ bint dayfirst,
641
+ bint yearfirst,
642
+ NPY_DATETIMEUNIT* out_bestunit
606
643
):
607
644
""" lifted from dateutil to get resolution"""
608
645
@@ -658,7 +695,9 @@ cdef dateutil_parse(
658
695
ret = ret.replace(tzinfo = _dateutil_tzutc())
659
696
elif res.tzoffset:
660
697
ret = ret.replace(tzinfo = tzoffset(res.tzname, res.tzoffset))
661
- return ret, reso
698
+
699
+ out_bestunit[0 ] = attrname_to_npy_unit[reso]
700
+ return ret
662
701
663
702
664
703
# ----------------------------------------------------------------------
0 commit comments