@@ -49,6 +49,10 @@ from numpy cimport (
49
49
50
50
from pandas._libs.missing cimport checknull_with_nat_and_na
51
51
from pandas._libs.tslibs.conversion cimport get_datetime64_nanos
52
+ from pandas._libs.tslibs.dtypes cimport (
53
+ get_supported_reso,
54
+ npy_unit_to_abbrev,
55
+ )
52
56
from pandas._libs.tslibs.nattype cimport (
53
57
NPY_NAT,
54
58
c_nat_strings as nat_strings,
@@ -57,6 +61,7 @@ from pandas._libs.tslibs.np_datetime cimport (
57
61
NPY_DATETIMEUNIT,
58
62
NPY_FR_ns,
59
63
check_dts_bounds,
64
+ get_datetime64_unit,
60
65
import_pandas_datetime,
61
66
npy_datetimestruct,
62
67
npy_datetimestruct_to_datetime,
@@ -232,9 +237,21 @@ cdef _get_format_regex(str fmt):
232
237
233
238
234
239
cdef class DatetimeParseState:
235
- def __cinit__ (self ):
240
+ def __cinit__ (self , NPY_DATETIMEUNIT creso = NPY_DATETIMEUNIT.NPY_FR_ns ):
236
241
self .found_tz = False
237
242
self .found_naive = False
243
+ self .creso = creso
244
+ self .creso_ever_changed = False
245
+
246
+ cdef bint update_creso(self , NPY_DATETIMEUNIT item_reso) noexcept:
247
+ # Return a bool indicating whether we bumped to a higher resolution
248
+ if self .creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
249
+ self .creso = item_reso
250
+ elif item_reso > self .creso:
251
+ self .creso = item_reso
252
+ self .creso_ever_changed = True
253
+ return True
254
+ return False
238
255
239
256
cdef tzinfo process_datetime(self , datetime dt, tzinfo tz, bint utc_convert):
240
257
if dt.tzinfo is not None :
@@ -268,6 +285,7 @@ def array_strptime(
268
285
bint exact = True ,
269
286
errors = " raise" ,
270
287
bint utc = False ,
288
+ NPY_DATETIMEUNIT creso = NPY_FR_ns,
271
289
):
272
290
"""
273
291
Calculates the datetime structs represented by the passed array of strings
@@ -278,6 +296,8 @@ def array_strptime(
278
296
fmt : string-like regex
279
297
exact : matches must be exact if True, search if False
280
298
errors : string specifying error handling, {'raise', 'ignore', 'coerce'}
299
+ creso : NPY_DATETIMEUNIT, default NPY_FR_ns
300
+ Set to NPY_FR_GENERIC to infer a resolution.
281
301
"""
282
302
283
303
cdef:
@@ -291,17 +311,22 @@ def array_strptime(
291
311
bint is_coerce = errors== " coerce"
292
312
tzinfo tz_out = None
293
313
bint iso_format = format_is_iso(fmt)
294
- NPY_DATETIMEUNIT out_bestunit
314
+ NPY_DATETIMEUNIT out_bestunit, item_reso
295
315
int out_local = 0 , out_tzoffset = 0
296
316
bint string_to_dts_succeeded = 0
297
- DatetimeParseState state = DatetimeParseState()
317
+ bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
318
+ DatetimeParseState state = DatetimeParseState(creso)
298
319
299
320
assert is_raise or is_ignore or is_coerce
300
321
301
322
_validate_fmt(fmt)
302
323
format_regex, locale_time = _get_format_regex(fmt)
303
324
304
- result = np.empty(n, dtype = " M8[ns]" )
325
+ if infer_reso:
326
+ abbrev = " ns"
327
+ else :
328
+ abbrev = npy_unit_to_abbrev(creso)
329
+ result = np.empty(n, dtype = f" M8[{abbrev}]" )
305
330
iresult = result.view(" i8" )
306
331
result_timezone = np.empty(n, dtype = " object" )
307
332
@@ -318,20 +343,32 @@ def array_strptime(
318
343
iresult[i] = NPY_NAT
319
344
continue
320
345
elif PyDateTime_Check(val):
346
+ if isinstance (val, _Timestamp):
347
+ item_reso = val._creso
348
+ else :
349
+ item_reso = NPY_DATETIMEUNIT.NPY_FR_us
350
+ state.update_creso(item_reso)
321
351
tz_out = state.process_datetime(val, tz_out, utc)
322
352
if isinstance (val, _Timestamp):
323
- iresult[i] = val.tz_localize(None ).as_unit(" ns" )._value
353
+ val = (< _Timestamp> val)._as_creso(state.creso)
354
+ iresult[i] = val.tz_localize(None )._value
324
355
else :
325
- iresult[i] = pydatetime_to_dt64(val.replace(tzinfo = None ), & dts)
326
- check_dts_bounds(& dts)
356
+ iresult[i] = pydatetime_to_dt64(
357
+ val.replace(tzinfo = None ), & dts, reso = state.creso
358
+ )
359
+ check_dts_bounds(& dts, state.creso)
327
360
result_timezone[i] = val.tzinfo
328
361
continue
329
362
elif PyDate_Check(val):
330
- iresult[i] = pydate_to_dt64(val, & dts)
331
- check_dts_bounds(& dts)
363
+ item_reso = NPY_DATETIMEUNIT.NPY_FR_s
364
+ state.update_creso(item_reso)
365
+ iresult[i] = pydate_to_dt64(val, & dts, reso = state.creso)
366
+ check_dts_bounds(& dts, state.creso)
332
367
continue
333
368
elif is_datetime64_object(val):
334
- iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
369
+ item_reso = get_supported_reso(get_datetime64_unit(val))
370
+ state.update_creso(item_reso)
371
+ iresult[i] = get_datetime64_nanos(val, state.creso)
335
372
continue
336
373
elif (
337
374
(is_integer_object(val) or is_float_object(val))
@@ -355,7 +392,9 @@ def array_strptime(
355
392
if string_to_dts_succeeded:
356
393
# No error reported by string_to_dts, pick back up
357
394
# where we left off
358
- value = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
395
+ item_reso = get_supported_reso(out_bestunit)
396
+ state.update_creso(item_reso)
397
+ value = npy_datetimestruct_to_datetime(state.creso, & dts)
359
398
if out_local == 1 :
360
399
# Store the out_tzoffset in seconds
361
400
# since we store the total_seconds of
@@ -368,7 +407,9 @@ def array_strptime(
368
407
check_dts_bounds(& dts)
369
408
continue
370
409
371
- if parse_today_now(val, & iresult[i], utc, NPY_FR_ns):
410
+ if parse_today_now(val, & iresult[i], utc, state.creso):
411
+ item_reso = NPY_DATETIMEUNIT.NPY_FR_us
412
+ state.update_creso(item_reso)
372
413
continue
373
414
374
415
# Some ISO formats can't be parsed by string_to_dts
@@ -380,9 +421,10 @@ def array_strptime(
380
421
raise ValueError (f" Time data {val} is not ISO8601 format" )
381
422
382
423
tz = _parse_with_format(
383
- val, fmt, exact, format_regex, locale_time, & dts
424
+ val, fmt, exact, format_regex, locale_time, & dts, & item_reso
384
425
)
385
- iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
426
+ state.update_creso(item_reso)
427
+ iresult[i] = npy_datetimestruct_to_datetime(state.creso, & dts)
386
428
check_dts_bounds(& dts)
387
429
result_timezone[i] = tz
388
430
@@ -403,11 +445,34 @@ def array_strptime(
403
445
raise
404
446
return values, []
405
447
448
+ if infer_reso:
449
+ if state.creso_ever_changed:
450
+ # We encountered mismatched resolutions, need to re-parse with
451
+ # the correct one.
452
+ return array_strptime(
453
+ values,
454
+ fmt = fmt,
455
+ exact = exact,
456
+ errors = errors,
457
+ utc = utc,
458
+ creso = state.creso,
459
+ )
460
+
461
+ # Otherwise we can use the single reso that we encountered and avoid
462
+ # a second pass.
463
+ abbrev = npy_unit_to_abbrev(state.creso)
464
+ result = iresult.base.view(f" M8[{abbrev}]" )
406
465
return result, result_timezone.base
407
466
408
467
409
468
cdef tzinfo _parse_with_format(
410
- str val, str fmt, bint exact, format_regex, locale_time, npy_datetimestruct* dts
469
+ str val,
470
+ str fmt,
471
+ bint exact,
472
+ format_regex,
473
+ locale_time,
474
+ npy_datetimestruct* dts,
475
+ NPY_DATETIMEUNIT* item_reso,
411
476
):
412
477
# Based on https://github.com/python/cpython/blob/main/Lib/_strptime.py#L293
413
478
cdef:
@@ -441,6 +506,8 @@ cdef tzinfo _parse_with_format(
441
506
f" time data \" {val}\" doesn't match format \" {fmt}\" "
442
507
)
443
508
509
+ item_reso[0 ] = NPY_DATETIMEUNIT.NPY_FR_s
510
+
444
511
iso_year = - 1
445
512
year = 1900
446
513
month = day = 1
@@ -527,6 +594,12 @@ cdef tzinfo _parse_with_format(
527
594
elif parse_code == 10 :
528
595
# e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f'
529
596
s = found_dict[" f" ]
597
+ if len (s) <= 3 :
598
+ item_reso[0 ] = NPY_DATETIMEUNIT.NPY_FR_ms
599
+ elif len (s) <= 6 :
600
+ item_reso[0 ] = NPY_DATETIMEUNIT.NPY_FR_us
601
+ else :
602
+ item_reso[0 ] = NPY_DATETIMEUNIT.NPY_FR_ns
530
603
# Pad to always return nanoseconds
531
604
s += " 0" * (9 - len (s))
532
605
us = long (s)
0 commit comments