Skip to content

Commit 92fa9ca

Browse files
authored
BUG: nanoseconds and reso in dateutil paths (#56051)
* BUG: nanoseconds and reso in dateutil paths * GH ref
1 parent d99c448 commit 92fa9ca

File tree

5 files changed

+71
-15
lines changed

5 files changed

+71
-15
lines changed

doc/source/whatsnew/v2.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,8 @@ Datetimelike
364364
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
365365
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` (or :class:`DatetimeTZDtype`) from mixed-numeric inputs treating those as nanoseconds instead of as multiples of the dtype's unit (which would happen with non-mixed numeric inputs) (:issue:`56004`)
366366
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
367+
- Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`)
368+
- Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`)
367369
-
368370

369371
Timedelta

pandas/_libs/tslibs/conversion.pyx

+7-3
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
508508
npy_datetimestruct dts
509509
int out_local = 0, out_tzoffset = 0, string_to_dts_failed
510510
datetime dt
511-
int64_t ival
511+
int64_t ival, nanos = 0
512512
NPY_DATETIMEUNIT out_bestunit, reso
513513
_TSObject obj
514514

@@ -560,10 +560,14 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
560560
return obj
561561

562562
dt = parse_datetime_string(
563-
ts, dayfirst=dayfirst, yearfirst=yearfirst, out_bestunit=&out_bestunit
563+
ts,
564+
dayfirst=dayfirst,
565+
yearfirst=yearfirst,
566+
out_bestunit=&out_bestunit,
567+
nanos=&nanos,
564568
)
565569
reso = get_supported_reso(out_bestunit)
566-
return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=reso)
570+
return convert_datetime_to_tsobject(dt, tz, nanos=nanos, reso=reso)
567571

568572
return convert_datetime_to_tsobject(dt, tz)
569573

pandas/_libs/tslibs/parsing.pxd

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from cpython.datetime cimport datetime
2+
from numpy cimport int64_t
23

34
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
45

@@ -10,5 +11,6 @@ cdef datetime parse_datetime_string(
1011
str date_string,
1112
bint dayfirst,
1213
bint yearfirst,
13-
NPY_DATETIMEUNIT* out_bestunit
14+
NPY_DATETIMEUNIT* out_bestunit,
15+
int64_t* nanos,
1416
)

pandas/_libs/tslibs/parsing.pyx

+47-11
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ from numpy cimport (
3434
PyArray_IterNew,
3535
flatiter,
3636
float64_t,
37+
int64_t,
3738
)
3839

3940
cnp.import_array()
@@ -272,8 +273,11 @@ def py_parse_datetime_string(
272273
# parse_datetime_string cpdef bc it has a pointer argument)
273274
cdef:
274275
NPY_DATETIMEUNIT out_bestunit
276+
int64_t nanos
275277

276-
return parse_datetime_string(date_string, dayfirst, yearfirst, &out_bestunit)
278+
return parse_datetime_string(
279+
date_string, dayfirst, yearfirst, &out_bestunit, &nanos
280+
)
277281

278282

279283
cdef datetime parse_datetime_string(
@@ -283,7 +287,8 @@ cdef datetime parse_datetime_string(
283287
str date_string,
284288
bint dayfirst,
285289
bint yearfirst,
286-
NPY_DATETIMEUNIT* out_bestunit
290+
NPY_DATETIMEUNIT* out_bestunit,
291+
int64_t* nanos,
287292
):
288293
"""
289294
Parse datetime string, only returns datetime.
@@ -311,7 +316,7 @@ cdef datetime parse_datetime_string(
311316
default = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
312317
dt = dateutil_parse(date_string, default=default,
313318
dayfirst=dayfirst, yearfirst=yearfirst,
314-
ignoretz=False, out_bestunit=out_bestunit)
319+
ignoretz=False, out_bestunit=out_bestunit, nanos=nanos)
315320
return dt
316321

317322
dt = _parse_delimited_date(date_string, dayfirst, out_bestunit)
@@ -330,7 +335,7 @@ cdef datetime parse_datetime_string(
330335

331336
dt = dateutil_parse(date_string, default=_DEFAULT_DATETIME,
332337
dayfirst=dayfirst, yearfirst=yearfirst,
333-
ignoretz=False, out_bestunit=out_bestunit)
338+
ignoretz=False, out_bestunit=out_bestunit, nanos=nanos)
334339
return dt
335340

336341

@@ -436,7 +441,7 @@ def parse_datetime_string_with_reso(
436441

437442
parsed = dateutil_parse(date_string, _DEFAULT_DATETIME,
438443
dayfirst=dayfirst, yearfirst=yearfirst,
439-
ignoretz=False, out_bestunit=&out_bestunit)
444+
ignoretz=False, out_bestunit=&out_bestunit, nanos=NULL)
440445
reso = npy_unit_to_attrname[out_bestunit]
441446
return parsed, reso
442447

@@ -639,7 +644,8 @@ cdef datetime dateutil_parse(
639644
bint ignoretz,
640645
bint dayfirst,
641646
bint yearfirst,
642-
NPY_DATETIMEUNIT* out_bestunit
647+
NPY_DATETIMEUNIT* out_bestunit,
648+
int64_t* nanos,
643649
):
644650
""" lifted from dateutil to get resolution"""
645651

@@ -671,11 +677,8 @@ cdef datetime dateutil_parse(
671677
if reso is None:
672678
raise DateParseError(f"Unable to parse datetime string: {timestr}")
673679

674-
if reso == "microsecond":
675-
if repl["microsecond"] == 0:
676-
reso = "second"
677-
elif repl["microsecond"] % 1000 == 0:
678-
reso = "millisecond"
680+
if reso == "microsecond" and repl["microsecond"] % 1000 == 0:
681+
reso = _find_subsecond_reso(timestr, nanos=nanos)
679682

680683
try:
681684
ret = default.replace(**repl)
@@ -745,6 +748,38 @@ cdef datetime dateutil_parse(
745748
return ret
746749

747750

751+
cdef object _reso_pattern = re.compile(r"\d:\d{2}:\d{2}\.(?P<frac>\d+)")
752+
753+
cdef _find_subsecond_reso(str timestr, int64_t* nanos):
754+
# GH#55737
755+
# Check for trailing zeros in a H:M:S.f pattern
756+
match = _reso_pattern.search(timestr)
757+
if not match:
758+
reso = "second"
759+
else:
760+
frac = match.groupdict()["frac"]
761+
if len(frac) <= 3:
762+
reso = "millisecond"
763+
elif len(frac) > 6:
764+
if frac[6:] == "0" * len(frac[6:]):
765+
# corner case where we haven't lost any data
766+
reso = "nanosecond"
767+
elif len(frac) <= 9:
768+
reso = "nanosecond"
769+
if nanos is not NULL:
770+
if len(frac) < 9:
771+
frac = frac + "0" * (9 - len(frac))
772+
nanos[0] = int(frac[6:])
773+
else:
774+
# TODO: should we warn/raise in higher-than-nano cases?
775+
reso = "nanosecond"
776+
if nanos is not NULL:
777+
nanos[0] = int(frac[6:9])
778+
else:
779+
reso = "microsecond"
780+
return reso
781+
782+
748783
# ----------------------------------------------------------------------
749784
# Parsing for type-inference
750785

@@ -916,6 +951,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
916951
yearfirst=False,
917952
ignoretz=False,
918953
out_bestunit=&out_bestunit,
954+
nanos=NULL,
919955
)
920956
except (ValueError, OverflowError, InvalidOperation):
921957
# In case the datetime can't be parsed, its format cannot be guessed

pandas/tests/scalar/timestamp/test_constructors.py

+12
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,18 @@ def test_constructor_str_infer_reso(self):
445445
ts = Timestamp("300 June 1:30:01.300")
446446
assert ts.unit == "ms"
447447

448+
# dateutil path -> don't drop trailing zeros
449+
ts = Timestamp("01-01-2013T00:00:00.000000000+0000")
450+
assert ts.unit == "ns"
451+
452+
ts = Timestamp("2016/01/02 03:04:05.001000 UTC")
453+
assert ts.unit == "us"
454+
455+
# higher-than-nanosecond -> we drop the trailing bits
456+
ts = Timestamp("01-01-2013T00:00:00.000000002100+0000")
457+
assert ts == Timestamp("01-01-2013T00:00:00.000000002+0000")
458+
assert ts.unit == "ns"
459+
448460

449461
class TestTimestampConstructors:
450462
def test_weekday_but_no_day_raises(self):

0 commit comments

Comments
 (0)