Skip to content

Commit ec29dfc

Browse files
authored
BUG: parsing nanoseconds incorrect resolution (#46811)
1 parent d0e3820 commit ec29dfc

File tree

11 files changed

+106
-6
lines changed

11 files changed

+106
-6
lines changed

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,8 @@ Period
582582
^^^^^^
583583
- Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`)
584584
- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`)
585+
- Bug in inferring an incorrect ``freq`` when passing a string to :class:`Period` microseconds that are a multiple of 1000 (:issue:`46811`)
586+
- Bug in constructing a :class:`Period` from a :class:`Timestamp` or ``np.datetime64`` object with non-zero nanoseconds and ``freq="ns"`` incorrectly truncating the nanoseconds (:issue:`46811`)
585587
-
586588

587589
Plotting

pandas/_libs/tslib.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ cnp.import_array()
2727
import pytz
2828

2929
from pandas._libs.tslibs.np_datetime cimport (
30+
NPY_DATETIMEUNIT,
3031
check_dts_bounds,
3132
dt64_to_dtstruct,
3233
dtstruct_to_dt64,
@@ -75,6 +76,7 @@ def _test_parse_iso8601(ts: str):
7576
cdef:
7677
_TSObject obj
7778
int out_local = 0, out_tzoffset = 0
79+
NPY_DATETIMEUNIT out_bestunit
7880

7981
obj = _TSObject()
8082

@@ -83,7 +85,7 @@ def _test_parse_iso8601(ts: str):
8385
elif ts == 'today':
8486
return Timestamp.now().normalize()
8587

86-
string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
88+
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True)
8789
obj.value = dtstruct_to_dt64(&obj.dts)
8890
check_dts_bounds(&obj.dts)
8991
if out_local == 1:
@@ -428,6 +430,7 @@ cpdef array_to_datetime(
428430
ndarray[int64_t] iresult
429431
ndarray[object] oresult
430432
npy_datetimestruct dts
433+
NPY_DATETIMEUNIT out_bestunit
431434
bint utc_convert = bool(utc)
432435
bint seen_integer = False
433436
bint seen_string = False
@@ -516,7 +519,7 @@ cpdef array_to_datetime(
516519
continue
517520

518521
string_to_dts_failed = string_to_dts(
519-
val, &dts, &out_local,
522+
val, &dts, &out_bestunit, &out_local,
520523
&out_tzoffset, False
521524
)
522525
if string_to_dts_failed:

pandas/_libs/tslibs/conversion.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
586586
int out_local = 0, out_tzoffset = 0, string_to_dts_failed
587587
datetime dt
588588
int64_t ival
589+
NPY_DATETIMEUNIT out_bestunit
589590

590591
if len(ts) == 0 or ts in nat_strings:
591592
ts = NaT
@@ -604,7 +605,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
604605
# equiv: datetime.today().replace(tzinfo=tz)
605606
else:
606607
string_to_dts_failed = string_to_dts(
607-
ts, &dts, &out_local,
608+
ts, &dts, &out_bestunit, &out_local,
608609
&out_tzoffset, False
609610
)
610611
if not string_to_dts_failed:

pandas/_libs/tslibs/np_datetime.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil
9090
cdef int string_to_dts(
9191
str val,
9292
npy_datetimestruct* dts,
93+
NPY_DATETIMEUNIT* out_bestunit,
9394
int* out_local,
9495
int* out_tzoffset,
9596
bint want_exc,

pandas/_libs/tslibs/np_datetime.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ cdef extern from "src/datetime/np_datetime.h":
4646
cdef extern from "src/datetime/np_datetime_strings.h":
4747
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
4848
npy_datetimestruct *out,
49+
NPY_DATETIMEUNIT *out_bestunit,
4950
int *out_local, int *out_tzoffset)
5051

5152

@@ -255,6 +256,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
255256
cdef inline int string_to_dts(
256257
str val,
257258
npy_datetimestruct* dts,
259+
NPY_DATETIMEUNIT* out_bestunit,
258260
int* out_local,
259261
int* out_tzoffset,
260262
bint want_exc,
@@ -265,7 +267,7 @@ cdef inline int string_to_dts(
265267

266268
buf = get_c_string_buf_and_size(val, &length)
267269
return parse_iso_8601_datetime(buf, length, want_exc,
268-
dts, out_local, out_tzoffset)
270+
dts, out_bestunit, out_local, out_tzoffset)
269271

270272

271273
cpdef ndarray astype_overflowsafe(

pandas/_libs/tslibs/parsing.pyx

+37
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ from pandas._libs.tslibs.nattype cimport (
5353
c_NaT as NaT,
5454
c_nat_strings as nat_strings,
5555
)
56+
from pandas._libs.tslibs.np_datetime cimport (
57+
NPY_DATETIMEUNIT,
58+
npy_datetimestruct,
59+
string_to_dts,
60+
)
5661
from pandas._libs.tslibs.offsets cimport is_offset_object
5762
from pandas._libs.tslibs.util cimport (
5863
get_c_string_buf_and_size,
@@ -350,6 +355,11 @@ cdef parse_datetime_string_with_reso(
350355
"""
351356
cdef:
352357
object parsed, reso
358+
bint string_to_dts_failed
359+
npy_datetimestruct dts
360+
NPY_DATETIMEUNIT out_bestunit
361+
int out_local
362+
int out_tzoffset
353363

354364
if not _does_string_look_like_datetime(date_string):
355365
raise ValueError('Given date string not likely a datetime.')
@@ -358,6 +368,33 @@ cdef parse_datetime_string_with_reso(
358368
if parsed is not None:
359369
return parsed, reso
360370

371+
# Try iso8601 first, as it handles nanoseconds
372+
# TODO: does this render some/all of parse_delimited_date redundant?
373+
string_to_dts_failed = string_to_dts(
374+
date_string, &dts, &out_bestunit, &out_local,
375+
&out_tzoffset, False
376+
)
377+
if not string_to_dts_failed:
378+
if dts.ps != 0 or out_local:
379+
# TODO: the not-out_local case we could do without Timestamp;
380+
# avoid circular import
381+
from pandas import Timestamp
382+
parsed = Timestamp(date_string)
383+
else:
384+
parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us)
385+
reso = {
386+
NPY_DATETIMEUNIT.NPY_FR_Y: "year",
387+
NPY_DATETIMEUNIT.NPY_FR_M: "month",
388+
NPY_DATETIMEUNIT.NPY_FR_D: "day",
389+
NPY_DATETIMEUNIT.NPY_FR_h: "hour",
390+
NPY_DATETIMEUNIT.NPY_FR_m: "minute",
391+
NPY_DATETIMEUNIT.NPY_FR_s: "second",
392+
NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond",
393+
NPY_DATETIMEUNIT.NPY_FR_us: "microsecond",
394+
NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond",
395+
}[out_bestunit]
396+
return parsed, reso
397+
361398
try:
362399
return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
363400
except DateParseError:

pandas/_libs/tslibs/period.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -2584,10 +2584,13 @@ class Period(_Period):
25842584
dt = value
25852585
if freq is None:
25862586
raise ValueError('Must supply freq for datetime value')
2587+
if isinstance(dt, Timestamp):
2588+
nanosecond = dt.nanosecond
25872589
elif util.is_datetime64_object(value):
25882590
dt = Timestamp(value)
25892591
if freq is None:
25902592
raise ValueError('Must supply freq for datetime value')
2593+
nanosecond = dt.nanosecond
25912594
elif PyDate_Check(value):
25922595
dt = datetime(year=value.year, month=value.month, day=value.day)
25932596
if freq is None:

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+28-1
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,13 @@ This file implements string parsing and creation for NumPy datetime.
6868
*/
6969
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
7070
npy_datetimestruct *out,
71+
NPY_DATETIMEUNIT *out_bestunit,
7172
int *out_local, int *out_tzoffset) {
7273
int year_leap = 0;
7374
int i, numdigits;
7475
const char *substr;
7576
int sublen;
77+
NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC;
7678

7779
/* If year-month-day are separated by a valid separator,
7880
* months/days without leading zeroes will be parsed
@@ -137,6 +139,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
137139
if (out_local != NULL) {
138140
*out_local = 0;
139141
}
142+
bestunit = NPY_FR_Y;
140143
goto finish;
141144
}
142145

@@ -182,6 +185,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
182185

183186
/* Next character must be the separator, start of day, or end of string */
184187
if (sublen == 0) {
188+
bestunit = NPY_FR_M;
185189
/* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
186190
if (!has_ymd_sep) {
187191
goto parse_error;
@@ -231,6 +235,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
231235
if (out_local != NULL) {
232236
*out_local = 0;
233237
}
238+
bestunit = NPY_FR_D;
234239
goto finish;
235240
}
236241

@@ -269,6 +274,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
269274
if (!hour_was_2_digits) {
270275
goto parse_error;
271276
}
277+
bestunit = NPY_FR_h;
272278
goto finish;
273279
}
274280

@@ -310,6 +316,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
310316
}
311317

312318
if (sublen == 0) {
319+
bestunit = NPY_FR_m;
313320
goto finish;
314321
}
315322

@@ -354,6 +361,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
354361
++substr;
355362
--sublen;
356363
} else {
364+
bestunit = NPY_FR_s;
357365
goto parse_timezone;
358366
}
359367

@@ -370,6 +378,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
370378
}
371379

372380
if (sublen == 0 || !isdigit(*substr)) {
381+
if (numdigits > 3) {
382+
bestunit = NPY_FR_us;
383+
} else {
384+
bestunit = NPY_FR_ms;
385+
}
373386
goto parse_timezone;
374387
}
375388

@@ -386,6 +399,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
386399
}
387400

388401
if (sublen == 0 || !isdigit(*substr)) {
402+
if (numdigits > 3) {
403+
bestunit = NPY_FR_ps;
404+
} else {
405+
bestunit = NPY_FR_ns;
406+
}
389407
goto parse_timezone;
390408
}
391409

@@ -401,8 +419,14 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
401419
}
402420
}
403421

422+
if (numdigits > 3) {
423+
bestunit = NPY_FR_as;
424+
} else {
425+
bestunit = NPY_FR_fs;
426+
}
427+
404428
parse_timezone:
405-
/* trim any whitespace between time/timeezone */
429+
/* trim any whitespace between time/timezone */
406430
while (sublen > 0 && isspace(*substr)) {
407431
++substr;
408432
--sublen;
@@ -521,6 +545,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
521545
}
522546

523547
finish:
548+
if (out_bestunit != NULL) {
549+
*out_bestunit = bestunit;
550+
}
524551
return 0;
525552

526553
parse_error:

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ This file implements string parsing and creation for NumPy datetime.
5656
int
5757
parse_iso_8601_datetime(const char *str, int len, int want_exc,
5858
npy_datetimestruct *out,
59+
NPY_DATETIMEUNIT *out_bestunit,
5960
int *out_local,
6061
int *out_tzoffset);
6162

pandas/tests/scalar/period/test_period.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ def test_construction(self):
113113
with pytest.raises(TypeError, match="pass as a string instead"):
114114
Period("1982", freq=("Min", 1))
115115

116+
def test_construction_from_timestamp_nanos(self):
117+
# GH#46811 don't drop nanos from Timestamp
118+
ts = Timestamp("2022-04-20 09:23:24.123456789")
119+
per = Period(ts, freq="ns")
120+
121+
# should losslessly round-trip, not lose the 789
122+
rt = per.to_timestamp()
123+
assert rt == ts
124+
125+
# same thing but from a datetime64 object
126+
dt64 = ts.asm8
127+
per2 = Period(dt64, freq="ns")
128+
rt2 = per2.to_timestamp()
129+
assert rt2.asm8 == dt64
130+
116131
def test_construction_bday(self):
117132

118133
# Biz day construction, roll forward if non-weekday
@@ -324,8 +339,10 @@ def test_constructor_infer_freq(self):
324339
p = Period("2007-01-01 07:10:15.123")
325340
assert p.freq == "L"
326341

342+
# We see that there are 6 digits after the decimal, so get microsecond
343+
# even though they are all zeros.
327344
p = Period("2007-01-01 07:10:15.123000")
328-
assert p.freq == "L"
345+
assert p.freq == "U"
329346

330347
p = Period("2007-01-01 07:10:15.123400")
331348
assert p.freq == "U"

pandas/tests/tslibs/test_parsing.py

+6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ def test_parse_time_string():
2323
assert parsed == parsed_lower
2424

2525

26+
def test_parse_time_string_nanosecond_reso():
27+
# GH#46811
28+
parsed, reso = parse_time_string("2022-04-20 09:19:19.123456789")
29+
assert reso == "nanosecond"
30+
31+
2632
def test_parse_time_string_invalid_type():
2733
# Raise on invalid input, don't just return it
2834
msg = "Argument 'arg' has incorrect type (expected str, got tuple)"

0 commit comments

Comments
 (0)