Skip to content

BUG: parsing nanoseconds incorrect resolution #46811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,8 @@ Period
^^^^^^
- Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`)
- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`)
- Bug in inferring an incorrect ``freq`` when passing a string to :class:`Period` microseconds that are a multiple of 1000 (:issue:`46811`)
- Bug in constructing a :class:`Period` from a :class:`Timestamp` or ``np.datetime64`` object with non-zero nanoseconds and ``freq="ns"`` incorrectly truncating the nanoseconds (:issue:`46811`)
-

Plotting
Expand Down
7 changes: 5 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ cnp.import_array()
import pytz

from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
check_dts_bounds,
dt64_to_dtstruct,
dtstruct_to_dt64,
Expand Down Expand Up @@ -75,6 +76,7 @@ def _test_parse_iso8601(ts: str):
cdef:
_TSObject obj
int out_local = 0, out_tzoffset = 0
NPY_DATETIMEUNIT out_bestunit

obj = _TSObject()

Expand All @@ -83,7 +85,7 @@ def _test_parse_iso8601(ts: str):
elif ts == 'today':
return Timestamp.now().normalize()

string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
Expand Down Expand Up @@ -428,6 +430,7 @@ cpdef array_to_datetime(
ndarray[int64_t] iresult
ndarray[object] oresult
npy_datetimestruct dts
NPY_DATETIMEUNIT out_bestunit
bint utc_convert = bool(utc)
bint seen_integer = False
bint seen_string = False
Expand Down Expand Up @@ -516,7 +519,7 @@ cpdef array_to_datetime(
continue

string_to_dts_failed = string_to_dts(
val, &dts, &out_local,
val, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if string_to_dts_failed:
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
int out_local = 0, out_tzoffset = 0, string_to_dts_failed
datetime dt
int64_t ival
NPY_DATETIMEUNIT out_bestunit

if len(ts) == 0 or ts in nat_strings:
ts = NaT
Expand All @@ -604,7 +605,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
# equiv: datetime.today().replace(tzinfo=tz)
else:
string_to_dts_failed = string_to_dts(
ts, &dts, &out_local,
ts, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil
cdef int string_to_dts(
str val,
npy_datetimestruct* dts,
NPY_DATETIMEUNIT* out_bestunit,
int* out_local,
int* out_tzoffset,
bint want_exc,
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ cdef extern from "src/datetime/np_datetime.h":
cdef extern from "src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local, int *out_tzoffset)


Expand Down Expand Up @@ -255,6 +256,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
cdef inline int string_to_dts(
str val,
npy_datetimestruct* dts,
NPY_DATETIMEUNIT* out_bestunit,
int* out_local,
int* out_tzoffset,
bint want_exc,
Expand All @@ -265,7 +267,7 @@ cdef inline int string_to_dts(

buf = get_c_string_buf_and_size(val, &length)
return parse_iso_8601_datetime(buf, length, want_exc,
dts, out_local, out_tzoffset)
dts, out_bestunit, out_local, out_tzoffset)


cpdef ndarray astype_overflowsafe(
Expand Down
37 changes: 37 additions & 0 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ from pandas._libs.tslibs.nattype cimport (
c_NaT as NaT,
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
npy_datetimestruct,
string_to_dts,
)
from pandas._libs.tslibs.offsets cimport is_offset_object
from pandas._libs.tslibs.util cimport (
get_c_string_buf_and_size,
Expand Down Expand Up @@ -350,6 +355,11 @@ cdef parse_datetime_string_with_reso(
"""
cdef:
object parsed, reso
bint string_to_dts_failed
npy_datetimestruct dts
NPY_DATETIMEUNIT out_bestunit
int out_local
int out_tzoffset

if not _does_string_look_like_datetime(date_string):
raise ValueError('Given date string not likely a datetime.')
Expand All @@ -358,6 +368,33 @@ cdef parse_datetime_string_with_reso(
if parsed is not None:
return parsed, reso

# Try iso8601 first, as it handles nanoseconds
# TODO: does this render some/all of parse_delimited_date redundant?
string_to_dts_failed = string_to_dts(
date_string, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
if dts.ps != 0 or out_local:
# TODO: the not-out_local case we could do without Timestamp;
# avoid circular import
from pandas import Timestamp
parsed = Timestamp(date_string)
else:
parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us)
reso = {
NPY_DATETIMEUNIT.NPY_FR_Y: "year",
NPY_DATETIMEUNIT.NPY_FR_M: "month",
NPY_DATETIMEUNIT.NPY_FR_D: "day",
NPY_DATETIMEUNIT.NPY_FR_h: "hour",
NPY_DATETIMEUNIT.NPY_FR_m: "minute",
NPY_DATETIMEUNIT.NPY_FR_s: "second",
NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond",
NPY_DATETIMEUNIT.NPY_FR_us: "microsecond",
NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond",
}[out_bestunit]
return parsed, reso

try:
return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
except DateParseError:
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2584,10 +2584,13 @@ class Period(_Period):
dt = value
if freq is None:
raise ValueError('Must supply freq for datetime value')
if isinstance(dt, Timestamp):
nanosecond = dt.nanosecond
elif util.is_datetime64_object(value):
dt = Timestamp(value)
if freq is None:
raise ValueError('Must supply freq for datetime value')
nanosecond = dt.nanosecond
elif PyDate_Check(value):
dt = datetime(year=value.year, month=value.month, day=value.day)
if freq is None:
Expand Down
29 changes: 28 additions & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ This file implements string parsing and creation for NumPy datetime.
*/
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local, int *out_tzoffset) {
int year_leap = 0;
int i, numdigits;
const char *substr;
int sublen;
NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC;

/* If year-month-day are separated by a valid separator,
* months/days without leading zeroes will be parsed
Expand Down Expand Up @@ -137,6 +139,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
if (out_local != NULL) {
*out_local = 0;
}
bestunit = NPY_FR_Y;
goto finish;
}

Expand Down Expand Up @@ -182,6 +185,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,

/* Next character must be the separator, start of day, or end of string */
if (sublen == 0) {
bestunit = NPY_FR_M;
/* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
if (!has_ymd_sep) {
goto parse_error;
Expand Down Expand Up @@ -231,6 +235,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
if (out_local != NULL) {
*out_local = 0;
}
bestunit = NPY_FR_D;
goto finish;
}

Expand Down Expand Up @@ -269,6 +274,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
if (!hour_was_2_digits) {
goto parse_error;
}
bestunit = NPY_FR_h;
goto finish;
}

Expand Down Expand Up @@ -310,6 +316,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

if (sublen == 0) {
bestunit = NPY_FR_m;
goto finish;
}

Expand Down Expand Up @@ -354,6 +361,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
++substr;
--sublen;
} else {
bestunit = NPY_FR_s;
goto parse_timezone;
}

Expand All @@ -370,6 +378,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

if (sublen == 0 || !isdigit(*substr)) {
if (numdigits > 3) {
bestunit = NPY_FR_us;
} else {
bestunit = NPY_FR_ms;
}
goto parse_timezone;
}

Expand All @@ -386,6 +399,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

if (sublen == 0 || !isdigit(*substr)) {
if (numdigits > 3) {
bestunit = NPY_FR_ps;
} else {
bestunit = NPY_FR_ns;
}
goto parse_timezone;
}

Expand All @@ -401,8 +419,14 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}
}

if (numdigits > 3) {
bestunit = NPY_FR_as;
} else {
bestunit = NPY_FR_fs;
}

parse_timezone:
/* trim any whitespace between time/timeezone */
/* trim any whitespace between time/timezone */
while (sublen > 0 && isspace(*substr)) {
++substr;
--sublen;
Expand Down Expand Up @@ -521,6 +545,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

finish:
if (out_bestunit != NULL) {
*out_bestunit = bestunit;
}
return 0;

parse_error:
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ This file implements string parsing and creation for NumPy datetime.
int
parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local,
int *out_tzoffset);

Expand Down
19 changes: 18 additions & 1 deletion pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,21 @@ def test_construction(self):
with pytest.raises(TypeError, match="pass as a string instead"):
Period("1982", freq=("Min", 1))

def test_construction_from_timestamp_nanos(self):
# GH#46811 don't drop nanos from Timestamp
ts = Timestamp("2022-04-20 09:23:24.123456789")
per = Period(ts, freq="ns")

# should losslessly round-trip, not lose the 789
rt = per.to_timestamp()
assert rt == ts

# same thing but from a datetime64 object
dt64 = ts.asm8
per2 = Period(dt64, freq="ns")
rt2 = per2.to_timestamp()
assert rt2.asm8 == dt64

def test_construction_bday(self):

# Biz day construction, roll forward if non-weekday
Expand Down Expand Up @@ -324,8 +339,10 @@ def test_constructor_infer_freq(self):
p = Period("2007-01-01 07:10:15.123")
assert p.freq == "L"

# We see that there are 6 digits after the decimal, so get microsecond
# even though they are all zeros.
p = Period("2007-01-01 07:10:15.123000")
assert p.freq == "L"
assert p.freq == "U"

p = Period("2007-01-01 07:10:15.123400")
assert p.freq == "U"
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/tslibs/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ def test_parse_time_string():
assert parsed == parsed_lower


def test_parse_time_string_nanosecond_reso():
# GH#46811
parsed, reso = parse_time_string("2022-04-20 09:19:19.123456789")
assert reso == "nanosecond"


def test_parse_time_string_invalid_type():
# Raise on invalid input, don't just return it
msg = "Argument 'arg' has incorrect type (expected str, got tuple)"
Expand Down