-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: can't round-trip non-nano Timestamp #51087
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 24 commits
d94ef37
9a57c9a
30bb898
648037e
bada089
c05cf4e
f9a4efc
72d9045
6a5e5f1
dd37781
f5abfeb
00b8816
fc11352
65e0f44
b26e3c9
82a80db
b71f629
f749d4d
7d27066
de66535
b692b03
f3205b2
27fd951
3b54d5e
19717cb
1f5fa7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import numpy as np | ||
|
||
cimport numpy as cnp | ||
from libc.math cimport log10 | ||
from numpy cimport ( | ||
int32_t, | ||
int64_t, | ||
|
@@ -81,7 +82,11 @@ TD64NS_DTYPE = np.dtype("m8[ns]") | |
# ---------------------------------------------------------------------- | ||
# Unit Conversion Helpers | ||
|
||
cdef int64_t cast_from_unit(object ts, str unit) except? -1: | ||
cdef int64_t cast_from_unit( | ||
object ts, | ||
str unit, | ||
NPY_DATETIMEUNIT out_reso=NPY_FR_ns | ||
) except? -1: | ||
""" | ||
Return a casting of the unit represented to nanoseconds | ||
round the fractional part of a float to our precision, p. | ||
|
@@ -99,7 +104,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: | |
int64_t m | ||
int p | ||
|
||
m, p = precision_from_unit(unit) | ||
m, p = precision_from_unit(unit, out_reso) | ||
|
||
# just give me the unit back | ||
if ts is None: | ||
|
@@ -119,7 +124,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: | |
if is_float_object(ts): | ||
ts = int(ts) | ||
dt64obj = np.datetime64(ts, unit) | ||
return get_datetime64_nanos(dt64obj, NPY_FR_ns) | ||
return get_datetime64_nanos(dt64obj, out_reso) | ||
|
||
# cast the unit, multiply base/frac separately | ||
# to avoid precision issues from float -> int | ||
|
@@ -142,7 +147,10 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: | |
) from err | ||
|
||
|
||
cpdef inline (int64_t, int) precision_from_unit(str unit): | ||
cpdef inline (int64_t, int) precision_from_unit( | ||
str unit, | ||
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns, | ||
): | ||
""" | ||
Return a casting of the unit represented to nanoseconds + the precision | ||
to round the fractional part. | ||
|
@@ -154,45 +162,39 @@ cpdef inline (int64_t, int) precision_from_unit(str unit): | |
""" | ||
cdef: | ||
int64_t m | ||
int64_t multiplier | ||
int p | ||
NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) | ||
|
||
multiplier = periods_per_second(out_reso) | ||
|
||
if reso == NPY_DATETIMEUNIT.NPY_FR_Y: | ||
# each 400 years we have 97 leap years, for an average of 97/400=.2425 | ||
# extra days each year. We get 31556952 by writing | ||
# 3600*24*365.2425=31556952 | ||
m = 1_000_000_000 * 31556952 | ||
p = 9 | ||
m = multiplier * 31556952 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_M: | ||
# 2629746 comes from dividing the "Y" case by 12. | ||
m = 1_000_000_000 * 2629746 | ||
p = 9 | ||
m = multiplier * 2629746 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_W: | ||
m = 1_000_000_000 * 3600 * 24 * 7 | ||
p = 9 | ||
m = multiplier * 3600 * 24 * 7 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_D: | ||
m = 1_000_000_000 * 3600 * 24 | ||
p = 9 | ||
m = multiplier * 3600 * 24 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_h: | ||
m = 1_000_000_000 * 3600 | ||
p = 9 | ||
m = multiplier * 3600 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_m: | ||
m = 1_000_000_000 * 60 | ||
p = 9 | ||
m = multiplier * 60 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_s: | ||
m = 1_000_000_000 | ||
p = 9 | ||
m = multiplier | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: | ||
m = 1_000_000 | ||
p = 6 | ||
m = multiplier // 1_000 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_us: | ||
m = 1000 | ||
p = 3 | ||
m = multiplier // 1_000_000 | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: | ||
m = 1 | ||
p = 0 | ||
m = multiplier // 1_000_000_000 | ||
else: | ||
raise ValueError(f"cannot cast unit {unit}") | ||
p = <int>log10(m) # number of digits in 'm' minus 1 | ||
return m, p | ||
|
||
|
||
|
@@ -294,9 +296,15 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, | |
if ts == NPY_NAT: | ||
obj.value = NPY_NAT | ||
else: | ||
ts = cast_from_unit(ts, unit) | ||
if unit is None: | ||
in_reso = abbrev_to_npy_unit("ns") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we assign unit="ns" here since we pass it to cast_from_unit below? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK for follow-up There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yup, good one, thanks |
||
else: | ||
in_reso = abbrev_to_npy_unit(unit) | ||
reso = get_supported_reso(in_reso) | ||
ts = cast_from_unit(ts, unit, reso) | ||
obj.value = ts | ||
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) | ||
obj.creso = reso | ||
pandas_datetime_to_datetimestruct(ts, reso, &obj.dts) | ||
elif is_float_object(ts): | ||
if ts != ts or ts == NPY_NAT: | ||
obj.value = NPY_NAT | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm pretty sure the M and Y cases in precision_from_unit are no longer reached, in which case the entire
m
part of this function i think can be replaced withget_conversion_factor
(OK to consider out of scope)