Skip to content

REF: de-duplicate precision_from_unit attempt 2 #51594

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ from pandas._libs.tslibs.conversion cimport (
convert_timezone,
get_datetime64_nanos,
parse_pydatetime,
precision_from_unit,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
Expand Down Expand Up @@ -258,7 +257,6 @@ def array_with_unit_to_datetime(
"""
cdef:
Py_ssize_t i, n=len(values)
int64_t mult
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_raise = errors == "raise"
Expand All @@ -275,8 +273,6 @@ def array_with_unit_to_datetime(
)
return result, tz

mult, _ = precision_from_unit(unit)

result = np.empty(n, dtype="M8[ns]")
iresult = result.view("i8")

Expand Down
44 changes: 14 additions & 30 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ from pandas._libs.tslibs.np_datetime cimport (
NPY_FR_us,
check_dts_bounds,
convert_reso,
get_conversion_factor,
get_datetime64_unit,
get_datetime64_value,
get_implementation_bounds,
Expand Down Expand Up @@ -83,9 +84,9 @@ TD64NS_DTYPE = np.dtype("m8[ns]")
# Unit Conversion Helpers

cdef int64_t cast_from_unit(
object ts,
str unit,
NPY_DATETIMEUNIT out_reso=NPY_FR_ns
object ts,
str unit,
NPY_DATETIMEUNIT out_reso=NPY_FR_ns
) except? -1:
"""
Return a casting of the unit represented to nanoseconds
Expand All @@ -104,12 +105,6 @@ cdef int64_t cast_from_unit(
int64_t m
int p

m, p = precision_from_unit(unit, out_reso)

# just give me the unit back
if ts is None:
return m

if unit in ["Y", "M"]:
if is_float_object(ts) and not ts.is_integer():
# GH#47267 it is clear that 2 "M" corresponds to 1970-02-01,
Expand All @@ -126,6 +121,8 @@ cdef int64_t cast_from_unit(
dt64obj = np.datetime64(ts, unit)
return get_datetime64_nanos(dt64obj, out_reso)

m, p = precision_from_unit(unit, out_reso)

# cast the unit, multiply base/frac separately
# to avoid precision issues from float -> int
try:
Expand All @@ -148,8 +145,8 @@ cdef int64_t cast_from_unit(


cpdef inline (int64_t, int) precision_from_unit(
str unit,
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
str unit,
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
):
"""
Return a casting of the unit represented to nanoseconds + the precision
Expand All @@ -166,34 +163,21 @@ cpdef inline (int64_t, int) precision_from_unit(
int p
NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit)

multiplier = periods_per_second(out_reso)

if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
reso = NPY_DATETIMEUNIT.NPY_FR_ns
if reso == NPY_DATETIMEUNIT.NPY_FR_Y:
# each 400 years we have 97 leap years, for an average of 97/400=.2425
# extra days each year. We get 31556952 by writing
# 3600*24*365.2425=31556952
multiplier = periods_per_second(out_reso)
m = multiplier * 31556952
elif reso == NPY_DATETIMEUNIT.NPY_FR_M:
# 2629746 comes from dividing the "Y" case by 12.
multiplier = periods_per_second(out_reso)
m = multiplier * 2629746
elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
m = multiplier * 3600 * 24 * 7
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
m = multiplier * 3600 * 24
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
m = multiplier * 3600
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
m = multiplier * 60
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
m = multiplier
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
m = multiplier // 1_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
m = multiplier // 1_000_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
m = multiplier // 1_000_000_000
else:
raise ValueError(f"cannot cast unit {unit}")
m = get_conversion_factor(reso, out_reso)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if get_conversion_factor raises, then precision_from_unit wouldn't propagate the error - perhaps a note could be made about this in the docstring, alongside the current note?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment sounds good

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to keep from clogging the CI, mind if i add this comment in my next CLN: assorted branch?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good!


p = <int>log10(m) # number of digits in 'm' minus 1
return m, p

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,8 @@ cdef int64_t get_conversion_factor(
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
else:
raise ValueError("Converting from M or Y units is not supported.")


cdef int64_t convert_reso(
Expand Down