Skip to content

REF: get_supported_reso->get_supported_dtype #56439

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,16 @@
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"is_supported_unit",
"npy_unit_to_abbrev",
"get_supported_reso",
"guess_datetime_format",
"add_overflowsafe",
"get_supported_dtype",
"is_supported_dtype",
]

from pandas._libs.tslibs import dtypes # pylint: disable=import-self
from pandas._libs.tslibs.conversion import localize_pydatetime
from pandas._libs.tslibs.dtypes import (
Resolution,
get_supported_reso,
is_supported_unit,
npy_unit_to_abbrev,
periods_per_day,
periods_per_second,
)
Expand All @@ -58,6 +54,8 @@
OutOfBoundsTimedelta,
add_overflowsafe,
astype_overflowsafe,
get_supported_dtype,
is_supported_dtype,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
)
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ from numpy cimport int64_t
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT


cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso)
cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso)

cpdef freq_to_period_freqstr(freq_n, freq_name)
cdef dict c_OFFSET_TO_PERIOD_FREQSTR
Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict[str, str]

def periods_per_day(reso: int = ...) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(unit: int) -> str: ...
def get_supported_reso(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str) -> int: ...
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...

Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ class NpyDatetimeUnit(Enum):
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC


cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso):
cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso):
# If we have an unsupported reso, return the nearest supported reso.
if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# TODO: or raise ValueError? trying this gives unraisable errors, but
Expand All @@ -572,7 +572,7 @@ cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso):
return reso


cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso):
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso):
return (
reso == NPY_DATETIMEUNIT.NPY_FR_ns
or reso == NPY_DATETIMEUNIT.NPY_FR_us
Expand All @@ -581,7 +581,7 @@ cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso):
)


cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
return "ns"
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ def add_overflowsafe(
left: npt.NDArray[np.int64],
right: npt.NDArray[np.int64],
) -> npt.NDArray[np.int64]: ...
def get_supported_dtype(dtype: np.dtype) -> np.dtype: ...
def is_supported_dtype(dtype: np.dtype) -> bool: ...
24 changes: 24 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ from numpy cimport (
)

from pandas._libs.tslibs.dtypes cimport (
get_supported_reso,
is_supported_unit,
npy_unit_to_abbrev,
npy_unit_to_attrname,
)
Expand Down Expand Up @@ -91,6 +93,28 @@ def py_get_unit_from_dtype(dtype):
return get_unit_from_dtype(dtype)


def get_supported_dtype(dtype: cnp.dtype) -> cnp.dtype:
reso = get_unit_from_dtype(dtype)
new_reso = get_supported_reso(reso)
new_unit = npy_unit_to_abbrev(new_reso)

# Accessing dtype.kind here incorrectly(?) gives "" instead of "m"/"M",
# so we check type_num instead
if dtype.type_num == cnp.NPY_DATETIME:
new_dtype = np.dtype(f"M8[{new_unit}]")
else:
new_dtype = np.dtype(f"m8[{new_unit}]")
return new_dtype


def is_supported_dtype(dtype: cnp.dtype) -> bool:
if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]:
raise ValueError("is_unitless dtype must be datetime64 or timedelta64")
cdef:
NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype)
return is_supported_unit(unit)


def is_unitless(dtype: cnp.dtype) -> bool:
"""
Check if a datetime64 or timedelta64 dtype has no attached unit.
Expand Down
13 changes: 3 additions & 10 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@

from pandas._libs import lib
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import (
get_unit_from_dtype,
is_supported_unit,
)
from pandas._libs.tslibs import is_supported_dtype
from pandas._typing import (
ArrayLike,
AxisInt,
Expand Down Expand Up @@ -141,16 +138,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
cls = dtype.construct_array_type() # type: ignore[assignment]
dt64_values = arr.view(f"M8[{dtype.unit}]")
return cls(dt64_values, dtype=dtype)
elif lib.is_np_dtype(dtype, "M") and is_supported_unit(
get_unit_from_dtype(dtype)
):
elif lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype):
from pandas.core.arrays import DatetimeArray

dt64_values = arr.view(dtype)
return DatetimeArray(dt64_values, dtype=dtype)
elif lib.is_np_dtype(dtype, "m") and is_supported_unit(
get_unit_from_dtype(dtype)
):
elif lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
from pandas.core.arrays import TimedeltaArray

td64_values = arr.view(dtype)
Expand Down
20 changes: 8 additions & 12 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,13 @@
astype_overflowsafe,
fields,
get_resolution,
get_supported_reso,
get_supported_dtype,
get_unit_from_dtype,
ints_to_pydatetime,
is_date_array_normalized,
is_supported_unit,
is_supported_dtype,
is_unitless,
normalize_i8_timestamps,
npy_unit_to_abbrev,
timezones,
to_offset,
tz_convert_from_utc,
Expand Down Expand Up @@ -712,7 +711,7 @@ def astype(self, dtype, copy: bool = True):
self.tz is None
and lib.is_np_dtype(dtype, "M")
and not is_unitless(dtype)
and is_supported_unit(get_unit_from_dtype(dtype))
and is_supported_dtype(dtype)
):
# unit conversion e.g. datetime64[s]
res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
Expand Down Expand Up @@ -2307,7 +2306,7 @@ def _sequence_to_dt64(
assert isinstance(result, np.ndarray), type(result)
assert result.dtype.kind == "M"
assert result.dtype != "M8"
assert is_supported_unit(get_unit_from_dtype(result.dtype))
assert is_supported_dtype(result.dtype)
return result, tz


Expand All @@ -2321,14 +2320,10 @@ def _construct_from_dt64_naive(
# lib.is_np_dtype(data.dtype)

new_dtype = data.dtype
data_unit = get_unit_from_dtype(new_dtype)
if not is_supported_unit(data_unit):
if not is_supported_dtype(new_dtype):
# Cast to the nearest supported unit, generally "s"
new_reso = get_supported_reso(data_unit)
new_unit = npy_unit_to_abbrev(new_reso)
new_dtype = np.dtype(f"M8[{new_unit}]")
new_dtype = get_supported_dtype(new_dtype)
data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
data_unit = get_unit_from_dtype(new_dtype)
copy = False

if data.dtype.byteorder == ">":
Expand All @@ -2346,6 +2341,7 @@ def _construct_from_dt64_naive(
if data.ndim > 1:
data = data.ravel()

data_unit = get_unit_from_dtype(new_dtype)
data = tzconversion.tz_localize_to_utc(
data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
)
Expand Down Expand Up @@ -2552,7 +2548,7 @@ def _validate_dt64_dtype(dtype):

if (
isinstance(dtype, np.dtype)
and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype)))
and (dtype.kind != "M" or not is_supported_dtype(dtype))
) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):
raise ValueError(
f"Unexpected value for 'dtype': '{dtype}'. "
Expand Down
9 changes: 2 additions & 7 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
lib,
missing as libmissing,
)
from pandas._libs.tslibs import (
get_unit_from_dtype,
is_supported_unit,
)
from pandas._libs.tslibs import is_supported_dtype
from pandas._typing import (
ArrayLike,
AstypeArg,
Expand Down Expand Up @@ -876,9 +873,7 @@ def _maybe_mask_result(

return BooleanArray(result, mask, copy=False)

elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
elif lib.is_np_dtype(result.dtype, "m") and is_supported_dtype(result.dtype):
# e.g. test_numeric_arr_mul_tdscalar_numexpr_path
from pandas.core.arrays import TimedeltaArray

Expand Down
9 changes: 2 additions & 7 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import (
get_unit_from_dtype,
is_supported_unit,
)
from pandas._libs.tslibs import is_supported_dtype
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.astype import astype_array
Expand Down Expand Up @@ -553,9 +550,7 @@ def _cmp_method(self, other, op):
def _wrap_ndarray_result(self, result: np.ndarray):
# If we have timedelta64[ns] result, return a TimedeltaArray instead
# of a NumpyExtensionArray
if result.dtype.kind == "m" and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
if result.dtype.kind == "m" and is_supported_dtype(result.dtype):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._simple_new(result, dtype=result.dtype)
Expand Down
17 changes: 6 additions & 11 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@
Tick,
Timedelta,
astype_overflowsafe,
get_supported_reso,
get_unit_from_dtype,
get_supported_dtype,
iNaT,
is_supported_unit,
npy_unit_to_abbrev,
is_supported_dtype,
periods_per_second,
)
from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
Expand Down Expand Up @@ -352,7 +350,7 @@ def astype(self, dtype, copy: bool = True):
return self.copy()
return self

if is_supported_unit(get_unit_from_dtype(dtype)):
if is_supported_dtype(dtype):
# unit conversion e.g. timedelta64[s]
res_values = astype_overflowsafe(self._ndarray, dtype, copy=False)
return type(self)._simple_new(
Expand Down Expand Up @@ -1064,12 +1062,9 @@ def sequence_to_td64ns(
copy = False

elif lib.is_np_dtype(data.dtype, "m"):
data_unit = get_unit_from_dtype(data.dtype)
if not is_supported_unit(data_unit):
if not is_supported_dtype(data.dtype):
# cast to closest supported unit, i.e. s or ns
new_reso = get_supported_reso(data_unit)
new_unit = npy_unit_to_abbrev(new_reso)
new_dtype = np.dtype(f"m8[{new_unit}]")
new_dtype = get_supported_dtype(data.dtype)
data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
copy = False

Expand Down Expand Up @@ -1173,7 +1168,7 @@ def _validate_td64_dtype(dtype) -> DtypeObj:

if not lib.is_np_dtype(dtype, "m"):
raise ValueError(f"dtype '{dtype}' is invalid, should be np.timedelta64 dtype")
elif not is_supported_unit(get_unit_from_dtype(dtype)):
elif not is_supported_dtype(dtype):
raise ValueError("Supported timedelta64 resolutions are 's', 'ms', 'us', 'ns'")

return dtype
14 changes: 8 additions & 6 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from pandas._libs import lib
from pandas._libs.tslibs import (
Period,
get_unit_from_dtype,
is_supported_unit,
get_supported_dtype,
is_supported_dtype,
)
from pandas._typing import (
AnyArrayLike,
Expand Down Expand Up @@ -370,9 +370,9 @@ def array(
# 1. datetime64[ns,us,ms,s]
# 2. timedelta64[ns,us,ms,s]
# so that a DatetimeArray is returned.
if lib.is_np_dtype(dtype, "M") and is_supported_unit(get_unit_from_dtype(dtype)):
if lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype):
return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
if lib.is_np_dtype(dtype, "m") and is_supported_unit(get_unit_from_dtype(dtype)):
if lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)

elif lib.is_np_dtype(dtype, "mM"):
Expand Down Expand Up @@ -490,12 +490,14 @@ def ensure_wrapped_if_datetimelike(arr):
if arr.dtype.kind == "M":
from pandas.core.arrays import DatetimeArray

return DatetimeArray._from_sequence(arr)
dtype = get_supported_dtype(arr.dtype)
return DatetimeArray._from_sequence(arr, dtype=dtype)

elif arr.dtype.kind == "m":
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._from_sequence(arr)
dtype = get_supported_dtype(arr.dtype)
return TimedeltaArray._from_sequence(arr, dtype=dtype)

return arr

Expand Down
6 changes: 2 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@
OutOfBoundsTimedelta,
Timedelta,
Timestamp,
get_unit_from_dtype,
is_supported_unit,
is_supported_dtype,
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas.errors import (
Expand Down Expand Up @@ -1266,8 +1265,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:
pass

elif dtype.kind in "mM":
reso = get_unit_from_dtype(dtype)
if not is_supported_unit(reso):
if not is_supported_dtype(dtype):
# pre-2.0 we would silently swap in nanos for lower-resolutions,
# raise for above-nano resolutions
if dtype.name in ["datetime64", "timedelta64"]:
Expand Down
Loading