diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index b626959203295..88a9a259ac8ec 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -30,20 +30,16 @@ "get_unit_from_dtype", "periods_per_day", "periods_per_second", - "is_supported_unit", - "npy_unit_to_abbrev", - "get_supported_reso", "guess_datetime_format", "add_overflowsafe", + "get_supported_dtype", + "is_supported_dtype", ] from pandas._libs.tslibs import dtypes # pylint: disable=import-self from pandas._libs.tslibs.conversion import localize_pydatetime from pandas._libs.tslibs.dtypes import ( Resolution, - get_supported_reso, - is_supported_unit, - npy_unit_to_abbrev, periods_per_day, periods_per_second, ) @@ -58,6 +54,8 @@ OutOfBoundsTimedelta, add_overflowsafe, astype_overflowsafe, + get_supported_dtype, + is_supported_dtype, is_unitless, py_get_unit_from_dtype as get_unit_from_dtype, ) diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index bda4fcf04234b..88cfa6ca60d93 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -3,13 +3,13 @@ from numpy cimport int64_t from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT -cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) +cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev) cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 -cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso) -cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso) +cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso) +cdef bint is_supported_unit(NPY_DATETIMEUNIT reso) cpdef freq_to_period_freqstr(freq_n, freq_name) cdef dict c_OFFSET_TO_PERIOD_FREQSTR diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index 76649aaaa41bf..72d12ca2d9dc7 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -4,9 +4,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict[str, str] def periods_per_day(reso: int = ...) -> int: ... def periods_per_second(reso: int) -> int: ... -def is_supported_unit(reso: int) -> bool: ... -def npy_unit_to_abbrev(unit: int) -> str: ... -def get_supported_reso(reso: int) -> int: ... def abbrev_to_npy_unit(abbrev: str) -> int: ... def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ... diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 17f517e5e7264..52e1133e596c5 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -559,7 +559,7 @@ class NpyDatetimeUnit(Enum): NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC -cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso): +cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso): # If we have an unsupported reso, return the nearest supported reso. if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # TODO: or raise ValueError? trying this gives unraisable errors, but @@ -572,7 +572,7 @@ cpdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso): return reso -cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso): +cdef bint is_supported_unit(NPY_DATETIMEUNIT reso): return ( reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_us @@ -581,7 +581,7 @@ cpdef bint is_supported_unit(NPY_DATETIMEUNIT reso): ) -cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): +cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # generic -> default to nanoseconds return "ns" diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index 5a4ba673dbeff..00ef35c50e532 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -23,3 +23,5 @@ def add_overflowsafe( left: npt.NDArray[np.int64], right: npt.NDArray[np.int64], ) -> npt.NDArray[np.int64]: ... +def get_supported_dtype(dtype: np.dtype) -> np.dtype: ... +def is_supported_dtype(dtype: np.dtype) -> bool: ... diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 5f5e75b1e64d0..54a5bcf3164ee 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -39,6 +39,8 @@ from numpy cimport ( ) from pandas._libs.tslibs.dtypes cimport ( + get_supported_reso, + is_supported_unit, npy_unit_to_abbrev, npy_unit_to_attrname, ) @@ -91,6 +93,28 @@ def py_get_unit_from_dtype(dtype): return get_unit_from_dtype(dtype) +def get_supported_dtype(dtype: cnp.dtype) -> cnp.dtype: + reso = get_unit_from_dtype(dtype) + new_reso = get_supported_reso(reso) + new_unit = npy_unit_to_abbrev(new_reso) + + # Accessing dtype.kind here incorrectly(?) gives "" instead of "m"/"M", + # so we check type_num instead + if dtype.type_num == cnp.NPY_DATETIME: + new_dtype = np.dtype(f"M8[{new_unit}]") + else: + new_dtype = np.dtype(f"m8[{new_unit}]") + return new_dtype + + +def is_supported_dtype(dtype: cnp.dtype) -> bool: + if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]: + raise ValueError("is_unitless dtype must be datetime64 or timedelta64") + cdef: + NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype) + return is_supported_unit(unit) + + def is_unitless(dtype: cnp.dtype) -> bool: """ Check if a datetime64 or timedelta64 dtype has no attached unit. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 8d1f5262e7911..cb8f802239146 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -13,10 +13,7 @@ from pandas._libs import lib from pandas._libs.arrays import NDArrayBacked -from pandas._libs.tslibs import ( - get_unit_from_dtype, - is_supported_unit, -) +from pandas._libs.tslibs import is_supported_dtype from pandas._typing import ( ArrayLike, AxisInt, @@ -141,16 +138,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: cls = dtype.construct_array_type() # type: ignore[assignment] dt64_values = arr.view(f"M8[{dtype.unit}]") return cls(dt64_values, dtype=dtype) - elif lib.is_np_dtype(dtype, "M") and is_supported_unit( - get_unit_from_dtype(dtype) - ): + elif lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype): from pandas.core.arrays import DatetimeArray dt64_values = arr.view(dtype) return DatetimeArray(dt64_values, dtype=dtype) - elif lib.is_np_dtype(dtype, "m") and is_supported_unit( - get_unit_from_dtype(dtype) - ): + elif lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype): from pandas.core.arrays import TimedeltaArray td64_values = arr.view(dtype) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 64f08adcd48c4..0074645a482b2 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -27,14 +27,13 @@ astype_overflowsafe, fields, get_resolution, - get_supported_reso, + get_supported_dtype, get_unit_from_dtype, ints_to_pydatetime, is_date_array_normalized, - is_supported_unit, + is_supported_dtype, is_unitless, normalize_i8_timestamps, - npy_unit_to_abbrev, timezones, to_offset, tz_convert_from_utc, @@ -712,7 +711,7 @@ def astype(self, dtype, copy: bool = True): self.tz is None and lib.is_np_dtype(dtype, "M") and not is_unitless(dtype) - and is_supported_unit(get_unit_from_dtype(dtype)) + and is_supported_dtype(dtype) ): # unit conversion e.g. datetime64[s] res_values = astype_overflowsafe(self._ndarray, dtype, copy=True) @@ -2307,7 +2306,7 @@ def _sequence_to_dt64( assert isinstance(result, np.ndarray), type(result) assert result.dtype.kind == "M" assert result.dtype != "M8" - assert is_supported_unit(get_unit_from_dtype(result.dtype)) + assert is_supported_dtype(result.dtype) return result, tz @@ -2321,14 +2320,10 @@ def _construct_from_dt64_naive( # lib.is_np_dtype(data.dtype) new_dtype = data.dtype - data_unit = get_unit_from_dtype(new_dtype) - if not is_supported_unit(data_unit): + if not is_supported_dtype(new_dtype): # Cast to the nearest supported unit, generally "s" - new_reso = get_supported_reso(data_unit) - new_unit = npy_unit_to_abbrev(new_reso) - new_dtype = np.dtype(f"M8[{new_unit}]") + new_dtype = get_supported_dtype(new_dtype) data = astype_overflowsafe(data, dtype=new_dtype, copy=False) - data_unit = get_unit_from_dtype(new_dtype) copy = False if data.dtype.byteorder == ">": @@ -2346,6 +2341,7 @@ def _construct_from_dt64_naive( if data.ndim > 1: data = data.ravel() + data_unit = get_unit_from_dtype(new_dtype) data = tzconversion.tz_localize_to_utc( data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit ) @@ -2552,7 +2548,7 @@ def _validate_dt64_dtype(dtype): if ( isinstance(dtype, np.dtype) - and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype))) + and (dtype.kind != "M" or not is_supported_dtype(dtype)) ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)): raise ValueError( f"Unexpected value for 'dtype': '{dtype}'. " diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 2f0cf7a67c1cc..b35c1033df384 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -15,10 +15,7 @@ lib, missing as libmissing, ) -from pandas._libs.tslibs import ( - get_unit_from_dtype, - is_supported_unit, -) +from pandas._libs.tslibs import is_supported_dtype from pandas._typing import ( ArrayLike, AstypeArg, @@ -876,9 +873,7 @@ def _maybe_mask_result( return BooleanArray(result, mask, copy=False) - elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit( - get_unit_from_dtype(result.dtype) - ): + elif lib.is_np_dtype(result.dtype, "m") and is_supported_dtype(result.dtype): # e.g. test_numeric_arr_mul_tdscalar_numexpr_path from pandas.core.arrays import TimedeltaArray diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index efe0c0df45e00..d83a37088daec 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -8,10 +8,7 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import ( - get_unit_from_dtype, - is_supported_unit, -) +from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv from pandas.core.dtypes.astype import astype_array @@ -553,9 +550,7 @@ def _cmp_method(self, other, op): def _wrap_ndarray_result(self, result: np.ndarray): # If we have timedelta64[ns] result, return a TimedeltaArray instead # of a NumpyExtensionArray - if result.dtype.kind == "m" and is_supported_unit( - get_unit_from_dtype(result.dtype) - ): + if result.dtype.kind == "m" and is_supported_dtype(result.dtype): from pandas.core.arrays import TimedeltaArray return TimedeltaArray._simple_new(result, dtype=result.dtype) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f55d3de8878ad..ccb63d6677b1a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -19,11 +19,9 @@ Tick, Timedelta, astype_overflowsafe, - get_supported_reso, - get_unit_from_dtype, + get_supported_dtype, iNaT, - is_supported_unit, - npy_unit_to_abbrev, + is_supported_dtype, periods_per_second, ) from pandas._libs.tslibs.conversion import cast_from_unit_vectorized @@ -352,7 +350,7 @@ def astype(self, dtype, copy: bool = True): return self.copy() return self - if is_supported_unit(get_unit_from_dtype(dtype)): + if is_supported_dtype(dtype): # unit conversion e.g. timedelta64[s] res_values = astype_overflowsafe(self._ndarray, dtype, copy=False) return type(self)._simple_new( @@ -1064,12 +1062,9 @@ def sequence_to_td64ns( copy = False elif lib.is_np_dtype(data.dtype, "m"): - data_unit = get_unit_from_dtype(data.dtype) - if not is_supported_unit(data_unit): + if not is_supported_dtype(data.dtype): # cast to closest supported unit, i.e. s or ns - new_reso = get_supported_reso(data_unit) - new_unit = npy_unit_to_abbrev(new_reso) - new_dtype = np.dtype(f"m8[{new_unit}]") + new_dtype = get_supported_dtype(data.dtype) data = astype_overflowsafe(data, dtype=new_dtype, copy=False) copy = False @@ -1173,7 +1168,7 @@ def _validate_td64_dtype(dtype) -> DtypeObj: if not lib.is_np_dtype(dtype, "m"): raise ValueError(f"dtype '{dtype}' is invalid, should be np.timedelta64 dtype") - elif not is_supported_unit(get_unit_from_dtype(dtype)): + elif not is_supported_dtype(dtype): raise ValueError("Supported timedelta64 resolutions are 's', 'ms', 'us', 'ns'") return dtype diff --git a/pandas/core/construction.py b/pandas/core/construction.py index a0a92a99abe51..8cb76e57eba7e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -24,8 +24,8 @@ from pandas._libs import lib from pandas._libs.tslibs import ( Period, - get_unit_from_dtype, - is_supported_unit, + get_supported_dtype, + is_supported_dtype, ) from pandas._typing import ( AnyArrayLike, @@ -370,9 +370,9 @@ def array( # 1. datetime64[ns,us,ms,s] # 2. timedelta64[ns,us,ms,s] # so that a DatetimeArray is returned. - if lib.is_np_dtype(dtype, "M") and is_supported_unit(get_unit_from_dtype(dtype)): + if lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype): return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) - if lib.is_np_dtype(dtype, "m") and is_supported_unit(get_unit_from_dtype(dtype)): + if lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype): return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) elif lib.is_np_dtype(dtype, "mM"): @@ -490,12 +490,14 @@ def ensure_wrapped_if_datetimelike(arr): if arr.dtype.kind == "M": from pandas.core.arrays import DatetimeArray - return DatetimeArray._from_sequence(arr) + dtype = get_supported_dtype(arr.dtype) + return DatetimeArray._from_sequence(arr, dtype=dtype) elif arr.dtype.kind == "m": from pandas.core.arrays import TimedeltaArray - return TimedeltaArray._from_sequence(arr) + dtype = get_supported_dtype(arr.dtype) + return TimedeltaArray._from_sequence(arr, dtype=dtype) return arr diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 320f028f4484c..d5144174d3c71 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -36,8 +36,7 @@ OutOfBoundsTimedelta, Timedelta, Timestamp, - get_unit_from_dtype, - is_supported_unit, + is_supported_dtype, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas.errors import ( @@ -1266,8 +1265,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None: pass elif dtype.kind in "mM": - reso = get_unit_from_dtype(dtype) - if not is_supported_unit(reso): + if not is_supported_dtype(dtype): # pre-2.0 we would silently swap in nanos for lower-resolutions, # raise for above-nano resolutions if dtype.name in ["datetime64", "timedelta64"]: diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index b39930da9f711..d8a772aac6082 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -24,11 +24,9 @@ ) from pandas._libs.tslibs import ( BaseOffset, - get_supported_reso, - get_unit_from_dtype, - is_supported_unit, + get_supported_dtype, + is_supported_dtype, is_unitless, - npy_unit_to_abbrev, ) from pandas.util._exceptions import find_stack_level @@ -543,10 +541,9 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape): # GH 52295 if is_unitless(obj.dtype): obj = obj.astype("datetime64[ns]") - elif not is_supported_unit(get_unit_from_dtype(obj.dtype)): - unit = get_unit_from_dtype(obj.dtype) - closest_unit = npy_unit_to_abbrev(get_supported_reso(unit)) - obj = obj.astype(f"datetime64[{closest_unit}]") + elif not is_supported_dtype(obj.dtype): + new_dtype = get_supported_dtype(obj.dtype) + obj = obj.astype(new_dtype) right = np.broadcast_to(obj, shape) return DatetimeArray(right) @@ -562,10 +559,9 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape): # GH 52295 if is_unitless(obj.dtype): obj = obj.astype("timedelta64[ns]") - elif not is_supported_unit(get_unit_from_dtype(obj.dtype)): - unit = get_unit_from_dtype(obj.dtype) - closest_unit = npy_unit_to_abbrev(get_supported_reso(unit)) - obj = obj.astype(f"timedelta64[{closest_unit}]") + elif not is_supported_dtype(obj.dtype): + new_dtype = get_supported_dtype(obj.dtype) + obj = obj.astype(new_dtype) right = np.broadcast_to(obj, shape) return TimedeltaArray(right) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 26cbc77e4e8ae..5ebf1e442733e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -25,8 +25,7 @@ Timedelta, Timestamp, astype_overflowsafe, - get_unit_from_dtype, - is_supported_unit, + is_supported_dtype, timezones as libtimezones, ) from pandas._libs.tslibs.conversion import cast_from_unit_vectorized @@ -385,7 +384,7 @@ def _convert_listlike_datetimes( return arg elif lib.is_np_dtype(arg_dtype, "M"): - if not is_supported_unit(get_unit_from_dtype(arg_dtype)): + if not is_supported_dtype(arg_dtype): # We go to closest supported reso, i.e. "s" arg = astype_overflowsafe( # TODO: looks like we incorrectly raise with errors=="ignore" diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index e02cea2fef426..42d055326c2a5 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -54,11 +54,10 @@ def test_namespace(): "get_unit_from_dtype", "periods_per_day", "periods_per_second", - "is_supported_unit", - "get_supported_reso", - "npy_unit_to_abbrev", "guess_datetime_format", "add_overflowsafe", + "get_supported_dtype", + "is_supported_dtype", ] expected = set(submodules + api)