From 5154ec9bd837eeaebc0bd6ebfff3f6ed55b734f6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 May 2022 15:14:54 -0700 Subject: [PATCH 1/2] ENH: Timestamp.month_name, day_name support non-nano --- asv_bench/benchmarks/tslibs/fields.py | 4 +-- pandas/_libs/tslibs/fields.pyi | 6 ++-- pandas/_libs/tslibs/fields.pyx | 30 ++++++++++++------- pandas/_libs/tslibs/timestamps.pyx | 12 +++----- pandas/core/arrays/datetimes.py | 18 ++++++++--- .../tests/scalar/timestamp/test_timestamp.py | 8 +++++ pandas/tests/tslibs/test_fields.py | 5 +--- 7 files changed, 53 insertions(+), 30 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/fields.py b/asv_bench/benchmarks/tslibs/fields.py index 203afcdaa7378..23ae73811204c 100644 --- a/asv_bench/benchmarks/tslibs/fields.py +++ b/asv_bench/benchmarks/tslibs/fields.py @@ -66,9 +66,9 @@ class TimeGetStartEndField: def setup(self, size, side, period, freqstr, month_kw): arr = np.random.randint(0, 10, size=size, dtype="i8") - self.dt64data = arr.view("M8[ns]") + self.i8data = arr self.attrname = f"is_{period}_{side}" def time_get_start_end_field(self, size, side, period, freqstr, month_kw): - get_start_end_field(self.dt64data, self.attrname, freqstr, month_kw=month_kw) + get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw) diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index 228f7dbdf5eac..3c503be3f0b66 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -7,15 +7,17 @@ def build_field_sarray( ) -> np.ndarray: ... def month_position_check(fields, weekdays) -> str | None: ... def get_date_name_field( - dtindex: npt.NDArray[np.int64], # const int64_t[:] + dtindex: npt.NDArray[np.int64], field: str, locale: str | None = ..., + reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.object_]: ... def get_start_end_field( - dt64values: npt.NDArray[np.datetime64], + dtindex: npt.NDArray[np.int64], field: str, freqstr: str | None = ..., month_kw: int = ..., + reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.bool_]: ... def get_date_field( dtindex: npt.NDArray[np.int64], # const int64_t[:] diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index e8980dc1a7553..57d4c27b3337d 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -44,6 +44,7 @@ from pandas._libs.tslibs.ccalendar cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + NPY_FR_ns, dt64_to_dtstruct, get_unit_from_dtype, npy_datetimestruct, @@ -139,13 +140,18 @@ def month_position_check(fields, weekdays) -> str | None: @cython.wraparound(False) @cython.boundscheck(False) -def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None): +def get_date_name_field( + const int64_t[:] dtindex, + str field, + object locale=None, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): """ Given a int64-based datetime index, return array of strings of date name based on requested field (e.g. day_name) """ cdef: - Py_ssize_t i, count = len(dtindex) + Py_ssize_t i, count = dtindex.shape[0] ndarray[object] out, names npy_datetimestruct dts int dow @@ -163,7 +169,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None) out[i] = np.nan continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) dow = dayofweek(dts.year, dts.month, dts.day) out[i] = names[dow].capitalize() @@ -178,7 +184,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None) out[i] = np.nan continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = names[dts.month].capitalize() else: @@ -201,8 +207,13 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: @cython.wraparound(False) @cython.boundscheck(False) -def get_start_end_field(ndarray dt64values, str field, - str freqstr=None, int month_kw=12): +def get_start_end_field( + const int64_t[:] dtindex, + str field, + str freqstr=None, + int month_kw=12, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): """ Given an int64-based datetime index return array of indicators of whether timestamps are at the start/end of the month/quarter/year @@ -210,10 +221,11 @@ def get_start_end_field(ndarray dt64values, str field, Parameters ---------- - dt64values : ndarray[datetime64], any resolution + dtindex : ndarray[int64] field : str frestr : str or None, default None month_kw : int, default 12 + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -221,15 +233,13 @@ def get_start_end_field(ndarray dt64values, str field, """ cdef: Py_ssize_t i - int count = dt64values.size + int count = dtindex.shape[0] bint is_business = 0 int end_month = 12 int start_month = 1 ndarray[int8_t] out npy_datetimestruct dts int compare_month, modby - ndarray dtindex = dt64values.view("i8") - NPY_DATETIMEUNIT reso = get_unit_from_dtype(dt64values.dtype) out = np.zeros(count, dtype='int8') diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e7ac855d6a832..923d1f830e1a9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -487,7 +487,6 @@ cdef class _Timestamp(ABCTimestamp): dict kwds ndarray[uint8_t, cast=True] out int month_kw - str unit if freq: kwds = freq.kwds @@ -499,9 +498,8 @@ cdef class _Timestamp(ABCTimestamp): val = self._maybe_convert_value_to_local() - unit = npy_unit_to_abbrev(self._reso) - out = get_start_end_field(np.array([val], dtype=f"M8[{unit}]"), - field, freqstr, month_kw) + out = get_start_end_field(np.array([val], dtype=np.int64), + field, freqstr, month_kw, self._reso) return out[0] cdef _warn_on_field_deprecation(self, freq, str field): @@ -661,12 +659,10 @@ cdef class _Timestamp(ABCTimestamp): int64_t val object[::1] out - if self._reso != NPY_FR_ns: - raise NotImplementedError(self._reso) - val = self._maybe_convert_value_to_local() + out = get_date_name_field(np.array([val], dtype=np.int64), - field, locale=locale) + field, locale=locale, reso=self._reso) return out[0] def day_name(self, locale=None) -> str: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7fef934a85626..6f984727f4f6d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -38,11 +38,13 @@ tz_convert_from_utc, tzconversion, ) +from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype from pandas._typing import npt from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, ) +from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive @@ -131,7 +133,7 @@ def f(self): month_kw = kwds.get("startingMonth", kwds.get("month", 12)) result = fields.get_start_end_field( - values.view(self._ndarray.dtype), field, self.freqstr, month_kw + values, field, self.freqstr, month_kw, reso=self._reso ) else: result = fields.get_date_field(values, field) @@ -140,7 +142,7 @@ def f(self): return result if field in self._object_ops: - result = fields.get_date_name_field(values, field) + result = fields.get_date_name_field(values, field, reso=self._reso) result = self._maybe_mask_results(result, fill_value=None) else: @@ -544,6 +546,10 @@ def _check_compatible_with(self, other, setitem: bool = False): # ----------------------------------------------------------------- # Descriptive Properties + @cache_readonly + def _reso(self): + return py_get_unit_from_dtype(self._ndarray.dtype) + def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: # GH#42228 value = x.view("i8") @@ -1270,7 +1276,9 @@ def month_name(self, locale=None): """ values = self._local_timestamps() - result = fields.get_date_name_field(values, "month_name", locale=locale) + result = fields.get_date_name_field( + values, "month_name", locale=locale, reso=self._reso + ) result = self._maybe_mask_results(result, fill_value=None) return result @@ -1313,7 +1321,9 @@ def day_name(self, locale=None): """ values = self._local_timestamps() - result = fields.get_date_name_field(values, "day_name", locale=locale) + result = fields.get_date_name_field( + values, "day_name", locale=locale, reso=self._reso + ) result = self._maybe_mask_results(result, fill_value=None) return result diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index bc9e6c0131646..c892816629462 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -753,6 +753,14 @@ def test_start_end_fields(self, ts): assert not ts.is_month_end assert not ts.is_month_end + def test_day_name(self, dt64, ts): + alt = Timestamp(dt64) + assert ts.day_name() == alt.day_name() + + def test_month_name(self, dt64, ts): + alt = Timestamp(dt64) + assert ts.month_name() == alt.month_name() + def test_repr(self, dt64, ts): alt = Timestamp(dt64) diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py index 528d08d7f499b..9e6464f7727bd 100644 --- a/pandas/tests/tslibs/test_fields.py +++ b/pandas/tests/tslibs/test_fields.py @@ -28,10 +28,7 @@ def test_get_date_field_readonly(dtindex): def test_get_start_end_field_readonly(dtindex): - dt64values = dtindex.view("M8[ns]") - dt64values.flags.writeable = False - - result = fields.get_start_end_field(dt64values, "is_month_start", None) + result = fields.get_start_end_field(dtindex, "is_month_start", None) expected = np.array([True, False, False, False, False], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected) From 040478b4d3088de1c97bed52ac3d04436924d15e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 May 2022 15:18:42 -0700 Subject: [PATCH 2/2] restore comment --- pandas/_libs/tslibs/fields.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index 3c503be3f0b66..e404eadf13657 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -7,7 +7,7 @@ def build_field_sarray( ) -> np.ndarray: ... def month_position_check(fields, weekdays) -> str | None: ... def get_date_name_field( - dtindex: npt.NDArray[np.int64], + dtindex: npt.NDArray[np.int64], # const int64_t[:] field: str, locale: str | None = ..., reso: int = ..., # NPY_DATETIMEUNIT