diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi index 946ae1215f1e3..690518a9fa88f 100644 --- a/pandas/_libs/tslibs/period.pyi +++ b/pandas/_libs/tslibs/period.pyi @@ -42,6 +42,12 @@ def extract_ordinals( def extract_freq( values: npt.NDArray[np.object_], ) -> BaseOffset: ... +def period_array_strftime( + values: npt.NDArray[np.int64], + dtype_code: int, + na_rep, + date_format: str | None, +) -> npt.NDArray[np.object_]: ... # exposed for tests def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ... diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f44178700503d..7da1cab9af4f9 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1268,6 +1268,54 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): return result +def period_array_strftime( + ndarray values, int dtype_code, object na_rep, str date_format +): + """ + Vectorized Period.strftime used for PeriodArray._format_native_types. + + Parameters + ---------- + values : ndarray[int64_t], ndim unrestricted + dtype_code : int + Corresponds to PeriodDtype._dtype_code + na_rep : any + date_format : str or None + """ + cdef: + Py_ssize_t i, n = values.size + int64_t ordinal + object item_repr + ndarray out = cnp.PyArray_EMPTY( + values.ndim, values.shape, cnp.NPY_OBJECT, 0 + ) + object[::1] out_flat = out.ravel() + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, values) + + for i in range(n): + # Analogous to: ordinal = values[i] + ordinal = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if ordinal == NPY_NAT: + item_repr = na_rep + else: + # This is equivalent to + # freq = frequency_corresponding_to_dtype_code(dtype_code) + # per = Period(ordinal, freq=freq) + # if date_format: + # item_repr = per.strftime(date_format) + # else: + # item_repr = str(per) + item_repr = period_format(ordinal, dtype_code, date_format) + + # Analogous to: ordinals[i] = ordinal + out_flat[i] = item_repr + + cnp.PyArray_MultiIter_NEXT(mi) + + return out + + # ---------------------------------------------------------------------- # period accessors diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f9404fbf57382..980bb46470ae2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -616,31 +616,15 @@ def _formatter(self, boxed: bool = False): return str return "'{}'".format - @dtl.ravel_compat def _format_native_types( self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: """ actually format my specific types """ - values = self.astype(object) - - # Create the formatter function - if date_format: - formatter = lambda per: per.strftime(date_format) - else: - # Uses `_Period.str` which in turn uses `format_period` - formatter = lambda per: str(per) - - # Apply the formatter to all values in the array, possibly with a mask - if self._hasna: - mask = self._isnan - values[mask] = na_rep - imask = ~mask - values[imask] = np.array([formatter(per) for per in values[imask]]) - else: - values = np.array([formatter(per) for per in values]) - return values + return libperiod.period_array_strftime( + self.asi8, self.dtype._dtype_code, na_rep, date_format + ) # ------------------------------------------------------------------ diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index bfd83f1360671..87bbb96377a79 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -13,7 +13,7 @@ def test_to_native_types(): index = PeriodIndex(["2017-01-01", "2017-01-02", "2017-01-03"], freq="D") # First, with no arguments. - expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype="=U10") + expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object) result = index._format_native_types() tm.assert_numpy_array_equal(result, expected) @@ -23,7 +23,7 @@ def test_to_native_types(): tm.assert_numpy_array_equal(result, expected) # Make sure date formatting works - expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype="=U10") + expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object) result = index._format_native_types(date_format="%m-%Y-%d") tm.assert_numpy_array_equal(result, expected)