Skip to content

Commit 948392b

Browse files
authored
PERF: PeriodArray._format_native_types (#51793)
1 parent f7df8bf commit 948392b

File tree

4 files changed

+59
-21
lines changed

4 files changed

+59
-21
lines changed

pandas/_libs/tslibs/period.pyi

+6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ def extract_ordinals(
4242
def extract_freq(
4343
values: npt.NDArray[np.object_],
4444
) -> BaseOffset: ...
45+
def period_array_strftime(
46+
values: npt.NDArray[np.int64],
47+
dtype_code: int,
48+
na_rep,
49+
date_format: str | None,
50+
) -> npt.NDArray[np.object_]: ...
4551

4652
# exposed for tests
4753
def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ...

pandas/_libs/tslibs/period.pyx

+48
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,54 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt):
12681268
return result
12691269

12701270

1271+
def period_array_strftime(
1272+
ndarray values, int dtype_code, object na_rep, str date_format
1273+
):
1274+
"""
1275+
Vectorized Period.strftime used for PeriodArray._format_native_types.
1276+
1277+
Parameters
1278+
----------
1279+
values : ndarray[int64_t], ndim unrestricted
1280+
dtype_code : int
1281+
Corresponds to PeriodDtype._dtype_code
1282+
na_rep : any
1283+
date_format : str or None
1284+
"""
1285+
cdef:
1286+
Py_ssize_t i, n = values.size
1287+
int64_t ordinal
1288+
object item_repr
1289+
ndarray out = cnp.PyArray_EMPTY(
1290+
values.ndim, values.shape, cnp.NPY_OBJECT, 0
1291+
)
1292+
object[::1] out_flat = out.ravel()
1293+
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, values)
1294+
1295+
for i in range(n):
1296+
# Analogous to: ordinal = values[i]
1297+
ordinal = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
1298+
1299+
if ordinal == NPY_NAT:
1300+
item_repr = na_rep
1301+
else:
1302+
# This is equivalent to
1303+
# freq = frequency_corresponding_to_dtype_code(dtype_code)
1304+
# per = Period(ordinal, freq=freq)
1305+
# if date_format:
1306+
# item_repr = per.strftime(date_format)
1307+
# else:
1308+
# item_repr = str(per)
1309+
item_repr = period_format(ordinal, dtype_code, date_format)
1310+
1311+
# Analogous to: ordinals[i] = ordinal
1312+
out_flat[i] = item_repr
1313+
1314+
cnp.PyArray_MultiIter_NEXT(mi)
1315+
1316+
return out
1317+
1318+
12711319
# ----------------------------------------------------------------------
12721320
# period accessors
12731321

pandas/core/arrays/period.py

+3-19
Original file line numberDiff line numberDiff line change
@@ -616,31 +616,15 @@ def _formatter(self, boxed: bool = False):
616616
return str
617617
return "'{}'".format
618618

619-
@dtl.ravel_compat
620619
def _format_native_types(
621620
self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
622621
) -> npt.NDArray[np.object_]:
623622
"""
624623
actually format my specific types
625624
"""
626-
values = self.astype(object)
627-
628-
# Create the formatter function
629-
if date_format:
630-
formatter = lambda per: per.strftime(date_format)
631-
else:
632-
# Uses `_Period.str` which in turn uses `format_period`
633-
formatter = lambda per: str(per)
634-
635-
# Apply the formatter to all values in the array, possibly with a mask
636-
if self._hasna:
637-
mask = self._isnan
638-
values[mask] = na_rep
639-
imask = ~mask
640-
values[imask] = np.array([formatter(per) for per in values[imask]])
641-
else:
642-
values = np.array([formatter(per) for per in values])
643-
return values
625+
return libperiod.period_array_strftime(
626+
self.asi8, self.dtype._dtype_code, na_rep, date_format
627+
)
644628

645629
# ------------------------------------------------------------------
646630

pandas/tests/indexes/period/test_formats.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def test_to_native_types():
1313
index = PeriodIndex(["2017-01-01", "2017-01-02", "2017-01-03"], freq="D")
1414

1515
# First, with no arguments.
16-
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype="=U10")
16+
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
1717

1818
result = index._format_native_types()
1919
tm.assert_numpy_array_equal(result, expected)
@@ -23,7 +23,7 @@ def test_to_native_types():
2323
tm.assert_numpy_array_equal(result, expected)
2424

2525
# Make sure date formatting works
26-
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype="=U10")
26+
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
2727

2828
result = index._format_native_types(date_format="%m-%Y-%d")
2929
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)