diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 089ba62e461d1..2536a9994399c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -434,6 +434,7 @@ I/O Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) - Plotting diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 9c05c6867f71f..182502ba9ad7f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -9,6 +9,7 @@ from cpython.object cimport ( PyObject_RichCompareBool, ) from numpy cimport ( + int32_t, int64_t, ndarray, ) @@ -1217,10 +1218,14 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): char *formatted bytes pat, brepl list found_pat = [False] * len(extra_fmts) - int year, quarter + int quarter + int32_t us, ps str result, repl get_date_info(value, freq, &dts) + + # Find our additional directives in the pattern and replace them with + # placeholders that are not processed by c_strftime for i in range(len(extra_fmts)): pat = extra_fmts[i][0] brepl = extra_fmts[i][1] @@ -1228,28 +1233,41 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): fmt = fmt.replace(pat, brepl) found_pat[i] = True + # Execute c_strftime to process the usual datetime directives formatted = c_strftime(&dts, fmt) result = util.char_to_string(formatted) free(formatted) + # Now we will fill the placeholders corresponding to our additional directives + + # First prepare the contents + # Save these to local vars as dts can be modified by get_yq below + us = dts.us + ps = dts.ps + if any(found_pat[0:3]): + # Note: this modifies `dts` in-place so that year becomes fiscal year + # However it looses the us and ps + quarter = get_yq(value, freq, &dts) + else: + quarter = 0 + + # Now do the filling per se for i in range(len(extra_fmts)): if found_pat[i]: - quarter = get_yq(value, freq, &dts) - - if i == 0: - repl = str(quarter) - elif i == 1: # %f, 2-digit year + if i == 0: # %q, 1-digit quarter. + repl = f"{quarter}" + elif i == 1: # %f, 2-digit 'Fiscal' year repl = f"{(dts.year % 100):02d}" - elif i == 2: + elif i == 2: # %F, 'Fiscal' year with a century repl = str(dts.year) - elif i == 3: - repl = f"{(value % 1_000):03d}" - elif i == 4: - repl = f"{(value % 1_000_000):06d}" - elif i == 5: - repl = f"{(value % 1_000_000_000):09d}" + elif i == 3: # %l, milliseconds + repl = f"{(us // 1_000):03d}" + elif i == 4: # %u, microseconds + repl = f"{(us):06d}" + elif i == 5: # %n, nanoseconds + repl = f"{((us * 1000) + (ps // 1000)):09d}" result = result.replace(str_extra_fmts[i], repl) @@ -2302,7 +2320,8 @@ cdef class _Period(PeriodMixin): containing one or several directives. The method recognizes the same directives as the :func:`time.strftime` function of the standard Python distribution, as well as the specific additional directives ``%f``, - ``%F``, ``%q``. (formatting & docs originally from scikits.timeries). + ``%F``, ``%q``, ``%l``, ``%u``, ``%n``. + (formatting & docs originally from scikits.timeries). +-----------+--------------------------------+-------+ | Directive | Meaning | Notes | @@ -2349,11 +2368,20 @@ cdef class _Period(PeriodMixin): | | AM or PM. | | +-----------+--------------------------------+-------+ | ``%q`` | Quarter as a decimal number | | - | | [01,04] | | + | | [1,4] | | +-----------+--------------------------------+-------+ | ``%S`` | Second as a decimal number | \(4) | | | [00,61]. | | +-----------+--------------------------------+-------+ + | ``%l`` | Millisecond as a decimal number| | + | | [000,999]. | | + +-----------+--------------------------------+-------+ + | ``%u`` | Microsecond as a decimal number| | + | | [000000,999999]. | | + +-----------+--------------------------------+-------+ + | ``%n`` | Nanosecond as a decimal number | | + | | [000000000,999999999]. | | + +-----------+--------------------------------+-------+ | ``%U`` | Week number of the year | \(5) | | | (Sunday as the first day of | | | | the week) as a decimal number | | diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 487a39976a0e8..8f0516abe8bb3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1593,6 +1593,14 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: of the string format can be found in `python string format doc <%(URL)s>`__. + Formats supported by the C `strftime` API but not by the python string format + doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be + preferably replaced with their supported equivalents (such as `"%%H:%%M"`, + `"%%I:%%M:%%S %%p"`). + + Note that `PeriodIndex` support additional directives, detailed in + `Period.strftime`. + Parameters ---------- date_format : str @@ -1609,6 +1617,8 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. DatetimeIndex.round : Round the DatetimeIndex to the specified freq. DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + Timestamp.strftime : Format a single Timestamp. + Period.strftime : Format a single Period. Examples -------- diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b2a8dfddc6b68..fa543f6773634 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -642,17 +642,17 @@ def _format_native_types( values = self.astype(object) if date_format: - formatter = lambda dt: dt.strftime(date_format) + formatter = lambda per: per.strftime(date_format) else: - formatter = lambda dt: str(dt) + formatter = lambda per: str(per) if self._hasna: mask = self._isnan values[mask] = na_rep imask = ~mask - values[imask] = np.array([formatter(dt) for dt in values[imask]]) + values[imask] = np.array([formatter(per) for per in values[imask]]) else: - values = np.array([formatter(dt) for dt in values]) + values = np.array([formatter(per) for per in values]) return values # ------------------------------------------------------------------ diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index adcaeba5cfd8d..9ab3e4cf6afac 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1,7 +1,6 @@ """ Test output formatting for Series/DataFrame, including to_string & reprs """ - from datetime import datetime from io import StringIO import itertools @@ -3167,6 +3166,65 @@ def test_str(self): assert str(NaT) == "NaT" +class TestPeriodIndexFormat: + def test_period_format_and_strftime_default(self): + per = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="H") + + # Default formatting + formatted = per.format() + assert formatted[0] == "2003-01-01 12:00" # default: minutes not shown + assert formatted[1] == "NaT" + # format is equivalent to strftime(None)... + assert formatted[0] == per.strftime(None)[0] + assert per.strftime(None)[1] is np.nan # ...except for NaTs + + # Same test with nanoseconds freq + per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") + formatted = per.format() + assert (formatted == per.strftime(None)).all() + assert formatted[0] == "2003-01-01 12:01:01.123456789" + assert formatted[1] == "2003-01-01 12:01:01.123456790" + + def test_period_custom(self): + # GH#46252 custom formatting directives %l (ms) and %u (us) + + # 3 digits + per = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l") + formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + assert formatted[0] == "03 12:01:01 (ms=123 us=123000 ns=123000000)" + assert formatted[1] == "03 12:01:01 (ms=124 us=124000 ns=124000000)" + + # 6 digits + per = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="u") + formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456000)" + assert formatted[1] == "03 12:01:01 (ms=123 us=123457 ns=123457000)" + + # 9 digits + per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") + formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456789)" + assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)" + + def test_period_tz(self): + # Formatting periods created from a datetime with timezone. + + # This timestamp is in 2013 in Europe/Paris but is 2012 in UTC + dt = pd.to_datetime(["2013-01-01 00:00:00+01:00"], utc=True) + + # Converting to a period looses the timezone information + # Since tz is currently set as utc, we'll see 2012 + with tm.assert_produces_warning(UserWarning, match="will drop timezone"): + per = dt.to_period(freq="H") + assert per.format()[0] == "2012-12-31 23:00" + + # If tz is currently set as paris before conversion, we'll see 2013 + dt = dt.tz_convert("Europe/Paris") + with tm.assert_produces_warning(UserWarning, match="will drop timezone"): + per = dt.to_period(freq="H") + assert per.format()[0] == "2013-01-01 00:00" + + class TestDatetimeIndexFormat: def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()