Skip to content

Commit 09f7ad5

Browse files
smarieyehoshuadimarsky
authored andcommitted
Fixed Period and PeriodIndex formatting (pandas-dev#46361)
1 parent 98e3101 commit 09f7ad5

File tree

5 files changed

+117
-20
lines changed

5 files changed

+117
-20
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ I/O
440440
Period
441441
^^^^^^
442442
- Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`)
443+
- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`)
443444
-
444445

445446
Plotting

pandas/_libs/tslibs/period.pyx

+43-15
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ from cpython.object cimport (
99
PyObject_RichCompareBool,
1010
)
1111
from numpy cimport (
12+
int32_t,
1213
int64_t,
1314
ndarray,
1415
)
@@ -1217,39 +1218,56 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt):
12171218
char *formatted
12181219
bytes pat, brepl
12191220
list found_pat = [False] * len(extra_fmts)
1220-
int year, quarter
1221+
int quarter
1222+
int32_t us, ps
12211223
str result, repl
12221224

12231225
get_date_info(value, freq, &dts)
1226+
1227+
# Find our additional directives in the pattern and replace them with
1228+
# placeholders that are not processed by c_strftime
12241229
for i in range(len(extra_fmts)):
12251230
pat = extra_fmts[i][0]
12261231
brepl = extra_fmts[i][1]
12271232
if pat in fmt:
12281233
fmt = fmt.replace(pat, brepl)
12291234
found_pat[i] = True
12301235

1236+
# Execute c_strftime to process the usual datetime directives
12311237
formatted = c_strftime(&dts, <char*>fmt)
12321238

12331239
result = util.char_to_string(formatted)
12341240
free(formatted)
12351241

1242+
# Now we will fill the placeholders corresponding to our additional directives
1243+
1244+
# First prepare the contents
1245+
# Save these to local vars as dts can be modified by get_yq below
1246+
us = dts.us
1247+
ps = dts.ps
1248+
if any(found_pat[0:3]):
1249+
# Note: this modifies `dts` in-place so that year becomes fiscal year
1250+
# However it looses the us and ps
1251+
quarter = get_yq(value, freq, &dts)
1252+
else:
1253+
quarter = 0
1254+
1255+
# Now do the filling per se
12361256
for i in range(len(extra_fmts)):
12371257
if found_pat[i]:
12381258

1239-
quarter = get_yq(value, freq, &dts)
1240-
1241-
if i == 0:
1242-
repl = str(quarter)
1243-
elif i == 1: # %f, 2-digit year
1259+
if i == 0: # %q, 1-digit quarter.
1260+
repl = f"{quarter}"
1261+
elif i == 1: # %f, 2-digit 'Fiscal' year
12441262
repl = f"{(dts.year % 100):02d}"
1245-
elif i == 2:
1263+
elif i == 2: # %F, 'Fiscal' year with a century
12461264
repl = str(dts.year)
1247-
elif i == 3:
1248-
repl = f"{(value % 1_000):03d}"
1249-
elif i == 4:
1250-
repl = f"{(value % 1_000_000):06d}"
1251-
elif i == 5:
1252-
repl = f"{(value % 1_000_000_000):09d}"
1265+
elif i == 3: # %l, milliseconds
1266+
repl = f"{(us // 1_000):03d}"
1267+
elif i == 4: # %u, microseconds
1268+
repl = f"{(us):06d}"
1269+
elif i == 5: # %n, nanoseconds
1270+
repl = f"{((us * 1000) + (ps // 1000)):09d}"
12531271

12541272
result = result.replace(str_extra_fmts[i], repl)
12551273

@@ -2302,7 +2320,8 @@ cdef class _Period(PeriodMixin):
23022320
containing one or several directives. The method recognizes the same
23032321
directives as the :func:`time.strftime` function of the standard Python
23042322
distribution, as well as the specific additional directives ``%f``,
2305-
``%F``, ``%q``. (formatting & docs originally from scikits.timeries).
2323+
``%F``, ``%q``, ``%l``, ``%u``, ``%n``.
2324+
(formatting & docs originally from scikits.timeries).
23062325

23072326
+-----------+--------------------------------+-------+
23082327
| Directive | Meaning | Notes |
@@ -2349,11 +2368,20 @@ cdef class _Period(PeriodMixin):
23492368
| | AM or PM. | |
23502369
+-----------+--------------------------------+-------+
23512370
| ``%q`` | Quarter as a decimal number | |
2352-
| | [01,04] | |
2371+
| | [1,4] | |
23532372
+-----------+--------------------------------+-------+
23542373
| ``%S`` | Second as a decimal number | \(4) |
23552374
| | [00,61]. | |
23562375
+-----------+--------------------------------+-------+
2376+
| ``%l`` | Millisecond as a decimal number| |
2377+
| | [000,999]. | |
2378+
+-----------+--------------------------------+-------+
2379+
| ``%u`` | Microsecond as a decimal number| |
2380+
| | [000000,999999]. | |
2381+
+-----------+--------------------------------+-------+
2382+
| ``%n`` | Nanosecond as a decimal number | |
2383+
| | [000000000,999999999]. | |
2384+
+-----------+--------------------------------+-------+
23572385
| ``%U`` | Week number of the year | \(5) |
23582386
| | (Sunday as the first day of | |
23592387
| | the week) as a decimal number | |

pandas/core/arrays/datetimelike.py

+10
Original file line numberDiff line numberDiff line change
@@ -1593,6 +1593,14 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
15931593
of the string format can be found in `python string format
15941594
doc <%(URL)s>`__.
15951595
1596+
Formats supported by the C `strftime` API but not by the python string format
1597+
doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be
1598+
preferably replaced with their supported equivalents (such as `"%%H:%%M"`,
1599+
`"%%I:%%M:%%S %%p"`).
1600+
1601+
Note that `PeriodIndex` support additional directives, detailed in
1602+
`Period.strftime`.
1603+
15961604
Parameters
15971605
----------
15981606
date_format : str
@@ -1609,6 +1617,8 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
16091617
DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
16101618
DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
16111619
DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
1620+
Timestamp.strftime : Format a single Timestamp.
1621+
Period.strftime : Format a single Period.
16121622
16131623
Examples
16141624
--------

pandas/core/arrays/period.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -642,17 +642,17 @@ def _format_native_types(
642642
values = self.astype(object)
643643

644644
if date_format:
645-
formatter = lambda dt: dt.strftime(date_format)
645+
formatter = lambda per: per.strftime(date_format)
646646
else:
647-
formatter = lambda dt: str(dt)
647+
formatter = lambda per: str(per)
648648

649649
if self._hasna:
650650
mask = self._isnan
651651
values[mask] = na_rep
652652
imask = ~mask
653-
values[imask] = np.array([formatter(dt) for dt in values[imask]])
653+
values[imask] = np.array([formatter(per) for per in values[imask]])
654654
else:
655-
values = np.array([formatter(dt) for dt in values])
655+
values = np.array([formatter(per) for per in values])
656656
return values
657657

658658
# ------------------------------------------------------------------

pandas/tests/io/formats/test_format.py

+59-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
Test output formatting for Series/DataFrame, including to_string & reprs
33
"""
4-
54
from datetime import datetime
65
from io import StringIO
76
import itertools
@@ -3167,6 +3166,65 @@ def test_str(self):
31673166
assert str(NaT) == "NaT"
31683167

31693168

3169+
class TestPeriodIndexFormat:
3170+
def test_period_format_and_strftime_default(self):
3171+
per = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="H")
3172+
3173+
# Default formatting
3174+
formatted = per.format()
3175+
assert formatted[0] == "2003-01-01 12:00" # default: minutes not shown
3176+
assert formatted[1] == "NaT"
3177+
# format is equivalent to strftime(None)...
3178+
assert formatted[0] == per.strftime(None)[0]
3179+
assert per.strftime(None)[1] is np.nan # ...except for NaTs
3180+
3181+
# Same test with nanoseconds freq
3182+
per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n")
3183+
formatted = per.format()
3184+
assert (formatted == per.strftime(None)).all()
3185+
assert formatted[0] == "2003-01-01 12:01:01.123456789"
3186+
assert formatted[1] == "2003-01-01 12:01:01.123456790"
3187+
3188+
def test_period_custom(self):
3189+
# GH#46252 custom formatting directives %l (ms) and %u (us)
3190+
3191+
# 3 digits
3192+
per = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l")
3193+
formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
3194+
assert formatted[0] == "03 12:01:01 (ms=123 us=123000 ns=123000000)"
3195+
assert formatted[1] == "03 12:01:01 (ms=124 us=124000 ns=124000000)"
3196+
3197+
# 6 digits
3198+
per = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="u")
3199+
formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
3200+
assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456000)"
3201+
assert formatted[1] == "03 12:01:01 (ms=123 us=123457 ns=123457000)"
3202+
3203+
# 9 digits
3204+
per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n")
3205+
formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
3206+
assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456789)"
3207+
assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)"
3208+
3209+
def test_period_tz(self):
3210+
# Formatting periods created from a datetime with timezone.
3211+
3212+
# This timestamp is in 2013 in Europe/Paris but is 2012 in UTC
3213+
dt = pd.to_datetime(["2013-01-01 00:00:00+01:00"], utc=True)
3214+
3215+
# Converting to a period looses the timezone information
3216+
# Since tz is currently set as utc, we'll see 2012
3217+
with tm.assert_produces_warning(UserWarning, match="will drop timezone"):
3218+
per = dt.to_period(freq="H")
3219+
assert per.format()[0] == "2012-12-31 23:00"
3220+
3221+
# If tz is currently set as paris before conversion, we'll see 2013
3222+
dt = dt.tz_convert("Europe/Paris")
3223+
with tm.assert_produces_warning(UserWarning, match="will drop timezone"):
3224+
per = dt.to_period(freq="H")
3225+
assert per.format()[0] == "2013-01-01 00:00"
3226+
3227+
31703228
class TestDatetimeIndexFormat:
31713229
def test_datetime(self):
31723230
formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()

0 commit comments

Comments
 (0)