Skip to content

Commit a504276

Browse files
authored
BUG: Parsing week-freq Periods (#50803)
* BUG: Parsing week-freq Periods * GH ref, remove redundant check * handle week as fallback
1 parent fdba1e6 commit a504276

File tree

4 files changed

+79
-28
lines changed

4 files changed

+79
-28
lines changed

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,8 @@ Period
11021102
- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`)
11031103
- Bug in adding a :class:`Period` object to an array of :class:`DateOffset` objects incorrectly raising ``TypeError`` (:issue:`50162`)
11041104
- Bug in :class:`Period` where passing a string with finer resolution than nanosecond would result in a ``KeyError`` instead of dropping the extra precision (:issue:`50417`)
1105+
- Bug in parsing strings representing Week-periods e.g. "2017-01-23/2017-01-29" as minute-frequency instead of week-frequency (:issue:`50803`)
1106+
-
11051107

11061108
Plotting
11071109
^^^^^^^^

pandas/_libs/tslibs/parsing.pyx

+13-15
Original file line numberDiff line numberDiff line change
@@ -316,21 +316,6 @@ def parse_datetime_string(
316316
dt = dateutil_parse(date_string, default=_DEFAULT_DATETIME,
317317
dayfirst=dayfirst, yearfirst=yearfirst,
318318
ignoretz=False, out_bestunit=&out_bestunit)
319-
320-
if dt.tzinfo is not None:
321-
# dateutil can return a datetime with a tzoffset outside of (-24H, 24H)
322-
# bounds, which is invalid (can be constructed, but raises if we call
323-
# str(dt)). Check that and raise here if necessary.
324-
try:
325-
dt.utcoffset()
326-
except ValueError as err:
327-
# offset must be a timedelta strictly between -timedelta(hours=24)
328-
# and timedelta(hours=24)
329-
raise ValueError(
330-
f'Parsed string "{date_string}" gives an invalid tzoffset, '
331-
"which must be between -timedelta(hours=24) and timedelta(hours=24)"
332-
)
333-
334319
return dt
335320

336321

@@ -696,6 +681,19 @@ cdef datetime dateutil_parse(
696681
elif res.tzoffset:
697682
ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset))
698683

684+
# dateutil can return a datetime with a tzoffset outside of (-24H, 24H)
685+
# bounds, which is invalid (can be constructed, but raises if we call
686+
# str(ret)). Check that and raise here if necessary.
687+
try:
688+
ret.utcoffset()
689+
except ValueError as err:
690+
# offset must be a timedelta strictly between -timedelta(hours=24)
691+
# and timedelta(hours=24)
692+
raise ValueError(
693+
f'Parsed string "{timestr}" gives an invalid tzoffset, '
694+
"which must be between -timedelta(hours=24) and timedelta(hours=24)"
695+
)
696+
699697
out_bestunit[0] = attrname_to_npy_unit[reso]
700698
return ret
701699

pandas/_libs/tslibs/period.pyx

+51-13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
13
cimport numpy as cnp
24
from cpython.object cimport (
35
Py_EQ,
@@ -2591,20 +2593,31 @@ class Period(_Period):
25912593
value = value.upper()
25922594

25932595
freqstr = freq.rule_code if freq is not None else None
2594-
dt, reso = parse_datetime_string_with_reso(value, freqstr)
2595-
if reso == "nanosecond":
2596-
nanosecond = dt.nanosecond
2597-
if dt is NaT:
2598-
ordinal = NPY_NAT
2596+
try:
2597+
dt, reso = parse_datetime_string_with_reso(value, freqstr)
2598+
except ValueError as err:
2599+
match = re.search(r"^\d{4}-\d{2}-\d{2}/\d{4}-\d{2}-\d{2}", value)
2600+
if match:
2601+
# Case that cannot be parsed (correctly) by our datetime
2602+
# parsing logic
2603+
dt, freq = _parse_weekly_str(value, freq)
2604+
else:
2605+
raise err
25992606

2600-
if freq is None and ordinal != NPY_NAT:
2601-
# Skip NaT, since it doesn't have a resolution
2602-
try:
2603-
freq = attrname_to_abbrevs[reso]
2604-
except KeyError:
2605-
raise ValueError(f"Invalid frequency or could not "
2606-
f"infer: {reso}")
2607-
freq = to_offset(freq)
2607+
else:
2608+
if reso == "nanosecond":
2609+
nanosecond = dt.nanosecond
2610+
if dt is NaT:
2611+
ordinal = NPY_NAT
2612+
2613+
if freq is None and ordinal != NPY_NAT:
2614+
# Skip NaT, since it doesn't have a resolution
2615+
try:
2616+
freq = attrname_to_abbrevs[reso]
2617+
except KeyError:
2618+
raise ValueError(f"Invalid frequency or could not "
2619+
f"infer: {reso}")
2620+
freq = to_offset(freq)
26082621

26092622
elif PyDateTime_Check(value):
26102623
dt = value
@@ -2664,3 +2677,28 @@ def validate_end_alias(how: str) -> str: # Literal["E", "S"]
26642677
if how not in {"S", "E"}:
26652678
raise ValueError("How must be one of S or E")
26662679
return how
2680+
2681+
2682+
cdef _parse_weekly_str(value, BaseOffset freq):
2683+
"""
2684+
Parse e.g. "2017-01-23/2017-01-29", which cannot be parsed by the general
2685+
datetime-parsing logic. This ensures that we can round-trip with
2686+
Period.__str__ with weekly freq.
2687+
"""
2688+
# GH#50803
2689+
start, end = value.split("/")
2690+
start = Timestamp(start)
2691+
end = Timestamp(end)
2692+
2693+
if (end - start).days != 6:
2694+
# We are interested in cases where this is str(period)
2695+
# of a Week-freq period
2696+
raise ValueError("Could not parse as weekly-freq Period")
2697+
2698+
if freq is None:
2699+
day_name = end.day_name()[:3].upper()
2700+
freqstr = f"W-{day_name}"
2701+
freq = to_offset(freqstr)
2702+
# We _should_ have freq.is_on_offset(end)
2703+
2704+
return end, freq

pandas/tests/scalar/period/test_period.py

+13
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,19 @@ def test_period_cons_weekly(self, num, day):
399399
assert result == expected
400400
assert isinstance(result, Period)
401401

402+
def test_parse_week_str_roundstrip(self):
403+
# GH#50803
404+
per = Period("2017-01-23/2017-01-29")
405+
assert per.freq.freqstr == "W-SUN"
406+
407+
per = Period("2017-01-24/2017-01-30")
408+
assert per.freq.freqstr == "W-MON"
409+
410+
msg = "Could not parse as weekly-freq Period"
411+
with pytest.raises(ValueError, match=msg):
412+
# not 6 days apart
413+
Period("2016-01-23/2017-01-29")
414+
402415
def test_period_from_ordinal(self):
403416
p = Period("2011-01", freq="M")
404417
res = Period._from_ordinal(p.ordinal, freq="M")

0 commit comments

Comments
 (0)