Skip to content

Commit d03603b

Browse files
committed
Implementing iso_week_year support for to_datetime
1 parent f886139 commit d03603b

File tree

4 files changed

+157
-8
lines changed

4 files changed

+157
-8
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ including other versions of pandas.
1919
Other Enhancements
2020
^^^^^^^^^^^^^^^^^^
2121

22+
- Added support for ISO week year format ('%G-%V-%u') when parsing datetimes using :meth: `to_datetime` (:issue:`16607`)
2223
- Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`)
2324
- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`)
2425
- :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`)

pandas/_libs/tslibs/strptime.pyx

+97-8
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ cdef dict _parse_code_table = {'y': 0,
5454
'W': 16,
5555
'Z': 17,
5656
'p': 18, # an additional key, only with I
57-
'z': 19}
57+
'z': 19,
58+
'G': 20,
59+
'V': 21,
60+
'u': 22}
5861

5962

6063
def array_strptime(object[:] values, object fmt,
@@ -77,6 +80,7 @@ def array_strptime(object[:] values, object fmt,
7780
object[:] result_timezone
7881
int year, month, day, minute, hour, second, weekday, julian
7982
int week_of_year, week_of_year_start, parse_code, ordinal
83+
int iso_week, iso_year
8084
int64_t us, ns
8185
object val, group_key, ampm, found, timezone
8286
dict found_key
@@ -169,13 +173,14 @@ def array_strptime(object[:] values, object fmt,
169173
raise ValueError("time data %r does not match format "
170174
"%r (search)" % (values[i], fmt))
171175

176+
iso_year = -1
172177
year = 1900
173178
month = day = 1
174179
hour = minute = second = ns = us = 0
175180
timezone = None
176181
# Default to -1 to signify that values not known; not critical to have,
177182
# though
178-
week_of_year = -1
183+
iso_week = week_of_year = -1
179184
week_of_year_start = -1
180185
# weekday and julian defaulted to -1 so as to signal need to calculate
181186
# values
@@ -265,13 +270,44 @@ def array_strptime(object[:] values, object fmt,
265270
timezone = pytz.timezone(found_dict['Z'])
266271
elif parse_code == 19:
267272
timezone = parse_timezone_directive(found_dict['z'])
273+
elif parse_code == 20:
274+
iso_year = int(found_dict['G'])
275+
elif parse_code == 21:
276+
iso_week = int(found_dict['V'])
277+
elif parse_code == 22:
278+
weekday = int(found_dict['u'])
279+
weekday -= 1
280+
281+
# don't assume default values for ISO week/year
282+
if iso_year != -1:
283+
if iso_week == -1 or weekday == -1:
284+
raise ValueError("ISO year directive '%G' must be used with "
285+
"the ISO week directive '%V' and a weekday "
286+
"directive '%A', '%a', '%w', or '%u'.")
287+
if julian != -1:
288+
raise ValueError("Day of the year directive '%j' is not "
289+
"compatible with ISO year directive '%G'. "
290+
"Use '%Y' instead.")
291+
elif year != -1 and week_of_year == -1 and iso_week != -1:
292+
if weekday == -1:
293+
raise ValueError("ISO week directive '%V' must be used with "
294+
"the ISO year directive '%G' and a weekday "
295+
"directive '%A', '%a', '%w', or '%u'.")
296+
else:
297+
raise ValueError("ISO week directive '%V' is incompatible with"
298+
" the year directive '%Y'. Use the ISO year "
299+
"'%G' instead.")
268300

269301
# If we know the wk of the year and what day of that wk, we can figure
270302
# out the Julian day of the year.
271-
if julian == -1 and week_of_year != -1 and weekday != -1:
272-
week_starts_Mon = True if week_of_year_start == 0 else False
273-
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
274-
week_starts_Mon)
303+
if julian == -1 and weekday != -1:
304+
if week_of_year != -1:
305+
week_starts_Mon = week_of_year_start == 0
306+
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
307+
week_starts_Mon)
308+
elif iso_year != -1 and iso_week != -1:
309+
year, julian = _calc_julian_from_V(iso_year, iso_week,
310+
weekday + 1)
275311
# Cannot pre-calculate datetime_date() since can change in Julian
276312
# calculation and thus could have different value for the day of the wk
277313
# calculation.
@@ -511,14 +547,17 @@ class TimeRE(dict):
511547
# The " \d" part of the regex is to make %c from ANSI C work
512548
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
513549
'f': r"(?P<f>[0-9]{1,9})",
550+
'G': r"(?P<G>\d\d\d\d)",
514551
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
515552
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
516553
'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
517554
r"[1-9]\d|0[1-9]|[1-9])"),
518555
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
519556
'M': r"(?P<M>[0-5]\d|\d)",
520557
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
558+
'u': r"(?P<u>[1-7])",
521559
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
560+
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
522561
'w': r"(?P<w>[0-6])",
523562
# W is set below by using 'U'
524563
'y': r"(?P<y>\d\d)",
@@ -593,11 +632,27 @@ _CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
593632
_regex_cache = {}
594633

595634

596-
cdef _calc_julian_from_U_or_W(int year, int week_of_year,
635+
cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
597636
int day_of_week, int week_starts_Mon):
598637
"""Calculate the Julian day based on the year, week of the year, and day of
599638
the week, with week_start_day representing whether the week of the year
600-
assumes the week starts on Sunday or Monday (6 or 0)."""
639+
assumes the week starts on Sunday or Monday (6 or 0).
640+
641+
Parameters
642+
----------
643+
year : int
644+
the year
645+
week_of_year : int
646+
week taken from format U or W
647+
week_starts_Mon : int
648+
represents whether the week of the year
649+
assumes the week starts on Sunday or Monday (6 or 0)
650+
651+
Returns
652+
-------
653+
int
654+
converted julian day
655+
"""
601656

602657
cdef:
603658
int first_weekday, week_0_length, days_to_week
@@ -620,6 +675,40 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
620675
return 1 + days_to_week + day_of_week
621676

622677

678+
cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
679+
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday.
680+
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
681+
ISO week days range from 1 (Monday) to 7 (Sunday).
682+
683+
Parameters
684+
----------
685+
iso_year : int
686+
the year taken from format %G
687+
iso_week : int
688+
the week taken from format %V
689+
iso_weekday : int
690+
weekday taken from format %u
691+
692+
Returns
693+
-------
694+
(int, int)
695+
the iso year and the Gregorian ordinal date / julian date
696+
"""
697+
698+
cdef:
699+
int correction, ordinal
700+
701+
correction = datetime_date(iso_year, 1, 4).isoweekday() + 3
702+
ordinal = (iso_week * 7) + iso_weekday - correction
703+
# ordinal may be negative or 0 now, which means the date is in the previous
704+
# calendar year
705+
if ordinal < 1:
706+
ordinal += datetime_date(iso_year, 1, 1).toordinal()
707+
iso_year -= 1
708+
ordinal -= datetime_date(iso_year, 1, 1).toordinal()
709+
return iso_year, ordinal
710+
711+
623712
cdef parse_timezone_directive(object z):
624713
"""
625714
Parse the '%z' directive and return a pytz.FixedOffset

pandas/core/tools/datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
447447
format : string, default None
448448
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
449449
all the way up to nanoseconds.
450+
See strftime documentation for more information on choices:
451+
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
450452
exact : boolean, True by default
451453
452454
- If True, require an exact format match.

pandas/tests/indexes/datetimes/test_tools.py

+57
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,63 @@ def test_to_datetime_parse_timezone_keeps_name(self):
247247

248248

249249
class TestToDatetime(object):
250+
@pytest.mark.parametrize("s, _format, dt", [
251+
['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)],
252+
['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)],
253+
['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)]
254+
])
255+
def test_to_datetime_iso_week_year_format(self, s, _format, dt):
256+
# See GH#16607
257+
assert to_datetime(s, format=_format) == dt
258+
259+
@pytest.mark.parametrize("msg, s, _format", [
260+
["ISO week directive '%V' must be used with the ISO year directive "
261+
"'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50",
262+
"%Y %V"],
263+
["ISO year directive '%G' must be used with the ISO week directive "
264+
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51",
265+
"%G %V"],
266+
["ISO year directive '%G' must be used with the ISO week directive "
267+
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 "
268+
"Monday", "%G %A"],
269+
["ISO year directive '%G' must be used with the ISO week directive "
270+
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon",
271+
"%G %a"],
272+
["ISO year directive '%G' must be used with the ISO week directive "
273+
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6",
274+
"%G %w"],
275+
["ISO year directive '%G' must be used with the ISO week directive "
276+
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6",
277+
"%G %u"],
278+
["ISO year directive '%G' must be used with the ISO week directive "
279+
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "2051",
280+
"%G"],
281+
["Day of the year directive '%j' is not compatible with ISO year "
282+
"directive '%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"],
283+
["ISO week directive '%V' is incompatible with the year directive "
284+
"'%Y'. Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"],
285+
["ISO week directive '%V' is incompatible with the year directive "
286+
"'%Y'. Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"],
287+
["ISO week directive '%V' is incompatible with the year directive "
288+
"'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"],
289+
["ISO week directive '%V' is incompatible with the year directive "
290+
"'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"],
291+
["ISO week directive '%V' must be used with the ISO year directive "
292+
"'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"]
293+
])
294+
def test_error_iso_week_year(self, msg, s, _format):
295+
# See GH#16607
296+
# This test checks for errors thrown when giving the wrong format
297+
# However, as discussed on PR#25541, overriding the locale
298+
# causes a different error to be thrown due to the format being
299+
# locale specific, but the test data is in english.
300+
# Thus, a hack I did was to only run this test if locale was not
301+
# overwritten.
302+
if (locale.getlocale() != ('zh_CN', 'UTF-8') and
303+
locale.getlocale() != ('it_IT', 'UTF-8')):
304+
with pytest.raises(ValueError, match=msg):
305+
to_datetime(s, format=_format)
306+
250307
@pytest.mark.parametrize('tz', [None, 'US/Central'])
251308
def test_to_datetime_dtarr(self, tz):
252309
# DatetimeArray

0 commit comments

Comments
 (0)