diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 3dd8bb2ac2de5..38cd3bb56abae 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -24,6 +24,9 @@ New features
`_ on most readers and writers (:issue:`13823`)
- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`,
and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)
+- Added 'iso week year support to to_datetime', added method '_calc_julian_from_V'
+ and amended method 'array_strptime' to support the feature.
+ (:issue: '16607')
.. _whatsnew_0210.enhancements.other:
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index c471d46262484..0f0ada2d910e5 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -3598,7 +3598,7 @@ def array_strptime(ndarray[object] values, object fmt,
pandas_datetimestruct dts
ndarray[int64_t] iresult
int year, month, day, minute, hour, second, weekday, julian, tz
- int week_of_year, week_of_year_start
+ int week_of_year, week_of_year_start, iso_week, iso_year
int64_t us, ns
object val, group_key, ampm, found
dict found_key
@@ -3658,7 +3658,10 @@ def array_strptime(ndarray[object] values, object fmt,
'U': 15,
'W': 16,
'Z': 17,
- 'p': 18 # just an additional key, works only with I
+ 'p': 18, # just an additional key, works only with I
+ 'G': 19,
+ 'V': 20,
+ 'u': 21
}
cdef int parse_code
@@ -3701,13 +3704,14 @@ def array_strptime(ndarray[object] values, object fmt,
raise ValueError("time data %r does not match format "
"%r (search)" % (values[i], fmt))
+ iso_year = -1
year = 1900
month = day = 1
hour = minute = second = ns = us = 0
tz = -1
# Default to -1 to signify that values not known; not critical to have,
# though
- week_of_year = -1
+ iso_week = week_of_year = -1
week_of_year_start = -1
# weekday and julian defaulted to -1 so as to signal need to calculate
# values
@@ -3809,12 +3813,45 @@ def array_strptime(ndarray[object] values, object fmt,
else:
tz = value
break
+ elif parse_code == 19:
+ iso_year = int(found_dict['G'])
+ elif parse_code == 20:
+ iso_week = int(found_dict['V'])
+ elif parse_code == 21:
+ weekday = int(found_dict['u'])
+ weekday -= 1
+
+
+ # don't assume default values for ISO week/year
+ if iso_year != -1:
+ if iso_week == -1 or weekday == -1:
+ raise ValueError("ISO year directive '%G' must be used with "
+ "the ISO week directive '%V' and a weekday "
+ "directive '%A', '%a', '%w', or '%u'.")
+ if julian != -1:
+ raise ValueError("Day of the year directive '%j' is not "
+ "compatible with ISO year directive '%G'. "
+ "Use '%Y' instead.")
+ elif year != -1 and week_of_year == -1 and iso_week != -1:
+ if weekday == -1:
+ raise ValueError("ISO week directive '%V' must be used with "
+ "the ISO year directive '%G' and a weekday "
+ "directive '%A', '%a', '%w', or '%u'.")
+ else:
+ raise ValueError("ISO week directive '%V' is incompatible with"
+ " the year directive '%Y'. Use the ISO year "
+ "'%G' instead.")
+
# If we know the wk of the year and what day of that wk, we can figure
# out the Julian day of the year.
- if julian == -1 and week_of_year != -1 and weekday != -1:
- week_starts_Mon = True if week_of_year_start == 0 else False
- julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
- week_starts_Mon)
+ if julian == -1 and weekday != -1:
+ if week_of_year != -1:
+ week_starts_Mon = True if week_of_year_start == 0 else False
+ julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
+ week_starts_Mon)
+ elif iso_year != -1 and iso_week != -1:
+ year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1)
+
# Cannot pre-calculate datetime_date() since can change in Julian
# calculation and thus could have different value for the day of the wk
# calculation.
@@ -5630,6 +5667,7 @@ class TimeRE(dict):
'f': r"(?P[0-9]{1,9})",
'H': r"(?P2[0-3]|[0-1]\d|\d)",
'I': r"(?P1[0-2]|0[1-9]|[1-9])",
+ 'G': r"(?P\d\d\d\d)",
'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
r"[1-9]\d|0[1-9]|[1-9])"),
'm': r"(?P1[0-2]|0[1-9]|[1-9])",
@@ -5637,6 +5675,8 @@ class TimeRE(dict):
'S': r"(?P6[0-1]|[0-5]\d|\d)",
'U': r"(?P5[0-3]|[0-4]\d|\d)",
'w': r"(?P[0-6])",
+ 'u': r"(?P[1-7])",
+ 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)",
# W is set below by using 'U'
'y': r"(?P\d\d)",
#XXX: Does 'Y' need to worry about having less or more than
@@ -5736,3 +5776,22 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
# def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
# return _strptime(data_string, format)[0]
+
+cdef _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
+ """Calculate the Julian day based on the ISO 8601 year, week, and weekday.
+ ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
+ ISO week days range from 1 (Monday) to 7 (Sunday)."""
+
+ cdef:
+ int correction, ordinal
+
+ correction = datetime_date(iso_year, 1, 4).isoweekday() + 3
+ ordinal = (iso_week * 7) + iso_weekday - correction
+ # ordinal may be negative or 0 now, which means the date is in the previous
+ # calendar year
+ if ordinal < 1:
+ ordinal += datetime_date(iso_year, 1, 1).toordinal()
+ iso_year -= 1
+ ordinal -= datetime_date(iso_year, 1, 1).toordinal()
+ return iso_year, ordinal
+
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index a47db755b44af..f69632ee748d6 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -175,6 +175,53 @@ def test_to_datetime_format_weeks(self):
class TestToDatetime(object):
+ @pytest.mark.parametrize("s, _format, dt", [
+ ['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)],
+ ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)],
+ ['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)]
+ ])
+ def test_to_datetime_iso_week_year_format(self, s, _format, dt):
+ assert to_datetime(s, format=_format) == dt
+
+ @pytest.mark.parametrize("msg, s, _format", [
+ ["ISO week directive '%V' must be used with the ISO year directive "
+ "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50",
+ "%Y %V"],
+ ["ISO year directive '%G' must be used with the ISO week directive "
+ "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51",
+ "%G %V"],
+ ["ISO year directive '%G' must be used with the ISO week directive "
+ "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 "
+ "Monday", "%G %A"],
+ ["ISO year directive '%G' must be used with the ISO week directive "
+ "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon",
+ "%G %a"],
+ ["ISO year directive '%G' must be used with the ISO week directive "
+ "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6",
+ "%G %w"],
+ ["ISO year directive '%G' must be used with the ISO week directive "
+ "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6",
+ "%G %u"],
+ ["ISO year directive '%G' must be used with the ISO week directive "
+ "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "2051",
+ "%G"],
+ ["Day of the year directive '%j' is not compatible with ISO year "
+ "directive '%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"],
+ ["ISO week directive '%V' is incompatible with the year directive "
+ "'%Y'. Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"],
+ ["ISO week directive '%V' is incompatible with the year directive "
+ "'%Y'. Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"],
+ ["ISO week directive '%V' is incompatible with the year directive "
+ "'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"],
+ ["ISO week directive '%V' is incompatible with the year directive "
+ "'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"],
+ ["ISO week directive '%V' must be used with the ISO year directive "
+ "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"]
+ ])
+ def test_ValueError_iso_week_year(self, msg, s, _format):
+ with tm.assert_raises_regex(ValueError, msg):
+ to_datetime(s, format=_format)
+
def test_to_datetime_dt64s(self):
in_bound_dts = [
np.datetime64('2000-01-01'),