diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3dd8bb2ac2de5..38cd3bb56abae 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -24,6 +24,9 @@ New features `_ on most readers and writers (:issue:`13823`) - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) +- Added 'iso week year support to to_datetime', added method '_calc_julian_from_V' + and amended method 'array_strptime' to support the feature. + (:issue: '16607') .. _whatsnew_0210.enhancements.other: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c471d46262484..0f0ada2d910e5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3598,7 +3598,7 @@ def array_strptime(ndarray[object] values, object fmt, pandas_datetimestruct dts ndarray[int64_t] iresult int year, month, day, minute, hour, second, weekday, julian, tz - int week_of_year, week_of_year_start + int week_of_year, week_of_year_start, iso_week, iso_year int64_t us, ns object val, group_key, ampm, found dict found_key @@ -3658,7 +3658,10 @@ def array_strptime(ndarray[object] values, object fmt, 'U': 15, 'W': 16, 'Z': 17, - 'p': 18 # just an additional key, works only with I + 'p': 18, # just an additional key, works only with I + 'G': 19, + 'V': 20, + 'u': 21 } cdef int parse_code @@ -3701,13 +3704,14 @@ def array_strptime(ndarray[object] values, object fmt, raise ValueError("time data %r does not match format " "%r (search)" % (values[i], fmt)) + iso_year = -1 year = 1900 month = day = 1 hour = minute = second = ns = us = 0 tz = -1 # Default to -1 to signify that values not known; not critical to have, # though - week_of_year = -1 + iso_week = week_of_year = -1 week_of_year_start = -1 # weekday and julian defaulted to -1 so as to signal need to calculate # values @@ -3809,12 +3813,45 @@ def array_strptime(ndarray[object] values, object fmt, else: tz = value break + elif parse_code == 19: + iso_year = int(found_dict['G']) + elif parse_code == 20: + iso_week = int(found_dict['V']) + elif parse_code == 21: + weekday = int(found_dict['u']) + weekday -= 1 + + + # don't assume default values for ISO week/year + if iso_year != -1: + if iso_week == -1 or weekday == -1: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + if julian != -1: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif year != -1 and week_of_year == -1 and iso_week != -1: + if weekday == -1: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + else: + raise ValueError("ISO week directive '%V' is incompatible with" + " the year directive '%Y'. Use the ISO year " + "'%G' instead.") + # If we know the wk of the year and what day of that wk, we can figure # out the Julian day of the year. - if julian == -1 and week_of_year != -1 and weekday != -1: - week_starts_Mon = True if week_of_year_start == 0 else False - julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, - week_starts_Mon) + if julian == -1 and weekday != -1: + if week_of_year != -1: + week_starts_Mon = True if week_of_year_start == 0 else False + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + elif iso_year != -1 and iso_week != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) + # Cannot pre-calculate datetime_date() since can change in Julian # calculation and thus could have different value for the day of the wk # calculation. @@ -5630,6 +5667,7 @@ class TimeRE(dict): 'f': r"(?P[0-9]{1,9})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'G': r"(?P\d\d\d\d)", 'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" r"[1-9]\d|0[1-9]|[1-9])"), 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -5637,6 +5675,8 @@ class TimeRE(dict): 'S': r"(?P6[0-1]|[0-5]\d|\d)", 'U': r"(?P5[0-3]|[0-4]\d|\d)", 'w': r"(?P[0-6])", + 'u': r"(?P[1-7])", + 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", #XXX: Does 'Y' need to worry about having less or more than @@ -5736,3 +5776,22 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, # def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): # return _strptime(data_string, format)[0] + +cdef _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): + """Calculate the Julian day based on the ISO 8601 year, week, and weekday. + ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. + ISO week days range from 1 (Monday) to 7 (Sunday).""" + + cdef: + int correction, ordinal + + correction = datetime_date(iso_year, 1, 4).isoweekday() + 3 + ordinal = (iso_week * 7) + iso_weekday - correction + # ordinal may be negative or 0 now, which means the date is in the previous + # calendar year + if ordinal < 1: + ordinal += datetime_date(iso_year, 1, 1).toordinal() + iso_year -= 1 + ordinal -= datetime_date(iso_year, 1, 1).toordinal() + return iso_year, ordinal + diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a47db755b44af..f69632ee748d6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -175,6 +175,53 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(object): + @pytest.mark.parametrize("s, _format, dt", [ + ['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)], + ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)], + ['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)] + ]) + def test_to_datetime_iso_week_year_format(self, s, _format, dt): + assert to_datetime(s, format=_format) == dt + + @pytest.mark.parametrize("msg, s, _format", [ + ["ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50", + "%Y %V"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51", + "%G %V"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 " + "Monday", "%G %A"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon", + "%G %a"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", + "%G %w"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", + "%G %u"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "2051", + "%G"], + ["Day of the year directive '%j' is not compatible with ISO year " + "directive '%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"], + ["ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"] + ]) + def test_ValueError_iso_week_year(self, msg, s, _format): + with tm.assert_raises_regex(ValueError, msg): + to_datetime(s, format=_format) + def test_to_datetime_dt64s(self): in_bound_dts = [ np.datetime64('2000-01-01'),