From 274a8e068366737369dd6ca22e5a3a0ef6695aac Mon Sep 17 00:00:00 2001 From: rosygupta Date: Sun, 11 Jun 2017 02:49:04 +0530 Subject: [PATCH 01/14] GH16607 ENH: to_datetime support iso week year --- pandas/_libs/tslib.pyx | 59 +++++++++++++++++++- pandas/tests/indexes/datetimes/test_tools.py | 8 +++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c471d46262484..b2a251c8e8441 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3658,7 +3658,10 @@ def array_strptime(ndarray[object] values, object fmt, 'U': 15, 'W': 16, 'Z': 17, - 'p': 18 # just an additional key, works only with I + 'p': 18, # just an additional key, works only with I + 'G': 19, + 'V': 20, + 'u': 21 } cdef int parse_code @@ -3678,6 +3681,7 @@ def array_strptime(ndarray[object] values, object fmt, # exact matching if exact: found = format_regex.match(val) + print found if not found: if is_coerce: iresult[i] = NPY_NAT @@ -3701,13 +3705,14 @@ def array_strptime(ndarray[object] values, object fmt, raise ValueError("time data %r does not match format " "%r (search)" % (values[i], fmt)) + iso_year = -1 year = 1900 month = day = 1 hour = minute = second = ns = us = 0 tz = -1 # Default to -1 to signify that values not known; not critical to have, # though - week_of_year = -1 + iso_week = week_of_year = -1 week_of_year_start = -1 # weekday and julian defaulted to -1 so as to signal need to calculate # values @@ -3809,12 +3814,40 @@ def array_strptime(ndarray[object] values, object fmt, else: tz = value break + elif parse_code == 19: + iso_year = int(found_dict['G']) + elif parse_code == 20: + iso_week = int(found_dict['V']) + elif parse_code == 21: + weekday = int(found_dict['u']) + weekday -= 1 + + + # don't assume default values for ISO week/year + if iso_year != -1: + if iso_week == -1 or weekday == -1: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive ('%w', or '%u').") + if julian != -1: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif week_of_year == -1 and iso_week != -1: + if weekday == -1: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive ('%w', or '%u').") + # If we know the wk of the year and what day of that wk, we can figure # out the Julian day of the year. if julian == -1 and week_of_year != -1 and weekday != -1: week_starts_Mon = True if week_of_year_start == 0 else False julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, week_starts_Mon) + elif iso_year != -1 and iso_week != -1 and weekday != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) + # Cannot pre-calculate datetime_date() since can change in Julian # calculation and thus could have different value for the day of the wk # calculation. @@ -5630,6 +5663,7 @@ class TimeRE(dict): 'f': r"(?P[0-9]{1,9})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'G': r"(?P\d\d\d\d)", 'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" r"[1-9]\d|0[1-9]|[1-9])"), 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -5637,6 +5671,8 @@ class TimeRE(dict): 'S': r"(?P6[0-1]|[0-5]\d|\d)", 'U': r"(?P5[0-3]|[0-4]\d|\d)", 'w': r"(?P[0-6])", + 'u': r"(?P[1-7])", + 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", #XXX: Does 'Y' need to worry about having less or more than @@ -5736,3 +5772,22 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, # def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): # return _strptime(data_string, format)[0] + +cdef _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): + """Calculate the Julian day based on the ISO 8601 year, week, and weekday. + ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. + ISO week days range from 1 (Monday) to 7 (Sunday).""" + + cdef: + int correction, ordinal + + correction = datetime_date(iso_year, 1, 4).isoweekday() + 3 + ordinal = (iso_week * 7) + iso_weekday - correction + # ordinal may be negative or 0 now, which means the date is in the previous + # calendar year + if ordinal < 1: + ordinal += datetime_date(iso_year, 1, 1).toordinal() + iso_year -= 1 + ordinal -= datetime_date(iso_year, 1, 1).toordinal() + return iso_year, ordinal + diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a47db755b44af..87b2388d6987a 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -175,6 +175,14 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(object): + def test_to_datetime_iso_week_year_format(self): + data = [ + ['2015-53-1', '%G-%V-%u', + datetime(2015, 12, 28, 0, 0)] + ] + for s, format, dt in data: + assert to_datetime(s, format=format) == dt + def test_to_datetime_dt64s(self): in_bound_dts = [ np.datetime64('2000-01-01'), From 7b622ede3a5b5ae873ffca209add09442831791d Mon Sep 17 00:00:00 2001 From: rosygupta Date: Sun, 11 Jun 2017 03:23:18 +0530 Subject: [PATCH 02/14] print removed --- pandas/_libs/tslib.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b2a251c8e8441..647e7c43c7824 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3681,7 +3681,6 @@ def array_strptime(ndarray[object] values, object fmt, # exact matching if exact: found = format_regex.match(val) - print found if not found: if is_coerce: iresult[i] = NPY_NAT From 437dea9555c388be8d2aca5997b0862bb5d8c262 Mon Sep 17 00:00:00 2001 From: rosygupta Date: Sun, 11 Jun 2017 14:25:35 +0530 Subject: [PATCH 03/14] GH16607 ENH: to_datetime support iso week year --- pandas/_libs/tslib.pyx | 58 +++++++++++++++++++- pandas/tests/indexes/datetimes/test_tools.py | 8 +++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c471d46262484..647e7c43c7824 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3658,7 +3658,10 @@ def array_strptime(ndarray[object] values, object fmt, 'U': 15, 'W': 16, 'Z': 17, - 'p': 18 # just an additional key, works only with I + 'p': 18, # just an additional key, works only with I + 'G': 19, + 'V': 20, + 'u': 21 } cdef int parse_code @@ -3701,13 +3704,14 @@ def array_strptime(ndarray[object] values, object fmt, raise ValueError("time data %r does not match format " "%r (search)" % (values[i], fmt)) + iso_year = -1 year = 1900 month = day = 1 hour = minute = second = ns = us = 0 tz = -1 # Default to -1 to signify that values not known; not critical to have, # though - week_of_year = -1 + iso_week = week_of_year = -1 week_of_year_start = -1 # weekday and julian defaulted to -1 so as to signal need to calculate # values @@ -3809,12 +3813,40 @@ def array_strptime(ndarray[object] values, object fmt, else: tz = value break + elif parse_code == 19: + iso_year = int(found_dict['G']) + elif parse_code == 20: + iso_week = int(found_dict['V']) + elif parse_code == 21: + weekday = int(found_dict['u']) + weekday -= 1 + + + # don't assume default values for ISO week/year + if iso_year != -1: + if iso_week == -1 or weekday == -1: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive ('%w', or '%u').") + if julian != -1: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif week_of_year == -1 and iso_week != -1: + if weekday == -1: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive ('%w', or '%u').") + # If we know the wk of the year and what day of that wk, we can figure # out the Julian day of the year. if julian == -1 and week_of_year != -1 and weekday != -1: week_starts_Mon = True if week_of_year_start == 0 else False julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, week_starts_Mon) + elif iso_year != -1 and iso_week != -1 and weekday != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) + # Cannot pre-calculate datetime_date() since can change in Julian # calculation and thus could have different value for the day of the wk # calculation. @@ -5630,6 +5662,7 @@ class TimeRE(dict): 'f': r"(?P[0-9]{1,9})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'G': r"(?P\d\d\d\d)", 'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" r"[1-9]\d|0[1-9]|[1-9])"), 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -5637,6 +5670,8 @@ class TimeRE(dict): 'S': r"(?P6[0-1]|[0-5]\d|\d)", 'U': r"(?P5[0-3]|[0-4]\d|\d)", 'w': r"(?P[0-6])", + 'u': r"(?P[1-7])", + 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", #XXX: Does 'Y' need to worry about having less or more than @@ -5736,3 +5771,22 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, # def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): # return _strptime(data_string, format)[0] + +cdef _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): + """Calculate the Julian day based on the ISO 8601 year, week, and weekday. + ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. + ISO week days range from 1 (Monday) to 7 (Sunday).""" + + cdef: + int correction, ordinal + + correction = datetime_date(iso_year, 1, 4).isoweekday() + 3 + ordinal = (iso_week * 7) + iso_weekday - correction + # ordinal may be negative or 0 now, which means the date is in the previous + # calendar year + if ordinal < 1: + ordinal += datetime_date(iso_year, 1, 1).toordinal() + iso_year -= 1 + ordinal -= datetime_date(iso_year, 1, 1).toordinal() + return iso_year, ordinal + diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a47db755b44af..87b2388d6987a 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -175,6 +175,14 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(object): + def test_to_datetime_iso_week_year_format(self): + data = [ + ['2015-53-1', '%G-%V-%u', + datetime(2015, 12, 28, 0, 0)] + ] + for s, format, dt in data: + assert to_datetime(s, format=format) == dt + def test_to_datetime_dt64s(self): in_bound_dts = [ np.datetime64('2000-01-01'), From 9f53a9948e6ba0dbbf2006094034d60a28b243fa Mon Sep 17 00:00:00 2001 From: rosygupta Date: Mon, 12 Jun 2017 03:27:52 +0530 Subject: [PATCH 04/14] GH16607 ENH: added tests for each of the ValueError, improved checks --- pandas/_libs/tslib.pyx | 23 ++++++++------ pandas/tests/indexes/datetimes/test_tools.py | 32 ++++++++++++++++++-- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 647e7c43c7824..9d564264670f2 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3827,25 +3827,30 @@ def array_strptime(ndarray[object] values, object fmt, if iso_week == -1 or weekday == -1: raise ValueError("ISO year directive '%G' must be used with " "the ISO week directive '%V' and a weekday " - "directive ('%w', or '%u').") + "directive ('%A', '%a', '%w', or '%u').") if julian != -1: raise ValueError("Day of the year directive '%j' is not " "compatible with ISO year directive '%G'. " "Use '%Y' instead.") - elif week_of_year == -1 and iso_week != -1: + elif year != -1 and week_of_year == -1 and iso_week != -1: if weekday == -1: raise ValueError("ISO week directive '%V' must be used with " "the ISO year directive '%G' and a weekday " - "directive ('%w', or '%u').") + "directive ('%A', '%a', '%w', or '%u').") + else: + raise ValueError("ISO week directive '%V' is incompatible with " + " the year directive '%Y'. Use the ISO year " + "'%G' instead.") # If we know the wk of the year and what day of that wk, we can figure # out the Julian day of the year. - if julian == -1 and week_of_year != -1 and weekday != -1: - week_starts_Mon = True if week_of_year_start == 0 else False - julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, - week_starts_Mon) - elif iso_year != -1 and iso_week != -1 and weekday != -1: - year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) + if julian == -1 and weekday != -1: + if week_of_year != -1: + week_starts_Mon = True if week_of_year_start == 0 else False + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + elif iso_year != -1 and iso_week != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) # Cannot pre-calculate datetime_date() since can change in Julian # calculation and thus could have different value for the day of the wk diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 87b2388d6987a..e21a374223604 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -177,12 +177,40 @@ class TestToDatetime(object): def test_to_datetime_iso_week_year_format(self): data = [ - ['2015-53-1', '%G-%V-%u', - datetime(2015, 12, 28, 0, 0)] + ['2015-1-1', '%G-%V-%u', + datetime(2014, 12, 29, 0, 0)], #negative ordinal (date in previous year) + ['2015-1-4', '%G-%V-%u', + datetime(2015, 1, 1, 0, 0)], + ['2015-1-7', '%G-%V-%u', + datetime(2015, 1, 4, 0, 0)] ] for s, format, dt in data: assert to_datetime(s, format=format) == dt + def test_ValueError_iso_week_year(self): + # 1. ISO week (%V) is specified, but the year is specified with %Y + # instead of %G + with pytest.raises(ValueError): + to_datetime("1999 50", format="%Y %V") + # 2. ISO year (%G) and ISO week (%V) are specified, but weekday is not + with pytest.raises(ValueError): + to_datetime("1999 51", format="%G %V") + # 3. ISO year (%G) and weekday are specified, but ISO week (%V) is not + for w in ('A', 'a', 'w', 'u'): + with pytest.raises(ValueError): + to_datetime("1999 51", format="%G %{}".format(w)) + # 4. ISO year is specified alone + with pytest.raises(ValueError): + to_datetime("2015", format="%G") + # 5. Julian/ordinal day (%j) is specified with %G, but not %Y + with pytest.raises(ValueError): + to_datetime("1999 256", format="%G %j") + #6. ISO week (%V) and weekday are specified, but ISO year (%G) is not + for w in ('A', 'a', 'w', 'u'): + with pytest.raises(ValueError): + to_datetime("51 11", format="%V %{}".format(w)) + + def test_to_datetime_dt64s(self): in_bound_dts = [ np.datetime64('2000-01-01'), From 89eaf1ac66b91712f4a54a1fcea8a63d8df266eb Mon Sep 17 00:00:00 2001 From: rosygupta Date: Tue, 13 Jun 2017 02:12:14 +0530 Subject: [PATCH 05/14] tests added with tm.assert_raises_regex --- pandas/_libs/tslib.pyx | 6 +- pandas/tests/indexes/datetimes/test_tools.py | 74 +++++++++++--------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9d564264670f2..597c67b2d006c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3827,7 +3827,7 @@ def array_strptime(ndarray[object] values, object fmt, if iso_week == -1 or weekday == -1: raise ValueError("ISO year directive '%G' must be used with " "the ISO week directive '%V' and a weekday " - "directive ('%A', '%a', '%w', or '%u').") + "directive '%A', '%a', '%w', or '%u'.") if julian != -1: raise ValueError("Day of the year directive '%j' is not " "compatible with ISO year directive '%G'. " @@ -3836,9 +3836,9 @@ def array_strptime(ndarray[object] values, object fmt, if weekday == -1: raise ValueError("ISO week directive '%V' must be used with " "the ISO year directive '%G' and a weekday " - "directive ('%A', '%a', '%w', or '%u').") + "directive '%A', '%a', '%w', or '%u'.") else: - raise ValueError("ISO week directive '%V' is incompatible with " + raise ValueError("ISO week directive '%V' is incompatible with" " the year directive '%Y'. Use the ISO year " "'%G' instead.") diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e21a374223604..a56e9ddfaab29 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -175,41 +175,45 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(object): - def test_to_datetime_iso_week_year_format(self): - data = [ - ['2015-1-1', '%G-%V-%u', - datetime(2014, 12, 29, 0, 0)], #negative ordinal (date in previous year) - ['2015-1-4', '%G-%V-%u', - datetime(2015, 1, 1, 0, 0)], - ['2015-1-7', '%G-%V-%u', - datetime(2015, 1, 4, 0, 0)] - ] - for s, format, dt in data: - assert to_datetime(s, format=format) == dt - - def test_ValueError_iso_week_year(self): - # 1. ISO week (%V) is specified, but the year is specified with %Y - # instead of %G - with pytest.raises(ValueError): - to_datetime("1999 50", format="%Y %V") - # 2. ISO year (%G) and ISO week (%V) are specified, but weekday is not - with pytest.raises(ValueError): - to_datetime("1999 51", format="%G %V") - # 3. ISO year (%G) and weekday are specified, but ISO week (%V) is not - for w in ('A', 'a', 'w', 'u'): - with pytest.raises(ValueError): - to_datetime("1999 51", format="%G %{}".format(w)) - # 4. ISO year is specified alone - with pytest.raises(ValueError): - to_datetime("2015", format="%G") - # 5. Julian/ordinal day (%j) is specified with %G, but not %Y - with pytest.raises(ValueError): - to_datetime("1999 256", format="%G %j") - #6. ISO week (%V) and weekday are specified, but ISO year (%G) is not - for w in ('A', 'a', 'w', 'u'): - with pytest.raises(ValueError): - to_datetime("51 11", format="%V %{}".format(w)) - + @pytest.mark.parametrize("s, _format, dt", [ + ['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)], + ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)], + ['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)] + ]) + def test_to_datetime_iso_week_year_format(self, s, _format, dt): + assert to_datetime(s, format = _format) == dt + + @pytest.mark.parametrize("msg, s, _format", [ + ["ISO week directive '%V' must be used with the ISO year directive '%G' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50", "%Y %V"], + ["ISO year directive '%G' must be used with the ISO week directive '%V' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51", "%G %V"], + ["ISO year directive '%G' must be used with the ISO week directive '%V' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Monday", "%G %A"], + ["ISO year directive '%G' must be used with the ISO week directive '%V' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon", "%G %a"], + ["ISO year directive '%G' must be used with the ISO week directive '%V' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", "%G %w"], + ["ISO year directive '%G' must be used with the ISO week directive '%V' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", "%G %u"], + ["ISO year directive '%G' must be used with the ISO week directive '%V' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "2051", "%G"], + ["Day of the year directive '%j' is not compatible with ISO year directive " + "'%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"], + ["ISO week directive '%V' is incompatible with the year directive '%Y'. " + "Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"], + ["ISO week directive '%V' is incompatible with the year directive '%Y'. " + "Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"], + ["ISO week directive '%V' is incompatible with the year directive '%Y'. " + "Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"], + ["ISO week directive '%V' is incompatible with the year directive '%Y'. " + "Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"], + ["ISO week directive '%V' must be used with the ISO year directive '%G' " + "and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"] + ]) + def test_ValueError_iso_week_year(self, msg, s, _format): + with tm.assert_raises_regex(ValueError, msg): + to_datetime(s, format = _format) def test_to_datetime_dt64s(self): in_bound_dts = [ From aff309c63115ec56e56e3631027eb91a9238763c Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 15 Jun 2017 00:11:47 +0530 Subject: [PATCH 06/14] GH16607 autoindented Test File --- pandas/tests/indexes/datetimes/test_tools.py | 109 +++++++++++-------- 1 file changed, 66 insertions(+), 43 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a56e9ddfaab29..59707f121aafa 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -99,8 +99,8 @@ def test_to_datetime_format_integer(self): assert_series_equal(result, expected) s = Series([200001, 200105, 200206]) - expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) - ]) + expected = Series([Timestamp(x[:4] + '-' + x[4:]) + for x in s.apply(str)]) result = to_datetime(s, format='%Y%m') assert_series_equal(result, expected) @@ -176,44 +176,51 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(object): @pytest.mark.parametrize("s, _format, dt", [ - ['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)], - ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)], - ['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)] - ]) + ['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)], + ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)], + ['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)] + ]) def test_to_datetime_iso_week_year_format(self, s, _format, dt): - assert to_datetime(s, format = _format) == dt + assert to_datetime(s, format=_format) == dt @pytest.mark.parametrize("msg, s, _format", [ - ["ISO week directive '%V' must be used with the ISO year directive '%G' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50", "%Y %V"], - ["ISO year directive '%G' must be used with the ISO week directive '%V' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51", "%G %V"], - ["ISO year directive '%G' must be used with the ISO week directive '%V' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Monday", "%G %A"], - ["ISO year directive '%G' must be used with the ISO week directive '%V' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon", "%G %a"], - ["ISO year directive '%G' must be used with the ISO week directive '%V' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", "%G %w"], - ["ISO year directive '%G' must be used with the ISO week directive '%V' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", "%G %u"], - ["ISO year directive '%G' must be used with the ISO week directive '%V' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "2051", "%G"], - ["Day of the year directive '%j' is not compatible with ISO year directive " - "'%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"], - ["ISO week directive '%V' is incompatible with the year directive '%Y'. " - "Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"], - ["ISO week directive '%V' is incompatible with the year directive '%Y'. " - "Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"], - ["ISO week directive '%V' is incompatible with the year directive '%Y'. " - "Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"], - ["ISO week directive '%V' is incompatible with the year directive '%Y'. " - "Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"], - ["ISO week directive '%V' must be used with the ISO year directive '%G' " - "and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"] - ]) + ["ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50", + "%V"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51", + "%G %V"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 " + "Monday", "%G %A"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon", + "%G %a"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", + "%G %w"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6", + "%G %u"], + ["ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "2051", + "%G"], + ["Day of the year directive '%j' is not compatible with ISO year " + "directive '%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"], + ["ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"], + ["ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"] + ]) def test_ValueError_iso_week_year(self, msg, s, _format): with tm.assert_raises_regex(ValueError, msg): - to_datetime(s, format = _format) + to_datetime(s, format=_format) def test_to_datetime_dt64s(self): in_bound_dts = [ @@ -289,11 +296,23 @@ def test_to_datetime_tz(self): def test_to_datetime_tz_pytz(self): # see gh-8260 us_eastern = pytz.timezone('US/Eastern') - arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), - us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) + arr = np.array( + [ + us_eastern.localize( + datetime( + year=2000, + month=1, + day=1, + hour=3, + minute=0)), + us_eastern.localize( + datetime( + year=2000, + month=6, + day=1, + hour=3, + minute=0))], + dtype=object) result = pd.to_datetime(arr, utc=True) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], @@ -996,9 +1015,13 @@ def test_to_datetime_infer_datetime_format_series_with_nans(self): tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), pd.to_datetime(s, infer_datetime_format=True)) - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): - s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', - '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) + def test_to_datetime_infer_datetime_format_series_starting_with_nans( + self): + s = pd.Series(np.array([np.nan, + np.nan, + '01/01/2011 00:00:00', + '01/02/2011 00:00:00', + '01/03/2011 00:00:00'])) tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), pd.to_datetime(s, infer_datetime_format=True)) From 71a1c138e44536b457b74904ddf51c584680116d Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 15 Jun 2017 01:11:17 +0530 Subject: [PATCH 07/14] GH 16607 build fail fix --- pandas/tests/indexes/datetimes/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 59707f121aafa..b518d8c156d57 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -186,7 +186,7 @@ def test_to_datetime_iso_week_year_format(self, s, _format, dt): @pytest.mark.parametrize("msg, s, _format", [ ["ISO week directive '%V' must be used with the ISO year directive " "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50", - "%V"], + "%Y %V"], ["ISO year directive '%G' must be used with the ISO week directive " "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51", "%G %V"], From 71bc3485c97624125d5163e669699ed8cff1e786 Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:06:30 +0530 Subject: [PATCH 08/14] removed pep styling from unnecessary code --- pandas/tests/indexes/datetimes/test_tools.py | 22 +++++--------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b518d8c156d57..2396ebf4fe9f1 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -296,23 +296,11 @@ def test_to_datetime_tz(self): def test_to_datetime_tz_pytz(self): # see gh-8260 us_eastern = pytz.timezone('US/Eastern') - arr = np.array( - [ - us_eastern.localize( - datetime( - year=2000, - month=1, - day=1, - hour=3, - minute=0)), - us_eastern.localize( - datetime( - year=2000, - month=6, - day=1, - hour=3, - minute=0))], - dtype=object) + arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, + hour=3, minute=0)), + us_eastern.localize(datetime(year=2000, month=6, day=1, + hour=3, minute=0))], + dtype=object) result = pd.to_datetime(arr, utc=True) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], From 028e978079db274e70f1a1362fb86feb17dfb7e6 Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:08:49 +0530 Subject: [PATCH 09/14] removed pep styling from unnecessary code --- pandas/tests/indexes/datetimes/test_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 2396ebf4fe9f1..70ea9af9033d5 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -297,10 +297,10 @@ def test_to_datetime_tz_pytz(self): # see gh-8260 us_eastern = pytz.timezone('US/Eastern') arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), + hour=3, minute=0)), us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) + hour=3, minute=0))], + dtype=object) result = pd.to_datetime(arr, utc=True) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], From cf58798a7c81f00cf00970772689cb9d55574bf6 Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:11:49 +0530 Subject: [PATCH 10/14] removed pep styling from unnecessary code --- pandas/tests/indexes/datetimes/test_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 70ea9af9033d5..e5fec14b7a0d7 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -99,8 +99,8 @@ def test_to_datetime_format_integer(self): assert_series_equal(result, expected) s = Series([200001, 200105, 200206]) - expected = Series([Timestamp(x[:4] + '-' + x[4:]) - for x in s.apply(str)]) + expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) + ]) result = to_datetime(s, format='%Y%m') assert_series_equal(result, expected) @@ -300,7 +300,7 @@ def test_to_datetime_tz_pytz(self): hour=3, minute=0)), us_eastern.localize(datetime(year=2000, month=6, day=1, hour=3, minute=0))], - dtype=object) + dtype=object) result = pd.to_datetime(arr, utc=True) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], From 137b72469ea6ea7614b6084a2f56dfcc186bc1a8 Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:12:55 +0530 Subject: [PATCH 11/14] removed pep styling from unnecessary code --- pandas/tests/indexes/datetimes/test_tools.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e5fec14b7a0d7..5110bcdbb63e6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1003,13 +1003,9 @@ def test_to_datetime_infer_datetime_format_series_with_nans(self): tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), pd.to_datetime(s, infer_datetime_format=True)) - def test_to_datetime_infer_datetime_format_series_starting_with_nans( - self): - s = pd.Series(np.array([np.nan, - np.nan, - '01/01/2011 00:00:00', - '01/02/2011 00:00:00', - '01/03/2011 00:00:00'])) + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', + '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), pd.to_datetime(s, infer_datetime_format=True)) From 250e2cc8e0c53a19dd84992bb9d59288f4e80ea2 Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:18:50 +0530 Subject: [PATCH 12/14] removed pep styling from unnecessary code --- pandas/tests/indexes/datetimes/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 5110bcdbb63e6..f69632ee748d6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -100,7 +100,7 @@ def test_to_datetime_format_integer(self): s = Series([200001, 200105, 200206]) expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) - ]) + ]) result = to_datetime(s, format='%Y%m') assert_series_equal(result, expected) From 652bb90640d1ae720e418b60553402fe3333cf8f Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:23:30 +0530 Subject: [PATCH 13/14] added variables in cdef --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 597c67b2d006c..0f0ada2d910e5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3598,7 +3598,7 @@ def array_strptime(ndarray[object] values, object fmt, pandas_datetimestruct dts ndarray[int64_t] iresult int year, month, day, minute, hour, second, weekday, julian, tz - int week_of_year, week_of_year_start + int week_of_year, week_of_year_start, iso_week, iso_year int64_t us, ns object val, group_key, ampm, found dict found_key From c249f66c74e251084794deea2b1f622ae4e57e2e Mon Sep 17 00:00:00 2001 From: rosygupta Date: Thu, 20 Jul 2017 01:45:22 +0530 Subject: [PATCH 14/14] added in release note --- doc/source/whatsnew/v0.21.0.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3dd8bb2ac2de5..38cd3bb56abae 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -24,6 +24,9 @@ New features `_ on most readers and writers (:issue:`13823`) - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) +- Added 'iso week year support to to_datetime', added method '_calc_julian_from_V' + and amended method 'array_strptime' to support the feature. + (:issue: '16607') .. _whatsnew_0210.enhancements.other: