Skip to content

ENH: to_datetime support iso week year (16607) #16661

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
58 changes: 56 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3658,7 +3658,10 @@ def array_strptime(ndarray[object] values, object fmt,
'U': 15,
'W': 16,
'Z': 17,
'p': 18 # just an additional key, works only with I
'p': 18, # just an additional key, works only with I
'G': 19,
'V': 20,
'u': 21
}
cdef int parse_code

Expand Down Expand Up @@ -3701,13 +3704,14 @@ def array_strptime(ndarray[object] values, object fmt,
raise ValueError("time data %r does not match format "
"%r (search)" % (values[i], fmt))

iso_year = -1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these need to be defined above in the cdef (iso_year/iso_week)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

year = 1900
month = day = 1
hour = minute = second = ns = us = 0
tz = -1
# Default to -1 to signify that values not known; not critical to have,
# though
week_of_year = -1
iso_week = week_of_year = -1
week_of_year_start = -1
# weekday and julian defaulted to -1 so as to signal need to calculate
# values
Expand Down Expand Up @@ -3809,12 +3813,40 @@ def array_strptime(ndarray[object] values, object fmt,
else:
tz = value
break
elif parse_code == 19:
iso_year = int(found_dict['G'])
elif parse_code == 20:
iso_week = int(found_dict['V'])
elif parse_code == 21:
weekday = int(found_dict['u'])
weekday -= 1


# don't assume default values for ISO week/year
if iso_year != -1:
if iso_week == -1 or weekday == -1:
raise ValueError("ISO year directive '%G' must be used with "
"the ISO week directive '%V' and a weekday "
"directive ('%w', or '%u').")
if julian != -1:
raise ValueError("Day of the year directive '%j' is not "
"compatible with ISO year directive '%G'. "
"Use '%Y' instead.")
elif week_of_year == -1 and iso_week != -1:
if weekday == -1:
raise ValueError("ISO week directive '%V' must be used with "
"the ISO year directive '%G' and a weekday "
"directive ('%w', or '%u').")

# If we know the wk of the year and what day of that wk, we can figure
# out the Julian day of the year.
if julian == -1 and week_of_year != -1 and weekday != -1:
week_starts_Mon = True if week_of_year_start == 0 else False
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
week_starts_Mon)
elif iso_year != -1 and iso_week != -1 and weekday != -1:
year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1)

# Cannot pre-calculate datetime_date() since can change in Julian
# calculation and thus could have different value for the day of the wk
# calculation.
Expand Down Expand Up @@ -5630,13 +5662,16 @@ class TimeRE(dict):
'f': r"(?P<f>[0-9]{1,9})",
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
'G': r"(?P<G>\d\d\d\d)",
'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
r"[1-9]\d|0[1-9]|[1-9])"),
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
'M': r"(?P<M>[0-5]\d|\d)",
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
'w': r"(?P<w>[0-6])",
'u': r"(?P<u>[1-7])",
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
# W is set below by using 'U'
'y': r"(?P<y>\d\d)",
#XXX: Does 'Y' need to worry about having less or more than
Expand Down Expand Up @@ -5736,3 +5771,22 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,

# def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
# return _strptime(data_string, format)[0]

cdef _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday.
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
ISO week days range from 1 (Monday) to 7 (Sunday)."""

cdef:
int correction, ordinal

correction = datetime_date(iso_year, 1, 4).isoweekday() + 3
ordinal = (iso_week * 7) + iso_weekday - correction
# ordinal may be negative or 0 now, which means the date is in the previous
# calendar year
if ordinal < 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so want to have a test case that exercises both of these clauses

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you mark which test case this is below with a comment

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pytest.mark.parametrize("s, _format, dt", [

  •    ['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)],
    
  •    ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)],
    
  •    ['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)]
    
  • ])----- out of these;

  •    ['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)],
    

this one tests this case!

ordinal += datetime_date(iso_year, 1, 1).toordinal()
iso_year -= 1
ordinal -= datetime_date(iso_year, 1, 1).toordinal()
return iso_year, ordinal

8 changes: 8 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ def test_to_datetime_format_weeks(self):

class TestToDatetime(object):

def test_to_datetime_iso_week_year_format(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • do these cases separately.
  • need tests for each of the ValueErrors that you are raising
  • as I indicated above, need to exercise the paths in the code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback Can you please clarify this--> as I indicated above, need to exercise the paths in the code.?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#16661 (comment)

I mean exactly this. you have an if clause so we need a test for both cases.

ordinal may be negative or 0 now, which means the date is in the previous calendar year

data = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's parametrize this using pytest:

@pytest.mark.parametrize(...)
def test_to_datetime_iso_week_year_format(s, _format, dt):
...

Also note that I use "_format" instead of "format" because as a personal preference, I prefer not to use keywords when creating local variables.

['2015-53-1', '%G-%V-%u',
datetime(2015, 12, 28, 0, 0)]
]
for s, format, dt in data:
assert to_datetime(s, format=format) == dt

def test_to_datetime_dt64s(self):
in_bound_dts = [
np.datetime64('2000-01-01'),
Expand Down