-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: to_datetime support iso week year (16607) #16661
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
274a8e0
7b622ed
437dea9
e349ccc
9f53a99
89eaf1a
aff309c
71a1c13
71bc348
028e978
cf58798
137b724
250e2cc
652bb90
c249f66
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3658,7 +3658,10 @@ def array_strptime(ndarray[object] values, object fmt, | |
'U': 15, | ||
'W': 16, | ||
'Z': 17, | ||
'p': 18 # just an additional key, works only with I | ||
'p': 18, # just an additional key, works only with I | ||
'G': 19, | ||
'V': 20, | ||
'u': 21 | ||
} | ||
cdef int parse_code | ||
|
||
|
@@ -3701,13 +3704,14 @@ def array_strptime(ndarray[object] values, object fmt, | |
raise ValueError("time data %r does not match format " | ||
"%r (search)" % (values[i], fmt)) | ||
|
||
iso_year = -1 | ||
year = 1900 | ||
month = day = 1 | ||
hour = minute = second = ns = us = 0 | ||
tz = -1 | ||
# Default to -1 to signify that values not known; not critical to have, | ||
# though | ||
week_of_year = -1 | ||
iso_week = week_of_year = -1 | ||
week_of_year_start = -1 | ||
# weekday and julian defaulted to -1 so as to signal need to calculate | ||
# values | ||
|
@@ -3809,12 +3813,40 @@ def array_strptime(ndarray[object] values, object fmt, | |
else: | ||
tz = value | ||
break | ||
elif parse_code == 19: | ||
iso_year = int(found_dict['G']) | ||
elif parse_code == 20: | ||
iso_week = int(found_dict['V']) | ||
elif parse_code == 21: | ||
weekday = int(found_dict['u']) | ||
weekday -= 1 | ||
|
||
|
||
# don't assume default values for ISO week/year | ||
if iso_year != -1: | ||
if iso_week == -1 or weekday == -1: | ||
raise ValueError("ISO year directive '%G' must be used with " | ||
"the ISO week directive '%V' and a weekday " | ||
"directive ('%w', or '%u').") | ||
if julian != -1: | ||
raise ValueError("Day of the year directive '%j' is not " | ||
"compatible with ISO year directive '%G'. " | ||
"Use '%Y' instead.") | ||
elif week_of_year == -1 and iso_week != -1: | ||
if weekday == -1: | ||
raise ValueError("ISO week directive '%V' must be used with " | ||
"the ISO year directive '%G' and a weekday " | ||
"directive ('%w', or '%u').") | ||
|
||
# If we know the wk of the year and what day of that wk, we can figure | ||
# out the Julian day of the year. | ||
if julian == -1 and week_of_year != -1 and weekday != -1: | ||
week_starts_Mon = True if week_of_year_start == 0 else False | ||
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, | ||
week_starts_Mon) | ||
elif iso_year != -1 and iso_week != -1 and weekday != -1: | ||
year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) | ||
|
||
# Cannot pre-calculate datetime_date() since can change in Julian | ||
# calculation and thus could have different value for the day of the wk | ||
# calculation. | ||
|
@@ -5630,13 +5662,16 @@ class TimeRE(dict): | |
'f': r"(?P<f>[0-9]{1,9})", | ||
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", | ||
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])", | ||
'G': r"(?P<G>\d\d\d\d)", | ||
'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" | ||
r"[1-9]\d|0[1-9]|[1-9])"), | ||
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])", | ||
'M': r"(?P<M>[0-5]\d|\d)", | ||
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)", | ||
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)", | ||
'w': r"(?P<w>[0-6])", | ||
'u': r"(?P<u>[1-7])", | ||
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)", | ||
# W is set below by using 'U' | ||
'y': r"(?P<y>\d\d)", | ||
#XXX: Does 'Y' need to worry about having less or more than | ||
|
@@ -5736,3 +5771,22 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, | |
|
||
# def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): | ||
# return _strptime(data_string, format)[0] | ||
|
||
cdef _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): | ||
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday. | ||
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. | ||
ISO week days range from 1 (Monday) to 7 (Sunday).""" | ||
|
||
cdef: | ||
int correction, ordinal | ||
|
||
correction = datetime_date(iso_year, 1, 4).isoweekday() + 3 | ||
ordinal = (iso_week * 7) + iso_weekday - correction | ||
# ordinal may be negative or 0 now, which means the date is in the previous | ||
# calendar year | ||
if ordinal < 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so want to have a test case that exercises both of these clauses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you mark which test case this is below with a comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pytest.mark.parametrize("s, _format, dt", [
this one tests this case! |
||
ordinal += datetime_date(iso_year, 1, 1).toordinal() | ||
iso_year -= 1 | ||
ordinal -= datetime_date(iso_year, 1, 1).toordinal() | ||
return iso_year, ordinal | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,6 +175,14 @@ def test_to_datetime_format_weeks(self): | |
|
||
class TestToDatetime(object): | ||
|
||
def test_to_datetime_iso_week_year_format(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback Can you please clarify this--> as I indicated above, need to exercise the paths in the code.? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean exactly this. you have an if clause so we need a test for both cases.
|
||
data = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's parametrize this using @pytest.mark.parametrize(...)
def test_to_datetime_iso_week_year_format(s, _format, dt):
... Also note that I use "_format" instead of "format" because as a personal preference, I prefer not to use keywords when creating local variables. |
||
['2015-53-1', '%G-%V-%u', | ||
datetime(2015, 12, 28, 0, 0)] | ||
] | ||
for s, format, dt in data: | ||
assert to_datetime(s, format=format) == dt | ||
|
||
def test_to_datetime_dt64s(self): | ||
in_bound_dts = [ | ||
np.datetime64('2000-01-01'), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these need to be defined above in the cdef (iso_year/iso_week)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done