-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: fix Period.asfreq conversion near datetime(1, 1, 1) #19650
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
ed97203
5faf231
5dbc03a
ffbd22f
785795c
312c633
dfe2542
d749ecc
13295e2
a14bb59
eafa0b0
839e7f4
96108cb
0ad8623
fcc92b0
aefb3c7
e4e453b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -154,12 +154,32 @@ cdef inline int get_freq_group(int freq) nogil: | |
return (freq // 1000) * 1000 | ||
|
||
|
||
@cython.cdivision | ||
# specifically _dont_ use cdvision or else ordinals near -1 are assigned to | ||
# incorrect dates GH#19643 | ||
@cython.cdivision(False) | ||
cdef int64_t get_period_ordinal(int year, int month, int day, | ||
int hour, int minute, int second, | ||
int microseconds, int picoseconds, | ||
int freq) nogil: | ||
"""generate an ordinal in period space""" | ||
""" | ||
Generate an ordinal in period space | ||
|
||
Parameters | ||
---------- | ||
year : int | ||
month : int | ||
day : int | ||
hour : int | ||
minute : int | ||
second : int | ||
microseconds : int | ||
picoseconds : int | ||
freq : int | ||
|
||
Returns | ||
------- | ||
period_ordinal : int64_t | ||
""" | ||
cdef: | ||
int64_t absdays, unix_date, seconds, delta | ||
int64_t weeks | ||
|
@@ -190,7 +210,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, | |
if month >= fmonth: | ||
mdiff += 12 | ||
|
||
return (year - 1970) * 4 + (mdiff - 1) / 3 | ||
return (year - 1970) * 4 + (mdiff - 1) // 3 | ||
|
||
elif freq == FR_MTH: | ||
return (year - 1970) * 12 + month - 1 | ||
|
@@ -202,14 +222,14 @@ cdef int64_t get_period_ordinal(int year, int month, int day, | |
seconds = unix_date * 86400 + hour * 3600 + minute * 60 + second | ||
|
||
if freq == FR_MS: | ||
return seconds * 1000 + microseconds / 1000 | ||
return seconds * 1000 + microseconds // 1000 | ||
|
||
elif freq == FR_US: | ||
return seconds * 1000000 + microseconds | ||
|
||
elif freq == FR_NS: | ||
return (seconds * 1000000000 + | ||
microseconds * 1000 + picoseconds / 1000) | ||
microseconds * 1000 + picoseconds // 1000) | ||
|
||
else: | ||
return seconds | ||
|
@@ -229,7 +249,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, | |
elif freq == FR_BUS: | ||
# calculate the current week assuming sunday as last day of a week | ||
# Jan 1 0001 is a Monday, so subtract 1 to get to end-of-week | ||
weeks = (unix_date + ORD_OFFSET - 1) / 7 | ||
weeks = (unix_date + ORD_OFFSET - 1) // 7 | ||
# calculate the current weekday (in range 1 .. 7) | ||
delta = (unix_date + ORD_OFFSET - 1) % 7 + 1 | ||
# return the number of business days in full weeks plus the business | ||
|
@@ -241,7 +261,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, | |
|
||
elif freq_group == FR_WK: | ||
day_adj = freq - FR_WK | ||
return (unix_date + ORD_OFFSET - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET | ||
return (unix_date + ORD_OFFSET - (1 + day_adj)) // 7 + 1 - WEEK_OFFSET | ||
|
||
# raise ValueError | ||
|
||
|
@@ -272,6 +292,15 @@ cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: | |
This corresponds to the number of days since Jan., 1st, 1AD. | ||
When the instance has a frequency less than daily, the proleptic date | ||
is calculated for the last day of the period. | ||
|
||
Parameters | ||
---------- | ||
period_ordinal : int64_t | ||
freq : int | ||
|
||
Returns | ||
------- | ||
absdate : int64_t number of days since datetime(1, 1, 1) | ||
""" | ||
cdef: | ||
asfreq_info af_info | ||
|
@@ -290,6 +319,21 @@ cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo, | |
""" | ||
Set the instance's value using the given date and time. | ||
Assumes GREGORIAN_CALENDAR. | ||
|
||
Parameters | ||
---------- | ||
dinfo : date_info* | ||
absdate : int64_t days elapsed since datetime(1, 1, 1) | ||
abstime : double | ||
seconds elapsed since beginning of day described by absdate | ||
|
||
Returns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should no longer be needed. IOW this is the point of an errors declaration (and raising) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, will be removing the leftover-from-C bits (and camelCase) in the next pass. For now this PR is focused on just the one bug. |
||
------- | ||
code : int (always 0) | ||
|
||
Notes | ||
----- | ||
Updates dinfo inplace | ||
""" | ||
# Bounds check | ||
# The calling function is responsible for ensuring that | ||
|
@@ -307,6 +351,19 @@ cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: | |
""" | ||
Sets the date part of the date_info struct | ||
Assumes GREGORIAN_CALENDAR | ||
|
||
Parameters | ||
---------- | ||
dinfo : date_info* | ||
unix_date : int64_t | ||
|
||
Returns | ||
------- | ||
code : int (always 0) | ||
|
||
Notes | ||
----- | ||
Updates dinfo inplace | ||
""" | ||
cdef: | ||
pandas_datetimestruct dts | ||
|
@@ -319,9 +376,23 @@ cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: | |
|
||
|
||
@cython.cdivision | ||
cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: | ||
cdef void dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: | ||
""" | ||
Sets the time part of the DateTime object. | ||
|
||
Parameters | ||
---------- | ||
dinfo : date_info* | ||
abstime : double | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you be more specific here , e.g. what is abstime There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The next line |
||
seconds elapsed since beginning of day described by absdate | ||
|
||
Returns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't have Returns for None, remove |
||
------- | ||
None | ||
|
||
Notes | ||
----- | ||
Updates dinfo inplace | ||
""" | ||
cdef: | ||
int inttime | ||
|
@@ -336,7 +407,7 @@ cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: | |
dinfo.hour = hour | ||
dinfo.minute = minute | ||
dinfo.second = second | ||
return 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you need a return? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't. Will remove when killing this function off. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just remove the return |
||
return | ||
|
||
|
||
@cython.cdivision | ||
|
@@ -370,7 +441,18 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: | |
Find the absdate (days elapsed since datetime(1, 1, 1) | ||
for the given year/month/day. | ||
Assumes GREGORIAN_CALENDAR | ||
|
||
Parameters | ||
---------- | ||
year : int | ||
month : int | ||
day : int | ||
|
||
Returns | ||
------- | ||
absdate : int days elapsed since datetime(1, 1, 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doc-string format, put the expl on the next line |
||
""" | ||
|
||
# /* Calculate the absolute date | ||
cdef: | ||
pandas_datetimestruct dts | ||
|
@@ -385,6 +467,24 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: | |
|
||
|
||
cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): | ||
""" | ||
Find the year and quarter of a Period with the given ordinal and frequency | ||
|
||
Parameters | ||
---------- | ||
ordinal : int64_t | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. very odd that these take pointers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At this point these functions are translated as directly as possible from the C versions. In future passes I'd like to make them more pythonic too, will need to be careful about perf. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that’s fine |
||
freq : int | ||
quarter : *int | ||
year : *int | ||
|
||
Returns | ||
------- | ||
qtr_freq : int describing the implied quarterly frequency | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
|
||
Notes | ||
----- | ||
Sets quarter and year inplace | ||
""" | ||
cdef: | ||
asfreq_info af_info | ||
int qtr_freq | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,31 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import pytest | ||
|
||
from pandas.errors import OutOfBoundsDatetime | ||
from pandas._libs.tslibs.frequencies import get_freq | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would not be averse to makeing this a submodule in future |
||
from pandas._libs.tslibs.period import period_ordinal, period_asfreq | ||
from pandas._libs.tslibs.period import period_ordinal, period_asfreq, Period | ||
|
||
|
||
class TestPeriodFreqConversion(object): | ||
@pytest.mark.parametrize('freq', ['A', 'Q', 'M', 'W', 'B', 'D']) | ||
def test_asfreq_near_zero(self, freq): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
# GH#19643, GH#19650 | ||
per = Period('0001-01-01', freq=freq) | ||
tup1 = (per.year, per.hour, per.day) | ||
|
||
prev = per - 1 | ||
assert (per - 1).ordinal == per.ordinal - 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think as a general rule if you are importing Period, then it should go in pandas/tests/scalar/period/* else it can go here (e.g. you are testing a helper function) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seeing as how the recent addition of tests.scalar.period will necessitate a new look at this, I propose we merge this bug fix and the next pass (later today) can attempt to standardize this convention. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can also simply move the tests there now (as travis has to run again anyhow since you just added a commit) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you update for this comment |
||
tup2 = (prev.year, prev.month, prev.day) | ||
assert tup2 < tup1 | ||
|
||
@pytest.mark.xfail | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a reason |
||
def test_to_timestamp_out_of_bounds(self): | ||
# GH#19643, currently gives Timestamp('1754-08-30 22:43:41.128654848') | ||
per = Period('0001-01-01', freq='B') | ||
with pytest.raises(OutOfBoundsDatetime): | ||
per.to_timestamp() | ||
|
||
def test_intraday_conversion_factors(self): | ||
assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 | ||
assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pointer to
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's what the asterisk is for (this notation is used in docstrings elsewhere). Have a preferred syntax?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, you are removing this anyhow IIRC? (in follow on)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.