-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: (partial) fix for np_datetime.c performance regression #57988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 23 commits
aecba4b
6f43de0
47a0d9b
61c5cae
e4c64b2
fa3ff63
9d768d6
c9f75b1
aba6c3a
e7c02e8
6b464a5
b3c0199
5a04d8d
39c77ab
2227b1b
0171a7c
1492bc9
2e670c0
0316442
b61b71c
c3b19e0
80ce283
c9a1efd
322252e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -752,66 +752,56 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, | |
} | ||
|
||
const npy_int64 per_day = sec_per_day * per_sec; | ||
npy_int64 frac; | ||
const int sign = td < 0 ? -1 : 1; | ||
const int is_negative = td < 0 ? 1 : 0; | ||
const int uneven_in_seconds = td % per_sec != 0 ? 1 : 0; | ||
// put frac in seconds | ||
if (td < 0 && td % per_sec != 0) | ||
frac = td / per_sec - 1; | ||
else | ||
frac = td / per_sec; | ||
|
||
const int sign = frac < 0 ? -1 : 1; | ||
if (frac < 0) { | ||
npy_int64 sfrac = td / per_sec - is_negative * uneven_in_seconds; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am I reading this right that it just can add/subtract 1 when uneven_in_seconds is true? I might be missing the point but that feels a bit off - not sure we have full test coverage in all precisions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for scratching your head at this. It's a leftover from a different optimisation route that was not successful in the end. I moved the subtraction into the if body again (same as in the main version) so it should be clearer now. Also now the speedup is at where I had it with the previous version that did not pass the tests. |
||
if (sign < 0) { | ||
// even fraction | ||
if ((-frac % sec_per_day) != 0) { | ||
out->days = -frac / sec_per_day + 1; | ||
frac += sec_per_day * out->days; | ||
if ((-sfrac % sec_per_day) != 0) { | ||
out->days = sfrac / sec_per_day - 1; | ||
sfrac -= sec_per_day * out->days; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this exactly the same as before? Previously it looks like out->days would be positive but this is now negative? Sorry if misreading - again just want to be careful as I'm not sure how well our test cases are hitting all of these branches |
||
} else { | ||
frac = -frac; | ||
if (sfrac <= sec_per_day) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to add more branching than what we had before, so I'm a little hesitant to say this is faster overall even though it may show up in some of our benchmarks. What kind of difference are you seeing in the current state? |
||
out->days = sfrac / sec_per_day; | ||
sfrac -= out->days * sec_per_day; | ||
} | ||
sfrac = -sfrac; | ||
} | ||
} else if (sfrac >= sec_per_day) { | ||
out->days = sfrac / sec_per_day; | ||
sfrac -= out->days * sec_per_day; | ||
} | ||
|
||
if (frac >= sec_per_day) { | ||
out->days += frac / sec_per_day; | ||
frac -= out->days * sec_per_day; | ||
} | ||
|
||
if (frac >= sec_per_hour) { | ||
out->hrs = (npy_int32)(frac / sec_per_hour); | ||
frac -= out->hrs * sec_per_hour; | ||
if (sfrac >= sec_per_hour) { | ||
out->hrs = (npy_int32)(sfrac / sec_per_hour); | ||
sfrac %= sec_per_hour; | ||
} | ||
|
||
if (frac >= sec_per_min) { | ||
out->min = (npy_int32)(frac / sec_per_min); | ||
frac -= out->min * sec_per_min; | ||
if (sfrac >= sec_per_min) { | ||
out->min = (npy_int32)(sfrac / sec_per_min); | ||
sfrac %= sec_per_min; | ||
} | ||
|
||
if (frac >= 0) { | ||
out->sec = (npy_int32)frac; | ||
frac -= out->sec; | ||
if (sfrac >= 0) { | ||
out->sec = (npy_int32)sfrac; | ||
} | ||
|
||
if (sign < 0) | ||
out->days = -out->days; | ||
|
||
if (base > NPY_FR_s) { | ||
const npy_int64 sfrac = | ||
(out->hrs * sec_per_hour + out->min * sec_per_min + out->sec) * | ||
per_sec; | ||
|
||
npy_int64 ifrac = td - (out->days * per_day + sfrac); | ||
// there will be at most 1 billion nanoseconds left here | ||
npy_int32 ifrac = (npy_int32)((td - out->days * per_day) % per_sec); | ||
|
||
if (base == NPY_FR_ms) { | ||
out->ms = (npy_int32)ifrac; | ||
out->ms = ifrac; | ||
} else if (base == NPY_FR_us) { | ||
out->ms = (npy_int32)(ifrac / 1000LL); | ||
ifrac = ifrac % 1000LL; | ||
out->us = (npy_int32)ifrac; | ||
out->ms = ifrac / 1000LL; | ||
out->us = ifrac % 1000LL; | ||
} else if (base == NPY_FR_ns) { | ||
out->ms = (npy_int32)(ifrac / (1000LL * 1000LL)); | ||
out->ms = ifrac / (1000LL * 1000LL); | ||
ifrac = ifrac % (1000LL * 1000LL); | ||
out->us = (npy_int32)(ifrac / 1000LL); | ||
ifrac = ifrac % 1000LL; | ||
out->ns = (npy_int32)ifrac; | ||
out->us = ifrac / 1000LL; | ||
out->ns = ifrac % 1000LL; | ||
} | ||
} | ||
|
||
|
@@ -822,7 +812,6 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, | |
"invalid base unit"); | ||
break; | ||
} | ||
|
||
out->seconds = | ||
(npy_int32)(out->hrs * sec_per_hour + out->min * sec_per_min + out->sec); | ||
out->microseconds = out->ms * 1000 + out->us; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use bool here - our minimum supported standard is C11