-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BLD: Fix IntervalTree build warnings #30560
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,12 +6,20 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in | |
|
||
from pandas._libs.algos import is_monotonic | ||
|
||
ctypedef fused scalar_t: | ||
float64_t | ||
float32_t | ||
ctypedef fused int_scalar_t: | ||
int64_t | ||
int32_t | ||
float64_t | ||
float32_t | ||
|
||
ctypedef fused uint_scalar_t: | ||
uint64_t | ||
float64_t | ||
float32_t | ||
|
||
ctypedef fused scalar_t: | ||
int_scalar_t | ||
uint_scalar_t | ||
|
||
# ---------------------------------------------------------------------- | ||
# IntervalTree | ||
|
@@ -128,7 +136,12 @@ cdef class IntervalTree(IntervalMixin): | |
result = Int64Vector() | ||
old_len = 0 | ||
for i in range(len(target)): | ||
self.root.query(result, target[i]) | ||
try: | ||
self.root.query(result, target[i]) | ||
except OverflowError: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to add these as operations like |
||
# overflow -> no match, which is already handled below | ||
pass | ||
|
||
if result.data.n == old_len: | ||
result.append(-1) | ||
elif result.data.n > old_len + 1: | ||
|
@@ -150,7 +163,12 @@ cdef class IntervalTree(IntervalMixin): | |
missing = Int64Vector() | ||
old_len = 0 | ||
for i in range(len(target)): | ||
self.root.query(result, target[i]) | ||
try: | ||
self.root.query(result, target[i]) | ||
except OverflowError: | ||
# overflow -> no match, which is already handled below | ||
pass | ||
|
||
if result.data.n == old_len: | ||
result.append(-1) | ||
missing.append(i) | ||
|
@@ -202,19 +220,26 @@ for dtype in ['float32', 'float64', 'int32', 'int64', 'uint64']: | |
('neither', '<', '<')]: | ||
cmp_left_converse = '<' if cmp_left == '<=' else '<=' | ||
cmp_right_converse = '<' if cmp_right == '<=' else '<=' | ||
if dtype.startswith('int'): | ||
fused_prefix = 'int_' | ||
elif dtype.startswith('uint'): | ||
fused_prefix = 'uint_' | ||
elif dtype.startswith('float'): | ||
fused_prefix = '' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. open to a better naming convention for the fused types to make this less gross |
||
nodes.append((dtype, dtype.title(), | ||
closed, closed.title(), | ||
cmp_left, | ||
cmp_right, | ||
cmp_left_converse, | ||
cmp_right_converse)) | ||
cmp_right_converse, | ||
fused_prefix)) | ||
|
||
}} | ||
|
||
NODE_CLASSES = {} | ||
|
||
{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right, | ||
cmp_left_converse, cmp_right_converse in nodes}} | ||
cmp_left_converse, cmp_right_converse, fused_prefix in nodes}} | ||
|
||
cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode: | ||
"""Non-terminal node for an IntervalTree | ||
|
@@ -317,7 +342,7 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode: | |
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
@cython.initializedcheck(False) | ||
cpdef query(self, Int64Vector result, scalar_t point): | ||
cpdef query(self, Int64Vector result, {{fused_prefix}}scalar_t point): | ||
"""Recursively query this node and its sub-nodes for intervals that | ||
overlap with the query point. | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can drop support for
float32
andint32
dtypes here, which should help reduce build time. There is no practical way to actually get anIntervalTree
of these dtypes sinceIntervalIndex
is backed by 2 indexes and we don't have aFloat32Index
orInt32Index
. Will save this for a followup though.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Soounds good.
im confused by the naming here: why are the float dtypes included in int_scalar_t/uint_scalar_t?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I couldn't think of a good way to name those. Idea is that
int_scalar_t
/uint_scalar_t
are things that are comparable to int/uint, and we want to be able to compare against float in both cases to determine things like 0.5 being in the interval (0, 1).I tried it without including floats and got "no matching type signature" errors when trying stuff like
IntervalTree[int].get_indexer([0.5])
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn’t comparisons to float be very inaccurate in the 2 ** 63 plus range where we get unsigned?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, comparisons will be inaccurate above
2**61
, so the issue is also present on the upper/lower ends of the int64 range. Comparisons are still valid below this so probably something we'll have to live with, e.g. the same holds for comparisons againstUInt64Index
but the behavior is still allowed there.