Skip to content

Commit 0ac63bc

Browse files
committed
cythonize boundscheck
1 parent 8e31f87 commit 0ac63bc

File tree

6 files changed

+47
-23
lines changed

6 files changed

+47
-23
lines changed

pandas/algos.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1302,7 +1302,7 @@ def group_last_bin_object(ndarray[object, ndim=2] out,
13021302
else:
13031303
out[i, j] = resx[i, j]
13041304

1305-
cdef inline float64_t _median_linear(float64_t* a, int n):
1305+
cdef inline float64_t _median_linear(float64_t* a, int n):
13061306
cdef int i, j, na_count = 0
13071307
cdef float64_t result
13081308
cdef float64_t* tmp

pandas/core/indexing.py

+7-18
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
is_categorical_dtype,
1010
is_list_like,
1111
is_sequence,
12-
is_scalar)
12+
is_scalar,
13+
_ensure_int64)
1314
from pandas.types.missing import isnull, _infer_fill_value
1415

1516
from pandas.core.index import Index, MultiIndex
@@ -1851,23 +1852,11 @@ def maybe_convert_indices(indices, n):
18511852
""" if we have negative indicies, translate to postive here
18521853
if have indicies that are out-of-bounds, raise an IndexError
18531854
"""
1854-
if isinstance(indices, list):
1855-
indices = np.array(indices)
1856-
if len(indices) == 0:
1857-
# If list is empty, np.array will return float and cause indexing
1858-
# errors.
1859-
return np.empty(0, dtype=np.int_)
1860-
1861-
# cythonize this to one pass?
1862-
mask = indices < 0
1863-
if mask.any():
1864-
# don't mutate original array
1865-
indices = indices.copy()
1866-
indices[mask] += n
1867-
mask = (indices >= n) | (indices < 0)
1868-
if mask.any():
1869-
raise IndexError("indices are out-of-bounds")
1870-
return indices
1855+
from pandas.algos import take_bounds_check
1856+
indices = _ensure_int64(indices)
1857+
out = np.empty(len(indices), dtype='int64')
1858+
take_bounds_check(indices, out, n)
1859+
return out
18711860

18721861

18731862
def maybe_convert_ix(*args):

pandas/indexes/base.py

-2
Original file line numberDiff line numberDiff line change
@@ -1496,8 +1496,6 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
14961496
taken = algos.take_nd(values, indices, allow_fill=False)
14971497
return taken
14981498

1499-
1500-
15011499
@cache_readonly
15021500
def _isnan(self):
15031501
""" return if each value is nan"""

pandas/indexes/category.py

-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
deprecate_kwarg)
1515
from pandas.core.config import get_option
1616
from pandas.indexes.base import Index, _index_shared_docs
17-
import pandas.core.algorithms as algos
1817
import pandas.core.base as base
1918
import pandas.core.missing as missing
2019
import pandas.indexes.base as ibase

pandas/src/algos_take_helper.pxi

+19
Original file line numberDiff line numberDiff line change
@@ -4947,3 +4947,22 @@ def take_2d_multi_object_object(ndarray[object, ndim=2] values,
49474947
out[i, j] = fv
49484948
else:
49494949
out[i, j] = values[idx, idx1[j]]
4950+
4951+
4952+
@cython.wraparound(False)
4953+
@cython.boundscheck(False)
4954+
def take_bounds_check(int64_t[:] indexer, int64_t[:] out, int64_t n):
4955+
cdef:
4956+
Py_ssize_t i
4957+
int64_t label
4958+
4959+
with nogil:
4960+
for i in range(indexer.shape[0]):
4961+
label = indexer[i]
4962+
if label < 0:
4963+
label += n
4964+
4965+
if label >= n or label < 0:
4966+
with gil:
4967+
raise IndexError("indicies are out of bounds")
4968+
out[i] = label

pandas/src/algos_take_helper.pxi.in

+20-1
Original file line numberDiff line numberDiff line change
@@ -258,4 +258,23 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
258258
else:
259259
out[i, j] = {{preval}}values[idx, idx1[j]]{{postval}}
260260

261-
{{endfor}}
261+
{{endfor}}
262+
263+
264+
@cython.wraparound(False)
265+
@cython.boundscheck(False)
266+
def take_bounds_check(int64_t[:] indexer, int64_t[:] out, int64_t n):
267+
cdef:
268+
Py_ssize_t i
269+
int64_t label
270+
271+
with nogil:
272+
for i in range(indexer.shape[0]):
273+
label = indexer[i]
274+
if label < 0:
275+
label += n
276+
277+
if label >= n or label < 0:
278+
with gil:
279+
raise IndexError("indicies are out of bounds")
280+
out[i] = label

0 commit comments

Comments
 (0)