-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
[CLN] More cython cleanups, with bonus type annotations #22283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0410aed
7f77137
b999b53
38cd31f
74df6b9
e0a19ad
5dfb648
d70c6ed
d31be36
20cf63d
71fccd5
72b38cd
e799458
3300de1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,6 +132,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, | |
v2[0] = _rotl(v2[0], 32) | ||
|
||
|
||
# TODO: This appears unused; remove? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be unused - it was a part of the hashing at one point iirc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, will remove in next pass. |
||
cpdef uint64_t siphash(bytes data, bytes key) except? 0: | ||
if len(key) != 16: | ||
raise ValueError("key should be a 16-byte bytestring, " | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,7 +107,7 @@ def memory_usage_of_objects(object[:] arr): | |
# ---------------------------------------------------------------------- | ||
|
||
|
||
cpdef bint is_scalar(object val): | ||
def is_scalar(val: object) -> bint: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this not ever called in cython? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, easy to confirm via grep |
||
""" | ||
Return True if given value is scalar. | ||
|
||
|
@@ -137,7 +137,7 @@ cpdef bint is_scalar(object val): | |
or util.is_period_object(val) | ||
or is_decimal(val) | ||
or is_interval(val) | ||
or is_offset(val)) | ||
or util.is_offset_object(val)) | ||
|
||
|
||
def item_from_zerodim(object val): | ||
|
@@ -457,7 +457,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): | |
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
cpdef bint array_equivalent_object(object[:] left, object[:] right): | ||
def array_equivalent_object(left: object[:], right: object[:]) -> bint: | ||
""" perform an element by element comparion on 1-d object arrays | ||
taking into account nan positions """ | ||
cdef: | ||
|
@@ -497,7 +497,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): | |
return result | ||
|
||
|
||
cpdef ndarray[object] astype_unicode(ndarray arr): | ||
def astype_unicode(arr: ndarray) -> ndarray[object]: | ||
cdef: | ||
Py_ssize_t i, n = arr.size | ||
ndarray[object] result = np.empty(n, dtype=object) | ||
|
@@ -508,7 +508,7 @@ cpdef ndarray[object] astype_unicode(ndarray arr): | |
return result | ||
|
||
|
||
cpdef ndarray[object] astype_str(ndarray arr): | ||
def astype_str(arr: ndarray) -> ndarray[object]: | ||
cdef: | ||
Py_ssize_t i, n = arr.size | ||
ndarray[object] result = np.empty(n, dtype=object) | ||
|
@@ -791,19 +791,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, | |
|
||
# core.common import for fast inference checks | ||
|
||
cpdef bint is_float(object obj): | ||
def is_float(obj: object) -> bint: | ||
return util.is_float_object(obj) | ||
|
||
|
||
cpdef bint is_integer(object obj): | ||
def is_integer(obj: object) -> bint: | ||
return util.is_integer_object(obj) | ||
|
||
|
||
cpdef bint is_bool(object obj): | ||
def is_bool(obj: object) -> bint: | ||
return util.is_bool_object(obj) | ||
|
||
|
||
cpdef bint is_complex(object obj): | ||
def is_complex(obj: object) -> bint: | ||
return util.is_complex_object(obj) | ||
|
||
|
||
|
@@ -815,15 +815,11 @@ cpdef bint is_interval(object obj): | |
return getattr(obj, '_typ', '_typ') == 'interval' | ||
|
||
|
||
cpdef bint is_period(object val): | ||
def is_period(val: object) -> bint: | ||
""" Return a boolean if this is a Period object """ | ||
return util.is_period_object(val) | ||
|
||
|
||
cdef inline bint is_offset(object val): | ||
return getattr(val, '_typ', '_typ') == 'dateoffset' | ||
|
||
|
||
_TYPE_MAP = { | ||
'categorical': 'categorical', | ||
'category': 'categorical', | ||
|
@@ -1225,7 +1221,7 @@ def infer_dtype(object value, bint skipna=False): | |
if is_bytes_array(values, skipna=skipna): | ||
return 'bytes' | ||
|
||
elif is_period(val): | ||
elif util.is_period_object(val): | ||
if is_period_array(values): | ||
return 'period' | ||
|
||
|
@@ -1243,7 +1239,7 @@ def infer_dtype(object value, bint skipna=False): | |
return 'mixed' | ||
|
||
|
||
cpdef object infer_datetimelike_array(object arr): | ||
def infer_datetimelike_array(arr: object) -> object: | ||
""" | ||
infer if we have a datetime or timedelta array | ||
- date: we have *only* date and maybe strings, nulls | ||
|
@@ -1580,7 +1576,7 @@ cpdef bint is_datetime64_array(ndarray values): | |
return validator.validate(values) | ||
|
||
|
||
cpdef bint is_datetime_with_singletz_array(ndarray values): | ||
def is_datetime_with_singletz_array(values: ndarray) -> bint: | ||
""" | ||
Check values have the same tzinfo attribute. | ||
Doesn't check values are datetime-like types. | ||
|
@@ -1616,7 +1612,8 @@ cdef class TimedeltaValidator(TemporalValidator): | |
return is_null_timedelta64(value) | ||
|
||
|
||
cpdef bint is_timedelta_array(ndarray values): | ||
# TODO: Not used outside of tests; remove? | ||
def is_timedelta_array(values: ndarray) -> bint: | ||
cdef: | ||
TimedeltaValidator validator = TimedeltaValidator(len(values), | ||
skipna=True) | ||
|
@@ -1628,7 +1625,8 @@ cdef class Timedelta64Validator(TimedeltaValidator): | |
return util.is_timedelta64_object(value) | ||
|
||
|
||
cpdef bint is_timedelta64_array(ndarray values): | ||
# TODO: Not used outside of tests; remove? | ||
def is_timedelta64_array(values: ndarray) -> bint: | ||
cdef: | ||
Timedelta64Validator validator = Timedelta64Validator(len(values), | ||
skipna=True) | ||
|
@@ -1672,7 +1670,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False): | |
|
||
cdef class PeriodValidator(TemporalValidator): | ||
cdef inline bint is_value_typed(self, object value) except -1: | ||
return is_period(value) | ||
return util.is_period_object(value) | ||
|
||
cdef inline bint is_valid_null(self, object value) except -1: | ||
return is_null_period(value) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,8 +3,7 @@ | |
cimport cython | ||
from cython cimport Py_ssize_t | ||
|
||
from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check, | ||
PyBytes_GET_SIZE, PyUnicode_GET_SIZE) | ||
from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE | ||
|
||
try: | ||
from cpython cimport PyString_GET_SIZE | ||
|
@@ -124,19 +123,19 @@ def convert_json_to_lines(object arr): | |
# stata, pytables | ||
@cython.boundscheck(False) | ||
@cython.wraparound(False) | ||
cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): | ||
def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: | ||
""" return the maximum size of elements in a 1-dim string array """ | ||
cdef: | ||
Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] | ||
pandas_string v | ||
|
||
for i in range(length): | ||
v = arr[i] | ||
if PyString_Check(v): | ||
if isinstance(v, str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do these make any perf diffs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cython makes this substitution on its own |
||
l = PyString_GET_SIZE(v) | ||
elif PyBytes_Check(v): | ||
elif isinstance(v, bytes): | ||
l = PyBytes_GET_SIZE(v) | ||
elif PyUnicode_Check(v): | ||
elif isinstance(v, unicode): | ||
l = PyUnicode_GET_SIZE(v) | ||
|
||
if l > m: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are these for sure not called in cython code?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
non-cimported only