Skip to content

Commit 52b1bf5

Browse files
jbrockmendelaeltanawy
authored andcommitted
[CLN] More cython cleanups, with bonus type annotations (pandas-dev#22283)
1 parent 24501d9 commit 52b1bf5

9 files changed

+36
-36
lines changed

pandas/_libs/algos_common_helper.pxi.in

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def get_dispatch(dtypes):
4545

4646
@cython.wraparound(False)
4747
@cython.boundscheck(False)
48-
cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):
48+
def map_indices_{{name}}(ndarray[{{c_type}}] index):
4949
"""
5050
Produce a dict mapping the values of the input array to their respective
5151
locations.
@@ -542,7 +542,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
542542
cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num
543543

544544

545-
cpdef ensure_platform_int(object arr):
545+
def ensure_platform_int(object arr):
546546
# GH3033, GH1392
547547
# platform int is the size of the int pointer, e.g. np.intp
548548
if util.is_array(arr):
@@ -554,7 +554,7 @@ cpdef ensure_platform_int(object arr):
554554
return np.array(arr, dtype=np.intp)
555555

556556

557-
cpdef ensure_object(object arr):
557+
def ensure_object(object arr):
558558
if util.is_array(arr):
559559
if (<ndarray> arr).descr.type_num == NPY_OBJECT:
560560
return arr
@@ -587,7 +587,7 @@ def get_dispatch(dtypes):
587587

588588
{{for name, c_type, dtype in get_dispatch(dtypes)}}
589589

590-
cpdef ensure_{{name}}(object arr, copy=True):
590+
def ensure_{{name}}(object arr, copy=True):
591591
if util.is_array(arr):
592592
if (<ndarray> arr).descr.type_num == NPY_{{c_type}}:
593593
return arr

pandas/_libs/groupby.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
6767
return result
6868

6969

70+
# TODO: Is this redundant with algos.kth_smallest?
7071
cdef inline float64_t kth_smallest_c(float64_t* a,
7172
Py_ssize_t k,
7273
Py_ssize_t n) nogil:

pandas/_libs/hashing.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
132132
v2[0] = _rotl(v2[0], 32)
133133

134134

135+
# TODO: This appears unused; remove?
135136
cpdef uint64_t siphash(bytes data, bytes key) except? 0:
136137
if len(key) != 16:
137138
raise ValueError("key should be a 16-byte bytestring, "

pandas/_libs/index.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None):
4949
return util.get_value_at(arr, loc)
5050

5151

52-
cpdef object get_value_box(ndarray arr, object loc):
52+
def get_value_box(arr: ndarray, loc: object) -> object:
5353
return get_value_at(arr, loc, tz=None)
5454

5555

pandas/_libs/internals.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ cdef class BlockPlacement:
184184
return self._as_slice
185185

186186

187-
cpdef slice_canonize(slice s):
187+
cdef slice_canonize(slice s):
188188
"""
189189
Convert slice to canonical bounded form.
190190
"""
@@ -255,7 +255,7 @@ cpdef Py_ssize_t slice_len(
255255
return length
256256

257257

258-
cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
258+
cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
259259
"""
260260
Get (start, stop, step, length) tuple for a slice.
261261

pandas/_libs/interval.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,8 @@ cdef class Interval(IntervalMixin):
362362

363363
@cython.wraparound(False)
364364
@cython.boundscheck(False)
365-
cpdef intervals_to_interval_bounds(ndarray intervals,
366-
bint validate_closed=True):
365+
def intervals_to_interval_bounds(ndarray intervals,
366+
bint validate_closed=True):
367367
"""
368368
Parameters
369369
----------
@@ -415,4 +415,5 @@ cpdef intervals_to_interval_bounds(ndarray intervals,
415415

416416
return left, right, closed
417417

418+
418419
include "intervaltree.pxi"

pandas/_libs/lib.pyx

+18-20
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def memory_usage_of_objects(object[:] arr):
107107
# ----------------------------------------------------------------------
108108

109109

110-
cpdef bint is_scalar(object val):
110+
def is_scalar(val: object) -> bint:
111111
"""
112112
Return True if given value is scalar.
113113

@@ -137,7 +137,7 @@ cpdef bint is_scalar(object val):
137137
or util.is_period_object(val)
138138
or is_decimal(val)
139139
or is_interval(val)
140-
or is_offset(val))
140+
or util.is_offset_object(val))
141141

142142

143143
def item_from_zerodim(object val):
@@ -457,7 +457,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask):
457457

458458
@cython.wraparound(False)
459459
@cython.boundscheck(False)
460-
cpdef bint array_equivalent_object(object[:] left, object[:] right):
460+
def array_equivalent_object(left: object[:], right: object[:]) -> bint:
461461
""" perform an element by element comparion on 1-d object arrays
462462
taking into account nan positions """
463463
cdef:
@@ -497,7 +497,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
497497
return result
498498

499499

500-
cpdef ndarray[object] astype_unicode(ndarray arr):
500+
def astype_unicode(arr: ndarray) -> ndarray[object]:
501501
cdef:
502502
Py_ssize_t i, n = arr.size
503503
ndarray[object] result = np.empty(n, dtype=object)
@@ -508,7 +508,7 @@ cpdef ndarray[object] astype_unicode(ndarray arr):
508508
return result
509509

510510

511-
cpdef ndarray[object] astype_str(ndarray arr):
511+
def astype_str(arr: ndarray) -> ndarray[object]:
512512
cdef:
513513
Py_ssize_t i, n = arr.size
514514
ndarray[object] result = np.empty(n, dtype=object)
@@ -791,19 +791,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys,
791791

792792
# core.common import for fast inference checks
793793

794-
cpdef bint is_float(object obj):
794+
def is_float(obj: object) -> bint:
795795
return util.is_float_object(obj)
796796

797797

798-
cpdef bint is_integer(object obj):
798+
def is_integer(obj: object) -> bint:
799799
return util.is_integer_object(obj)
800800

801801

802-
cpdef bint is_bool(object obj):
802+
def is_bool(obj: object) -> bint:
803803
return util.is_bool_object(obj)
804804

805805

806-
cpdef bint is_complex(object obj):
806+
def is_complex(obj: object) -> bint:
807807
return util.is_complex_object(obj)
808808

809809

@@ -815,15 +815,11 @@ cpdef bint is_interval(object obj):
815815
return getattr(obj, '_typ', '_typ') == 'interval'
816816

817817

818-
cpdef bint is_period(object val):
818+
def is_period(val: object) -> bint:
819819
""" Return a boolean if this is a Period object """
820820
return util.is_period_object(val)
821821

822822

823-
cdef inline bint is_offset(object val):
824-
return getattr(val, '_typ', '_typ') == 'dateoffset'
825-
826-
827823
_TYPE_MAP = {
828824
'categorical': 'categorical',
829825
'category': 'categorical',
@@ -1225,7 +1221,7 @@ def infer_dtype(object value, bint skipna=False):
12251221
if is_bytes_array(values, skipna=skipna):
12261222
return 'bytes'
12271223

1228-
elif is_period(val):
1224+
elif util.is_period_object(val):
12291225
if is_period_array(values):
12301226
return 'period'
12311227

@@ -1243,7 +1239,7 @@ def infer_dtype(object value, bint skipna=False):
12431239
return 'mixed'
12441240

12451241

1246-
cpdef object infer_datetimelike_array(object arr):
1242+
def infer_datetimelike_array(arr: object) -> object:
12471243
"""
12481244
infer if we have a datetime or timedelta array
12491245
- date: we have *only* date and maybe strings, nulls
@@ -1580,7 +1576,7 @@ cpdef bint is_datetime64_array(ndarray values):
15801576
return validator.validate(values)
15811577

15821578

1583-
cpdef bint is_datetime_with_singletz_array(ndarray values):
1579+
def is_datetime_with_singletz_array(values: ndarray) -> bint:
15841580
"""
15851581
Check values have the same tzinfo attribute.
15861582
Doesn't check values are datetime-like types.
@@ -1616,7 +1612,8 @@ cdef class TimedeltaValidator(TemporalValidator):
16161612
return is_null_timedelta64(value)
16171613

16181614

1619-
cpdef bint is_timedelta_array(ndarray values):
1615+
# TODO: Not used outside of tests; remove?
1616+
def is_timedelta_array(values: ndarray) -> bint:
16201617
cdef:
16211618
TimedeltaValidator validator = TimedeltaValidator(len(values),
16221619
skipna=True)
@@ -1628,7 +1625,8 @@ cdef class Timedelta64Validator(TimedeltaValidator):
16281625
return util.is_timedelta64_object(value)
16291626

16301627

1631-
cpdef bint is_timedelta64_array(ndarray values):
1628+
# TODO: Not used outside of tests; remove?
1629+
def is_timedelta64_array(values: ndarray) -> bint:
16321630
cdef:
16331631
Timedelta64Validator validator = Timedelta64Validator(len(values),
16341632
skipna=True)
@@ -1672,7 +1670,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False):
16721670

16731671
cdef class PeriodValidator(TemporalValidator):
16741672
cdef inline bint is_value_typed(self, object value) except -1:
1675-
return is_period(value)
1673+
return util.is_period_object(value)
16761674

16771675
cdef inline bint is_valid_null(self, object value) except -1:
16781676
return is_null_period(value)

pandas/_libs/tslib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
300300
return result
301301

302302

303-
cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
303+
def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
304304
"""
305305
convert the ndarray according to the unit
306306
if errors:

pandas/_libs/writers.pyx

+5-6
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
cimport cython
44
from cython cimport Py_ssize_t
55

6-
from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check,
7-
PyBytes_GET_SIZE, PyUnicode_GET_SIZE)
6+
from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE
87

98
try:
109
from cpython cimport PyString_GET_SIZE
@@ -124,19 +123,19 @@ def convert_json_to_lines(object arr):
124123
# stata, pytables
125124
@cython.boundscheck(False)
126125
@cython.wraparound(False)
127-
cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr):
126+
def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t:
128127
""" return the maximum size of elements in a 1-dim string array """
129128
cdef:
130129
Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
131130
pandas_string v
132131

133132
for i in range(length):
134133
v = arr[i]
135-
if PyString_Check(v):
134+
if isinstance(v, str):
136135
l = PyString_GET_SIZE(v)
137-
elif PyBytes_Check(v):
136+
elif isinstance(v, bytes):
138137
l = PyBytes_GET_SIZE(v)
139-
elif PyUnicode_Check(v):
138+
elif isinstance(v, unicode):
140139
l = PyUnicode_GET_SIZE(v)
141140

142141
if l > m:

0 commit comments

Comments
 (0)