Skip to content

Commit c72b02f

Browse files
jbrockmendeljreback
authored andcommitted
REF: cython cleanup, typing, optimizations (#23456)
1 parent 93aba79 commit c72b02f

19 files changed

+221
-208
lines changed

pandas/_libs/algos_common_helper.pxi.in

-10
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
11
"""
22
Template for each `dtype` helper function using 1-d template
33

4-
# 1-d template
5-
- pad
6-
- pad_1d
7-
- pad_2d
8-
- backfill
9-
- backfill_1d
10-
- backfill_2d
11-
- is_monotonic
12-
- arrmap
13-
144
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
155
"""
166

pandas/_libs/groupby_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
313313
raise AssertionError("len(index) != len(labels)")
314314

315315
nobs = np.zeros((<object> out).shape, dtype=np.int64)
316-
{{if name=='object'}}
316+
{{if name == 'object'}}
317317
resx = np.empty((<object> out).shape, dtype=object)
318318
{{else}}
319319
resx = np.empty_like(out)

pandas/_libs/hashtable.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22

33
cimport cython
44

5-
from cpython cimport (PyObject, Py_INCREF, PyList_Check, PyTuple_Check,
6-
PyMem_Malloc, PyMem_Realloc, PyMem_Free,
7-
PyString_Check, PyBytes_Check,
8-
PyUnicode_Check)
5+
from cpython cimport (PyObject, Py_INCREF,
6+
PyMem_Malloc, PyMem_Realloc, PyMem_Free)
97

108
from libc.stdlib cimport malloc, free
119

@@ -153,7 +151,7 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
153151
cdef:
154152
int ret = 0
155153
Py_ssize_t i, n = len(labels)
156-
kh_int64_t * table = kh_init_int64()
154+
kh_int64_t *table = kh_init_int64()
157155
Int64Vector idx = Int64Vector()
158156
ndarray[int64_t, ndim=1] arr
159157
Int64VectorData *ud = idx.data

pandas/_libs/hashtable_class_helper.pxi.in

+14-15
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
55
"""
66

77

8-
#----------------------------------------------------------------------
8+
# ----------------------------------------------------------------------
99
# VectorData
10-
#----------------------------------------------------------------------
10+
# ----------------------------------------------------------------------
1111

1212
{{py:
1313

@@ -53,9 +53,9 @@ ctypedef fused vector_data:
5353
cdef inline bint needs_resize(vector_data *data) nogil:
5454
return data.n == data.m
5555

56-
#----------------------------------------------------------------------
56+
# ----------------------------------------------------------------------
5757
# Vector
58-
#----------------------------------------------------------------------
58+
# ----------------------------------------------------------------------
5959

6060
{{py:
6161

@@ -134,8 +134,7 @@ cdef class StringVector:
134134
bint external_view_exists
135135

136136
def __cinit__(self):
137-
self.data = <StringVectorData *>PyMem_Malloc(
138-
sizeof(StringVectorData))
137+
self.data = <StringVectorData *>PyMem_Malloc(sizeof(StringVectorData))
139138
if not self.data:
140139
raise MemoryError()
141140
self.external_view_exists = False
@@ -184,7 +183,7 @@ cdef class StringVector:
184183
self.data.m = self.data.n
185184
return ao
186185

187-
cdef inline void append(self, char * x):
186+
cdef inline void append(self, char *x):
188187

189188
if needs_resize(self.data):
190189
self.resize()
@@ -240,9 +239,9 @@ cdef class ObjectVector:
240239
for i in range(len(x)):
241240
self.append(x[i])
242241

243-
#----------------------------------------------------------------------
242+
# ----------------------------------------------------------------------
244243
# HashTable
245-
#----------------------------------------------------------------------
244+
# ----------------------------------------------------------------------
246245

247246

248247
cdef class HashTable:
@@ -283,9 +282,9 @@ cdef class {{name}}HashTable(HashTable):
283282

284283
def sizeof(self, deep=False):
285284
""" return the size of my table in bytes """
286-
return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
287-
sizeof(Py_ssize_t) + # vals
288-
sizeof(uint32_t)) # flags
285+
return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
286+
sizeof(Py_ssize_t) + # vals
287+
sizeof(uint32_t)) # flags
289288

290289
cpdef get_item(self, {{dtype}}_t val):
291290
cdef khiter_t k
@@ -679,7 +678,7 @@ cdef class StringHashTable(HashTable):
679678
for i in range(n):
680679
val = values[i]
681680

682-
if PyUnicode_Check(val) or PyString_Check(val):
681+
if isinstance(val, (str, unicode)):
683682
v = util.get_c_string(val)
684683
else:
685684
v = util.get_c_string(self.na_string_sentinel)
@@ -712,7 +711,7 @@ cdef class StringHashTable(HashTable):
712711
for i in range(n):
713712
val = values[i]
714713

715-
if PyUnicode_Check(val) or PyString_Check(val):
714+
if isinstance(val, (str, unicode)):
716715
v = util.get_c_string(val)
717716
else:
718717
v = util.get_c_string(self.na_string_sentinel)
@@ -773,7 +772,7 @@ cdef class StringHashTable(HashTable):
773772
for i in range(n):
774773
val = values[i]
775774

776-
if ((PyUnicode_Check(val) or PyString_Check(val))
775+
if (isinstance(val, (str, unicode))
777776
and not (use_na_value and val == na_value)):
778777
v = util.get_c_string(val)
779778
vecs[i] = v

pandas/_libs/hashtable_func_helper.pxi.in

+10-14
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ Template for each `dtype` helper function for hashtable
44
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
55
"""
66

7-
#----------------------------------------------------------------------
7+
# ----------------------------------------------------------------------
88
# VectorData
9-
#----------------------------------------------------------------------
9+
# ----------------------------------------------------------------------
1010

1111
{{py:
1212

@@ -80,7 +80,7 @@ cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
8080
cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
8181
{{endif}}
8282
cdef:
83-
Py_ssize_t i=0
83+
Py_ssize_t i = 0
8484
kh_{{ttype}}_t *table
8585

8686
{{if dtype != 'object'}}
@@ -141,7 +141,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
141141
{{dtype}}_t value
142142
{{endif}}
143143
Py_ssize_t k, i, n = len(values)
144-
kh_{{ttype}}_t * table = kh_init_{{ttype}}()
144+
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
145145
ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
146146

147147
kh_resize_{{ttype}}(table, min(n, _SIZE_HINT_LIMIT))
@@ -202,9 +202,9 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
202202
return out
203203

204204

205-
#----------------------------------------------------------------------
205+
# ----------------------------------------------------------------------
206206
# Membership
207-
#----------------------------------------------------------------------
207+
# ----------------------------------------------------------------------
208208

209209

210210
@cython.wraparound(False)
@@ -237,7 +237,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
237237
int ret = 0
238238
ndarray[uint8_t] result
239239
{{scalar}} val
240-
kh_{{ttype}}_t * table = kh_init_{{ttype}}()
240+
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
241241

242242
# construct the table
243243
n = len(values)
@@ -275,9 +275,9 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
275275
{{endfor}}
276276

277277

278-
#----------------------------------------------------------------------
278+
# ----------------------------------------------------------------------
279279
# Mode Computations
280-
#----------------------------------------------------------------------
280+
# ----------------------------------------------------------------------
281281

282282
{{py:
283283

@@ -305,17 +305,13 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna):
305305
{{endif}}
306306
cdef:
307307
int count, max_count = 1
308-
int j = -1 # so you can do +=
308+
int j = -1 # so you can do +=
309309
Py_ssize_t k
310310
kh_{{table_type}}_t *table
311311
ndarray[{{ctype}}] modes
312312

313313
table = kh_init_{{table_type}}()
314-
{{if dtype == 'object'}}
315-
build_count_table_{{dtype}}(values, table, dropna)
316-
{{else}}
317314
build_count_table_{{dtype}}(values, table, dropna)
318-
{{endif}}
319315

320316
modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}})
321317

pandas/_libs/lib.pyx

+9-3
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,8 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bint:
473473
return True
474474

475475

476+
@cython.wraparound(False)
477+
@cython.boundscheck(False)
476478
def astype_intsafe(ndarray[object] arr, new_dtype):
477479
cdef:
478480
Py_ssize_t i, n = len(arr)
@@ -494,6 +496,8 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
494496
return result
495497

496498

499+
@cython.wraparound(False)
500+
@cython.boundscheck(False)
497501
def astype_unicode(arr: ndarray,
498502
skipna: bool=False) -> ndarray[object]:
499503
"""
@@ -528,6 +532,8 @@ def astype_unicode(arr: ndarray,
528532
return result
529533

530534

535+
@cython.wraparound(False)
536+
@cython.boundscheck(False)
531537
def astype_str(arr: ndarray,
532538
skipna: bool=False) -> ndarray[object]:
533539
"""
@@ -562,6 +568,8 @@ def astype_str(arr: ndarray,
562568
return result
563569

564570

571+
@cython.wraparound(False)
572+
@cython.boundscheck(False)
565573
def clean_index_list(list obj):
566574
"""
567575
Utility used in pandas.core.index.ensure_index
@@ -583,11 +591,9 @@ def clean_index_list(list obj):
583591

584592
# don't force numpy coerce with nan's
585593
inferred = infer_dtype(obj)
586-
if inferred in ['string', 'bytes', 'unicode',
587-
'mixed', 'mixed-integer']:
594+
if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']:
588595
return np.asarray(obj, dtype=object), 0
589596
elif inferred in ['integer']:
590-
591597
# TODO: we infer an integer but it *could* be a unint64
592598
try:
593599
return np.asarray(obj, dtype='int64'), 0

pandas/_libs/missing.pxd

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# -*- coding: utf-8 -*-
22

3-
from tslibs.nattype cimport is_null_datetimelike
4-
53
cpdef bint checknull(object val)
64
cpdef bint checknull_old(object val)
75

pandas/_libs/reduction.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ cdef inline _extract_result(object res):
438438
res = res[0]
439439
return res
440440

441+
441442
cdef class Slider:
442443
"""
443444
Only handles contiguous data for now

pandas/_libs/sparse.pyx

+14-8
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ cdef class IntIndex(SparseIndex):
275275
ndarray[int32_t, ndim=1] indices):
276276
pass
277277

278+
278279
cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
279280
cdef:
280281
Py_ssize_t init_len, i, npoints, result_indexer = 0
@@ -315,6 +316,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
315316
lens = lens[:result_indexer]
316317
return locs, lens
317318

319+
318320
# -----------------------------------------------------------------------------
319321
# BlockIndex
320322

@@ -670,12 +672,14 @@ cdef class BlockMerge(object):
670672
self.xi = yi
671673
self.yi = xi
672674

675+
673676
cdef class BlockIntersection(BlockMerge):
674677
"""
675678
not done yet
676679
"""
677680
pass
678681

682+
679683
cdef class BlockUnion(BlockMerge):
680684
"""
681685
Object-oriented approach makes sharing state between recursive functions a
@@ -805,10 +809,11 @@ include "sparse_op_helper.pxi"
805809
# Indexing operations
806810

807811
def get_reindexer(ndarray[object, ndim=1] values, dict index_map):
808-
cdef object idx
809-
cdef Py_ssize_t i
810-
cdef Py_ssize_t new_length = len(values)
811-
cdef ndarray[int32_t, ndim=1] indexer
812+
cdef:
813+
object idx
814+
Py_ssize_t i
815+
Py_ssize_t new_length = len(values)
816+
ndarray[int32_t, ndim=1] indexer
812817

813818
indexer = np.empty(new_length, dtype=np.int32)
814819

@@ -861,10 +866,11 @@ def reindex_integer(ndarray[float64_t, ndim=1] values,
861866
# SparseArray mask create operations
862867

863868
def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
864-
cdef object value
865-
cdef Py_ssize_t i
866-
cdef Py_ssize_t new_length = len(arr)
867-
cdef ndarray[int8_t, ndim=1] mask
869+
cdef:
870+
object value
871+
Py_ssize_t i
872+
Py_ssize_t new_length = len(arr)
873+
ndarray[int8_t, ndim=1] mask
868874

869875
mask = np.ones(new_length, dtype=np.int8)
870876

0 commit comments

Comments
 (0)