Skip to content

Commit 386ed58

Browse files
committed
MAINT: flake8 *.pyx files
flake8-ed *.pyx files and fixed errors. Removed the E226 check because that inhibits pointers (e.g. char*). In addition, the check is not even universally accepted in Python.
1 parent 3110a72 commit 386ed58

17 files changed

+1114
-774
lines changed

ci/lint.sh

+1-9
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,7 @@ if [ "$LINT" ]; then
2020
echo "Linting *.py DONE"
2121

2222
echo "Linting *.pyx"
23-
for path in 'window.pyx' "src/join.pyx"
24-
do
25-
echo "linting -> pandas/$path"
26-
flake8 pandas/$path --filename '*.pyx' --select=E501,E302,E203,E226,E111,E114,E221,E303,E128,E231,E126
27-
if [ $? -ne "0" ]; then
28-
RET=1
29-
fi
30-
31-
done
23+
flake8 pandas --filename '*.pyx' --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
3224
echo "Linting *.pyx DONE"
3325

3426
echo "Linting *.pxi.in"

pandas/algos.pyx

+25-19
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ cdef:
5959
int TIEBREAK_DENSE = 5
6060

6161
tiebreakers = {
62-
'average' : TIEBREAK_AVERAGE,
63-
'min' : TIEBREAK_MIN,
64-
'max' : TIEBREAK_MAX,
65-
'first' : TIEBREAK_FIRST,
66-
'dense' : TIEBREAK_DENSE,
62+
'average': TIEBREAK_AVERAGE,
63+
'min': TIEBREAK_MIN,
64+
'max': TIEBREAK_MAX,
65+
'first': TIEBREAK_FIRST,
66+
'dense': TIEBREAK_DENSE,
6767
}
6868

6969

@@ -489,7 +489,6 @@ def rank_1d_generic(object in_arr, bint retry=1, ties_method='average',
489489
bint keep_na = 0
490490
float count = 0.0
491491

492-
493492
tiebreak = tiebreakers[ties_method]
494493

495494
keep_na = na_option == 'keep'
@@ -578,6 +577,7 @@ class Infinity(object):
578577
__gt__ = lambda self, other: self is not other
579578
__ge__ = lambda self, other: True
580579

580+
581581
class NegInfinity(object):
582582
""" provide a negative Infinity comparision method for ranking """
583583

@@ -705,7 +705,6 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
705705
# return result
706706

707707

708-
709708
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil except -1:
710709
cdef numeric t
711710

@@ -747,11 +746,11 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k):
747746

748747
cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
749748
cdef:
750-
Py_ssize_t i,j,l,m
749+
Py_ssize_t i, j, l, m
751750
double_t x, t
752751

753752
l = 0
754-
m = n-1
753+
m = n -1
755754
while (l<m):
756755
x = a[k]
757756
i = l
@@ -793,13 +792,13 @@ cpdef numeric median(numeric[:] arr):
793792

794793
def max_subseq(ndarray[double_t] arr):
795794
cdef:
796-
Py_ssize_t i=0,s=0,e=0,T,n
795+
Py_ssize_t i=0, s=0, e=0, T, n
797796
double m, S
798797

799798
n = len(arr)
800799

801800
if len(arr) == 0:
802-
return (-1,-1,None)
801+
return (-1, -1, None)
803802

804803
m = arr[0]
805804
S = m
@@ -819,6 +818,7 @@ def max_subseq(ndarray[double_t] arr):
819818

820819
return (s, e, m)
821820

821+
822822
def min_subseq(ndarray[double_t] arr):
823823
cdef:
824824
Py_ssize_t s, e
@@ -831,6 +831,7 @@ def min_subseq(ndarray[double_t] arr):
831831
#----------------------------------------------------------------------
832832
# Pairwise correlation/covariance
833833

834+
834835
@cython.boundscheck(False)
835836
@cython.wraparound(False)
836837
def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None):
@@ -890,6 +891,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None):
890891
#----------------------------------------------------------------------
891892
# Pairwise Spearman correlation
892893

894+
893895
@cython.boundscheck(False)
894896
@cython.wraparound(False)
895897
def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
@@ -953,6 +955,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
953955
#----------------------------------------------------------------------
954956
# group operations
955957

958+
956959
@cython.wraparound(False)
957960
@cython.boundscheck(False)
958961
def is_lexsorted(list list_of_arrays):
@@ -967,16 +970,14 @@ def is_lexsorted(list list_of_arrays):
967970

968971
cdef int64_t **vecs = <int64_t**> malloc(nlevels * sizeof(int64_t*))
969972
for i from 0 <= i < nlevels:
970-
# vecs[i] = <int64_t *> (<ndarray> list_of_arrays[i]).data
971-
972973
arr = list_of_arrays[i]
973-
vecs[i] = <int64_t *> arr.data
974-
# assume uniqueness??
974+
vecs[i] = <int64_t*> arr.data
975975

976+
# Assume uniqueness??
976977
for i from 1 <= i < n:
977978
for k from 0 <= k < nlevels:
978979
cur = vecs[k][i]
979-
pre = vecs[k][i-1]
980+
pre = vecs[k][i -1]
980981
if cur == pre:
981982
continue
982983
elif cur > pre:
@@ -988,7 +989,8 @@ def is_lexsorted(list list_of_arrays):
988989

989990

990991
@cython.boundscheck(False)
991-
def groupby_indices(dict ids, ndarray[int64_t] labels, ndarray[int64_t] counts):
992+
def groupby_indices(dict ids, ndarray[int64_t] labels,
993+
ndarray[int64_t] counts):
992994
"""
993995
turn group_labels output into a combined indexer maping the labels to
994996
indexers
@@ -1020,7 +1022,7 @@ def groupby_indices(dict ids, ndarray[int64_t] labels, ndarray[int64_t] counts):
10201022
for i from 0 <= i < len(counts):
10211023
arr = np.empty(counts[i], dtype=np.int64)
10221024
result[ids[i]] = arr
1023-
vecs[i] = <int64_t *> arr.data
1025+
vecs[i] = <int64_t*> arr.data
10241026

10251027
for i from 0 <= i < n:
10261028
k = labels[i]
@@ -1036,6 +1038,7 @@ def groupby_indices(dict ids, ndarray[int64_t] labels, ndarray[int64_t] counts):
10361038
free(vecs)
10371039
return result
10381040

1041+
10391042
@cython.wraparound(False)
10401043
@cython.boundscheck(False)
10411044
def group_labels(ndarray[object] values):
@@ -1116,6 +1119,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
11161119
#----------------------------------------------------------------------
11171120
# first, nth, last
11181121

1122+
11191123
@cython.boundscheck(False)
11201124
@cython.wraparound(False)
11211125
def group_nth_object(ndarray[object, ndim=2] out,
@@ -1160,6 +1164,7 @@ def group_nth_object(ndarray[object, ndim=2] out,
11601164
else:
11611165
out[i, j] = resx[i, j]
11621166

1167+
11631168
@cython.boundscheck(False)
11641169
@cython.wraparound(False)
11651170
def group_nth_bin_object(ndarray[object, ndim=2] out,
@@ -1210,6 +1215,7 @@ def group_nth_bin_object(ndarray[object, ndim=2] out,
12101215
else:
12111216
out[i, j] = resx[i, j]
12121217

1218+
12131219
@cython.boundscheck(False)
12141220
@cython.wraparound(False)
12151221
def group_last_object(ndarray[object, ndim=2] out,
@@ -1252,6 +1258,7 @@ def group_last_object(ndarray[object, ndim=2] out,
12521258
else:
12531259
out[i, j] = resx[i, j]
12541260

1261+
12551262
@cython.boundscheck(False)
12561263
@cython.wraparound(False)
12571264
def group_last_bin_object(ndarray[object, ndim=2] out,
@@ -1326,7 +1333,6 @@ cdef inline float64_t _median_linear(float64_t* a, int n):
13261333
a = tmp
13271334
n -= na_count
13281335

1329-
13301336
if n % 2:
13311337
result = kth_smallest_c( a, n / 2, n)
13321338
else:

pandas/hashtable.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
192192

193193
kh_destroy_pymap(table)
194194

195-
return modes[:j+1]
195+
return modes[:j + 1]
196196

197197

198198
@cython.wraparound(False)
@@ -227,7 +227,7 @@ def mode_int64(int64_t[:] values):
227227

228228
kh_destroy_int64(table)
229229

230-
return modes[:j+1]
230+
return modes[:j + 1]
231231

232232

233233
@cython.wraparound(False)

pandas/index.pyx

+20-17
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ cdef inline is_definitely_invalid_key(object val):
5454

5555
# we have a _data, means we are a NDFrame
5656
return (PySlice_Check(val) or cnp.PyArray_Check(val)
57-
or PyList_Check(val) or hasattr(val,'_data'))
57+
or PyList_Check(val) or hasattr(val, '_data'))
58+
5859

5960
def get_value_at(ndarray arr, object loc):
6061
if arr.descr.type_num == NPY_DATETIME:
@@ -63,6 +64,7 @@ def get_value_at(ndarray arr, object loc):
6364
return Timedelta(util.get_value_at(arr, loc))
6465
return util.get_value_at(arr, loc)
6566

67+
6668
def set_value_at(ndarray arr, object loc, object val):
6769
return util.set_value_at(arr, loc, val)
6870

@@ -302,7 +304,7 @@ cdef class IndexEngine:
302304
else:
303305
n_alloc = n
304306

305-
result = np.empty(n_alloc, dtype=np.int64)
307+
result = np.empty(n_alloc, dtype=np.int64)
306308
missing = np.empty(n_t, dtype=np.int64)
307309

308310
# form the set of the results (like ismember)
@@ -311,7 +313,7 @@ cdef class IndexEngine:
311313
val = util.get_value_1d(values, i)
312314
if val in stargets:
313315
if val not in d:
314-
d[val] = []
316+
d[val] = []
315317
d[val].append(i)
316318

317319
for i in range(n_t):
@@ -322,20 +324,20 @@ cdef class IndexEngine:
322324
if val in d:
323325
for j in d[val]:
324326

325-
# realloc if needed
326-
if count >= n_alloc:
327-
n_alloc += 10000
328-
result = np.resize(result, n_alloc)
327+
# realloc if needed
328+
if count >= n_alloc:
329+
n_alloc += 10000
330+
result = np.resize(result, n_alloc)
329331

330-
result[count] = j
331-
count += 1
332+
result[count] = j
333+
count += 1
332334

333335
# value not found
334336
else:
335337

336338
if count >= n_alloc:
337-
n_alloc += 10000
338-
result = np.resize(result, n_alloc)
339+
n_alloc += 10000
340+
result = np.resize(result, n_alloc)
339341
result[count] = -1
340342
count += 1
341343
missing[count_missing] = i
@@ -479,9 +481,9 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
479481
return mid + 1
480482

481483
_pad_functions = {
482-
'object' : algos.pad_object,
483-
'int64' : algos.pad_int64,
484-
'float64' : algos.pad_float64
484+
'object': algos.pad_object,
485+
'int64': algos.pad_int64,
486+
'float64': algos.pad_float64
485487
}
486488

487489
_backfill_functions = {
@@ -606,7 +608,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
606608

607609
cpdef convert_scalar(ndarray arr, object value):
608610
if arr.descr.type_num == NPY_DATETIME:
609-
if isinstance(value,np.ndarray):
611+
if isinstance(value, np.ndarray):
610612
pass
611613
elif isinstance(value, Timestamp):
612614
return value.value
@@ -615,7 +617,7 @@ cpdef convert_scalar(ndarray arr, object value):
615617
else:
616618
return Timestamp(value).value
617619
elif arr.descr.type_num == NPY_TIMEDELTA:
618-
if isinstance(value,np.ndarray):
620+
if isinstance(value, np.ndarray):
619621
pass
620622
elif isinstance(value, Timedelta):
621623
return value.value
@@ -639,7 +641,8 @@ cdef inline _to_i8(object val):
639641
return get_datetime64_value(val)
640642
elif PyDateTime_Check(val):
641643
tzinfo = getattr(val, 'tzinfo', None)
642-
ival = _pydatetime_to_dts(val, &dts) # Save the original date value so we can get the utcoffset from it.
644+
# Save the original date value so we can get the utcoffset from it.
645+
ival = _pydatetime_to_dts(val, &dts)
643646
if tzinfo is not None and not _is_utc(tzinfo):
644647
offset = tslib._get_utcoffset(tzinfo, val)
645648
ival -= tslib._delta_to_nanoseconds(offset)

0 commit comments

Comments
 (0)