Skip to content

Commit f62c85a

Browse files
jbrockmendeljreback
authored andcommitted
a zillion flakes (#18046)
1 parent 881ee30 commit f62c85a

29 files changed

+288
-256
lines changed

ci/lint.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ if [ "$LINT" ]; then
2424
echo "Linting setup.py DONE"
2525

2626
echo "Linting *.pyx"
27-
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
27+
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403
2828
if [ $? -ne "0" ]; then
2929
RET=1
3030
fi
@@ -34,7 +34,7 @@ if [ "$LINT" ]; then
3434
for path in 'src'
3535
do
3636
echo "linting -> pandas/$path"
37-
flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126
37+
flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
3838
if [ $? -ne "0" ]; then
3939
RET=1
4040
fi

pandas/_libs/algos.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def min_subseq(ndarray[double_t] arr):
258258

259259
return (s, e, -m)
260260

261-
#----------------------------------------------------------------------
261+
# ----------------------------------------------------------------------
262262
# Pairwise correlation/covariance
263263

264264

@@ -322,7 +322,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
322322

323323
return result
324324

325-
#----------------------------------------------------------------------
325+
# ----------------------------------------------------------------------
326326
# Pairwise Spearman correlation
327327

328328

@@ -386,6 +386,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
386386

387387
return result
388388

389+
389390
# generated from template
390391
include "algos_common_helper.pxi"
391392
include "algos_rank_helper.pxi"

pandas/_libs/groupby.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ cdef double nan = NaN
2525

2626

2727
# TODO: aggregate multiple columns in single pass
28-
#----------------------------------------------------------------------
28+
# ----------------------------------------------------------------------
2929
# first, nth, last
3030

3131

pandas/_libs/hashing.pyx

+7-1
Original file line numberDiff line numberDiff line change
@@ -93,29 +93,34 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
9393
free(lens)
9494
return result
9595

96+
9697
cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
9798
return (x << b) | (x >> (64 - b))
9899

100+
99101
cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil:
100102
p[0] = <uint8_t>(v)
101103
p[1] = <uint8_t>(v >> 8)
102104
p[2] = <uint8_t>(v >> 16)
103105
p[3] = <uint8_t>(v >> 24)
104106

107+
105108
cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil:
106109
u32to8_le(p, <uint32_t>v)
107110
u32to8_le(p + 4, <uint32_t>(v >> 32))
108111

112+
109113
cdef inline uint64_t u8to64_le(uint8_t* p) nogil:
110114
return (<uint64_t>p[0] |
111-
<uint64_t>p[1] << 8 |
115+
<uint64_t>p[1] << 8 |
112116
<uint64_t>p[2] << 16 |
113117
<uint64_t>p[3] << 24 |
114118
<uint64_t>p[4] << 32 |
115119
<uint64_t>p[5] << 40 |
116120
<uint64_t>p[6] << 48 |
117121
<uint64_t>p[7] << 56)
118122

123+
119124
cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
120125
uint64_t* v2, uint64_t* v3) nogil:
121126
v0[0] += v1[0]
@@ -133,6 +138,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
133138
v1[0] ^= v2[0]
134139
v2[0] = _rotl(v2[0], 32)
135140

141+
136142
cpdef uint64_t siphash(bytes data, bytes key) except? 0:
137143
if len(key) != 16:
138144
raise ValueError(

pandas/_libs/index.pyx

+3-4
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ cdef class IndexEngine:
122122
if not self.is_unique:
123123
return self._get_loc_duplicates(val)
124124
values = self._get_index_values()
125-
loc = _bin_search(values, val) # .searchsorted(val, side='left')
125+
loc = _bin_search(values, val) # .searchsorted(val, side='left')
126126
if loc >= len(values):
127127
raise KeyError(val)
128128
if util.get_value_at(values, loc) != val:
@@ -475,15 +475,14 @@ cdef class DatetimeEngine(Int64Engine):
475475
if other.dtype != self._get_box_dtype():
476476
return np.repeat(-1, len(other)).astype('i4')
477477
other = np.asarray(other).view('i8')
478-
return algos.pad_int64(self._get_index_values(), other,
479-
limit=limit)
478+
return algos.pad_int64(self._get_index_values(), other, limit=limit)
480479

481480
def get_backfill_indexer(self, other, limit=None):
482481
if other.dtype != self._get_box_dtype():
483482
return np.repeat(-1, len(other)).astype('i4')
484483
other = np.asarray(other).view('i8')
485484
return algos.backfill_int64(self._get_index_values(), other,
486-
limit=limit)
485+
limit=limit)
487486

488487

489488
cdef class TimedeltaEngine(DatetimeEngine):

pandas/_libs/interval.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
1313
import numbers
1414
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
1515

16+
1617
cdef class IntervalMixin:
1718
property closed_left:
1819
def __get__(self):

pandas/_libs/join.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
147147

148148

149149
def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
150-
Py_ssize_t max_groups):
150+
Py_ssize_t max_groups):
151151
cdef:
152152
Py_ssize_t i, j, k, count = 0
153153
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter

pandas/_libs/lib.pyx

+13-10
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def values_from_object(object o):
8282

8383
return o
8484

85+
8586
cpdef map_indices_list(list index):
8687
"""
8788
Produce a dict mapping the values of the input array to their respective
@@ -116,7 +117,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
116117
s += arr[i].__sizeof__()
117118
return s
118119

119-
#----------------------------------------------------------------------
120+
121+
# ----------------------------------------------------------------------
120122
# isnull / notnull related
121123

122124
cdef double INF = <double> np.inf
@@ -125,7 +127,7 @@ cdef double NEGINF = -INF
125127

126128
cpdef bint checknull(object val):
127129
if util.is_float_object(val) or util.is_complex_object(val):
128-
return val != val # and val != INF and val != NEGINF
130+
return val != val # and val != INF and val != NEGINF
129131
elif util.is_datetime64_object(val):
130132
return get_datetime64_value(val) == NPY_NAT
131133
elif val is NaT:
@@ -990,7 +992,7 @@ def convert_json_to_lines(object arr):
990992
in_quotes = ~in_quotes
991993
if v == backslash or is_escaping:
992994
is_escaping = ~is_escaping
993-
if v == comma: # commas that should be \n
995+
if v == comma: # commas that should be \n
994996
if num_open_brackets_seen == 0 and not in_quotes:
995997
narr[i] = newline
996998
elif v == left_bracket:
@@ -1015,7 +1017,7 @@ def write_csv_rows(list data, ndarray data_index,
10151017
# In crude testing, N>100 yields little marginal improvement
10161018
N=100
10171019

1018-
# pre-allocate rows
1020+
# pre-allocate rows
10191021
ncols = len(cols)
10201022
rows = [[None] * (nlevels + ncols) for x in range(N)]
10211023

@@ -1047,12 +1049,13 @@ def write_csv_rows(list data, ndarray data_index,
10471049
if j >= N - 1 and j % N == N - 1:
10481050
writer.writerows(rows)
10491051

1050-
if j >= 0 and (j < N - 1 or (j % N) != N - 1):
1052+
if j >= 0 and (j < N - 1 or (j % N) != N - 1):
10511053
writer.writerows(rows[:((j + 1) % N)])
10521054

10531055

1054-
#------------------------------------------------------------------------------
1056+
# ------------------------------------------------------------------------------
10551057
# Groupby-related functions
1058+
10561059
@cython.boundscheck(False)
10571060
def arrmap(ndarray[object] index, object func):
10581061
cdef int length = index.shape[0]
@@ -1136,7 +1139,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
11361139
bins = np.empty(lenbin - 1, dtype=np.int64)
11371140

11381141
j = 0 # index into values
1139-
bc = 0 # bin count
1142+
bc = 0 # bin count
11401143

11411144
# linear scan
11421145
if right_closed:
@@ -1285,9 +1288,9 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
12851288
cdef class _PandasNull:
12861289

12871290
def __richcmp__(_PandasNull self, object other, int op):
1288-
if op == 2: # ==
1291+
if op == 2: # ==
12891292
return isinstance(other, _PandasNull)
1290-
elif op == 3: # !=
1293+
elif op == 3: # !=
12911294
return not isinstance(other, _PandasNull)
12921295
else:
12931296
return False
@@ -1793,7 +1796,7 @@ cdef class BlockPlacement:
17931796
stop += other_int
17941797

17951798
if ((step > 0 and start < 0) or
1796-
(step < 0 and stop < step)):
1799+
(step < 0 and stop < step)):
17971800
raise ValueError("iadd causes length change")
17981801

17991802
if stop < 0:

pandas/_libs/parsers.pyx

+15-7
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h":
138138

139139
# Store words in (potentially ragged) matrix for now, hmm
140140
char **words
141-
int64_t *word_starts # where we are in the stream
141+
int64_t *word_starts # where we are in the stream
142142
int64_t words_len
143143
int64_t words_cap
144144

@@ -400,7 +400,7 @@ cdef class TextReader:
400400
raise ValueError('only length-1 separators excluded right now')
401401
self.parser.delimiter = ord(delimiter)
402402

403-
#----------------------------------------
403+
# ----------------------------------------
404404
# parser options
405405

406406
self.parser.doublequote = doublequote
@@ -519,7 +519,7 @@ cdef class TextReader:
519519

520520
self.index_col = index_col
521521

522-
#----------------------------------------
522+
# ----------------------------------------
523523
# header stuff
524524

525525
self.allow_leading_cols = allow_leading_cols
@@ -810,7 +810,7 @@ cdef class TextReader:
810810
if hr == self.header[-1]:
811811
lc = len(this_header)
812812
ic = (len(self.index_col) if self.index_col
813-
is not None else 0)
813+
is not None else 0)
814814
if lc != unnamed_count and lc - ic > unnamed_count:
815815
hr -= 1
816816
self.parser_start -= 1
@@ -848,7 +848,7 @@ cdef class TextReader:
848848
# Corner case, not enough lines in the file
849849
if self.parser.lines < data_line + 1:
850850
field_count = len(header[0])
851-
else: # not self.has_usecols:
851+
else: # not self.has_usecols:
852852

853853
field_count = self.parser.line_fields[data_line]
854854

@@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst):
13741374
result.append(x)
13751375
return result
13761376

1377+
13771378
cdef asbytes(object o):
13781379
if PY3:
13791380
return str(o).encode('utf-8')
@@ -1417,11 +1418,13 @@ def _maybe_upcast(arr):
14171418

14181419
return arr
14191420

1421+
14201422
cdef enum StringPath:
14211423
CSTRING
14221424
UTF8
14231425
ENCODED
14241426

1427+
14251428
# factored out logic to pick string converter
14261429
cdef inline StringPath _string_path(char *encoding):
14271430
if encoding != NULL and encoding != b"utf-8":
@@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding):
14301433
return UTF8
14311434
else:
14321435
return CSTRING
1436+
1437+
14331438
# ----------------------------------------------------------------------
14341439
# Type conversions / inference support code
14351440

1441+
14361442
cdef _string_box_factorize(parser_t *parser, int64_t col,
14371443
int64_t line_start, int64_t line_end,
14381444
bint na_filter, kh_str_t *na_hashset):
@@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
17821788
parser.sci, parser.thousands, 1)
17831789
if errno != 0 or p_end[0] or p_end == word:
17841790
if (strcasecmp(word, cinf) == 0 or
1785-
strcasecmp(word, cposinf) == 0):
1791+
strcasecmp(word, cposinf) == 0):
17861792
data[0] = INF
17871793
elif strcasecmp(word, cneginf) == 0:
17881794
data[0] = NEGINF
@@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
18031809
parser.sci, parser.thousands, 1)
18041810
if errno != 0 or p_end[0] or p_end == word:
18051811
if (strcasecmp(word, cinf) == 0 or
1806-
strcasecmp(word, cposinf) == 0):
1812+
strcasecmp(word, cposinf) == 0):
18071813
data[0] = INF
18081814
elif strcasecmp(word, cneginf) == 0:
18091815
data[0] = NEGINF
@@ -2263,6 +2269,7 @@ def _compute_na_values():
22632269
}
22642270
return na_values
22652271

2272+
22662273
na_values = _compute_na_values()
22672274

22682275
for k in list(na_values):
@@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols):
23622369

23632370
return recs
23642371

2372+
23652373
cdef _fill_structured_column(char *dst, char* src, int64_t elsize,
23662374
int64_t stride, int64_t length, bint incref):
23672375
cdef:

0 commit comments

Comments
 (0)