Skip to content

a zillion flakes #18046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 1, 2017
6 changes: 3 additions & 3 deletions ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ if [ "$LINT" ]; then

# pandas/_libs/src is C code, so no need to search there.
echo "Linting *.py"
flake8 pandas --filename=*.py --exclude pandas/_libs/src
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503,E731
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.py DONE"

echo "Linting setup.py"
flake8 setup.py
flake8 setup.py --ignore=W503
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can just add the global ignores to setup.cfg

if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting setup.py DONE"

echo "Linting *.pyx"
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123
if [ $? -ne "0" ]; then
RET=1
fi
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def min_subseq(ndarray[double_t] arr):

return (s, e, -m)

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Pairwise correlation/covariance


Expand Down Expand Up @@ -322,7 +322,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):

return result

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Pairwise Spearman correlation


Expand Down Expand Up @@ -386,6 +386,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):

return result


# generated from template
include "algos_common_helper.pxi"
include "algos_rank_helper.pxi"
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ cdef double nan = NaN


# TODO: aggregate multiple columns in single pass
#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# first, nth, last


Expand Down
8 changes: 7 additions & 1 deletion pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -93,29 +93,34 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
free(lens)
return result


cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
return (x << b) | (x >> (64 - b))


cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil:
p[0] = <uint8_t>(v)
p[1] = <uint8_t>(v >> 8)
p[2] = <uint8_t>(v >> 16)
p[3] = <uint8_t>(v >> 24)


cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil:
u32to8_le(p, <uint32_t>v)
u32to8_le(p + 4, <uint32_t>(v >> 32))


cdef inline uint64_t u8to64_le(uint8_t* p) nogil:
return (<uint64_t>p[0] |
<uint64_t>p[1] << 8 |
<uint64_t>p[1] << 8 |
<uint64_t>p[2] << 16 |
<uint64_t>p[3] << 24 |
<uint64_t>p[4] << 32 |
<uint64_t>p[5] << 40 |
<uint64_t>p[6] << 48 |
<uint64_t>p[7] << 56)


cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
uint64_t* v2, uint64_t* v3) nogil:
v0[0] += v1[0]
Expand All @@ -133,6 +138,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
v1[0] ^= v2[0]
v2[0] = _rotl(v2[0], 32)


cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError(
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ cdef class IndexEngine:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
loc = _bin_search(values, val) # .searchsorted(val, side='left')
loc = _bin_search(values, val) # .searchsorted(val, side='left')
if loc >= len(values):
raise KeyError(val)
if util.get_value_at(values, loc) != val:
Expand Down Expand Up @@ -475,15 +475,14 @@ cdef class DatetimeEngine(Int64Engine):
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype('i4')
other = np.asarray(other).view('i8')
return algos.pad_int64(self._get_index_values(), other,
limit=limit)
return algos.pad_int64(self._get_index_values(), other, limit=limit)

def get_backfill_indexer(self, other, limit=None):
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype('i4')
other = np.asarray(other).view('i8')
return algos.backfill_int64(self._get_index_values(), other,
limit=limit)
limit=limit)


cdef class TimedeltaEngine(DatetimeEngine):
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
import numbers
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])


cdef class IntervalMixin:
property closed_left:
def __get__(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,


def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
Py_ssize_t max_groups):
Py_ssize_t max_groups):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
Expand Down
23 changes: 13 additions & 10 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def values_from_object(object o):

return o


cpdef map_indices_list(list index):
"""
Produce a dict mapping the values of the input array to their respective
Expand Down Expand Up @@ -116,7 +117,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
s += arr[i].__sizeof__()
return s

#----------------------------------------------------------------------

# ----------------------------------------------------------------------
# isnull / notnull related

cdef double INF = <double> np.inf
Expand All @@ -125,7 +127,7 @@ cdef double NEGINF = -INF

cpdef bint checknull(object val):
if util.is_float_object(val) or util.is_complex_object(val):
return val != val # and val != INF and val != NEGINF
return val != val # and val != INF and val != NEGINF
elif util.is_datetime64_object(val):
return get_datetime64_value(val) == NPY_NAT
elif val is NaT:
Expand Down Expand Up @@ -990,7 +992,7 @@ def convert_json_to_lines(object arr):
in_quotes = ~in_quotes
if v == backslash or is_escaping:
is_escaping = ~is_escaping
if v == comma: # commas that should be \n
if v == comma: # commas that should be \n
if num_open_brackets_seen == 0 and not in_quotes:
narr[i] = newline
elif v == left_bracket:
Expand All @@ -1015,7 +1017,7 @@ def write_csv_rows(list data, ndarray data_index,
# In crude testing, N>100 yields little marginal improvement
N=100

# pre-allocate rows
# pre-allocate rows
ncols = len(cols)
rows = [[None] * (nlevels + ncols) for x in range(N)]

Expand Down Expand Up @@ -1047,12 +1049,13 @@ def write_csv_rows(list data, ndarray data_index,
if j >= N - 1 and j % N == N - 1:
writer.writerows(rows)

if j >= 0 and (j < N - 1 or (j % N) != N - 1):
if j >= 0 and (j < N - 1 or (j % N) != N - 1):
writer.writerows(rows[:((j + 1) % N)])


#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Groupby-related functions

@cython.boundscheck(False)
def arrmap(ndarray[object] index, object func):
cdef int length = index.shape[0]
Expand Down Expand Up @@ -1136,7 +1139,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
bins = np.empty(lenbin - 1, dtype=np.int64)

j = 0 # index into values
bc = 0 # bin count
bc = 0 # bin count

# linear scan
if right_closed:
Expand Down Expand Up @@ -1285,9 +1288,9 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
cdef class _PandasNull:

def __richcmp__(_PandasNull self, object other, int op):
if op == 2: # ==
if op == 2: # ==
return isinstance(other, _PandasNull)
elif op == 3: # !=
elif op == 3: # !=
return not isinstance(other, _PandasNull)
else:
return False
Expand Down Expand Up @@ -1793,7 +1796,7 @@ cdef class BlockPlacement:
stop += other_int

if ((step > 0 and start < 0) or
(step < 0 and stop < step)):
(step < 0 and stop < step)):
raise ValueError("iadd causes length change")

if stop < 0:
Expand Down
22 changes: 15 additions & 7 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h":

# Store words in (potentially ragged) matrix for now, hmm
char **words
int64_t *word_starts # where we are in the stream
int64_t *word_starts # where we are in the stream
int64_t words_len
int64_t words_cap

Expand Down Expand Up @@ -400,7 +400,7 @@ cdef class TextReader:
raise ValueError('only length-1 separators excluded right now')
self.parser.delimiter = ord(delimiter)

#----------------------------------------
# ----------------------------------------
# parser options

self.parser.doublequote = doublequote
Expand Down Expand Up @@ -519,7 +519,7 @@ cdef class TextReader:

self.index_col = index_col

#----------------------------------------
# ----------------------------------------
# header stuff

self.allow_leading_cols = allow_leading_cols
Expand Down Expand Up @@ -810,7 +810,7 @@ cdef class TextReader:
if hr == self.header[-1]:
lc = len(this_header)
ic = (len(self.index_col) if self.index_col
is not None else 0)
is not None else 0)
if lc != unnamed_count and lc - ic > unnamed_count:
hr -= 1
self.parser_start -= 1
Expand Down Expand Up @@ -848,7 +848,7 @@ cdef class TextReader:
# Corner case, not enough lines in the file
if self.parser.lines < data_line + 1:
field_count = len(header[0])
else: # not self.has_usecols:
else: # not self.has_usecols:

field_count = self.parser.line_fields[data_line]

Expand Down Expand Up @@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst):
result.append(x)
return result


cdef asbytes(object o):
if PY3:
return str(o).encode('utf-8')
Expand Down Expand Up @@ -1417,11 +1418,13 @@ def _maybe_upcast(arr):

return arr


cdef enum StringPath:
CSTRING
UTF8
ENCODED


# factored out logic to pick string converter
cdef inline StringPath _string_path(char *encoding):
if encoding != NULL and encoding != b"utf-8":
Expand All @@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding):
return UTF8
else:
return CSTRING


# ----------------------------------------------------------------------
# Type conversions / inference support code


cdef _string_box_factorize(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset):
Expand Down Expand Up @@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
parser.sci, parser.thousands, 1)
if errno != 0 or p_end[0] or p_end == word:
if (strcasecmp(word, cinf) == 0 or
strcasecmp(word, cposinf) == 0):
strcasecmp(word, cposinf) == 0):
data[0] = INF
elif strcasecmp(word, cneginf) == 0:
data[0] = NEGINF
Expand All @@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
parser.sci, parser.thousands, 1)
if errno != 0 or p_end[0] or p_end == word:
if (strcasecmp(word, cinf) == 0 or
strcasecmp(word, cposinf) == 0):
strcasecmp(word, cposinf) == 0):
data[0] = INF
elif strcasecmp(word, cneginf) == 0:
data[0] = NEGINF
Expand Down Expand Up @@ -2263,6 +2269,7 @@ def _compute_na_values():
}
return na_values


na_values = _compute_na_values()

for k in list(na_values):
Expand Down Expand Up @@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols):

return recs


cdef _fill_structured_column(char *dst, char* src, int64_t elsize,
int64_t stride, int64_t length, bint incref):
cdef:
Expand Down
Loading