diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b829cbefe8f7a..26b7eaca87a04 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -44,6 +44,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 RET=$(($RET + $?)) ; echo $MSG "DONE" + # Check that cython casting is of the form `obj` as opposed to ` obj`; + # it doesn't make a difference, but we want to be internally consistent. + # Note: this grep pattern is (intended to be) equivalent to the python + # regex r'(?])> ' + MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG + ! grep -r -E --include '*.pyx' --include '*.pxi.in' '> ' pandas/_libs | grep -v '[ ->]> ' + RET=$(($RET + $?)) ; echo $MSG "DONE" + # readability/casting: Warnings about C casting instead of C++ casting # runtime/int: Warnings about using C number types instead of C++ ones # build/include_subdir: Warnings about prefacing included header files with directory diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 02815dce156fb..24828db64c392 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -32,7 +32,7 @@ import missing cdef float64_t FP_ERR = 1e-13 -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef double nan = NaN cdef int64_t iNaT = get_nat() @@ -242,7 +242,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): int64_t nobs = 0 float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor - N, K = ( mat).shape + N, K = (mat).shape if minp is None: minpv = 1 @@ -307,7 +307,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): int64_t nobs = 0 float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor - N, K = ( mat).shape + N, K = (mat).shape result = np.empty((K, K), dtype=np.float64) mask = np.isfinite(mat).view(np.uint8) @@ -531,7 +531,7 @@ def pad_2d_inplace(ndarray[algos_t, ndim=2] values, algos_t val int lim, fill_count = 0 - K, N = ( values).shape + K, N = (values).shape # GH#2778 if N == 0: @@ -730,7 +730,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, algos_t val int lim, fill_count = 0 - K, N = ( values).shape + K, N = (values).shape # GH#2778 if N == 0: diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 2835c95c96575..c2b0a4119e6e5 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -34,7 +34,7 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, cdef: Py_ssize_t i, j, sx, sy - sx, sy = ( arr).shape + sx, sy = (arr).shape if arr.flags.f_contiguous: if axis == 0: if periods >= 0: @@ -88,14 +88,14 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, # ensure_dtype #---------------------------------------------------------------------- -cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num +cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp if util.is_array(arr): - if ( arr).descr.type_num == PLATFORM_INT: + if (arr).descr.type_num == PLATFORM_INT: return arr else: return arr.astype(np.intp) @@ -105,7 +105,7 @@ def ensure_platform_int(object arr): def ensure_object(object arr): if util.is_array(arr): - if ( arr).descr.type_num == NPY_OBJECT: + if (arr).descr.type_num == NPY_OBJECT: return arr else: return arr.astype(np.object_) @@ -142,7 +142,7 @@ def get_dispatch(dtypes): def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): - if ( arr).descr.type_num == NPY_{{c_type}}: + if (arr).descr.type_num == NPY_{{c_type}}: return arr else: return arr.astype(np.{{dtype}}, copy=copy) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index bb4aec75ed567..e13f87d15aace 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -263,7 +263,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', np.putmask(values, mask, nan_value) {{endif}} - n, k = ( values).shape + n, k = (values).shape ranks = np.empty((n, k), dtype='f8') {{if dtype == 'object'}} diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 358479c837d05..bd5feef1ff2b0 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -278,7 +278,7 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx): ndarray[take_t, ndim=2] result object val - N, K = ( values).shape + N, K = (values).shape if take_t is object: # evaluated at compile-time diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 2894e014b84b8..c72b4001dcb79 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -22,7 +22,7 @@ from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers cdef int64_t iNaT = get_nat() -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef double nan = NaN @@ -115,7 +115,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, assert min_count == -1, "'min_count' only used in add and prod" ngroups = len(counts) - N, K = ( values).shape + N, K = (values).shape indexer, _counts = groupsort_indexer(labels, ngroups) counts[:] = _counts[1:] @@ -152,7 +152,7 @@ def group_cumprod_float64(float64_t[:, :] out, float64_t[:, :] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.ones_like(values) with nogil: @@ -189,7 +189,7 @@ def group_cumsum(numeric[:, :] out, numeric[:, :] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.zeros_like(values) with nogil: @@ -226,7 +226,7 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64) int64_t[:, :] label_indexer - N, = ( labels).shape + N, = (labels).shape if periods < 0: periods = -periods diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 315cfea56896e..0917453e3f864 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -48,7 +48,7 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) - N, K = ( values).shape + N, K = (values).shape with nogil: @@ -95,7 +95,7 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) prodx = np.ones_like(out) - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -141,7 +141,7 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) mean = np.zeros_like(out) - N, K = ( values).shape + N, K = (values).shape out[:, :] = 0.0 @@ -193,7 +193,7 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -238,7 +238,7 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, if len(labels) == 0: return - N, K = ( values).shape + N, K = (values).shape if out.shape[1] != 4: raise ValueError('Output array must have 4 columns') @@ -312,14 +312,14 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out, if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") - nobs = np.zeros(( out).shape, dtype=np.int64) + nobs = np.zeros((out).shape, dtype=np.int64) {{if name == 'object'}} - resx = np.empty(( out).shape, dtype=object) + resx = np.empty((out).shape, dtype=object) {{else}} resx = np.empty_like(out) {{endif}} - N, K = ( values).shape + N, K = (values).shape {{if name == "object"}} if True: # make templating happy @@ -369,14 +369,14 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") - nobs = np.zeros(( out).shape, dtype=np.int64) + nobs = np.zeros((out).shape, dtype=np.int64) {{if name=='object'}} - resx = np.empty(( out).shape, dtype=object) + resx = np.empty((out).shape, dtype=object) {{else}} resx = np.empty_like(out) {{endif}} - N, K = ( values).shape + N, K = (values).shape {{if name == "object"}} if True: # make templating happy @@ -462,7 +462,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, tiebreak = tiebreakers[ties_method] keep_na = na_option == 'keep' - N, K = ( values).shape + N, K = (values).shape grp_sizes = np.ones_like(out) # Copy values into new array in order to fill missing data @@ -635,7 +635,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, maxx.fill(-np.inf) nan_val = NAN - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -697,7 +697,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx.fill(np.inf) nan_val = NAN - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -744,7 +744,7 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, ndarray[groupby_t, ndim=2] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.empty_like(values) if groupby_t is int64_t: accum.fill(_int64_max) @@ -792,7 +792,7 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, ndarray[groupby_t, ndim=2] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.empty_like(values) if groupby_t is int64_t: accum.fill(-_int64_max) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 1fdd8e3b1987f..affb6a038074a 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -318,7 +318,7 @@ cdef class {{name}}HashTable(HashTable): for i in range(n): key = keys[i] k = kh_put_{{dtype}}(self.table, key, &ret) - self.table.vals[k] = values[i] + self.table.vals[k] = values[i] @cython.boundscheck(False) def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index c92e0a4a7aa23..04c2f222b14ad 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -11,7 +11,7 @@ from numpy cimport (ndarray, cnp.import_array() -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef double nan = NaN from pandas._libs.algos import groupsort_indexer, ensure_platform_int diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c57dd66a33fe0..ad8ae9c4bdb74 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -304,7 +304,7 @@ def fast_zip(list ndarrays): # initialize tuples on first pass arr = ndarrays[0] - it = PyArray_IterNew(arr) + it = PyArray_IterNew(arr) for i in range(n): val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) tup = PyTuple_New(k) @@ -316,7 +316,7 @@ def fast_zip(list ndarrays): for j in range(1, k): arr = ndarrays[j] - it = PyArray_IterNew(arr) + it = PyArray_IterNew(arr) if len(arr) != n: raise ValueError('all arrays must be same length') @@ -1994,8 +1994,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, break elif util.is_integer_object(val): seen.int_ = 1 - floats[i] = val - complexes[i] = val + floats[i] = val + complexes[i] = val if not seen.null_: seen.saw_int(int(val)) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 2590a30c57f33..95ea103025b23 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -13,7 +13,7 @@ cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value from tslibs.nattype import NaT -cdef double INF = np.inf +cdef double INF = np.inf cdef double NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() @@ -224,7 +224,7 @@ def isnaobj2d(ndarray arr): assert arr.ndim == 2, "'arr' must be 2-D." - n, m = ( arr).shape + n, m = (arr).shape result = np.zeros((n, m), dtype=np.uint8) for i in range(n): for j in range(m): @@ -268,7 +268,7 @@ def isnaobj2d_old(ndarray arr): assert arr.ndim == 2, "'arr' must be 2-D." - n, m = ( arr).shape + n, m = (arr).shape result = np.zeros((n, m), dtype=np.uint8) for i in range(n): for j in range(m): diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e3df391c5c45d..027a4e36204dc 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -65,7 +65,7 @@ CParserError = ParserError cdef bint PY3 = (sys.version_info[0] >= 3) -cdef double INF = np.inf +cdef double INF = np.inf cdef double NEGINF = -INF @@ -1438,7 +1438,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col, # in the hash table if k != table.n_buckets: # this increments the refcount, but need to test - pyval = table.vals[k] + pyval = table.vals[k] else: # box it. new ref? pyval = PyBytes_FromString(word) @@ -1492,7 +1492,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col, # in the hash table if k != table.n_buckets: # this increments the refcount, but need to test - pyval = table.vals[k] + pyval = table.vals[k] else: # box it. new ref? pyval = PyUnicode_FromString(word) @@ -1549,7 +1549,7 @@ cdef _string_box_decode(parser_t *parser, int64_t col, # in the hash table if k != table.n_buckets: # this increments the refcount, but need to test - pyval = table.vals[k] + pyval = table.vals[k] else: # box it. new ref? size = strlen(word) @@ -2087,14 +2087,14 @@ cdef raise_parser_error(object base, parser_t *parser): Py_XDECREF(traceback) if value != NULL: - old_exc = value + old_exc = value Py_XDECREF(value) # PyErr_Fetch only returned the error message in *value, # so the Exception class must be extracted from *type. if isinstance(old_exc, compat.string_types): if type != NULL: - exc_type = type + exc_type = type else: exc_type = ParserError diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 6e4c0c62b0dd8..d2fbf5aa66fbf 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -31,7 +31,7 @@ cdef class CachedProperty(object): if PyDict_Contains(cache, self.name): # not necessary to Py_INCREF - val = PyDict_GetItem(cache, self.name) + val = PyDict_GetItem(cache, self.name) else: val = self.func(obj) PyDict_SetItem(cache, self.name, val) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 119060bd28a1c..951c163522401 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -153,7 +153,7 @@ cdef class Reducer: result = _get_result_array(res, self.nresults, len(self.dummy)) - it = PyArray_IterNew(result) + it = PyArray_IterNew(result) PyArray_SETITEM(result, PyArray_ITER_DATA(it), res) chunk.data = chunk.data + self.increment @@ -574,7 +574,7 @@ cdef class BlockSlider: self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) for i, block in enumerate(self.blocks): - self.base_ptrs[i] = ( block).data + self.base_ptrs[i] = (block).data def __dealloc__(self): free(self.base_ptrs) diff --git a/pandas/_libs/skiplist.pyx b/pandas/_libs/skiplist.pyx index eec0457fc4caf..6698fcb767d7c 100644 --- a/pandas/_libs/skiplist.pyx +++ b/pandas/_libs/skiplist.pyx @@ -105,7 +105,7 @@ cdef class IndexableSkiplist: steps += steps_at_level[level] for level in range(d, self.maxlevels): - ( chain[level]).width[level] += 1 + (chain[level]).width[level] += 1 self.size += 1 @@ -126,11 +126,11 @@ cdef class IndexableSkiplist: chain[level] = node - if value != ( ( ( chain[0]).next)[0]).value: + if value != (((chain[0]).next)[0]).value: raise KeyError('Not Found') # remove one link at each level - d = len(( ( ( chain[0]).next)[0]).next) + d = len((((chain[0]).next)[0]).next) for level in range(d): prevnode = chain[level] diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 705d93da10ba8..67698f1b4c2ca 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -19,8 +19,8 @@ _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') # ----------------------------------------------------------------------------- # Preamble stuff -cdef float64_t NaN = np.NaN -cdef float64_t INF = np.inf +cdef float64_t NaN = np.NaN +cdef float64_t INF = np.inf cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d199997d2e9fe..f55966fd053af 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -95,7 +95,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): NPY_DATETIMEUNIT unit npy_datetimestruct dts - shape = ( arr).shape + shape = (arr).shape ivalues = arr.view(np.int64).ravel() diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index fa965e2ca7c8c..f0a57c49a98fc 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -373,7 +373,7 @@ cdef inline parse_timedelta_string(object ts): elif current_unit == 'm': current_unit = 's' m = 1000000000L - r = int(''.join(number)) * m + r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_hhmmss = 1 else: @@ -393,7 +393,7 @@ cdef inline parse_timedelta_string(object ts): if current_unit != 'm': raise ValueError("expected hh:mm:ss format before .") m = 1000000000L - r = int(''.join(number)) * m + r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_value = 1 unit, number, frac = [], [], [] @@ -427,7 +427,7 @@ cdef inline parse_timedelta_string(object ts): else: m = 10**(9 -len(frac)) - r = int(''.join(frac)) * m + r = int(''.join(frac)) * m result += timedelta_as_neg(r, neg) # we have a regular format @@ -436,7 +436,7 @@ cdef inline parse_timedelta_string(object ts): if current_unit != 'm': raise ValueError("expected hh:mm:ss format") m = 1000000000L - r = int(''.join(number)) * m + r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) # we have a last abbreviation diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index c4af4a6b35a37..8de2852942865 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -32,7 +32,7 @@ cdef float64_t MINfloat64 = np.NINF cdef float32_t MAXfloat32 = np.inf cdef float64_t MAXfloat64 = np.inf -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -1498,7 +1498,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, output[i] = skiplist_get(skiplist, 0, &ret) else: idx_with_fraction = quantile * (nobs - 1) - idx = idx_with_fraction + idx = idx_with_fraction if idx_with_fraction == idx: # no need to interpolate @@ -1529,7 +1529,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, elif interpolation_type == MIDPOINT: vlow = skiplist_get(skiplist, idx, &ret) vhigh = skiplist_get(skiplist, idx + 1, &ret) - output[i] = (vlow + vhigh) / 2 + output[i] = (vlow + vhigh) / 2 else: output[i] = NaN