pandas-dev · gfyoung · Sep 2, 2016
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -20,15 +20,7 @@ if [ "$LINT" ]; then
     echo "Linting *.py DONE"
 
     echo "Linting *.pyx"
-    for path in 'window.pyx' "src/join.pyx"
-    do
-        echo "linting -> pandas/$path"
-        flake8 pandas/$path --filename '*.pyx' --select=E501,E302,E203,E226,E111,E114,E221,E303,E128,E231,E126
-        if [ $? -ne "0" ]; then
-            RET=1
-        fi
-
-    done
+    flake8 pandas --filename '*.pyx' --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
     echo "Linting *.pyx DONE"
 
     echo "Linting *.pxi.in"

diff --git a/pandas/algos.pyx b/pandas/algos.pyx
@@ -59,11 +59,11 @@ cdef:
     int TIEBREAK_DENSE = 5
 
 tiebreakers = {
-    'average' : TIEBREAK_AVERAGE,
-    'min' : TIEBREAK_MIN,
-    'max' : TIEBREAK_MAX,
-    'first' : TIEBREAK_FIRST,
-    'dense' : TIEBREAK_DENSE,
+    'average': TIEBREAK_AVERAGE,
+    'min': TIEBREAK_MIN,
+    'max': TIEBREAK_MAX,
+    'first': TIEBREAK_FIRST,
+    'dense': TIEBREAK_DENSE,
 }
 
 
@@ -489,7 +489,6 @@ def rank_1d_generic(object in_arr, bint retry=1, ties_method='average',
         bint keep_na = 0
         float count = 0.0
 
-
     tiebreak = tiebreakers[ties_method]
 
     keep_na = na_option == 'keep'
@@ -578,6 +577,7 @@ class Infinity(object):
     __gt__ = lambda self, other: self is not other
     __ge__ = lambda self, other: True
 
+
 class NegInfinity(object):
     """ provide a negative Infinity comparision method for ranking """
 
@@ -705,7 +705,6 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
 #     return result
 
 
-
 cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil except -1:
     cdef numeric t
 
@@ -747,11 +746,11 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k):
 
 cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
     cdef:
-        Py_ssize_t i,j,l,m
+        Py_ssize_t i, j, l, m
         double_t x, t
 
     l = 0
-    m = n-1
+    m = n -1
     while (l<m):
         x = a[k]
         i = l
@@ -793,13 +792,13 @@ cpdef numeric median(numeric[:] arr):
 
 def max_subseq(ndarray[double_t] arr):
     cdef:
-        Py_ssize_t i=0,s=0,e=0,T,n
+        Py_ssize_t i=0, s=0, e=0, T, n
         double m, S
 
     n = len(arr)
 
     if len(arr) == 0:
-        return (-1,-1,None)
+        return (-1, -1, None)
 
     m = arr[0]
     S = m
@@ -819,6 +818,7 @@ def max_subseq(ndarray[double_t] arr):
 
     return (s, e, m)
 
+
 def min_subseq(ndarray[double_t] arr):
     cdef:
         Py_ssize_t s, e
@@ -831,6 +831,7 @@ def min_subseq(ndarray[double_t] arr):
 #----------------------------------------------------------------------
 # Pairwise correlation/covariance
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None):
@@ -890,6 +891,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None):
 #----------------------------------------------------------------------
 # Pairwise Spearman correlation
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
@@ -953,6 +955,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
 #----------------------------------------------------------------------
 # group operations
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_lexsorted(list list_of_arrays):
@@ -967,16 +970,14 @@ def is_lexsorted(list list_of_arrays):
 
     cdef int64_t **vecs = <int64_t**> malloc(nlevels * sizeof(int64_t*))
     for i from 0 <= i < nlevels:
-        # vecs[i] = <int64_t *> (<ndarray> list_of_arrays[i]).data
-
         arr = list_of_arrays[i]
-        vecs[i] = <int64_t *> arr.data
-    # assume uniqueness??
+        vecs[i] = <int64_t*> arr.data
 
+    # Assume uniqueness??
     for i from 1 <= i < n:
         for k from 0 <= k < nlevels:
             cur = vecs[k][i]
-            pre = vecs[k][i-1]
+            pre = vecs[k][i -1]
             if cur == pre:
                 continue
             elif cur > pre:
@@ -988,7 +989,8 @@ def is_lexsorted(list list_of_arrays):
 
 
 @cython.boundscheck(False)
-def groupby_indices(dict ids, ndarray[int64_t] labels, ndarray[int64_t] counts):
+def groupby_indices(dict ids, ndarray[int64_t] labels,
+                    ndarray[int64_t] counts):
     """
     turn group_labels output into a combined indexer maping the labels to
     indexers
@@ -1020,7 +1022,7 @@ def groupby_indices(dict ids, ndarray[int64_t] labels, ndarray[int64_t] counts):
     for i from 0 <= i < len(counts):
         arr = np.empty(counts[i], dtype=np.int64)
         result[ids[i]] = arr
-        vecs[i] = <int64_t *> arr.data
+        vecs[i] = <int64_t*> arr.data
 
     for i from 0 <= i < n:
         k = labels[i]
@@ -1036,6 +1038,7 @@ def groupby_indices(dict ids, ndarray[int64_t] labels, ndarray[int64_t] counts):
     free(vecs)
     return result
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_labels(ndarray[object] values):
@@ -1116,6 +1119,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
 #----------------------------------------------------------------------
 # first, nth, last
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_nth_object(ndarray[object, ndim=2] out,
@@ -1160,6 +1164,7 @@ def group_nth_object(ndarray[object, ndim=2] out,
             else:
                 out[i, j] = resx[i, j]
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_nth_bin_object(ndarray[object, ndim=2] out,
@@ -1210,6 +1215,7 @@ def group_nth_bin_object(ndarray[object, ndim=2] out,
             else:
                 out[i, j] = resx[i, j]
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_last_object(ndarray[object, ndim=2] out,
@@ -1252,6 +1258,7 @@ def group_last_object(ndarray[object, ndim=2] out,
             else:
                 out[i, j] = resx[i, j]
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_last_bin_object(ndarray[object, ndim=2] out,
@@ -1326,7 +1333,6 @@ cdef inline float64_t _median_linear(float64_t* a, int n):
         a = tmp
         n -= na_count
 
-
     if n % 2:
         result = kth_smallest_c( a, n / 2, n)
     else:

diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
@@ -192,7 +192,7 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
 
     kh_destroy_pymap(table)
 
-    return modes[:j+1]
+    return modes[:j + 1]
 
 
 @cython.wraparound(False)
@@ -227,7 +227,7 @@ def mode_int64(int64_t[:] values):
 
     kh_destroy_int64(table)
 
-    return modes[:j+1]
+    return modes[:j + 1]
 
 
 @cython.wraparound(False)

diff --git a/pandas/index.pyx b/pandas/index.pyx
@@ -54,7 +54,8 @@ cdef inline is_definitely_invalid_key(object val):
 
     # we have a _data, means we are a NDFrame
     return (PySlice_Check(val) or cnp.PyArray_Check(val)
-            or PyList_Check(val) or hasattr(val,'_data'))
+            or PyList_Check(val) or hasattr(val, '_data'))
+
 
 def get_value_at(ndarray arr, object loc):
     if arr.descr.type_num == NPY_DATETIME:
@@ -63,6 +64,7 @@ def get_value_at(ndarray arr, object loc):
         return Timedelta(util.get_value_at(arr, loc))
     return util.get_value_at(arr, loc)
 
+
 def set_value_at(ndarray arr, object loc, object val):
     return util.set_value_at(arr, loc, val)
 
@@ -302,7 +304,7 @@ cdef class IndexEngine:
         else:
             n_alloc = n
 
-        result  = np.empty(n_alloc, dtype=np.int64)
+        result = np.empty(n_alloc, dtype=np.int64)
         missing = np.empty(n_t, dtype=np.int64)
 
         # form the set of the results (like ismember)
@@ -311,7 +313,7 @@ cdef class IndexEngine:
             val = util.get_value_1d(values, i)
             if val in stargets:
                 if val not in d:
-                   d[val] = []
+                    d[val] = []
                 d[val].append(i)
 
         for i in range(n_t):
@@ -322,20 +324,20 @@ cdef class IndexEngine:
             if val in d:
                 for j in d[val]:
 
-                   # realloc if needed
-                   if count >= n_alloc:
-                      n_alloc += 10000
-                      result = np.resize(result, n_alloc)
+                    # realloc if needed
+                    if count >= n_alloc:
+                        n_alloc += 10000
+                        result = np.resize(result, n_alloc)
 
-                   result[count] = j
-                   count += 1
+                    result[count] = j
+                    count += 1
 
             # value not found
             else:
 
                 if count >= n_alloc:
-                     n_alloc += 10000
-                     result = np.resize(result, n_alloc)
+                    n_alloc += 10000
+                    result = np.resize(result, n_alloc)
                 result[count] = -1
                 count += 1
                 missing[count_missing] = i
@@ -479,9 +481,9 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
         return mid + 1
 
 _pad_functions = {
-    'object' : algos.pad_object,
-    'int64' : algos.pad_int64,
-    'float64' : algos.pad_float64
+    'object': algos.pad_object,
+    'int64': algos.pad_int64,
+    'float64': algos.pad_float64
 }
 
 _backfill_functions = {
@@ -606,7 +608,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
 
 cpdef convert_scalar(ndarray arr, object value):
     if arr.descr.type_num == NPY_DATETIME:
-        if isinstance(value,np.ndarray):
+        if isinstance(value, np.ndarray):
             pass
         elif isinstance(value, Timestamp):
             return value.value
@@ -615,7 +617,7 @@ cpdef convert_scalar(ndarray arr, object value):
         else:
             return Timestamp(value).value
     elif arr.descr.type_num == NPY_TIMEDELTA:
-        if isinstance(value,np.ndarray):
+        if isinstance(value, np.ndarray):
             pass
         elif isinstance(value, Timedelta):
             return value.value
@@ -639,7 +641,8 @@ cdef inline _to_i8(object val):
             return get_datetime64_value(val)
         elif PyDateTime_Check(val):
             tzinfo = getattr(val, 'tzinfo', None)
-            ival = _pydatetime_to_dts(val, &dts)  # Save the original date value so we can get the utcoffset from it.
+            # Save the original date value so we can get the utcoffset from it.
+            ival = _pydatetime_to_dts(val, &dts)
             if tzinfo is not None and not _is_utc(tzinfo):
                 offset = tslib._get_utcoffset(tzinfo, val)
                 ival -= tslib._delta_to_nanoseconds(offset)