From 18119545976238d8100440357da0227f1cf68665 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 27 Sep 2018 22:03:23 +0100 Subject: [PATCH 01/15] Refactoring lint.sh, wrong path pandas/src corrected, and made more compact and readable --- ci/lint.sh | 280 ++++++++++++++++++----------------------------------- 1 file changed, 93 insertions(+), 187 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 533e1d18d8e0e..8264d460a10bd 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -1,196 +1,102 @@ #!/bin/bash echo "inside $0" +[[ $LINT ]] || { echo "NOT Linting"; exit 0; } source activate pandas - RET=0 -if [ "$LINT" ]; then - - # We're ignoring the following codes across the board - #E402, # module level import not at top of file - #E731, # do not assign a lambda expression, use a def - #E741, # do not use variables named 'l', 'O', or 'I' - #W503, # line break before binary operator - #C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal. - #C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal. - #C409, # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal). - #C410 # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal). - - # pandas/_libs/src is C code, so no need to search there. - echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - - flake8 scripts/tests --filename=*.py - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting *.py DONE" - - echo "Linting setup.py" - flake8 setup.py --ignore=E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting setup.py DONE" - - echo "Linting asv_bench/benchmarks/" - flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811,C406,C408,C409,C410 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting asv_bench/benchmarks/*.py DONE" - - echo "Linting scripts/*.py" - flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting scripts/*.py DONE" - - echo "Linting doc scripts" - flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting doc scripts DONE" - - echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting *.pyx DONE" - - echo "Linting *.pxi.in" - for path in 'src' - do - echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 - if [ $? -ne "0" ]; then - RET=1 - fi - done - echo "Linting *.pxi.in DONE" - - echo "Linting *.pxd" - for path in '_libs' - do - echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 - if [ $? -ne "0" ]; then - RET=1 - fi - done - echo "Linting *.pxd DONE" - - # readability/casting: Warnings about C casting instead of C++ casting - # runtime/int: Warnings about using C number types instead of C++ ones - # build/include_subdir: Warnings about prefacing included header files with directory - - # We don't lint all C files because we don't want to lint any that are built - # from Cython files nor do we want to lint C files that we didn't modify for - # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, - # we can lint all header files since they aren't "generated" like C files are. - echo "Linting *.c and *.h" - for path in '*.h' 'parser' 'ujson' - do - echo "linting -> pandas/_libs/src/$path" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path - if [ $? -ne "0" ]; then - RET=1 - fi - done - echo "linting -> pandas/_libs/tslibs/src/datetime" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/tslibs/src/datetime - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting *.c and *.h DONE" - - echo "Check for invalid testing" - - # Check for the following code in testing: - # - # np.testing - # np.array_equal - grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ - - if [ $? = "0" ]; then - RET=1 - fi - - # Check for pytest.warns - grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ - - if [ $? = "0" ]; then - RET=1 - fi - - # Check for the following code in the extension array base tests - # tm.assert_frame_equal - # tm.assert_series_equal - grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base - - if [ $? = "0" ]; then - RET=1 - fi - - echo "Check for invalid testing DONE" - - # Check for imports from pandas.core.common instead - # of `import pandas.core.common as com` - echo "Check for non-standard imports" - grep -R --include="*.py*" -E "from pandas.core.common import " pandas - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for non-standard imports DONE" - - echo "Check for incorrect sphinx directives" - SPHINX_DIRECTIVES=$(echo \ - "autosummary|contents|currentmodule|deprecated|function|image|"\ - "important|include|ipython|literalinclude|math|module|note|raw|"\ - "seealso|toctree|versionadded|versionchanged|warning" | tr -d "[:space:]") - for path in './pandas' './doc/source' - do - grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. ($SPHINX_DIRECTIVES):[^:]" $path - if [ $? = "0" ]; then - RET=1 - fi - done - echo "Check for incorrect sphinx directives DONE" - - echo "Check for deprecated messages without sphinx directive" - grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas - - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for deprecated messages without sphinx directive DONE" - - echo "Check for old-style classes" - grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts - - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for old-style classes DONE" - - echo "Check for backticks incorrectly rendering because of missing spaces" - grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ - - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for backticks incorrectly rendering because of missing spaces DONE" - -else - echo "NOT Linting" -fi + +### LINTING ### + +# We're ignoring the following codes across the board +# E402 module level import not at top of file +# E731 do not assign a lambda expression, use a def +# E741 do not use variables named 'l', 'O', or 'I' +# W503 line break before binary operator +# C406 Unnecessary (list/tuple) literal - rewrite as a dict literal. +# C408 Unnecessary (dict/list/tuple) call - rewrite as a literal. +# C409 Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal). +# C410 Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal). + +# pandas/_libs/src is C code, so no need to search there. +MSG='Linting .py code' ; echo $MSG +flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting .pyx code' ; echo $MSG +flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting .pxd and .pxi.in' ; echo $MSG +flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting setup.py' ; echo $MSG +flake8 setup.py --ignore=E402,E731,E741,W503 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting scripts' ; echo $MSG +flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting asv benchmarks' ; echo $MSG +flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811,C406,C408,C409,C410 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting doc scripts' ; echo $MSG +flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +# readability/casting: Warnings about C casting instead of C++ casting +# runtime/int: Warnings about using C number types instead of C++ ones +# build/include_subdir: Warnings about prefacing included header files with directory + +# We don't lint all C files because we don't want to lint any that are built +# from Cython files nor do we want to lint C files that we didn't modify for +# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, +# we can lint all header files since they aren't "generated" like C files are. +MSG='Linting .c and .h' ; echo $MSG +cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime +RET=$(($RET + $?)) ; echo $MSG "DONE" + + +### CHECKS ### + +# Check for imports from pandas.core.common instead of `import pandas.core.common as com` +MSG='Check for non-standard imports' ; echo $MSG +! grep -R --include="*.py*" -E "from pandas.core.common import " pandas +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Check for pytest warns' ; echo $MSG +! grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ +RET=$(($RET + $?)) ; echo $MSG "DONE" + +# Check for the following code in testing: `np.testing` and `np.array_equal` +MSG='Check for invalid testing' ; echo $MSG +! grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ +RET=$(($RET + $?)) ; echo $MSG "DONE" + +# Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal` +MSG='Check for invalid EA testing' ; echo $MSG +! grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Check for deprecated messages without sphinx directive' ; echo $MSG +! grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Check for old-style classes' ; echo $MSG +! grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG +! grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Check for incorrect sphinx directives' ; echo $MSG +! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source +RET=$(($RET + $?)) ; echo $MSG "DONE" + exit $RET From 6ace7efe1109d3c148b8c57ac20fa15556c95a56 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 28 Sep 2018 11:32:58 +0100 Subject: [PATCH 02/15] fixing linting errors in pxi.in files (linting was broken) --- pandas/_libs/algos_common_helper.pxi.in | 1 + pandas/_libs/groupby_helper.pxi.in | 3 +++ pandas/_libs/hashtable_class_helper.pxi.in | 12 ++++++------ pandas/_libs/hashtable_func_helper.pxi.in | 4 +++- pandas/_libs/join_func_helper.pxi.in | 1 - 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 40b1b1a282670..f22559b6991f7 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -150,6 +150,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} + def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 765381d89705d..5b01117381a27 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -351,6 +351,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, else: out[i, j] = resx[i, j] + @cython.wraparound(False) @cython.boundscheck(False) def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @@ -410,6 +411,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{if name != 'object'}} + + @cython.boundscheck(False) @cython.wraparound(False) def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index f294fd141a9f1..3ff98b7b5a9b5 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -466,12 +466,12 @@ cdef class {{name}}HashTable(HashTable): @cython.boundscheck(False) def unique(self, const {{dtype}}_t[:] values): cdef: - Py_ssize_t i, n = len(values) - int ret = 0 - {{dtype}}_t val - khiter_t k - {{name}}Vector uniques = {{name}}Vector() - {{name}}VectorData *ud + Py_ssize_t i, n = len(values) + int ret = 0 + {{dtype}}_t val + khiter_t k + {{name}}Vector uniques = {{name}}Vector() + {{name}}VectorData *ud ud = uniques.data diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 45a69b613f698..3d35e7014b408 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -128,6 +128,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): @cython.boundscheck(False) {{if dtype == 'object'}} + def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): {{else}} @@ -210,9 +211,11 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): @cython.boundscheck(False) {{if dtype == 'object'}} + def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values): {{else}} + def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{endif}} @@ -236,7 +239,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{scalar}} val kh_{{ttype}}_t * table = kh_init_{{ttype}}() - # construct the table n = len(values) kh_resize_{{ttype}}(table, min(n, len(values))) diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index 73d231b8588dc..eca92c2bf305f 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -25,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', }} - {{for table_type, by_dtype in by_dtypes}} {{for on_dtype in on_dtypes}} From a28a612d6e62f6897127aef63d2655122a9a3313 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 28 Sep 2018 12:20:56 +0100 Subject: [PATCH 03/15] unifying the flake8 error codes that are being ignored --- ci/lint.sh | 36 ++++++++++++++---------------------- setup.cfg | 9 +++------ 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 8264d460a10bd..aa811acb97a8e 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -9,43 +9,35 @@ RET=0 ### LINTING ### -# We're ignoring the following codes across the board -# E402 module level import not at top of file -# E731 do not assign a lambda expression, use a def -# E741 do not use variables named 'l', 'O', or 'I' -# W503 line break before binary operator -# C406 Unnecessary (list/tuple) literal - rewrite as a dict literal. -# C408 Unnecessary (dict/list/tuple) call - rewrite as a literal. -# C409 Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal). -# C410 Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal). +# `setup.cfg` contains the list of error codes that are being ignored in flake8 # pandas/_libs/src is C code, so no need to search there. MSG='Linting .py code' ; echo $MSG -flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503 -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting .pyx code' ; echo $MSG -flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting .pxd and .pxi.in' ; echo $MSG -flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 +flake8 pandas --filename=*.py --exclude pandas/_libs/src RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Linting setup.py' ; echo $MSG -flake8 setup.py --ignore=E402,E731,E741,W503 +flake8 setup.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Linting scripts' ; echo $MSG -flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503 +flake8 scripts --filename=*.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Linting asv benchmarks' ; echo $MSG -flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811,C406,C408,C409,C410 +flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Linting doc scripts' ; echo $MSG -flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503 +flake8 doc/make.py doc/source/conf.py +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting .pyx code' ; echo $MSG +flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Linting .pxd and .pxi.in' ; echo $MSG +flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 RET=$(($RET + $?)) ; echo $MSG "DONE" # readability/casting: Warnings about C casting instead of C++ casting diff --git a/setup.cfg b/setup.cfg index e4a2357def474..c4bb965da3413 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,14 +13,11 @@ parentdir_prefix = pandas- [flake8] ignore = + W503, # line break before binary operator E402, # module level import not at top of file + E722, # do not use bare except E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator - C405, # Unnecessary (list/tuple) literal - rewrite as a set literal. - C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal. - C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal. - C409, # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal). - C410 # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal). + E741 # ambiguous variable name 'l' max-line-length = 79 [yapf] From af76f111a12d961475bbc46860d4bf06cc45d816 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 28 Sep 2018 13:54:10 +0100 Subject: [PATCH 04/15] Restoring flake errors C406, C408 and C409 --- setup.cfg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c4bb965da3413..c17a91116c5ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,7 +17,10 @@ ignore = E402, # module level import not at top of file E722, # do not use bare except E731, # do not assign a lambda expression, use a def - E741 # ambiguous variable name 'l' + E741, # ambiguous variable name 'l' + C406, # Unnecessary list literal - rewrite as a dict literal. + C408, # Unnecessary dict call - rewrite as a literal. + C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal. max-line-length = 79 [yapf] From 07ebcd013e251402ca17e492f026389e30afd89d Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 28 Sep 2018 14:51:51 +0100 Subject: [PATCH 05/15] Unpinning flake8, to see if linting works with the latest version --- ci/travis-27.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index a921bcb46dba4..9d83bb5c5d8dd 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -8,7 +8,7 @@ dependencies: - cython=0.28.2 - fastparquet - feather-format - - flake8=3.4.1 + - flake8 - flake8-comprehensions - gcsfs - html5lib From 34e85f00dba7a201387f40100e92381bb5f7f2c9 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 29 Sep 2018 10:34:50 +0100 Subject: [PATCH 06/15] Synchronizing flake8 config in pep8speaks --- .pep8speaks.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.pep8speaks.yml b/.pep8speaks.yml index cd610907007eb..c3a85d595eb59 100644 --- a/.pep8speaks.yml +++ b/.pep8speaks.yml @@ -3,9 +3,18 @@ scanner: diff_only: True # If True, errors caused by only the patch are shown +# Opened issue in pep8speaks, so we can directly use the config in setup.cfg +# (and avoid having to duplicate it here): +# https://github.com/OrkoHunter/pep8speaks/issues/95 + pycodestyle: max-line-length: 79 - ignore: # Errors and warnings to ignore + ignore: + - W503, # line break before binary operator - E402, # module level import not at top of file + - E722, # do not use bare except - E731, # do not assign a lambda expression, use a def - - W503 # line break before binary operator + - E741, # ambiguous variable name 'l' + - C406, # Unnecessary list literal - rewrite as a dict literal. + - C408, # Unnecessary dict call - rewrite as a literal. + - C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal. From adcda478c3af1ae0edeaf09630ce2c3d6926b36a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 29 Sep 2018 13:10:34 +0100 Subject: [PATCH 07/15] Unifying all flake8 of .py files in one command, and moving excludes to setup.cfg --- ci/lint.sh | 18 +----------------- ci/print_versions.py | 3 ++- setup.cfg | 9 ++++++++- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index aa811acb97a8e..9532384fcdff8 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -13,23 +13,7 @@ RET=0 # pandas/_libs/src is C code, so no need to search there. MSG='Linting .py code' ; echo $MSG -flake8 pandas --filename=*.py --exclude pandas/_libs/src -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting setup.py' ; echo $MSG -flake8 setup.py -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting scripts' ; echo $MSG -flake8 scripts --filename=*.py -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting asv benchmarks' ; echo $MSG -flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting doc scripts' ; echo $MSG -flake8 doc/make.py doc/source/conf.py +flake8 . RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Linting .pyx code' ; echo $MSG diff --git a/ci/print_versions.py b/ci/print_versions.py index 8be795174d76d..a2c93748b0388 100755 --- a/ci/print_versions.py +++ b/ci/print_versions.py @@ -18,7 +18,8 @@ def show_versions(as_json=False): from optparse import OptionParser parser = OptionParser() parser.add_option("-j", "--json", metavar="FILE", nargs=1, - help="Save output as JSON into file, pass in '-' to output to stdout") + help="Save output as JSON into file, " + "pass in '-' to output to stdout") (options, args) = parser.parse_args() diff --git a/setup.cfg b/setup.cfg index c17a91116c5ac..29392d7f15345 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,6 +12,7 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] +max-line-length = 79 ignore = W503, # line break before binary operator E402, # module level import not at top of file @@ -21,7 +22,13 @@ ignore = C406, # Unnecessary list literal - rewrite as a dict literal. C408, # Unnecessary dict call - rewrite as a literal. C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal. -max-line-length = 79 +exclude = + asv_bench/*.py, # TODO we should fix linting in those files instead of excluding + doc/sphinxext/*.py, + doc/build/*.py, + doc/temp/*.py, + .eggs/*.py, + versioneer.py [yapf] based_on_style = pep8 From c12919736316b454a146204f78cdf0e0df4581c9 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 29 Sep 2018 15:28:25 +0100 Subject: [PATCH 08/15] Printing flake8 version, and moving doctests and check_import to lint.sh --- .travis.yml | 4 --- ci/check_imports.py | 37 ---------------------------- ci/doctests.sh | 60 --------------------------------------------- ci/lint.sh | 42 +++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 101 deletions(-) delete mode 100644 ci/check_imports.py delete mode 100755 ci/doctests.sh diff --git a/.travis.yml b/.travis.yml index 40baee2c03ea0..e40bc3830320a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -115,10 +115,6 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh - - ci/doctests.sh - - echo "checking imports" - - source activate pandas && python ci/check_imports.py - - echo "script done" after_success: - ci/upload_coverage.sh diff --git a/ci/check_imports.py b/ci/check_imports.py deleted file mode 100644 index 19e48b659617f..0000000000000 --- a/ci/check_imports.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Check that certain modules are not loaded by `import pandas` -""" -import sys - -blacklist = { - 'bs4', - 'gcsfs', - 'html5lib', - 'ipython', - 'jinja2' - 'hypothesis', - 'lxml', - 'numexpr', - 'openpyxl', - 'py', - 'pytest', - 's3fs', - 'scipy', - 'tables', - 'xlrd', - 'xlsxwriter', - 'xlwt', -} - - -def main(): - import pandas # noqa - - modules = set(x.split('.')[0] for x in sys.modules) - imported = modules & blacklist - if modules & blacklist: - sys.exit("Imported {}".format(imported)) - - -if __name__ == '__main__': - main() diff --git a/ci/doctests.sh b/ci/doctests.sh deleted file mode 100755 index b3d7f6785815a..0000000000000 --- a/ci/doctests.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -echo "inside $0" - - -source activate pandas -cd "$TRAVIS_BUILD_DIR" - -RET=0 - -if [ "$DOCTEST" ]; then - - echo "Running doctests" - - # running all doctests is not yet working - # pytest --doctest-modules --ignore=pandas/tests -v pandas - - # if [ $? -ne "0" ]; then - # RET=1 - # fi - - # DataFrame / Series docstrings - pytest --doctest-modules -v pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" - - if [ $? -ne "0" ]; then - RET=1 - fi - - pytest --doctest-modules -v pandas/core/series.py \ - -k"-nonzero -reindex -searchsorted -to_dict" - - if [ $? -ne "0" ]; then - RET=1 - fi - - pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs" - - if [ $? -ne "0" ]; then - RET=1 - fi - - # top-level reshaping functions - pytest --doctest-modules -v \ - pandas/core/reshape/concat.py \ - pandas/core/reshape/pivot.py \ - pandas/core/reshape/reshape.py \ - pandas/core/reshape/tile.py \ - -k"-crosstab -pivot_table -cut" - - if [ $? -ne "0" ]; then - RET=1 - fi - -else - echo "NOT running doctests" -fi - -exit $RET diff --git a/ci/lint.sh b/ci/lint.sh index 9532384fcdff8..4b090955ab6f8 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -74,5 +74,47 @@ MSG='Check for incorrect sphinx directives' ; echo $MSG ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source RET=$(($RET + $?)) ; echo $MSG "DONE" +MSG='Check for modules that pandas should not import' ; echo $MSG +python -c " +import sys +import pandas + +blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis', + 'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', + 'tables', 'xlrd', 'xlsxwriter', 'xlwt'} +mods = blacklist & set(m.split('.')[0] for m in sys.modules) +if mods: + sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods))) + sys.exit(len(mods)) +" +RET=$(($RET + $?)) ; echo $MSG "DONE" + + +### DOCTESTS ### + +MSG='Doctests frame.py' ; echo $MSG +pytest --doctest-modules -v pandas/core/frame.py \ + -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Doctests series.py' ; echo $MSG +pytest --doctest-modules -v pandas/core/series.py \ + -k"-nonzero -reindex -searchsorted -to_dict" +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Doctests generic.py' ; echo $MSG +pytest --doctest-modules -v pandas/core/generic.py \ + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs" +RET=$(($RET + $?)) ; echo $MSG "DONE" + +MSG='Doctests top-level reshaping functions' ; echo $MSG +pytest --doctest-modules -v \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py \ + -k"-crosstab -pivot_table -cut" +RET=$(($RET + $?)) ; echo $MSG "DONE" + exit $RET From 803b2179a06673caaa585569873e2d0118943792 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 29 Sep 2018 18:19:43 +0100 Subject: [PATCH 09/15] Moving linting from py2.7 to py3.6 --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index e40bc3830320a..263f696e41def 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,14 +45,14 @@ matrix: - language-pack-zh-hans - dist: trusty env: - - JOB="2.7, lint" ENV_FILE="ci/travis-27.yaml" TEST_ARGS="--skip-slow" LINT=true + - JOB="2.7" ENV_FILE="ci/travis-27.yaml" TEST_ARGS="--skip-slow" addons: apt: packages: - python-gtk2 - dist: trusty env: - - JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true + - JOB="3.6, lint, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true # In allow_failures - dist: trusty env: From 62a88136cb2244f83a8a674388f101647a68f5e3 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 29 Sep 2018 19:05:32 +0100 Subject: [PATCH 10/15] Moving flake8/cpplint requirements to 3.6, as this is now where the linting is happening --- ci/lint.sh | 3 +++ ci/travis-27.yaml | 3 --- ci/travis-36.yaml | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 4b090955ab6f8..630ea8684ae57 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -11,6 +11,9 @@ RET=0 # `setup.cfg` contains the list of error codes that are being ignored in flake8 +echo "flake8 --version" +flake8 --version + # pandas/_libs/src is C code, so no need to search there. MSG='Linting .py code' ; echo $MSG flake8 . diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index 9d83bb5c5d8dd..2a992d6e534b8 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -8,8 +8,6 @@ dependencies: - cython=0.28.2 - fastparquet - feather-format - - flake8 - - flake8-comprehensions - gcsfs - html5lib - ipython @@ -48,6 +46,5 @@ dependencies: - hypothesis>=3.58.0 - pip: - backports.lzma - - cpplint - pandas-gbq - pathlib diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml index 3c9daa5f8b73c..a6260f36617a6 100644 --- a/ci/travis-36.yaml +++ b/ci/travis-36.yaml @@ -8,6 +8,8 @@ dependencies: - dask - fastparquet - feather-format + - flake8 + - flake8-comprehensions - gcsfs - geopandas - html5lib @@ -45,5 +47,6 @@ dependencies: - pip: - brotlipy - coverage + - cpplint - pandas-datareader - python-dateutil From cabe3530f62b04307aca7f25058689fb5079d1b5 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Oct 2018 16:46:01 +0100 Subject: [PATCH 11/15] lint.sh renamed to code_checks.sh, and parameters now supported to call lint, patterns or doctests independently --- ci/code_checks.sh | 131 ++++++++++++++++++++++++++++++++++++++++++++++ ci/lint.sh | 123 ------------------------------------------- 2 files changed, 131 insertions(+), 123 deletions(-) create mode 100755 ci/code_checks.sh delete mode 100755 ci/lint.sh diff --git a/ci/code_checks.sh b/ci/code_checks.sh new file mode 100755 index 0000000000000..ac745eac7cf6d --- /dev/null +++ b/ci/code_checks.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +echo "inside $0" +[[ $LINT ]] || { echo "NOT Linting"; exit 0; } +[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unkown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; } + +source activate pandas +RET=0 +CHECK=$1 + + +### LINTING ### +if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then + + # `setup.cfg` contains the list of error codes that are being ignored in flake8 + + echo "flake8 --version" + flake8 --version + + # pandas/_libs/src is C code, so no need to search there. + MSG='Linting .py code' ; echo $MSG + flake8 . + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pyx code' ; echo $MSG + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pxd and .pxi.in' ; echo $MSG + flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # readability/casting: Warnings about C casting instead of C++ casting + # runtime/int: Warnings about using C number types instead of C++ ones + # build/include_subdir: Warnings about prefacing included header files with directory + + # We don't lint all C files because we don't want to lint any that are built + # from Cython files nor do we want to lint C files that we didn't modify for + # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, + # we can lint all header files since they aren't "generated" like C files are. + MSG='Linting .c and .h' ; echo $MSG + cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### PATTERNS ### +if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then + + # Check for imports from pandas.core.common instead of `import pandas.core.common as com` + MSG='Check for non-standard imports' ; echo $MSG + ! grep -R --include="*.py*" -E "from pandas.core.common import " pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for pytest warns' ; echo $MSG + ! grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in testing: `np.testing` and `np.array_equal` + MSG='Check for invalid testing' ; echo $MSG + ! grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal` + MSG='Check for invalid EA testing' ; echo $MSG + ! grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for deprecated messages without sphinx directive' ; echo $MSG + ! grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for old-style classes' ; echo $MSG + ! grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG + ! grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for incorrect sphinx directives' ; echo $MSG + ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for modules that pandas should not import' ; echo $MSG + python -c " +import sys +import pandas + +blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis', + 'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', + 'tables', 'xlrd', 'xlsxwriter', 'xlwt'} +mods = blacklist & set(m.split('.')[0] for m in sys.modules) +if mods: + sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods))) + sys.exit(len(mods)) + " + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DOCTESTS ### +if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then + + MSG='Doctests frame.py' ; echo $MSG + pytest --doctest-modules -v pandas/core/frame.py \ + -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests series.py' ; echo $MSG + pytest --doctest-modules -v pandas/core/series.py \ + -k"-nonzero -reindex -searchsorted -to_dict" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests generic.py' ; echo $MSG + pytest --doctest-modules -v pandas/core/generic.py \ + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests top-level reshaping functions' ; echo $MSG + pytest --doctest-modules -v \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py \ + -k"-crosstab -pivot_table -cut" + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +exit $RET diff --git a/ci/lint.sh b/ci/lint.sh deleted file mode 100755 index 630ea8684ae57..0000000000000 --- a/ci/lint.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash - -echo "inside $0" -[[ $LINT ]] || { echo "NOT Linting"; exit 0; } - -source activate pandas -RET=0 - - -### LINTING ### - -# `setup.cfg` contains the list of error codes that are being ignored in flake8 - -echo "flake8 --version" -flake8 --version - -# pandas/_libs/src is C code, so no need to search there. -MSG='Linting .py code' ; echo $MSG -flake8 . -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting .pyx code' ; echo $MSG -flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Linting .pxd and .pxi.in' ; echo $MSG -flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 -RET=$(($RET + $?)) ; echo $MSG "DONE" - -# readability/casting: Warnings about C casting instead of C++ casting -# runtime/int: Warnings about using C number types instead of C++ ones -# build/include_subdir: Warnings about prefacing included header files with directory - -# We don't lint all C files because we don't want to lint any that are built -# from Cython files nor do we want to lint C files that we didn't modify for -# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, -# we can lint all header files since they aren't "generated" like C files are. -MSG='Linting .c and .h' ; echo $MSG -cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime -RET=$(($RET + $?)) ; echo $MSG "DONE" - - -### CHECKS ### - -# Check for imports from pandas.core.common instead of `import pandas.core.common as com` -MSG='Check for non-standard imports' ; echo $MSG -! grep -R --include="*.py*" -E "from pandas.core.common import " pandas -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Check for pytest warns' ; echo $MSG -! grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ -RET=$(($RET + $?)) ; echo $MSG "DONE" - -# Check for the following code in testing: `np.testing` and `np.array_equal` -MSG='Check for invalid testing' ; echo $MSG -! grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ -RET=$(($RET + $?)) ; echo $MSG "DONE" - -# Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal` -MSG='Check for invalid EA testing' ; echo $MSG -! grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Check for deprecated messages without sphinx directive' ; echo $MSG -! grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Check for old-style classes' ; echo $MSG -! grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG -! grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Check for incorrect sphinx directives' ; echo $MSG -! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Check for modules that pandas should not import' ; echo $MSG -python -c " -import sys -import pandas - -blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis', - 'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', - 'tables', 'xlrd', 'xlsxwriter', 'xlwt'} -mods = blacklist & set(m.split('.')[0] for m in sys.modules) -if mods: - sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods))) - sys.exit(len(mods)) -" -RET=$(($RET + $?)) ; echo $MSG "DONE" - - -### DOCTESTS ### - -MSG='Doctests frame.py' ; echo $MSG -pytest --doctest-modules -v pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Doctests series.py' ; echo $MSG -pytest --doctest-modules -v pandas/core/series.py \ - -k"-nonzero -reindex -searchsorted -to_dict" -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Doctests generic.py' ; echo $MSG -pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs" -RET=$(($RET + $?)) ; echo $MSG "DONE" - -MSG='Doctests top-level reshaping functions' ; echo $MSG -pytest --doctest-modules -v \ - pandas/core/reshape/concat.py \ - pandas/core/reshape/pivot.py \ - pandas/core/reshape/reshape.py \ - pandas/core/reshape/tile.py \ - -k"-crosstab -pivot_table -cut" -RET=$(($RET + $?)) ; echo $MSG "DONE" - - -exit $RET From 9a76f5841ed8578525d56e6fbe018b9655622341 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Oct 2018 21:53:17 +0100 Subject: [PATCH 12/15] Fixing linting script name in travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 263f696e41def..1f209e5ae25ee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -114,7 +114,7 @@ script: - ci/run_build_docs.sh - ci/script_single.sh - ci/script_multi.sh - - ci/lint.sh + - ci/code_checks.sh after_success: - ci/upload_coverage.sh From 0ec16feca420859e790f4e2dab4891cc9296e253 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 9 Oct 2018 20:15:56 +0530 Subject: [PATCH 13/15] Adding documentation to the new linting script --- ci/code_checks.sh | 14 ++++++++++++++ doc/source/contributing.rst | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 28fd9c00b961c..02aa4191edd17 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -1,4 +1,18 @@ #!/bin/bash +# +# Run checks related to code quality. +# +# This script is intended for both the CI and to check locally that code standards are +# respected. We are currently linting (PEP-8 and similar), looking for patterns of +# common mistakes (sphinx directives with missing blank lines, old style classes, +# unwanted imports...), and we also run doctests here (currently some files only). +# In the future we may want to add the validation of docstrings and other checks here. +# +# Usage: +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh lint # run linting only +# $ ./ci/code_checks.sh patterns # check for patterns that should not exist +# $ ./ci/code_checks.sh doctests # run doctests echo "inside $0" [[ $LINT ]] || { echo "NOT Linting"; exit 0; } diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 445f9a7e5e980..8cbbc236e0323 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -497,6 +497,17 @@ tools will be run to check your code for stylistic errors. Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to *pandas*. +There is a tool in pandas to help contributors verify their changes before +contributing them to the project: + + ./ci/code_checks.sh + +The script verify the linting of code files, it looks for common mistake patterns +(like missing spaces around sphinx directives that make the documentation not +being rendered properly) and it also validates the doctests. It is possible to +run the checks independently by using the parameters ``lint``, ``patterns`` and +``doctests`` (e.g. ``./ci/code_checks.sh lint``). + In addition, because a lot of people use our library, it is important that we do not make sudden changes to the code that could have the potential to break a lot of user code as a result, that is, we need it to be as *backwards compatible* From cd38de3dfd3b2ec71631c4dd53736877faad17cb Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 9 Oct 2018 22:36:57 +0530 Subject: [PATCH 14/15] Fix linting error in the code, and added more information for when LINT environment variable is false. --- ci/code_checks.sh | 2 +- pandas/_libs/join_helper.pxi.in | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 02aa4191edd17..eced3bf34e7c6 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -15,7 +15,7 @@ # $ ./ci/code_checks.sh doctests # run doctests echo "inside $0" -[[ $LINT ]] || { echo "NOT Linting"; exit 0; } +[[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; } [[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unkown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; } source activate pandas diff --git a/pandas/_libs/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in index 6ba587a5b04ea..35dedf90f8ca4 100644 --- a/pandas/_libs/join_helper.pxi.in +++ b/pandas/_libs/join_helper.pxi.in @@ -93,6 +93,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} + # @cython.wraparound(False) # @cython.boundscheck(False) def left_join_indexer_{{name}}(ndarray[{{c_type}}] left, From 8738e807853a9733496a142c32c2ffe41c75a46a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 9 Oct 2018 23:12:59 +0530 Subject: [PATCH 15/15] Fixing formatting in the documentation --- doc/source/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 8cbbc236e0323..f898ef54e4101 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -498,7 +498,7 @@ Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to *pandas*. There is a tool in pandas to help contributors verify their changes before -contributing them to the project: +contributing them to the project:: ./ci/code_checks.sh