Skip to content

STYLE: Fixing and refactoring linting #22863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Oct 9, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
1811954
Refactoring lint.sh, wrong path pandas/src corrected, and made more c…
datapythonista Sep 27, 2018
6ace7ef
fixing linting errors in pxi.in files (linting was broken)
datapythonista Sep 28, 2018
a28a612
unifying the flake8 error codes that are being ignored
datapythonista Sep 28, 2018
af76f11
Restoring flake errors C406, C408 and C409
datapythonista Sep 28, 2018
07ebcd0
Unpinning flake8, to see if linting works with the latest version
datapythonista Sep 28, 2018
34e85f0
Synchronizing flake8 config in pep8speaks
datapythonista Sep 29, 2018
adcda47
Unifying all flake8 of .py files in one command, and moving excludes …
datapythonista Sep 29, 2018
c129197
Printing flake8 version, and moving doctests and check_import to lint.sh
datapythonista Sep 29, 2018
803b217
Moving linting from py2.7 to py3.6
datapythonista Sep 29, 2018
62a8813
Moving flake8/cpplint requirements to 3.6, as this is now where the l…
datapythonista Sep 29, 2018
720f18c
Merge remote-tracking branch 'upstream/master' into lint_refactor
datapythonista Sep 29, 2018
cabe353
lint.sh renamed to code_checks.sh, and parameters now supported to ca…
datapythonista Oct 1, 2018
9a76f58
Fixing linting script name in travis
datapythonista Oct 1, 2018
f4e17ad
Merging from master
datapythonista Oct 4, 2018
56eca8f
merging from master
datapythonista Oct 9, 2018
879157f
Merge remote-tracking branch 'upstream/master' into lint_refactor
datapythonista Oct 9, 2018
0ec16fe
Adding documentation to the new linting script
datapythonista Oct 9, 2018
cd38de3
Fix linting error in the code, and added more information for when LI…
datapythonista Oct 9, 2018
8738e80
Fixing formatting in the documentation
datapythonista Oct 9, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 85 additions & 187 deletions ci/lint.sh
Original file line number Diff line number Diff line change
@@ -1,196 +1,94 @@
#!/bin/bash

echo "inside $0"
[[ $LINT ]] || { echo "NOT Linting"; exit 0; }

source activate pandas

RET=0

if [ "$LINT" ]; then

# We're ignoring the following codes across the board
#E402, # module level import not at top of file
#E731, # do not assign a lambda expression, use a def
#E741, # do not use variables named 'l', 'O', or 'I'
#W503, # line break before binary operator
#C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal.
#C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal.
#C409, # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal).
#C410 # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal).

# pandas/_libs/src is C code, so no need to search there.
echo "Linting *.py"
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503
if [ $? -ne "0" ]; then
RET=1
fi

flake8 scripts/tests --filename=*.py
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.py DONE"

echo "Linting setup.py"
flake8 setup.py --ignore=E402,E731,E741,W503
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting setup.py DONE"

echo "Linting asv_bench/benchmarks/"
flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811,C406,C408,C409,C410
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting asv_bench/benchmarks/*.py DONE"

echo "Linting scripts/*.py"
flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting scripts/*.py DONE"

echo "Linting doc scripts"
flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting doc scripts DONE"

echo "Linting *.pyx"
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.pyx DONE"

echo "Linting *.pxi.in"
for path in 'src'
do
echo "linting -> pandas/$path"
flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
if [ $? -ne "0" ]; then
RET=1
fi
done
echo "Linting *.pxi.in DONE"

echo "Linting *.pxd"
for path in '_libs'
do
echo "linting -> pandas/$path"
flake8 pandas/$path --filename=*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
if [ $? -ne "0" ]; then
RET=1
fi
done
echo "Linting *.pxd DONE"

# readability/casting: Warnings about C casting instead of C++ casting
# runtime/int: Warnings about using C number types instead of C++ ones
# build/include_subdir: Warnings about prefacing included header files with directory

# We don't lint all C files because we don't want to lint any that are built
# from Cython files nor do we want to lint C files that we didn't modify for
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
# we can lint all header files since they aren't "generated" like C files are.
echo "Linting *.c and *.h"
for path in '*.h' 'parser' 'ujson'
do
echo "linting -> pandas/_libs/src/$path"
cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path
if [ $? -ne "0" ]; then
RET=1
fi
done
echo "linting -> pandas/_libs/tslibs/src/datetime"
cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/tslibs/src/datetime
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.c and *.h DONE"

echo "Check for invalid testing"

# Check for the following code in testing:
#
# np.testing
# np.array_equal
grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/

if [ $? = "0" ]; then
RET=1
fi

# Check for pytest.warns
grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/

if [ $? = "0" ]; then
RET=1
fi

# Check for the following code in the extension array base tests
# tm.assert_frame_equal
# tm.assert_series_equal
grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base

if [ $? = "0" ]; then
RET=1
fi

echo "Check for invalid testing DONE"

# Check for imports from pandas.core.common instead
# of `import pandas.core.common as com`
echo "Check for non-standard imports"
grep -R --include="*.py*" -E "from pandas.core.common import " pandas
if [ $? = "0" ]; then
RET=1
fi
echo "Check for non-standard imports DONE"

echo "Check for incorrect sphinx directives"
SPHINX_DIRECTIVES=$(echo \
"autosummary|contents|currentmodule|deprecated|function|image|"\
"important|include|ipython|literalinclude|math|module|note|raw|"\
"seealso|toctree|versionadded|versionchanged|warning" | tr -d "[:space:]")
for path in './pandas' './doc/source'
do
grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. ($SPHINX_DIRECTIVES):[^:]" $path
if [ $? = "0" ]; then
RET=1
fi
done
echo "Check for incorrect sphinx directives DONE"

echo "Check for deprecated messages without sphinx directive"
grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas

if [ $? = "0" ]; then
RET=1
fi
echo "Check for deprecated messages without sphinx directive DONE"

echo "Check for old-style classes"
grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts

if [ $? = "0" ]; then
RET=1
fi
echo "Check for old-style classes DONE"

echo "Check for backticks incorrectly rendering because of missing spaces"
grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/

if [ $? = "0" ]; then
RET=1
fi
echo "Check for backticks incorrectly rendering because of missing spaces DONE"

else
echo "NOT Linting"
fi

### LINTING ###
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so I would rather have these as 3 scripts, then a top-level script to call each one. That way we can call these individually (e.g. in the Makefile)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean one script for linting (flake8+cpplint), one for the checks (grep) and one for the doctests, right? I think that makes sense, but considering we may want to move this to azure, I see different options:

  • Having 3 scripts as you say
  • Having one, but with a parameter (e.g. ./ci/lint.sh [lint|checks|doctests]). I think this makes things a bit simpler and more compact (as the 3 scripts will be very similar)
  • Leave it as it is, and move in a separate PR the content of this file to azure-pipelines.yml

You'll know better than me how often these checks are run locally or out of the CI, but as now flake8 is mainly flake8 . and the doctests will soon be flake 8 . --doctests I think it can make sense to just have them in azure. May be a script just for the grep checks if you think those are being run locally?


# `setup.cfg` contains the list of error codes that are being ignored in flake8

# pandas/_libs/src is C code, so no need to search there.
MSG='Linting .py code' ; echo $MSG
flake8 pandas --filename=*.py --exclude pandas/_libs/src
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting setup.py' ; echo $MSG
flake8 setup.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting scripts' ; echo $MSG
flake8 scripts --filename=*.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting asv benchmarks' ; echo $MSG
flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting doc scripts' ; echo $MSG
flake8 doc/make.py doc/source/conf.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting .pyx code' ; echo $MSG
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting .pxd and .pxi.in' ; echo $MSG
flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
RET=$(($RET + $?)) ; echo $MSG "DONE"

# readability/casting: Warnings about C casting instead of C++ casting
# runtime/int: Warnings about using C number types instead of C++ ones
# build/include_subdir: Warnings about prefacing included header files with directory

# We don't lint all C files because we don't want to lint any that are built
# from Cython files nor do we want to lint C files that we didn't modify for
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
# we can lint all header files since they aren't "generated" like C files are.
MSG='Linting .c and .h' ; echo $MSG
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
RET=$(($RET + $?)) ; echo $MSG "DONE"


### CHECKS ###

# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
MSG='Check for non-standard imports' ; echo $MSG
! grep -R --include="*.py*" -E "from pandas.core.common import " pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for pytest warns' ; echo $MSG
! grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
RET=$(($RET + $?)) ; echo $MSG "DONE"

# Check for the following code in testing: `np.testing` and `np.array_equal`
MSG='Check for invalid testing' ; echo $MSG
! grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
RET=$(($RET + $?)) ; echo $MSG "DONE"

# Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
MSG='Check for invalid EA testing' ; echo $MSG
! grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for deprecated messages without sphinx directive' ; echo $MSG
! grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for old-style classes' ; echo $MSG
! grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
! grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for incorrect sphinx directives' ; echo $MSG
! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
RET=$(($RET + $?)) ; echo $MSG "DONE"


exit $RET
1 change: 1 addition & 0 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def get_dispatch(dtypes):

{{for name, c_type, dtype in get_dispatch(dtypes)}}


def ensure_{{name}}(object arr, copy=True):
if util.is_array(arr):
if (<ndarray> arr).descr.type_num == NPY_{{c_type}}:
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
else:
out[i, j] = resx[i, j]


@cython.wraparound(False)
@cython.boundscheck(False)
def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
Expand Down Expand Up @@ -410,6 +411,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,


{{if name != 'object'}}


@cython.boundscheck(False)
@cython.wraparound(False)
def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
Expand Down
12 changes: 6 additions & 6 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,12 @@ cdef class {{name}}HashTable(HashTable):
@cython.boundscheck(False)
def unique(self, const {{dtype}}_t[:] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
{{dtype}}_t val
khiter_t k
{{name}}Vector uniques = {{name}}Vector()
{{name}}VectorData *ud
Py_ssize_t i, n = len(values)
int ret = 0
{{dtype}}_t val
khiter_t k
{{name}}Vector uniques = {{name}}Vector()
{{name}}VectorData *ud

ud = uniques.data

Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
@cython.boundscheck(False)
{{if dtype == 'object'}}


def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
{{else}}

Expand Down Expand Up @@ -210,9 +211,11 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
@cython.boundscheck(False)
{{if dtype == 'object'}}


def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values):
{{else}}


def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{endif}}

Expand All @@ -236,7 +239,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{scalar}} val
kh_{{ttype}}_t * table = kh_init_{{ttype}}()


# construct the table
n = len(values)
kh_resize_{{ttype}}(table, min(n, len(values)))
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/join_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
}}



{{for table_type, by_dtype in by_dtypes}}
{{for on_dtype in on_dtypes}}

Expand Down
9 changes: 3 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@ parentdir_prefix = pandas-

[flake8]
ignore =
W503, # line break before binary operator
E402, # module level import not at top of file
E722, # do not use bare except
E731, # do not assign a lambda expression, use a def
W503, # line break before binary operator
C405, # Unnecessary (list/tuple) literal - rewrite as a set literal.
C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal.
C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal.
C409, # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal).
C410 # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal).
E741 # ambiguous variable name 'l'
max-line-length = 79

[yapf]
Expand Down