diff --git a/.pep8speaks.yml b/.pep8speaks.yml index cd610907007eb..c3a85d595eb59 100644 --- a/.pep8speaks.yml +++ b/.pep8speaks.yml @@ -3,9 +3,18 @@ scanner: diff_only: True # If True, errors caused by only the patch are shown +# Opened issue in pep8speaks, so we can directly use the config in setup.cfg +# (and avoid having to duplicate it here): +# https://github.com/OrkoHunter/pep8speaks/issues/95 + pycodestyle: max-line-length: 79 - ignore: # Errors and warnings to ignore + ignore: + - W503, # line break before binary operator - E402, # module level import not at top of file + - E722, # do not use bare except - E731, # do not assign a lambda expression, use a def - - W503 # line break before binary operator + - E741, # ambiguous variable name 'l' + - C406, # Unnecessary list literal - rewrite as a dict literal. + - C408, # Unnecessary dict call - rewrite as a literal. + - C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal. diff --git a/.travis.yml b/.travis.yml index c9bdb91283d42..e8f7f3465bfd5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,15 +45,14 @@ matrix: - language-pack-zh-hans - dist: trusty env: - - JOB="2.7, lint" ENV_FILE="ci/travis-27.yaml" TEST_ARGS="--skip-slow" LINT=true + - JOB="2.7" ENV_FILE="ci/travis-27.yaml" TEST_ARGS="--skip-slow" addons: apt: packages: - python-gtk2 - dist: trusty env: - - JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true - + - JOB="3.6, lint, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true - dist: trusty env: - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" @@ -109,11 +108,7 @@ script: - ci/run_build_docs.sh - ci/script_single.sh - ci/script_multi.sh - - ci/lint.sh - - ci/doctests.sh - - echo "checking imports" - - source activate pandas && python ci/check_imports.py - - echo "script done" + - ci/code_checks.sh after_success: - ci/upload_coverage.sh diff --git a/ci/check_imports.py b/ci/check_imports.py deleted file mode 100644 index 19e48b659617f..0000000000000 --- a/ci/check_imports.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Check that certain modules are not loaded by `import pandas` -""" -import sys - -blacklist = { - 'bs4', - 'gcsfs', - 'html5lib', - 'ipython', - 'jinja2' - 'hypothesis', - 'lxml', - 'numexpr', - 'openpyxl', - 'py', - 'pytest', - 's3fs', - 'scipy', - 'tables', - 'xlrd', - 'xlsxwriter', - 'xlwt', -} - - -def main(): - import pandas # noqa - - modules = set(x.split('.')[0] for x in sys.modules) - imported = modules & blacklist - if modules & blacklist: - sys.exit("Imported {}".format(imported)) - - -if __name__ == '__main__': - main() diff --git a/ci/code_checks.sh b/ci/code_checks.sh new file mode 100755 index 0000000000000..eced3bf34e7c6 --- /dev/null +++ b/ci/code_checks.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# +# Run checks related to code quality. +# +# This script is intended for both the CI and to check locally that code standards are +# respected. We are currently linting (PEP-8 and similar), looking for patterns of +# common mistakes (sphinx directives with missing blank lines, old style classes, +# unwanted imports...), and we also run doctests here (currently some files only). +# In the future we may want to add the validation of docstrings and other checks here. +# +# Usage: +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh lint # run linting only +# $ ./ci/code_checks.sh patterns # check for patterns that should not exist +# $ ./ci/code_checks.sh doctests # run doctests + +echo "inside $0" +[[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; } +[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unkown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; } + +source activate pandas +RET=0 +CHECK=$1 + + +### LINTING ### +if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then + + # `setup.cfg` contains the list of error codes that are being ignored in flake8 + + echo "flake8 --version" + flake8 --version + + # pandas/_libs/src is C code, so no need to search there. + MSG='Linting .py code' ; echo $MSG + flake8 . + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pyx code' ; echo $MSG + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pxd and .pxi.in' ; echo $MSG + flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # readability/casting: Warnings about C casting instead of C++ casting + # runtime/int: Warnings about using C number types instead of C++ ones + # build/include_subdir: Warnings about prefacing included header files with directory + + # We don't lint all C files because we don't want to lint any that are built + # from Cython files nor do we want to lint C files that we didn't modify for + # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, + # we can lint all header files since they aren't "generated" like C files are. + MSG='Linting .c and .h' ; echo $MSG + cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### PATTERNS ### +if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then + + # Check for imports from pandas.core.common instead of `import pandas.core.common as com` + MSG='Check for non-standard imports' ; echo $MSG + ! grep -R --include="*.py*" -E "from pandas.core.common import " pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for pytest warns' ; echo $MSG + ! grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in testing: `np.testing` and `np.array_equal` + MSG='Check for invalid testing' ; echo $MSG + ! grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal` + MSG='Check for invalid EA testing' ; echo $MSG + ! grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for deprecated messages without sphinx directive' ; echo $MSG + ! grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for old-style classes' ; echo $MSG + ! grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG + ! grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for incorrect sphinx directives' ; echo $MSG + ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for modules that pandas should not import' ; echo $MSG + python -c " +import sys +import pandas + +blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis', + 'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', + 'tables', 'xlrd', 'xlsxwriter', 'xlwt'} +mods = blacklist & set(m.split('.')[0] for m in sys.modules) +if mods: + sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods))) + sys.exit(len(mods)) + " + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DOCTESTS ### +if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then + + MSG='Doctests frame.py' ; echo $MSG + pytest --doctest-modules -v pandas/core/frame.py \ + -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests series.py' ; echo $MSG + pytest --doctest-modules -v pandas/core/series.py \ + -k"-nonzero -reindex -searchsorted -to_dict" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests generic.py' ; echo $MSG + pytest --doctest-modules -v pandas/core/generic.py \ + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests top-level reshaping functions' ; echo $MSG + pytest --doctest-modules -v \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py \ + -k"-crosstab -pivot_table -cut" + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +exit $RET diff --git a/ci/doctests.sh b/ci/doctests.sh deleted file mode 100755 index 16b3430f1e431..0000000000000 --- a/ci/doctests.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -echo "inside $0" - - -source activate pandas -cd "$TRAVIS_BUILD_DIR" - -RET=0 - -if [ "$DOCTEST" ]; then - - echo "Running doctests" - - # running all doctests is not yet working - # pytest --doctest-modules --ignore=pandas/tests -v pandas - - # if [ $? -ne "0" ]; then - # RET=1 - # fi - - # DataFrame / Series docstrings - pytest --doctest-modules -v pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" - - if [ $? -ne "0" ]; then - RET=1 - fi - - pytest --doctest-modules -v pandas/core/series.py \ - -k"-nonzero -reindex -searchsorted -to_dict" - - if [ $? -ne "0" ]; then - RET=1 - fi - - pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs" - - if [ $? -ne "0" ]; then - RET=1 - fi - - # top-level reshaping functions - pytest --doctest-modules -v \ - pandas/core/reshape/concat.py \ - pandas/core/reshape/pivot.py \ - pandas/core/reshape/reshape.py \ - pandas/core/reshape/tile.py \ - -k"-crosstab -pivot_table -cut" - - if [ $? -ne "0" ]; then - RET=1 - fi - -else - echo "NOT running doctests" -fi - -exit $RET diff --git a/ci/lint.sh b/ci/lint.sh deleted file mode 100755 index 533e1d18d8e0e..0000000000000 --- a/ci/lint.sh +++ /dev/null @@ -1,196 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -source activate pandas - -RET=0 - -if [ "$LINT" ]; then - - # We're ignoring the following codes across the board - #E402, # module level import not at top of file - #E731, # do not assign a lambda expression, use a def - #E741, # do not use variables named 'l', 'O', or 'I' - #W503, # line break before binary operator - #C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal. - #C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal. - #C409, # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal). - #C410 # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal). - - # pandas/_libs/src is C code, so no need to search there. - echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - - flake8 scripts/tests --filename=*.py - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting *.py DONE" - - echo "Linting setup.py" - flake8 setup.py --ignore=E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting setup.py DONE" - - echo "Linting asv_bench/benchmarks/" - flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811,C406,C408,C409,C410 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting asv_bench/benchmarks/*.py DONE" - - echo "Linting scripts/*.py" - flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting scripts/*.py DONE" - - echo "Linting doc scripts" - flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting doc scripts DONE" - - echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting *.pyx DONE" - - echo "Linting *.pxi.in" - for path in 'src' - do - echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 - if [ $? -ne "0" ]; then - RET=1 - fi - done - echo "Linting *.pxi.in DONE" - - echo "Linting *.pxd" - for path in '_libs' - do - echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 - if [ $? -ne "0" ]; then - RET=1 - fi - done - echo "Linting *.pxd DONE" - - # readability/casting: Warnings about C casting instead of C++ casting - # runtime/int: Warnings about using C number types instead of C++ ones - # build/include_subdir: Warnings about prefacing included header files with directory - - # We don't lint all C files because we don't want to lint any that are built - # from Cython files nor do we want to lint C files that we didn't modify for - # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, - # we can lint all header files since they aren't "generated" like C files are. - echo "Linting *.c and *.h" - for path in '*.h' 'parser' 'ujson' - do - echo "linting -> pandas/_libs/src/$path" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path - if [ $? -ne "0" ]; then - RET=1 - fi - done - echo "linting -> pandas/_libs/tslibs/src/datetime" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/tslibs/src/datetime - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Linting *.c and *.h DONE" - - echo "Check for invalid testing" - - # Check for the following code in testing: - # - # np.testing - # np.array_equal - grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ - - if [ $? = "0" ]; then - RET=1 - fi - - # Check for pytest.warns - grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ - - if [ $? = "0" ]; then - RET=1 - fi - - # Check for the following code in the extension array base tests - # tm.assert_frame_equal - # tm.assert_series_equal - grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base - - if [ $? = "0" ]; then - RET=1 - fi - - echo "Check for invalid testing DONE" - - # Check for imports from pandas.core.common instead - # of `import pandas.core.common as com` - echo "Check for non-standard imports" - grep -R --include="*.py*" -E "from pandas.core.common import " pandas - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for non-standard imports DONE" - - echo "Check for incorrect sphinx directives" - SPHINX_DIRECTIVES=$(echo \ - "autosummary|contents|currentmodule|deprecated|function|image|"\ - "important|include|ipython|literalinclude|math|module|note|raw|"\ - "seealso|toctree|versionadded|versionchanged|warning" | tr -d "[:space:]") - for path in './pandas' './doc/source' - do - grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. ($SPHINX_DIRECTIVES):[^:]" $path - if [ $? = "0" ]; then - RET=1 - fi - done - echo "Check for incorrect sphinx directives DONE" - - echo "Check for deprecated messages without sphinx directive" - grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas - - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for deprecated messages without sphinx directive DONE" - - echo "Check for old-style classes" - grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts - - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for old-style classes DONE" - - echo "Check for backticks incorrectly rendering because of missing spaces" - grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ - - if [ $? = "0" ]; then - RET=1 - fi - echo "Check for backticks incorrectly rendering because of missing spaces DONE" - -else - echo "NOT Linting" -fi - -exit $RET diff --git a/ci/print_versions.py b/ci/print_versions.py index 8be795174d76d..a2c93748b0388 100755 --- a/ci/print_versions.py +++ b/ci/print_versions.py @@ -18,7 +18,8 @@ def show_versions(as_json=False): from optparse import OptionParser parser = OptionParser() parser.add_option("-j", "--json", metavar="FILE", nargs=1, - help="Save output as JSON into file, pass in '-' to output to stdout") + help="Save output as JSON into file, " + "pass in '-' to output to stdout") (options, args) = parser.parse_args() diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index 6955db363ca1f..cc0c5a3192188 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -8,8 +8,6 @@ dependencies: - cython=0.28.2 - fastparquet - feather-format - - flake8=3.4.1 - - flake8-comprehensions - gcsfs - html5lib - ipython @@ -48,6 +46,5 @@ dependencies: - hypothesis>=3.58.0 - pip: - backports.lzma - - cpplint - pandas-gbq - pathlib diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml index 3c9daa5f8b73c..90c892709d9f6 100644 --- a/ci/travis-36.yaml +++ b/ci/travis-36.yaml @@ -8,6 +8,8 @@ dependencies: - dask - fastparquet - feather-format + - flake8>=3.5 + - flake8-comprehensions - gcsfs - geopandas - html5lib @@ -45,5 +47,6 @@ dependencies: - pip: - brotlipy - coverage + - cpplint - pandas-datareader - python-dateutil diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 445f9a7e5e980..f898ef54e4101 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -497,6 +497,17 @@ tools will be run to check your code for stylistic errors. Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to *pandas*. +There is a tool in pandas to help contributors verify their changes before +contributing them to the project:: + + ./ci/code_checks.sh + +The script verify the linting of code files, it looks for common mistake patterns +(like missing spaces around sphinx directives that make the documentation not +being rendered properly) and it also validates the doctests. It is possible to +run the checks independently by using the parameters ``lint``, ``patterns`` and +``doctests`` (e.g. ``./ci/code_checks.sh lint``). + In addition, because a lot of people use our library, it is important that we do not make sudden changes to the code that could have the potential to break a lot of user code as a result, that is, we need it to be as *backwards compatible* diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 9f531f36d1a64..6bcc735656c6b 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -149,6 +149,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} + def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 765381d89705d..5b01117381a27 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -351,6 +351,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, else: out[i, j] = resx[i, j] + @cython.wraparound(False) @cython.boundscheck(False) def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @@ -410,6 +411,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{if name != 'object'}} + + @cython.boundscheck(False) @cython.wraparound(False) def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index f294fd141a9f1..3ff98b7b5a9b5 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -466,12 +466,12 @@ cdef class {{name}}HashTable(HashTable): @cython.boundscheck(False) def unique(self, const {{dtype}}_t[:] values): cdef: - Py_ssize_t i, n = len(values) - int ret = 0 - {{dtype}}_t val - khiter_t k - {{name}}Vector uniques = {{name}}Vector() - {{name}}VectorData *ud + Py_ssize_t i, n = len(values) + int ret = 0 + {{dtype}}_t val + khiter_t k + {{name}}Vector uniques = {{name}}Vector() + {{name}}VectorData *ud ud = uniques.data diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 45a69b613f698..3d35e7014b408 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -128,6 +128,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): @cython.boundscheck(False) {{if dtype == 'object'}} + def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): {{else}} @@ -210,9 +211,11 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): @cython.boundscheck(False) {{if dtype == 'object'}} + def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values): {{else}} + def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{endif}} @@ -236,7 +239,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{scalar}} val kh_{{ttype}}_t * table = kh_init_{{ttype}}() - # construct the table n = len(values) kh_resize_{{ttype}}(table, min(n, len(values))) diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index a72b113a6fdb6..72f24762838b4 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -25,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', }} - {{for table_type, by_dtype in by_dtypes}} {{for on_dtype in on_dtypes}} diff --git a/pandas/_libs/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in index 6ba587a5b04ea..35dedf90f8ca4 100644 --- a/pandas/_libs/join_helper.pxi.in +++ b/pandas/_libs/join_helper.pxi.in @@ -93,6 +93,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} + # @cython.wraparound(False) # @cython.boundscheck(False) def left_join_indexer_{{name}}(ndarray[{{c_type}}] left, diff --git a/setup.cfg b/setup.cfg index e4a2357def474..29392d7f15345 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,16 +12,23 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] +max-line-length = 79 ignore = + W503, # line break before binary operator E402, # module level import not at top of file + E722, # do not use bare except E731, # do not assign a lambda expression, use a def - W503, # line break before binary operator - C405, # Unnecessary (list/tuple) literal - rewrite as a set literal. - C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal. - C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal. - C409, # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal). - C410 # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal). -max-line-length = 79 + E741, # ambiguous variable name 'l' + C406, # Unnecessary list literal - rewrite as a dict literal. + C408, # Unnecessary dict call - rewrite as a literal. + C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal. +exclude = + asv_bench/*.py, # TODO we should fix linting in those files instead of excluding + doc/sphinxext/*.py, + doc/build/*.py, + doc/temp/*.py, + .eggs/*.py, + versioneer.py [yapf] based_on_style = pep8