diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e7738fb9a2979..7e9a1ec890fba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -99,6 +99,28 @@ repos: language: pygrep entry: (\.\. code-block ::|\.\. ipython ::) files: \.(py|pyx|rst)$ + - id: unwanted-patterns-strings-to-concatenate + name: Check for use of not concatenated strings + language: python + entry: ./scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" + files: \.(py|pyx|pxd|pxi)$ + - id: unwanted-patterns-strings-with-wrong-placed-whitespace + name: Check for strings with wrong placed spaces + language: python + entry: ./scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" + files: \.(py|pyx|pxd|pxi)$ + - id: unwanted-patterns-private-import-across-module + name: Check for import of private attributes across modules + language: python + entry: ./scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" + types: [python] + exclude: ^(asv_bench|pandas/_vendored|pandas/tests|doc)/ + - id: unwanted-patterns-private-function-across-module + name: Check for use of private functions across modules + language: python + entry: ./scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" + types: [python] + exclude: ^(asv_bench|pandas/_vendored|pandas/tests|doc)/ - repo: https://github.com/asottile/yesqa rev: v1.2.2 hooks: diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 877e136492518..a9cce9357b531 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,38 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Check for use of not concatenated strings' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" . - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" . - fi - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check for strings with wrong placed spaces' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" . - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" . - fi - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check for import of private attributes across modules' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ - fi - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check for use of private functions across modules' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/ - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/ - fi - RET=$(($RET + $?)) ; echo $MSG "DONE" - fi ### PATTERNS ### diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 119ae1cce2ff0..f824980a6d7e1 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -12,11 +12,10 @@ import argparse import ast -import os import sys import token import tokenize -from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple +from typing import IO, Callable, Iterable, List, Set, Tuple PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { "_extension_array_shared_docs", @@ -403,8 +402,6 @@ def main( function: Callable[[IO[str]], Iterable[Tuple[int, str]]], source_path: str, output_format: str, - file_extensions_to_check: str, - excluded_file_paths: str, ) -> bool: """ Main entry point of the script. @@ -432,19 +429,9 @@ def main( ValueError If the `source_path` is not pointing to existing file/directory. """ - if not os.path.exists(source_path): - raise ValueError("Please enter a valid path, pointing to a file/directory.") - is_failed: bool = False - file_path: str = "" - - FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset( - file_extensions_to_check.split(",") - ) - PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(",")) - if os.path.isfile(source_path): - file_path = source_path + for file_path in source_path: with open(file_path) as file_obj: for line_number, msg in function(file_obj): is_failed = True @@ -454,25 +441,6 @@ def main( ) ) - for subdir, _, files in os.walk(source_path): - if any(path in subdir for path in PATHS_TO_IGNORE): - continue - for file_name in files: - if not any( - file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK - ): - continue - - file_path = os.path.join(subdir, file_name) - with open(file_path) as file_obj: - for line_number, msg in function(file_obj): - is_failed = True - print( - output_format.format( - source_path=file_path, line_number=line_number, msg=msg - ) - ) - return is_failed @@ -487,9 +455,7 @@ def main( parser = argparse.ArgumentParser(description="Unwanted patterns checker.") - parser.add_argument( - "path", nargs="?", default=".", help="Source path of file/directory to check." - ) + parser.add_argument("paths", nargs="*", help="Source paths of files to check.") parser.add_argument( "--format", "-f", @@ -503,25 +469,13 @@ def main( required=True, help="Validation test case to check.", ) - parser.add_argument( - "--included-file-extensions", - default="py,pyx,pxd,pxi", - help="Comma separated file extensions to check.", - ) - parser.add_argument( - "--excluded-file-paths", - default="asv_bench/env", - help="Comma separated file paths to exclude.", - ) args = parser.parse_args() sys.exit( main( function=globals().get(args.validation_type), # type: ignore - source_path=args.path, + source_path=args.paths, output_format=args.format, - file_extensions_to_check=args.included_file_extensions, - excluded_file_paths=args.excluded_file_paths, ) )