@@ -334,4 +334,332 @@ if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
334
334
fi
335
335
336
336
337
+ exit $RET
338
+ #! /bin/bash
339
+ #
340
+ # Run checks related to code quality.
341
+ #
342
+ # This script is intended for both the CI and to check locally that code standards are
343
+ # respected. We are currently linting (PEP-8 and similar), looking for patterns of
344
+ # common mistakes (sphinx directives with missing blank lines, old style classes,
345
+ # unwanted imports...), we run doctests here (currently some files only), and we
346
+ # validate formatting error in docstrings.
347
+ #
348
+ # Usage:
349
+ # $ ./ci/code_checks.sh # run all checks
350
+ # $ ./ci/code_checks.sh lint # run linting only
351
+ # $ ./ci/code_checks.sh patterns # check for patterns that should not exist
352
+ # $ ./ci/code_checks.sh code # checks on imported code
353
+ # $ ./ci/code_checks.sh doctests # run doctests
354
+ # $ ./ci/code_checks.sh docstrings # validate docstring errors
355
+ # $ ./ci/code_checks.sh dependencies # check that dependencies are consistent
356
+ # $ ./ci/code_checks.sh typing # run static type analysis
357
+
358
+ [[ -z " $1 " || " $1 " == " lint" || " $1 " == " patterns" || " $1 " == " code" || " $1 " == " doctests" || " $1 " == " docstrings" || " $1 " == " dependencies" || " $1 " == " typing" ]] || \
359
+ { echo " Unknown command $1 . Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies|typing]" ; exit 9999; }
360
+
361
+ BASE_DIR=" $( dirname $0 ) /.."
362
+ RET=0
363
+ CHECK=$1
364
+
365
+ function invgrep {
366
+ # grep with inverse exist status and formatting for azure-pipelines
367
+ #
368
+ # This function works exactly as grep, but with opposite exit status:
369
+ # - 0 (success) when no patterns are found
370
+ # - 1 (fail) when the patterns are found
371
+ #
372
+ # This is useful for the CI, as we want to fail if one of the patterns
373
+ # that we want to avoid is found by grep.
374
+ grep -n " $@ " | sed " s/^/$INVGREP_PREPEND /" | sed " s/$/$INVGREP_APPEND /" ; EXIT_STATUS=${PIPESTATUS[0]}
375
+ return $(( ! $EXIT_STATUS ))
376
+ }
377
+
378
+ if [[ " $GITHUB_ACTIONS " == " true" ]]; then
379
+ FLAKE8_FORMAT=" ##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
380
+ INVGREP_PREPEND=" ##[error]"
381
+ else
382
+ FLAKE8_FORMAT=" default"
383
+ fi
384
+
385
+ # ## LINTING ###
386
+ if [[ -z " $CHECK " || " $CHECK " == " lint" ]]; then
387
+
388
+ echo " black --version"
389
+ black --version
390
+
391
+ MSG=' Checking black formatting' ; echo $MSG
392
+ black . --check
393
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
394
+
395
+ # `setup.cfg` contains the list of error codes that are being ignored in flake8
396
+
397
+ echo " flake8 --version"
398
+ flake8 --version
399
+
400
+ # pandas/_libs/src is C code, so no need to search there.
401
+ MSG=' Linting .py code' ; echo $MSG
402
+ flake8 --format=" $FLAKE8_FORMAT " .
403
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
404
+
405
+ MSG=' Linting .pyx code' ; echo $MSG
406
+ flake8 --format=" $FLAKE8_FORMAT " pandas --filename=* .pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
407
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
408
+
409
+ MSG=' Linting .pxd and .pxi.in' ; echo $MSG
410
+ flake8 --format=" $FLAKE8_FORMAT " pandas/_libs --filename=* .pxi.in,* .pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
411
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
412
+
413
+ echo " flake8-rst --version"
414
+ flake8-rst --version
415
+
416
+ MSG=' Linting code-blocks in .rst documentation' ; echo $MSG
417
+ flake8-rst doc/source --filename=* .rst --format=" $FLAKE8_FORMAT "
418
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
419
+
420
+ # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
421
+ # it doesn't make a difference, but we want to be internally consistent.
422
+ # Note: this grep pattern is (intended to be) equivalent to the python
423
+ # regex r'(?<![ ->])> '
424
+ MSG=' Linting .pyx code for spacing conventions in casting' ; echo $MSG
425
+ invgrep -r -E --include ' *.pyx' --include ' *.pxi.in' ' [a-zA-Z0-9*]> ' pandas/_libs
426
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
427
+
428
+ # readability/casting: Warnings about C casting instead of C++ casting
429
+ # runtime/int: Warnings about using C number types instead of C++ ones
430
+ # build/include_subdir: Warnings about prefacing included header files with directory
431
+
432
+ # We don't lint all C files because we don't want to lint any that are built
433
+ # from Cython files nor do we want to lint C files that we didn't modify for
434
+ # this particular codebase (e.g. src/headers, src/klib). However,
435
+ # we can lint all header files since they aren't "generated" like C files are.
436
+ MSG=' Linting .c and .h' ; echo $MSG
437
+ cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/* .h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/* .cpp
438
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
439
+
440
+ echo " isort --version-number"
441
+ isort --version-number
442
+
443
+ # Imports - Check formatting using isort see setup.cfg for settings
444
+ MSG=' Check import format using isort' ; echo $MSG
445
+ ISORT_CMD=" isort --recursive --check-only pandas asv_bench"
446
+ if [[ " $GITHUB_ACTIONS " == " true" ]]; then
447
+ eval $ISORT_CMD | awk ' {print "##[error]" $0}' ; RET=$(( $RET + ${PIPESTATUS[0]} ))
448
+ else
449
+ eval $ISORT_CMD
450
+ fi
451
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
452
+
453
+ fi
454
+
455
+ # ## PATTERNS ###
456
+ if [[ -z " $CHECK " || " $CHECK " == " patterns" ]]; then
457
+
458
+ # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
459
+ # Check for imports from collections.abc instead of `from collections import abc`
460
+ MSG=' Check for non-standard imports' ; echo $MSG
461
+ invgrep -R --include=" *.py*" -E " from pandas.core.common import" pandas
462
+ invgrep -R --include=" *.py*" -E " from pandas.core import common" pandas
463
+ invgrep -R --include=" *.py*" -E " from collections.abc import" pandas
464
+ invgrep -R --include=" *.py*" -E " from numpy import nan" pandas
465
+
466
+ # Checks for test suite
467
+ # Check for imports from pandas.util.testing instead of `import pandas.util.testing as tm`
468
+ invgrep -R --include=" *.py*" -E " from pandas.util.testing import" pandas/tests
469
+ invgrep -R --include=" *.py*" -E " from pandas.util import testing as tm" pandas/tests
470
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
471
+
472
+ MSG=' Check for use of exec' ; echo $MSG
473
+ invgrep -R --include=" *.py*" -E " [^a-zA-Z0-9_]exec\(" pandas
474
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
475
+
476
+ MSG=' Check for pytest warns' ; echo $MSG
477
+ invgrep -r -E --include ' *.py' ' pytest\.warns' pandas/tests/
478
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
479
+
480
+ MSG=' Check for pytest raises without context' ; echo $MSG
481
+ invgrep -r -E --include ' *.py' " [[:space:]] pytest.raises" pandas/tests/
482
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
483
+
484
+ MSG=' Check for python2-style file encodings' ; echo $MSG
485
+ invgrep -R --include=" *.py" --include=" *.pyx" -E " # -\*- coding: utf-8 -\*-" pandas scripts
486
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
487
+
488
+ MSG=' Check for python2-style super usage' ; echo $MSG
489
+ invgrep -R --include=" *.py" -E " super\(\w*, (self|cls)\)" pandas
490
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
491
+
492
+ # Check for the following code in testing: `np.testing` and `np.array_equal`
493
+ MSG=' Check for invalid testing' ; echo $MSG
494
+ invgrep -r -E --include ' *.py' --exclude testing.py ' (numpy|np)(\.testing|\.array_equal)' pandas/tests/
495
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
496
+
497
+ # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
498
+ MSG=' Check for invalid EA testing' ; echo $MSG
499
+ invgrep -r -E --include ' *.py' --exclude base.py ' tm.assert_(series|frame)_equal' pandas/tests/extension/base
500
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
501
+
502
+ MSG=' Check for deprecated messages without sphinx directive' ; echo $MSG
503
+ invgrep -R --include=" *.py" --include=" *.pyx" -E " (DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
504
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
505
+
506
+ MSG=' Check for python2 new-style classes and for empty parentheses' ; echo $MSG
507
+ invgrep -R --include=" *.py" --include=" *.pyx" -E " class\s\S*\((object)?\):" pandas asv_bench/benchmarks scripts
508
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
509
+
510
+ MSG=' Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
511
+ invgrep -R --include=" *.rst" -E " [a-zA-Z0-9]\`\` ?[a-zA-Z0-9]" doc/source/
512
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
513
+
514
+ MSG=' Check for incorrect sphinx directives' ; echo $MSG
515
+ invgrep -R --include=" *.py" --include=" *.pyx" --include=" *.rst" -E " \.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
516
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
517
+
518
+ # Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
519
+ MSG=' Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
520
+ invgrep -r -E --include ' *.py' ' (unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
521
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
522
+
523
+ MSG=' Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG
524
+ invgrep -R --include=" *.rst" " .. code-block ::" doc/source
525
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
526
+
527
+ MSG=' Check for wrong space after ipython directive and before colon (".. ipython ::" instead of ".. ipython::")' ; echo $MSG
528
+ invgrep -R --include=" *.rst" " .. ipython ::" doc/source
529
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
530
+
531
+ MSG=' Check for extra blank lines after the class definition' ; echo $MSG
532
+ invgrep -R --include=" *.py" --include=" *.pyx" -E ' class.*:\n\n( )+"""' .
533
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
534
+
535
+ MSG=' Check for use of comment-based annotation syntax' ; echo $MSG
536
+ invgrep -R --include=" *.py" -P ' # type: (?!ignore)' pandas
537
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
538
+
539
+ MSG=' Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
540
+ invgrep -R --include=* .{py,pyx} ' \.__class__' pandas
541
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
542
+
543
+ MSG=' Check for use of xrange instead of range' ; echo $MSG
544
+ invgrep -R --include=* .{py,pyx} ' xrange' pandas
545
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
546
+
547
+ MSG=' Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
548
+ invgrep -R --include=* .{py,pyx} ' !r}' pandas
549
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
550
+
551
+ MSG=' Check that no file in the repo contains trailing whitespaces' ; echo $MSG
552
+ INVGREP_APPEND=" <- trailing whitespaces found"
553
+ invgrep -RI --exclude=\* .{svg,c,cpp,html,js} --exclude-dir=env " \s$" *
554
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
555
+ unset INVGREP_APPEND
556
+ fi
557
+
558
+ # ## CODE ###
559
+ if [[ -z " $CHECK " || " $CHECK " == " code" ]]; then
560
+
561
+ MSG=' Check import. No warnings, and blacklist some optional dependencies' ; echo $MSG
562
+ python -W error -c "
563
+ import sys
564
+ import pandas
565
+
566
+ blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
567
+ 'lxml', 'matplotlib', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
568
+ 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
569
+
570
+ # GH#28227 for some of these check for top-level modules, while others are
571
+ # more specific (e.g. urllib.request)
572
+ import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
573
+ mods = blacklist & import_mods
574
+ if mods:
575
+ sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
576
+ sys.exit(len(mods))
577
+ "
578
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
579
+
580
+ fi
581
+
582
+ # ## DOCTESTS ###
583
+ if [[ -z " $CHECK " || " $CHECK " == " doctests" ]]; then
584
+
585
+ MSG=' Doctests frame.py' ; echo $MSG
586
+ pytest -q --doctest-modules pandas/core/frame.py \
587
+ -k" -itertuples -join -reindex -reindex_axis -round"
588
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
589
+
590
+ MSG=' Doctests series.py' ; echo $MSG
591
+ pytest -q --doctest-modules pandas/core/series.py \
592
+ -k" -nonzero -reindex -searchsorted -to_dict"
593
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
594
+
595
+ MSG=' Doctests generic.py' ; echo $MSG
596
+ pytest -q --doctest-modules pandas/core/generic.py \
597
+ -k" -_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
598
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
599
+
600
+ MSG=' Doctests groupby.py' ; echo $MSG
601
+ pytest -q --doctest-modules pandas/core/groupby/groupby.py -k" -cumcount -describe -pipe"
602
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
603
+
604
+ MSG=' Doctests datetimes.py' ; echo $MSG
605
+ pytest -q --doctest-modules pandas/core/tools/datetimes.py
606
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
607
+
608
+ MSG=' Doctests top-level reshaping functions' ; echo $MSG
609
+ pytest -q --doctest-modules \
610
+ pandas/core/reshape/concat.py \
611
+ pandas/core/reshape/pivot.py \
612
+ pandas/core/reshape/reshape.py \
613
+ pandas/core/reshape/tile.py \
614
+ pandas/core/reshape/melt.py \
615
+ -k" -crosstab -pivot_table -cut"
616
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
617
+
618
+ MSG=' Doctests interval classes' ; echo $MSG
619
+ pytest -q --doctest-modules \
620
+ pandas/core/indexes/interval.py \
621
+ pandas/core/arrays/interval.py \
622
+ -k" -from_arrays -from_breaks -from_intervals -from_tuples -set_closed -to_tuples -interval_range"
623
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
624
+
625
+ MSG=' Doctests arrays/string_.py' ; echo $MSG
626
+ pytest -q --doctest-modules pandas/core/arrays/string_.py
627
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
628
+
629
+ MSG=' Doctests arrays/boolean.py' ; echo $MSG
630
+ pytest -q --doctest-modules pandas/core/arrays/boolean.py
631
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
632
+
633
+ fi
634
+
635
+ # ## DOCSTRINGS ###
636
+ if [[ -z " $CHECK " || " $CHECK " == " docstrings" ]]; then
637
+
638
+ MSG=' Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA01, SA02, SA03, SA05)' ; echo $MSG
639
+ $BASE_DIR /scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA01,SA02,SA03,SA05
640
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
641
+
642
+ fi
643
+
644
+ # ## DEPENDENCIES ###
645
+ if [[ -z " $CHECK " || " $CHECK " == " dependencies" ]]; then
646
+
647
+ MSG=' Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
648
+ $BASE_DIR /scripts/generate_pip_deps_from_conda.py --compare --azure
649
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
650
+
651
+ fi
652
+
653
+ # ## TYPING ###
654
+ if [[ -z " $CHECK " || " $CHECK " == " typing" ]]; then
655
+
656
+ echo " mypy --version"
657
+ mypy --version
658
+
659
+ MSG=' Performing static analysis using mypy' ; echo $MSG
660
+ mypy pandas
661
+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
662
+ fi
663
+
664
+
337
665
exit $RET
0 commit comments