From 253c98433db76e6d3ab87e2f1b397bd653e5e94c Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 23 Jan 2018 11:46:33 +0000 Subject: [PATCH] LINT: Adding scripts directory to lint, and fixing flake issues on them (#18949) --- ci/lint.sh | 7 ++ scripts/announce.py | 0 scripts/api_rst_coverage.py | 22 ++-- scripts/build_dist_for_release.sh | 0 scripts/convert_deps.py | 0 scripts/find_commits_touching_func.py | 161 ++++++++++++++------------ scripts/find_undoc_args.py | 161 ++++++++++++++------------ scripts/merge-pr.py | 4 +- 8 files changed, 197 insertions(+), 158 deletions(-) mode change 100644 => 100755 scripts/announce.py mode change 100644 => 100755 scripts/build_dist_for_release.sh mode change 100644 => 100755 scripts/convert_deps.py diff --git a/ci/lint.sh b/ci/lint.sh index 98b33c0803d90..49bf9a690b990 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -30,6 +30,13 @@ if [ "$LINT" ]; then fi echo "Linting asv_bench/benchmarks/*.py DONE" + echo "Linting scripts/*.py" + flake8 scripts --filename=*.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting scripts/*.py DONE" + echo "Linting *.pyx" flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then diff --git a/scripts/announce.py b/scripts/announce.py old mode 100644 new mode 100755 diff --git a/scripts/api_rst_coverage.py b/scripts/api_rst_coverage.py index 28e761ef256d0..4800e80d82891 100755 --- a/scripts/api_rst_coverage.py +++ b/scripts/api_rst_coverage.py @@ -17,9 +17,11 @@ $ PYTHONPATH=.. ./api_rst_coverage.py """ -import pandas as pd -import inspect +import os import re +import inspect +import pandas as pd + def main(): # classes whose members to check @@ -61,13 +63,17 @@ def add_notes(x): # class members class_members = set() for cls in classes: - class_members.update([cls.__name__ + '.' + x[0] for x in inspect.getmembers(cls)]) + for member in inspect.getmembers(cls): + class_members.add('{cls}.{member}'.format(cls=cls.__name__, + member=member[0])) # class members referenced in api.rst api_rst_members = set() - file_name = '../doc/source/api.rst' - with open(file_name, 'r') as f: - pattern = re.compile('({})\.(\w+)'.format('|'.join(cls.__name__ for cls in classes))) + base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + api_rst_fname = os.path.join(base_path, 'doc', 'source', 'api.rst') + class_names = (cls.__name__ for cls in classes) + pattern = re.compile('({})\.(\w+)'.format('|'.join(class_names))) + with open(api_rst_fname, 'r') as f: for line in f: match = pattern.search(line) if match: @@ -75,7 +81,8 @@ def add_notes(x): print() print("Documented members in api.rst that aren't actual class members:") - for x in sorted(api_rst_members.difference(class_members), key=class_name_sort_key): + for x in sorted(api_rst_members.difference(class_members), + key=class_name_sort_key): print(x) print() @@ -86,5 +93,6 @@ def add_notes(x): if '._' not in x: print(add_notes(x)) + if __name__ == "__main__": main() diff --git a/scripts/build_dist_for_release.sh b/scripts/build_dist_for_release.sh old mode 100644 new mode 100755 diff --git a/scripts/convert_deps.py b/scripts/convert_deps.py old mode 100644 new mode 100755 diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 0dd609417d7ba..29eb4161718ff 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -1,135 +1,148 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - # copyright 2013, y-p @ github - -from __future__ import print_function -from pandas.compat import range, lrange, map, string_types, text_type - -"""Search the git history for all commits touching a named method +""" +Search the git history for all commits touching a named method You need the sh module to run this -WARNING: this script uses git clean -f, running it on a repo with untracked files -will probably erase them. +WARNING: this script uses git clean -f, running it on a repo with untracked +files will probably erase them. + +Usage:: + $ ./find_commits_touching_func.py (see arguments below) """ +from __future__ import print_function import logging import re import os +import argparse from collections import namedtuple -from pandas.compat import parse_date - +from pandas.compat import lrange, map, string_types, text_type, parse_date try: import sh except ImportError: - raise ImportError("The 'sh' package is required in order to run this script. ") + raise ImportError("The 'sh' package is required to run this script.") -import argparse desc = """ Find all commits touching a specified function across the codebase. """.strip() argparser = argparse.ArgumentParser(description=desc) argparser.add_argument('funcname', metavar='FUNCNAME', - help='Name of function/method to search for changes on.') + help='Name of function/method to search for changes on') argparser.add_argument('-f', '--file-masks', metavar='f_re(,f_re)*', default=["\.py.?$"], - help='comma separated list of regexes to match filenames against\n'+ - 'defaults all .py? files') + help='comma separated list of regexes to match ' + 'filenames against\ndefaults all .py? files') argparser.add_argument('-d', '--dir-masks', metavar='d_re(,d_re)*', default=[], - help='comma separated list of regexes to match base path against') + help='comma separated list of regexes to match base ' + 'path against') argparser.add_argument('-p', '--path-masks', metavar='p_re(,p_re)*', default=[], - help='comma separated list of regexes to match full file path against') + help='comma separated list of regexes to match full ' + 'file path against') argparser.add_argument('-y', '--saw-the-warning', - action='store_true',default=False, - help='must specify this to run, acknowledge you realize this will erase untracked files') + action='store_true', default=False, + help='must specify this to run, acknowledge you ' + 'realize this will erase untracked files') argparser.add_argument('--debug-level', default="CRITICAL", - help='debug level of messages (DEBUG,INFO,etc...)') - + help='debug level of messages (DEBUG, INFO, etc...)') args = argparser.parse_args() lfmt = logging.Formatter(fmt='%(levelname)-8s %(message)s', - datefmt='%m-%d %H:%M:%S' -) - + datefmt='%m-%d %H:%M:%S') shh = logging.StreamHandler() shh.setFormatter(lfmt) - -logger=logging.getLogger("findit") +logger = logging.getLogger("findit") logger.addHandler(shh) +Hit = namedtuple("Hit", "commit path") +HASH_LEN = 8 -Hit=namedtuple("Hit","commit path") -HASH_LEN=8 def clean_checkout(comm): - h,s,d = get_commit_vitals(comm) + h, s, d = get_commit_vitals(comm) if len(s) > 60: s = s[:60] + "..." - s=s.split("\n")[0] - logger.info("CO: %s %s" % (comm,s )) + s = s.split("\n")[0] + logger.info("CO: %s %s" % (comm, s)) - sh.git('checkout', comm ,_tty_out=False) + sh.git('checkout', comm, _tty_out=False) sh.git('clean', '-f') -def get_hits(defname,files=()): - cs=set() + +def get_hits(defname, files=()): + cs = set() for f in files: try: - r=sh.git('blame', '-L', '/def\s*{start}/,/def/'.format(start=defname),f,_tty_out=False) + r = sh.git('blame', + '-L', + '/def\s*{start}/,/def/'.format(start=defname), + f, + _tty_out=False) except sh.ErrorReturnCode_128: logger.debug("no matches in %s" % f) continue lines = r.strip().splitlines()[:-1] # remove comment lines - lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#",x)] - hits = set(map(lambda x: x.split(" ")[0],lines)) - cs.update(set(Hit(commit=c,path=f) for c in hits)) + lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#", x)] + hits = set(map(lambda x: x.split(" ")[0], lines)) + cs.update(set(Hit(commit=c, path=f) for c in hits)) return cs -def get_commit_info(c,fmt,sep='\t'): - r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) + +def get_commit_info(c, fmt, sep='\t'): + r = sh.git('log', + "--format={}".format(fmt), + '{}^..{}'.format(c, c), + "-n", + "1", + _tty_out=False) return text_type(r).split(sep) -def get_commit_vitals(c,hlen=HASH_LEN): - h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") - return h[:hlen],s,parse_date(d) -def file_filter(state,dirname,fnames): - if args.dir_masks and not any(re.search(x,dirname) for x in args.dir_masks): +def get_commit_vitals(c, hlen=HASH_LEN): + h, s, d = get_commit_info(c, '%H\t%s\t%ci', "\t") + return h[:hlen], s, parse_date(d) + + +def file_filter(state, dirname, fnames): + if (args.dir_masks and + not any(re.search(x, dirname) for x in args.dir_masks)): return for f in fnames: - p = os.path.abspath(os.path.join(os.path.realpath(dirname),f)) - if any(re.search(x,f) for x in args.file_masks)\ - or any(re.search(x,p) for x in args.path_masks): + p = os.path.abspath(os.path.join(os.path.realpath(dirname), f)) + if (any(re.search(x, f) for x in args.file_masks) or + any(re.search(x, p) for x in args.path_masks)): if os.path.isfile(p): state['files'].append(p) -def search(defname,head_commit="HEAD"): - HEAD,s = get_commit_vitals("HEAD")[:2] - logger.info("HEAD at %s: %s" % (HEAD,s)) + +def search(defname, head_commit="HEAD"): + HEAD, s = get_commit_vitals("HEAD")[:2] + logger.info("HEAD at %s: %s" % (HEAD, s)) done_commits = set() # allhits = set() files = [] state = dict(files=files) - os.path.walk('.',file_filter,state) + os.walk('.', file_filter, state) # files now holds a list of paths to files # seed with hits from q - allhits= set(get_hits(defname, files = files)) + allhits = set(get_hits(defname, files=files)) q = set([HEAD]) try: while q: - h=q.pop() + h = q.pop() clean_checkout(h) - hits = get_hits(defname, files = files) + hits = get_hits(defname, files=files) for x in hits: - prevc = get_commit_vitals(x.commit+"^")[0] + prevc = get_commit_vitals(x.commit + "^")[0] if prevc not in done_commits: q.add(prevc) allhits.update(hits) @@ -141,43 +154,46 @@ def search(defname,head_commit="HEAD"): clean_checkout(HEAD) return allhits + def pprint_hits(hits): - SUBJ_LEN=50 + SUBJ_LEN = 50 PATH_LEN = 20 - hits=list(hits) + hits = list(hits) max_p = 0 for hit in hits: - p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] - max_p=max(max_p,len(p)) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] + max_p = max(max_p, len(p)) if max_p < PATH_LEN: SUBJ_LEN += PATH_LEN - max_p PATH_LEN = max_p def sorter(i): - h,s,d=get_commit_vitals(hits[i].commit) - return hits[i].path,d + h, s, d = get_commit_vitals(hits[i].commit) + return hits[i].path, d - print("\nThese commits touched the %s method in these files on these dates:\n" \ - % args.funcname) - for i in sorted(lrange(len(hits)),key=sorter): + print(('\nThese commits touched the %s method in these files ' + 'on these dates:\n') % args.funcname) + for i in sorted(lrange(len(hits)), key=sorter): hit = hits[i] - h,s,d=get_commit_vitals(hit.commit) - p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] + h, s, d = get_commit_vitals(hit.commit) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] fmt = "{:%d} {:10} {:<%d} {:<%d}" % (HASH_LEN, SUBJ_LEN, PATH_LEN) if len(s) > SUBJ_LEN: - s = s[:SUBJ_LEN-5] + " ..." - print(fmt.format(h[:HASH_LEN],d.isoformat()[:10],s,p[-20:]) ) + s = s[:SUBJ_LEN - 5] + " ..." + print(fmt.format(h[:HASH_LEN], d.isoformat()[:10], s, p[-20:])) print("\n") + def main(): if not args.saw_the_warning: argparser.print_help() print(""" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -WARNING: this script uses git clean -f, running it on a repo with untracked files. +WARNING: +this script uses git clean -f, running it on a repo with untracked files. It's recommended that you make a fresh clone and run from its root directory. You must specify the -y argument to ignore this warning. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -190,12 +206,11 @@ def main(): if isinstance(args.dir_masks, string_types): args.dir_masks = args.dir_masks.split(',') - logger.setLevel(getattr(logging,args.debug_level)) + logger.setLevel(getattr(logging, args.debug_level)) - hits=search(args.funcname) + hits = search(args.funcname) pprint_hits(hits) - pass if __name__ == "__main__": import sys diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index 32b23a67b187f..a135c8e5171a1 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -1,126 +1,135 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +Script that compares the signature arguments with the ones in the docsting +and returns the differences in plain text or GitHub task list format. +Usage:: + $ ./find_undoc_args.py (see arguments below) +""" from __future__ import print_function - +import sys from collections import namedtuple -from itertools import islice import types import os import re import argparse -#http://docs.python.org/2/library/argparse.html -# arg name is positional is not prefixed with - or -- +import inspect + parser = argparse.ArgumentParser(description='Program description.') parser.add_argument('-p', '--path', metavar='PATH', type=str, required=False, - default=None, - help='full path relative to which paths wills be reported',action='store') -parser.add_argument('-m', '--module', metavar='MODULE', type=str,required=True, - help='name of package to import and examine',action='store') -parser.add_argument('-G', '--github_repo', metavar='REPO', type=str,required=False, - help='github project where the code lives, e.g. "pandas-dev/pandas"', - default=None,action='store') - + default=None, action='store', + help='full path relative to which paths wills be reported') +parser.add_argument('-m', '--module', metavar='MODULE', type=str, + required=True, action='store', + help='name of package to import and examine') +parser.add_argument('-G', '--github_repo', metavar='REPO', type=str, + required=False, default=None, action='store', + help='github project where the code lives, ' + 'e.g. "pandas-dev/pandas"') args = parser.parse_args() -Entry=namedtuple("Entry","func path lnum undoc_names missing_args nsig_names ndoc_names") +Entry = namedtuple('Entry', + 'func path lnum undoc_names missing_args ' + 'nsig_names ndoc_names') -def entry_gen(root_ns,module_name): - q=[root_ns] - seen=set() +def entry_gen(root_ns, module_name): + """Walk and yield all methods and functions in the module root_ns and + submodules.""" + q = [root_ns] + seen = set() while q: ns = q.pop() for x in dir(ns): - cand = getattr(ns,x) - if (isinstance(cand,types.ModuleType) - and cand.__name__ not in seen - and cand.__name__.startswith(module_name)): - # print(cand.__name__) + cand = getattr(ns, x) + if (isinstance(cand, types.ModuleType) and + cand.__name__ not in seen and + cand.__name__.startswith(module_name)): seen.add(cand.__name__) - q.insert(0,cand) - elif (isinstance(cand,(types.MethodType,types.FunctionType)) and + q.insert(0, cand) + elif (isinstance(cand, (types.MethodType, types.FunctionType)) and cand not in seen and cand.__doc__): seen.add(cand) yield cand + def cmp_docstring_sig(f): + """Return an `Entry` object describing the differences between the + arguments in the signature and the documented ones.""" def build_loc(f): - path=f.__code__.co_filename.split(args.path,1)[-1][1:] - return dict(path=path,lnum=f.__code__.co_firstlineno) + path = f.__code__.co_filename.split(args.path, 1)[-1][1:] + return dict(path=path, lnum=f.__code__.co_firstlineno) - import inspect - sig_names=set(inspect.getargspec(f).args) + sig_names = set(inspect.getargspec(f).args) + # XXX numpydoc can be used to get the list of parameters doc = f.__doc__.lower() - doc = re.split("^\s*parameters\s*",doc,1,re.M)[-1] - doc = re.split("^\s*returns*",doc,1,re.M)[0] - doc_names={x.split(":")[0].strip() for x in doc.split("\n") - if re.match("\s+[\w_]+\s*:",x)} - sig_names.discard("self") - doc_names.discard("kwds") - doc_names.discard("kwargs") - doc_names.discard("args") - return Entry(func=f,path=build_loc(f)['path'],lnum=build_loc(f)['lnum'], + doc = re.split('^\s*parameters\s*', doc, 1, re.M)[-1] + doc = re.split('^\s*returns*', doc, 1, re.M)[0] + doc_names = {x.split(":")[0].strip() for x in doc.split('\n') + if re.match('\s+[\w_]+\s*:', x)} + sig_names.discard('self') + doc_names.discard('kwds') + doc_names.discard('kwargs') + doc_names.discard('args') + return Entry(func=f, path=build_loc(f)['path'], lnum=build_loc(f)['lnum'], undoc_names=sig_names.difference(doc_names), - missing_args=doc_names.difference(sig_names),nsig_names=len(sig_names), - ndoc_names=len(doc_names)) + missing_args=doc_names.difference(sig_names), + nsig_names=len(sig_names), ndoc_names=len(doc_names)) + def format_id(i): return i -def format_item_as_github_task_list( i,item,repo): - tmpl = "- [ ] {id}) [{file}:{lnum} ({func_name}())]({link}) - __Missing__[{nmissing}/{total_args}]: {undoc_names}" +def format_item_as_github_task_list(i, item, repo): + tmpl = ('- [ ] {id_}) [{fname}:{lnum} ({func_name}())]({link}) - ' + '__Missing__[{nmissing}/{total_args}]: {undoc_names}') link_tmpl = "https://github.com/{repo}/blob/master/{file}#L{lnum}" - - link = link_tmpl.format(repo=repo,file=item.path ,lnum=item.lnum ) - - s = tmpl.format(id=i,file=item.path , - lnum=item.lnum, - func_name=item.func.__name__, - link=link, - nmissing=len(item.undoc_names), - total_args=item.nsig_names, - undoc_names=list(item.undoc_names)) - + link = link_tmpl.format(repo=repo, file=item.path, lnum=item.lnum) + s = tmpl.format(id_=i, fname=item.path, lnum=item.lnum, + func_name=item.func.__name__, link=link, + nmissing=len(item.undoc_names), + total_args=item.nsig_names, + undoc_names=list(item.undoc_names)) if item.missing_args: - s+= " __Extra__(?): {missing_args}".format(missing_args=list(item.missing_args)) - + s += ' __Extra__(?): %s' % list(item.missing_args) return s -def format_item_as_plain(i,item): - tmpl = "+{lnum} {path} {func_name}(): Missing[{nmissing}/{total_args}]={undoc_names}" - - s = tmpl.format(path=item.path , - lnum=item.lnum, - func_name=item.func.__name__, - nmissing=len(item.undoc_names), - total_args=item.nsig_names, - undoc_names=list(item.undoc_names)) +def format_item_as_plain(i, item): + tmpl = ('+{lnum} {path} {func_name}(): ' + 'Missing[{nmissing}/{total_args}]={undoc_names}') + s = tmpl.format(path=item.path, lnum=item.lnum, + func_name=item.func.__name__, + nmissing=len(item.undoc_names), + total_args=item.nsig_names, + undoc_names=list(item.undoc_names)) if item.missing_args: - s+= " Extra(?)={missing_args}".format(missing_args=list(item.missing_args)) - + s += ' Extra(?)=%s' % list(item.missing_args) return s + def main(): module = __import__(args.module) if not args.path: - args.path=os.path.dirname(module.__file__) - collect=[cmp_docstring_sig(e) for e in entry_gen(module,module.__name__)] - # only include if there are missing arguments in the docstring (fewer false positives) - # and there are at least some documented arguments - collect = [e for e in collect if e.undoc_names and len(e.undoc_names) != e.nsig_names] - collect.sort(key=lambda x:x.path) + args.path = os.path.dirname(module.__file__) + collect = [cmp_docstring_sig(e) + for e in entry_gen(module, module.__name__)] + # only include if there are missing arguments in the docstring + # (fewer false positives) and there are at least some documented arguments + collect = [e for e in collect + if e.undoc_names and len(e.undoc_names) != e.nsig_names] + collect.sort(key=lambda x: x.path) if args.github_repo: - for i,item in enumerate(collect,1): - print( format_item_as_github_task_list(i,item,args.github_repo)) + for i, item in enumerate(collect, 1): + print(format_item_as_github_task_list(i, item, args.github_repo)) else: - for i,item in enumerate(collect,1): - print( format_item_as_plain(i, item)) + for i, item in enumerate(collect, 1): + print(format_item_as_plain(i, item)) + -if __name__ == "__main__": - import sys +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/merge-pr.py b/scripts/merge-pr.py index 5337c37fe5320..31264cad52e4f 100755 --- a/scripts/merge-pr.py +++ b/scripts/merge-pr.py @@ -22,7 +22,6 @@ # usage: ./apache-pr-merge.py (see config env vars below) # # Lightly modified from version of this script in incubator-parquet-format - from __future__ import print_function from subprocess import check_output @@ -223,7 +222,7 @@ def update_pr(pr_num, user_login, base_ref): try: run_cmd( 'git push -f %s %s:%s' % (push_user_remote, pr_branch_name, - base_ref)) + base_ref)) except Exception as e: fail("Exception while pushing: %s" % e) clean_up() @@ -275,6 +274,7 @@ def fix_version_from_branch(branch, versions): branch_ver = branch.replace("branch-", "") return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] + pr_num = input("Which pull request would you like to merge? (e.g. 34): ") pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))