From 534eb70c429391344a3692fb22750d1946119db6 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Mon, 10 Dec 2018 18:45:59 +0100 Subject: [PATCH 1/9] Validate documentation Signed-off-by: Fabian Haase --- doc/make.py | 9 + scripts/tests/test_validate_documentation.py | 49 ++++++ scripts/validate_documentation.py | 174 +++++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 scripts/tests/test_validate_documentation.py create mode 100644 scripts/validate_documentation.py diff --git a/doc/make.py b/doc/make.py index 0a3a7483fcc91..33c3f16c1d140 100755 --- a/doc/make.py +++ b/doc/make.py @@ -21,6 +21,14 @@ import webbrowser import jinja2 +sys.path.insert(0, os.path.abspath('../scripts')) +sys.path.extend([ + + # numpy standard doc extensions + os.path.join(os.path.dirname(__file__), '..', 'scripts') + +]) +import validate_documentation DOC_PATH = os.path.dirname(os.path.abspath(__file__)) SOURCE_PATH = os.path.join(DOC_PATH, 'source') @@ -263,6 +271,7 @@ def html(self): os.remove(zip_fname) if self.single_doc is not None: + validate_documentation.main(self.single_doc, None, 'default') self._open_browser() shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'), ignore_errors=True) diff --git a/scripts/tests/test_validate_documentation.py b/scripts/tests/test_validate_documentation.py new file mode 100644 index 0000000000000..930f3cbc74f66 --- /dev/null +++ b/scripts/tests/test_validate_documentation.py @@ -0,0 +1,49 @@ +import pytest + +import docutils.nodes +import docutils.utils + +import validate_documentation +DocumentChecker = validate_documentation.DocumentChecker + + +@pytest.fixture() +def doctree(): + return docutils.utils.new_document('test.rst') + +@pytest.mark.parametrize('raw_doc,errs', [ + (' Indented Line\n', {}), + ('\n', {}), + ('\tIndented Line\n', {'WS01': [(1, '\tIndented Line\n')]}), + ('Line \n', {'WS02': [(1, 'Line \n')]}), + ('\t\n', {'WS01': [(1, '\t\n')], 'WS02': [(1, '\t\n')]}), + (' \n', {'WS02': [(1, ' \n')]}), +]) +def test_line_checkers(doctree, raw_doc, errs): + checker = DocumentChecker(raw_doc, doctree) + result = checker.validate() + + assert result == errs + + +def test_doctree_with_list_in_block_quote(doctree): + bullet_list = docutils.nodes.bullet_list() + block_quote = docutils.nodes.block_quote('BlockQuote', bullet_list) + doctree.append(block_quote) + + checker = DocumentChecker('', doctree) + result = checker.validate() + + assert result == {'DT01': [None]} + + +def test_doctree_with_block_quote_in_list(doctree): + bullet_list = docutils.nodes.bullet_list() + block_quote = docutils.nodes.block_quote('BlockQuote', bullet_list) + top_list = docutils.nodes.bullet_list('', block_quote) + doctree.append(top_list) + + checker = DocumentChecker('', doctree) + result = checker.validate() + + assert result == {'DT01': [None]} diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py new file mode 100644 index 0000000000000..a2e893ac86144 --- /dev/null +++ b/scripts/validate_documentation.py @@ -0,0 +1,174 @@ +import argparse +import json +import os +import pickle +import re +import sys + +import docutils.nodes + +DOCTREE_PATH = '../doc/build/doctrees/{}.doctree' +RST_PATH = '../doc/source/{}.rst' + +ERROR_MSGS = { + 'DT01': "Found bullet list within block quote", + 'WS01': "Indentation uses tabulation", + 'WS02': "Trailing whitespaces", + 'WS03': "Whitespaces in empty line", +} + +STARTING_WHITESPACE_RE = re.compile(r'^(\s+).*\n$', re.MULTILINE) +TRAILING_WHITESPACE_RE = re.compile(r'^.*([\t ]+)\n$', re.MULTILINE) + + +class DocumentChecker(object): + + def __init__(self, raw_doc, doctree): + self.doctree = doctree + self.raw_doc = raw_doc + + self.issues = None + + def issue(self, code, match=None, **kwargs): + """ + Parameters + ---------- + code : str + Error code. + **kwargs + Values for the variables in the error messages + """ + issue = self.issues.setdefault(code, []) + issue.append((self.find_line(match), kwargs)) + + def find_line(self, match): + if not match: + return None + lines = self.raw_doc[:match.start(0)].splitlines() + return len(lines) + 1, match.group(0) + + def check_bullet_list_in_block_quote(self): + for node in self.doctree.traverse(docutils.nodes.block_quote): + match = node.first_child_matching_class(docutils.nodes.bullet_list) + if match is not None: + self.issue('DT01') + + def check_tabulator_as_indentation(self): + matches = STARTING_WHITESPACE_RE.finditer(self.raw_doc) + for match in matches: + if '\t' in match.group(1): + self.issue('WS01', match) + + def check_line_ends_with_whitespace(self): + matches = TRAILING_WHITESPACE_RE.finditer(self.raw_doc) + for match in matches: + self.issue('WS02', match) + + def validate(self): + self.issues = {} + for func in dir(self): + if func.startswith('check'): + self.__class__.__dict__[func](self) + + return self.issues + + +def report(reports, output_format='default', errors=None): + exit_status = 0 + if output_format == 'json': + output = json.dumps(reports) + else: + if output_format == 'default': + output_format = '{text}\n' + elif output_format == 'azure': + output_format = ('##vso[task.logissue type=error;' + 'sourcepath={path};' + 'linenumber={row};' + 'code={code};' + ']{text}\n') + else: + raise ValueError('Unknown output_format "{}"'.format( + output_format)) + + output = '' + for name, res in reports.items(): + for err_code, issues in res.items(): + # The script would be faster if instead of filtering the + # errors after validating them, it didn't validate them + # initially. But that would complicate the code too much + if errors and err_code not in errors: + continue + for issue, kwargs in issues: + exit_status += 1 + row = issue[0] if issue else None + output += output_format.format( + name=name, + path=RST_PATH.format(name), + row=row, + code=err_code, + text='{}{}:: {}'.format(name, + ':' + row if row else '', + ERROR_MSGS[err_code] + .format(kwargs))) + + sys.stdout.write(output) + return exit_status + + +def validate(*docnames): + result = {} + for docname in docnames: + with open(DOCTREE_PATH.format(docname), 'r+b') as file: + doctree = pickle.load(file) + + with open(RST_PATH.format(docname), 'r') as file: + raw_doc = file.read() + + checker = DocumentChecker(raw_doc, doctree) + result[docname] = checker.validate() + + return result + + +def main(document, errors, output_format): + if document: + reports = validate(document) + else: + documentation_source = os.path.join(os.curdir, '../doc/source') + docnames = [] + for root, dirs, files in os.walk(documentation_source): + _, base_dir = root.split('../doc/source') + for file in files: + docname, ext = os.path.splitext(file) + if not ext == '.rst': + continue + docnames.append(os.path.join(base_dir, docname)) + reports = validate(*docnames) + return report(reports, output_format=output_format, errors=errors) + + +if __name__ == '__main__': + format_opts = 'default', 'json', 'azure' + func_help = ('document to validate (e.g. io) ' + 'if not provided, all documents are validated') + argparser = argparse.ArgumentParser( + description='validate pandas documentation') + add = argparser.add_argument + add('document', + nargs='?', + default=None, + help=func_help) + add('--format', default='default', choices=format_opts, + help='format of the output when validating ' + 'multiple documents (ignored when validating one).' + 'It can be {}'.format(str(format_opts)[1:-1])) + add('--errors', default=None, + help='comma separated ' + 'list of error codes to validate. By default it ' + 'validates all errors (ignored when validating ' + 'a single document)') + + args = argparser.parse_args() + sys.exit(main(args.document, + args.errors.split(',') if args.errors else None, + args.format, )) From 7aa12ad5feaf148e55f1ea5cad9c3708069b884d Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 00:51:21 +0100 Subject: [PATCH 2/9] Match style of validate_docstrings Signed-off-by: Fabian Haase --- doc/make.py | 3 +- scripts/tests/test_validate_documentation.py | 1 + scripts/validate_documentation.py | 103 ++++++++++++++----- 3 files changed, 81 insertions(+), 26 deletions(-) diff --git a/doc/make.py b/doc/make.py index 33c3f16c1d140..3f6bd55db9169 100755 --- a/doc/make.py +++ b/doc/make.py @@ -271,7 +271,8 @@ def html(self): os.remove(zip_fname) if self.single_doc is not None: - validate_documentation.main(self.single_doc, None, 'default') + report = validate_documentation.validate_one(self.single_doc) + validate_documentation.report({self.single_doc: report}) self._open_browser() shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'), ignore_errors=True) diff --git a/scripts/tests/test_validate_documentation.py b/scripts/tests/test_validate_documentation.py index 930f3cbc74f66..e5edc4f566f1b 100644 --- a/scripts/tests/test_validate_documentation.py +++ b/scripts/tests/test_validate_documentation.py @@ -11,6 +11,7 @@ def doctree(): return docutils.utils.new_document('test.rst') + @pytest.mark.parametrize('raw_doc,errs', [ (' Indented Line\n', {}), ('\n', {}), diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py index a2e893ac86144..1f253861df06f 100644 --- a/scripts/validate_documentation.py +++ b/scripts/validate_documentation.py @@ -1,4 +1,5 @@ import argparse +from fnmatch import fnmatch import json import os import pickle @@ -7,14 +8,15 @@ import docutils.nodes +DOCUMENTATION_SOURCE = os.path.join(os.curdir, '../doc/source') DOCTREE_PATH = '../doc/build/doctrees/{}.doctree' RST_PATH = '../doc/source/{}.rst' ERROR_MSGS = { - 'DT01': "Found bullet list within block quote", + 'DT01': "Found bullet list within block quote. Use 0 spaces for top-level " + "list and 2 spaces for sub-lists", 'WS01': "Indentation uses tabulation", 'WS02': "Trailing whitespaces", - 'WS03': "Whitespaces in empty line", } STARTING_WHITESPACE_RE = re.compile(r'^(\s+).*\n$', re.MULTILINE) @@ -115,35 +117,80 @@ def report(reports, output_format='default', errors=None): return exit_status -def validate(*docnames): - result = {} - for docname in docnames: - with open(DOCTREE_PATH.format(docname), 'r+b') as file: - doctree = pickle.load(file) +def validate_one(page): + """ + Validate the page for the given func_name + + Parameters + ---------- + page : str + Path to page relative to documentation's source folder without file + extension. (e.g. io) + + Returns + ------- + dict + A dictionary containing all the information obtained from validating + the page. + + Notes + ----- + The errors codes are defined as: + - First two characters: Type of errors: + * DT: Error with unwanted node constellations the doctree + * WS: Issues regarding whitespace characters + - Last two characters: Numeric error code + """ + + with open(DOCTREE_PATH.format(page), 'r+b') as file: + doctree = pickle.load(file) + + with open(RST_PATH.format(page), 'r') as file: + raw_doc = file.read() + + checker = DocumentChecker(raw_doc, doctree) + return checker.validate() + - with open(RST_PATH.format(docname), 'r') as file: - raw_doc = file.read() +def validate_all(exclude_patterns): + """ + Execute the validation of all pages, and return a dict with the + results. - checker = DocumentChecker(raw_doc, doctree) - result[docname] = checker.validate() + Parameters + ---------- + exclude_patterns : List[str] or None + If provided, the pages that match with one of these patterns + will be ignored. If None, all pages will be validated. + Returns + ------- + dict + A dictionary with an item for every page containing + all the validation information. + """ + + result = {} + for root, dirs, files in os.walk(DOCUMENTATION_SOURCE): + _, base_dir = root.split('../doc/source') + for file in files: + docname, ext = os.path.splitext(file) + if not ext == '.rst': + continue + page = os.path.join(base_dir, docname) + for pattern in exclude_patterns: + if fnmatch(page, pattern): + continue + + result[page] = validate_one(page) return result -def main(document, errors, output_format): - if document: - reports = validate(document) +def main(page, errors, output_format, exclude_patterns=None): + if page: + reports = {page: validate_one(page)} else: - documentation_source = os.path.join(os.curdir, '../doc/source') - docnames = [] - for root, dirs, files in os.walk(documentation_source): - _, base_dir = root.split('../doc/source') - for file in files: - docname, ext = os.path.splitext(file) - if not ext == '.rst': - continue - docnames.append(os.path.join(base_dir, docname)) - reports = validate(*docnames) + reports = validate_all(exclude_patterns=exclude_patterns) return report(reports, output_format=output_format, errors=errors) @@ -167,8 +214,14 @@ def main(document, errors, output_format): 'list of error codes to validate. By default it ' 'validates all errors (ignored when validating ' 'a single document)') + add('--exclude', default=None, + help='comma separated ' + 'patterns of pages to exclude. By default it ' + 'validates all errors (ignored when validating ' + 'a single document)') args = argparser.parse_args() sys.exit(main(args.document, args.errors.split(',') if args.errors else None, - args.format, )) + args.format, + args.exclude.split(','))) From 93974fb7dce50a9bdc2233d544e17d00f45ed43d Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 00:55:23 +0100 Subject: [PATCH 3/9] Fix flake8 issues Signed-off-by: Fabian Haase --- scripts/validate_documentation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py index 1f253861df06f..cdb56cc8a2c4a 100644 --- a/scripts/validate_documentation.py +++ b/scripts/validate_documentation.py @@ -130,8 +130,8 @@ def validate_one(page): Returns ------- dict - A dictionary containing all the information obtained from validating - the page. + A dictionary containing all the information obtained from + validating the page. Notes ----- From 5e7bca463ea7a5a4d0e9689747a5cf70689af15b Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 03:03:47 +0100 Subject: [PATCH 4/9] Review of @datapythonista Signed-off-by: Fabian Haase --- ci/build_docs.sh | 12 +- doc/make.py | 9 +- scripts/tests/test_validate_documentation.py | 28 +- scripts/validate_documentation.py | 258 ++++++++++++------- 4 files changed, 195 insertions(+), 112 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index f89c4369dff4a..4ea4a0a9194aa 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -8,6 +8,8 @@ fi cd "$TRAVIS_BUILD_DIR"/doc echo "inside $0" +RET=0 + if [ "$DOC" ]; then echo "Will build docs" @@ -19,6 +21,14 @@ if [ "$DOC" ]; then echo ./make.py ./make.py + echo ############################### + echo # Lint documentation # + echo ############################### + + MSG='Validate documentation' ; echo $MSG + $TRAVIS_BUILD_DIR/scripts/validate_docstrings.py io + RET=$(($RET + $?)) ; echo $MSG "DONE" + echo ######################## echo # Create and send docs # echo ######################## @@ -47,4 +57,4 @@ if [ "$DOC" ]; then git push origin gh-pages -f fi -exit 0 +exit $RET diff --git a/doc/make.py b/doc/make.py index 3f6bd55db9169..1ed9dc1c17417 100755 --- a/doc/make.py +++ b/doc/make.py @@ -21,12 +21,9 @@ import webbrowser import jinja2 -sys.path.insert(0, os.path.abspath('../scripts')) sys.path.extend([ - - # numpy standard doc extensions + # validation for documentation os.path.join(os.path.dirname(__file__), '..', 'scripts') - ]) import validate_documentation @@ -271,8 +268,8 @@ def html(self): os.remove(zip_fname) if self.single_doc is not None: - report = validate_documentation.validate_one(self.single_doc) - validate_documentation.report({self.single_doc: report}) + checker = validate_documentation.validate_one(self.single_doc) + checker.report() self._open_browser() shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'), ignore_errors=True) diff --git a/scripts/tests/test_validate_documentation.py b/scripts/tests/test_validate_documentation.py index e5edc4f566f1b..eb4ac493b33dd 100644 --- a/scripts/tests/test_validate_documentation.py +++ b/scripts/tests/test_validate_documentation.py @@ -13,18 +13,18 @@ def doctree(): @pytest.mark.parametrize('raw_doc,errs', [ - (' Indented Line\n', {}), - ('\n', {}), - ('\tIndented Line\n', {'WS01': [(1, '\tIndented Line\n')]}), - ('Line \n', {'WS02': [(1, 'Line \n')]}), - ('\t\n', {'WS01': [(1, '\t\n')], 'WS02': [(1, '\t\n')]}), - (' \n', {'WS02': [(1, ' \n')]}), + ([' Indented Line\n'], {}), + (['\n'], {}), + (['\tIndented Line\n'], {'WS01': [((0, 1), {})]}), + (['Line \n'], {'WS02': [((0, 1), {})]}), + (['\t\n'], {'WS03': [((0, 1), {})]}), + ([' \n'], {'WS03': [((0, 1), {})]}), ]) def test_line_checkers(doctree, raw_doc, errs): - checker = DocumentChecker(raw_doc, doctree) - result = checker.validate() + checker = DocumentChecker('', raw_doc, doctree) + checker.validate() - assert result == errs + assert checker.errs == errs def test_doctree_with_list_in_block_quote(doctree): @@ -32,10 +32,10 @@ def test_doctree_with_list_in_block_quote(doctree): block_quote = docutils.nodes.block_quote('BlockQuote', bullet_list) doctree.append(block_quote) - checker = DocumentChecker('', doctree) - result = checker.validate() + checker = DocumentChecker('', [''], doctree) + checker.validate() - assert result == {'DT01': [None]} + assert checker.errs == {'DT01': [(None, {})]} def test_doctree_with_block_quote_in_list(doctree): @@ -44,7 +44,7 @@ def test_doctree_with_block_quote_in_list(doctree): top_list = docutils.nodes.bullet_list('', block_quote) doctree.append(top_list) - checker = DocumentChecker('', doctree) + checker = DocumentChecker('', [''], doctree) result = checker.validate() - assert result == {'DT01': [None]} + assert result == {'DT01': [(None, {})]} diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py index cdb56cc8a2c4a..aa821a622ab75 100644 --- a/scripts/validate_documentation.py +++ b/scripts/validate_documentation.py @@ -1,3 +1,19 @@ +""" +Analyze documentation to detect errors. + +Requires the documentation to be build before usage. + +If no argument is provided, it validates all pages in the source folder for +which a doctree has been generated. + +If a page is provided like "index", "whatsnew/v1.0.0" a list of all errors +in that page is printed. + +Usage:: + $ ./validate_documentation.py + $ ./validate_documentation.py index +""" + import argparse from fnmatch import fnmatch import json @@ -8,148 +24,207 @@ import docutils.nodes -DOCUMENTATION_SOURCE = os.path.join(os.curdir, '../doc/source') -DOCTREE_PATH = '../doc/build/doctrees/{}.doctree' -RST_PATH = '../doc/source/{}.rst' +BASE_DIR = os.path.join(os.path.dirname(__file__), '..', 'doc') +DOCUMENTATION_SOURCE = os.path.join(BASE_DIR, 'build', 'doctrees', '') +DOCTREE_PATH = os.path.join(DOCUMENTATION_SOURCE, '{}.doctree') +RST_PATH = os.path.join(BASE_DIR, 'source', '{}.rst') ERROR_MSGS = { 'DT01': "Found bullet list within block quote. Use 0 spaces for top-level " "list and 2 spaces for sub-lists", 'WS01': "Indentation uses tabulation", 'WS02': "Trailing whitespaces", + 'WS03': "Whitespace in empty line", } -STARTING_WHITESPACE_RE = re.compile(r'^(\s+).*\n$', re.MULTILINE) -TRAILING_WHITESPACE_RE = re.compile(r'^.*([\t ]+)\n$', re.MULTILINE) +STARTING_WHITESPACE_RE = re.compile(r'^(\s+).', re.MULTILINE) +TRAILING_WHITESPACE_RE = re.compile(r'.([\t ]+)$', re.MULTILINE) +EMPTY_LINE_WHITESPACE_RE = re.compile(r'^([\t ]+)$', re.MULTILINE) class DocumentChecker(object): + """ + Checker to validate one page in the documentation - def __init__(self, raw_doc, doctree): + Attributes + ---------- + page : str + Path to page relative to documentation's source folder without file + extension. (e.g. io) + doctree : docutils.nodes.document + Generated doctree for associated the page + raw_lines : list of str + Lines from rst-file for associated page + raw_doc : str + Joined lines from rst-file for associated page + + Notes + ----- + Add a method starting with `check` to add additional checks. + + """ + + def __init__(self, page, raw_lines, doctree): + self.page = page self.doctree = doctree - self.raw_doc = raw_doc + self.raw_lines = raw_lines + self.raw_doc = ''.join(raw_lines) - self.issues = None + self.errs = None - def issue(self, code, match=None, **kwargs): + def error(self, code, line=None, **kwargs): """ Parameters ---------- code : str Error code. + line : Tuple[int, str] **kwargs Values for the variables in the error messages """ - issue = self.issues.setdefault(code, []) - issue.append((self.find_line(match), kwargs)) + errs = self.errs.setdefault(code, []) + errs.append((line, kwargs)) def find_line(self, match): + """ + Find rows in documentation that were matched + + Parameters + ---------- + match : typing.Match + + Returns + ------- + row_start : int + row_end : int + """ if not match: return None - lines = self.raw_doc[:match.start(0)].splitlines() - return len(lines) + 1, match.group(0) + + row_start = self.raw_doc[:match.start(0)].count('\n') + row_end = self.raw_doc[:match.end(0)].count('\n') + return row_start, row_end + 1 def check_bullet_list_in_block_quote(self): for node in self.doctree.traverse(docutils.nodes.block_quote): match = node.first_child_matching_class(docutils.nodes.bullet_list) if match is not None: - self.issue('DT01') + self.error('DT01') def check_tabulator_as_indentation(self): matches = STARTING_WHITESPACE_RE.finditer(self.raw_doc) for match in matches: if '\t' in match.group(1): - self.issue('WS01', match) + self.error('WS01', line=self.find_line(match)) def check_line_ends_with_whitespace(self): matches = TRAILING_WHITESPACE_RE.finditer(self.raw_doc) for match in matches: - self.issue('WS02', match) + self.error('WS02', line=self.find_line(match)) + + def check_empty_line_contains_whitespace(self): + matches = EMPTY_LINE_WHITESPACE_RE.finditer(self.raw_doc) + for match in matches: + self.error('WS03', line=self.find_line(match)) def validate(self): - self.issues = {} + """Execute methods starting with 'check'""" + self.errs = {} for func in dir(self): if func.startswith('check'): self.__class__.__dict__[func](self) - return self.issues + return self.errs + + def report(self, errors=None, output_format='default'): + """ + Output errors to stdout + Parameters + ---------- + errors : list of str, optional + If provided, filter output by these error codes. + output_format : str, optional + One of 'default', 'json', 'azure' -def report(reports, output_format='default', errors=None): - exit_status = 0 - if output_format == 'json': - output = json.dumps(reports) - else: - if output_format == 'default': - output_format = '{text}\n' - elif output_format == 'azure': - output_format = ('##vso[task.logissue type=error;' - 'sourcepath={path};' - 'linenumber={row};' - 'code={code};' - ']{text}\n') + Returns + ------- + int + A integer with number of found issues + """ + n_errs = 0 + if output_format == 'json': + output = json.dumps(self.errs) else: - raise ValueError('Unknown output_format "{}"'.format( - output_format)) - - output = '' - for name, res in reports.items(): - for err_code, issues in res.items(): + if output_format == 'default': + output_format = '{path}:{row}:: {code} {text}\n{source}\n' + elif output_format == 'azure': + output_format = ('##vso[task.logissue type=error;' + 'sourcepath={path};' + 'linenumber={row};' + 'code={code};' + ']{text}\n') + else: + raise ValueError('Unknown output_format "{}"'.format( + output_format)) + + output = '' + + for err_code, errs in self.errs.items(): # The script would be faster if instead of filtering the # errors after validating them, it didn't validate them # initially. But that would complicate the code too much if errors and err_code not in errors: continue - for issue, kwargs in issues: - exit_status += 1 - row = issue[0] if issue else None + for line, kwargs in errs: + n_errs += 1 + row_start, row_end = line if line else (0, 0) output += output_format.format( - name=name, - path=RST_PATH.format(name), - row=row, + name=self.page, + path='doc/source/{}.rst'.format(self.page), + row=row_start + 1, code=err_code, - text='{}{}:: {}'.format(name, - ':' + row if row else '', - ERROR_MSGS[err_code] - .format(kwargs))) + source=''.join(self.raw_lines[row_start:row_end]), + text=ERROR_MSGS[err_code].format(kwargs)) - sys.stdout.write(output) - return exit_status + sys.stdout.write(output) + return n_errs def validate_one(page): """ - Validate the page for the given func_name + Validate the page for the given page - Parameters - ---------- - page : str - Path to page relative to documentation's source folder without file - extension. (e.g. io) + Parameters + ---------- + page : str + Path to page relative to documentation's source folder without file + extension. (e.g. io) - Returns - ------- - dict - A dictionary containing all the information obtained from - validating the page. - - Notes - ----- - The errors codes are defined as: - - First two characters: Type of errors: - * DT: Error with unwanted node constellations the doctree - * WS: Issues regarding whitespace characters - - Last two characters: Numeric error code - """ + Returns + ------- + dict + A dictionary containing all the information obtained from + validating the page. + + Notes + ----- + The errors codes are defined as: + - First two characters: Type of errors: + * DT: Error with unwanted node constellations inside the doctree + * WS: Issues with whitespace characters + - Last two characters: Numeric error code + """ with open(DOCTREE_PATH.format(page), 'r+b') as file: doctree = pickle.load(file) with open(RST_PATH.format(page), 'r') as file: - raw_doc = file.read() + raw_doc = file.readlines() - checker = DocumentChecker(raw_doc, doctree) - return checker.validate() + checker = DocumentChecker(page, raw_doc, doctree) + checker.validate() + return checker def validate_all(exclude_patterns): @@ -172,15 +247,16 @@ def validate_all(exclude_patterns): result = {} for root, dirs, files in os.walk(DOCUMENTATION_SOURCE): - _, base_dir = root.split('../doc/source') + _, base_dir = root.split(DOCUMENTATION_SOURCE) for file in files: docname, ext = os.path.splitext(file) - if not ext == '.rst': + if not ext == '.doctree': continue page = os.path.join(base_dir, docname) - for pattern in exclude_patterns: - if fnmatch(page, pattern): - continue + if exclude_patterns: + for pattern in exclude_patterns: + if fnmatch(page, pattern): + continue result[page] = validate_one(page) return result @@ -188,23 +264,23 @@ def validate_all(exclude_patterns): def main(page, errors, output_format, exclude_patterns=None): if page: - reports = {page: validate_one(page)} + checkers = {page: validate_one(page)} else: - reports = validate_all(exclude_patterns=exclude_patterns) - return report(reports, output_format=output_format, errors=errors) + checkers = validate_all(exclude_patterns=exclude_patterns) + exit_code = 0 + for page, checker in checkers.items(): + exit_code += checker.report(errors=errors, output_format=output_format) + return exit_code if __name__ == '__main__': format_opts = 'default', 'json', 'azure' - func_help = ('document to validate (e.g. io) ' - 'if not provided, all documents are validated') argparser = argparse.ArgumentParser( description='validate pandas documentation') add = argparser.add_argument - add('document', - nargs='?', - default=None, - help=func_help) + add('page', nargs='?', default=None, + help='page to validate (e.g. io) ' + 'if not provided, all pages are validated') add('--format', default='default', choices=format_opts, help='format of the output when validating ' 'multiple documents (ignored when validating one).' @@ -216,12 +292,12 @@ def main(page, errors, output_format, exclude_patterns=None): 'a single document)') add('--exclude', default=None, help='comma separated ' - 'patterns of pages to exclude. By default it ' - 'validates all errors (ignored when validating ' - 'a single document)') + 'patterns of pages to exclude. Utilises ' + '`Unix filename pattern matching`' + '(ignored when validating a single document)') args = argparser.parse_args() - sys.exit(main(args.document, + sys.exit(main(args.page, args.errors.split(',') if args.errors else None, args.format, - args.exclude.split(','))) + args.exclude.split(',') if args.exclude else None)) From 08e754608eb92151745fdd309bf16f86ae60abd9 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 19:02:34 +0100 Subject: [PATCH 5/9] Review of @datapythonista Signed-off-by: Fabian Haase --- scripts/validate_documentation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py index aa821a622ab75..c55c61d8e06f2 100644 --- a/scripts/validate_documentation.py +++ b/scripts/validate_documentation.py @@ -15,6 +15,7 @@ """ import argparse +import docutils.nodes from fnmatch import fnmatch import json import os @@ -22,8 +23,6 @@ import re import sys -import docutils.nodes - BASE_DIR = os.path.join(os.path.dirname(__file__), '..', 'doc') DOCUMENTATION_SOURCE = os.path.join(BASE_DIR, 'build', 'doctrees', '') DOCTREE_PATH = os.path.join(DOCUMENTATION_SOURCE, '{}.doctree') @@ -69,7 +68,6 @@ def __init__(self, page, raw_lines, doctree): self.doctree = doctree self.raw_lines = raw_lines self.raw_doc = ''.join(raw_lines) - self.errs = None def error(self, code, line=None, **kwargs): From 3e0c233265215c17948779a0f310678c83a3000d Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 21:10:20 +0100 Subject: [PATCH 6/9] Fix travis build Signed-off-by: Fabian Haase --- ci/build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4ea4a0a9194aa..55c87d95cc02c 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -26,7 +26,7 @@ if [ "$DOC" ]; then echo ############################### MSG='Validate documentation' ; echo $MSG - $TRAVIS_BUILD_DIR/scripts/validate_docstrings.py io + $TRAVIS_BUILD_DIR/scripts/validate_documentation.py RET=$(($RET + $?)) ; echo $MSG "DONE" echo ######################## From 907c587575c10d28712d1c5fb0622a829d57f092 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 23:52:11 +0100 Subject: [PATCH 7/9] Make validate_documentation executable Signed-off-by: Fabian Haase --- scripts/validate_documentation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 scripts/validate_documentation.py diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py old mode 100644 new mode 100755 index c55c61d8e06f2..b5fc7c0dbef6e --- a/scripts/validate_documentation.py +++ b/scripts/validate_documentation.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Analyze documentation to detect errors. @@ -155,7 +156,7 @@ def report(self, errors=None, output_format='default'): output = json.dumps(self.errs) else: if output_format == 'default': - output_format = '{path}:{row}:: {code} {text}\n{source}\n' + output_format = '{path}:{row}:: {code} {text}\n' elif output_format == 'azure': output_format = ('##vso[task.logissue type=error;' 'sourcepath={path};' @@ -180,7 +181,7 @@ def report(self, errors=None, output_format='default'): output += output_format.format( name=self.page, path='doc/source/{}.rst'.format(self.page), - row=row_start + 1, + row=row_start + 1 if line else '?', code=err_code, source=''.join(self.raw_lines[row_start:row_end]), text=ERROR_MSGS[err_code].format(kwargs)) From 1ffa59a10dc19bca0271bc331e3bdd69d52d888c Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Wed, 12 Dec 2018 01:05:47 +0100 Subject: [PATCH 8/9] Skip pages missing corresponding rst-file Signed-off-by: Fabian Haase --- scripts/validate_documentation.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scripts/validate_documentation.py b/scripts/validate_documentation.py index b5fc7c0dbef6e..163f41de6036a 100755 --- a/scripts/validate_documentation.py +++ b/scripts/validate_documentation.py @@ -214,12 +214,14 @@ def validate_one(page): * WS: Issues with whitespace characters - Last two characters: Numeric error code """ + try: + with open(DOCTREE_PATH.format(page), 'r+b') as file: + doctree = pickle.load(file) - with open(DOCTREE_PATH.format(page), 'r+b') as file: - doctree = pickle.load(file) - - with open(RST_PATH.format(page), 'r') as file: - raw_doc = file.readlines() + with open(RST_PATH.format(page), 'r') as file: + raw_doc = file.readlines() + except FileNotFoundError: + return None checker = DocumentChecker(page, raw_doc, doctree) checker.validate() @@ -244,7 +246,7 @@ def validate_all(exclude_patterns): all the validation information. """ - result = {} + checkers = {} for root, dirs, files in os.walk(DOCUMENTATION_SOURCE): _, base_dir = root.split(DOCUMENTATION_SOURCE) for file in files: @@ -257,8 +259,10 @@ def validate_all(exclude_patterns): if fnmatch(page, pattern): continue - result[page] = validate_one(page) - return result + checker = validate_one(page) + if checker: + checkers[page] = checker + return checkers def main(page, errors, output_format, exclude_patterns=None): From f21a4655bf1bb03c9ce91fe810108b0e3f05784e Mon Sep 17 00:00:00 2001 From: pandas-docs-bot Date: Wed, 12 Dec 2018 02:36:10 +0100 Subject: [PATCH 9/9] Move linting execution to `run_tests.sh` Signed-off-by: pandas-docs-bot --- ci/build_docs.sh | 12 +----------- ci/run_tests.sh | 10 ++++++++-- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 55c87d95cc02c..f89c4369dff4a 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -8,8 +8,6 @@ fi cd "$TRAVIS_BUILD_DIR"/doc echo "inside $0" -RET=0 - if [ "$DOC" ]; then echo "Will build docs" @@ -21,14 +19,6 @@ if [ "$DOC" ]; then echo ./make.py ./make.py - echo ############################### - echo # Lint documentation # - echo ############################### - - MSG='Validate documentation' ; echo $MSG - $TRAVIS_BUILD_DIR/scripts/validate_documentation.py - RET=$(($RET + $?)) ; echo $MSG "DONE" - echo ######################## echo # Create and send docs # echo ######################## @@ -57,4 +47,4 @@ if [ "$DOC" ]; then git push origin gh-pages -f fi -exit $RET +exit 0 diff --git a/ci/run_tests.sh b/ci/run_tests.sh index ee46da9f52eab..27176a1c71789 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -3,8 +3,14 @@ set -e if [ "$DOC" ]; then - echo "We are not running pytest as this is a doc-build" - exit 0 + echo ############################### + echo # Lint documentation # + echo ############################### + + MSG='Validate documentation' ; echo $MSG + $TRAVIS_BUILD_DIR/scripts/validate_documentation.py + RET=$? ; echo $MSG "DONE" + exit $RET fi # Workaround for pytest-xdist flaky collection order