From ea584df34beb4d2fef66ed94c3555e8bc4d1438a Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Tue, 6 Apr 2021 22:29:46 +0300 Subject: [PATCH 1/6] STY: Use subprocess to validate flake8 --- scripts/validate_docstrings.py | 40 +++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 98de5b2b1eb84..8611e445039b8 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -19,6 +19,8 @@ import importlib import json import os +import re +import subprocess import sys import tempfile from typing import ( @@ -183,20 +185,27 @@ def validate_pep8(self): ) ) - application = flake8.main.application.Application() - application.initialize(["--quiet"]) - + error_messages = [] with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as file: file.write(content) file.flush() - application.run_checks([file.name]) - - # We need this to avoid flake8 printing the names of the files to - # the standard output - application.formatter.write = lambda line, source: None - application.report() - - yield from application.guide.stats.statistics_for("") + # GH40784 + cmd = ["flake8", "--quiet", "--statistics", file.name] + response = subprocess.run(cmd, capture_output=True, text=True) + stdout = response.stdout + # Remove file name from the start + stdout = stdout.replace(file.name, "") + # Remove the first and last elements since they are always empty str + messages = stdout.split("\n")[1:-1] + error_messages.extend(messages) + + for error_message in error_messages: + # Parse error message + error_items = re.split(r"\s+", error_message) + error_count = int(error_items[0]) + error_code = error_items[1] + message = " ".join(error_items[2:]) + yield error_code, message, error_count def pandas_validate(func_name: str): @@ -240,13 +249,14 @@ def pandas_validate(func_name: str): result["errors"].append( pandas_error("EX02", doctest_log=result["examples_errs"]) ) - for err in doc.validate_pep8(): + + for error_code, error_message, error_count in doc.validate_pep8(): result["errors"].append( pandas_error( "EX03", - error_code=err.error_code, - error_message=err.message, - times_happening=f" ({err.count} times)" if err.count > 1 else "", + error_code=error_code, + error_message=error_message, + times_happening=f" ({error_count} times)" if error_count > 1 else "", ) ) examples_source_code = "".join(doc.examples_source_code) From e85b285dd252a0ca11ddb34e600ae6259d5bc1db Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Tue, 6 Apr 2021 22:31:43 +0300 Subject: [PATCH 2/6] Reformat scripts/validate_docstrings.py --- scripts/validate_docstrings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 8611e445039b8..fb8bd73ce38d2 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -256,7 +256,9 @@ def pandas_validate(func_name: str): "EX03", error_code=error_code, error_message=error_message, - times_happening=f" ({error_count} times)" if error_count > 1 else "", + times_happening=f" ({error_count} times)" + if error_count > 1 + else "", ) ) examples_source_code = "".join(doc.examples_source_code) From 6a2a77eb9812bdc86b1ff00c0dd66477e5098fe6 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Tue, 6 Apr 2021 22:46:56 +0300 Subject: [PATCH 3/6] Remove unused flake8 import --- scripts/validate_docstrings.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index fb8bd73ce38d2..947024df98119 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -28,8 +28,6 @@ Optional, ) -import flake8.main.application - try: from io import StringIO except ImportError: From e6f3c9832cf356d86743909be113dd75c1cf7216 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Wed, 7 Apr 2021 00:23:41 +0300 Subject: [PATCH 4/6] Preserve whitespaces for flake8 error messages --- scripts/validate_docstrings.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 947024df98119..dd524fc335de4 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -197,12 +197,13 @@ def validate_pep8(self): messages = stdout.split("\n")[1:-1] error_messages.extend(messages) + # Parse error message for error_message in error_messages: - # Parse error message - error_items = re.split(r"\s+", error_message) + # Preserve whitespaces with a group + error_items = re.split(r"(\s+)", error_message) error_count = int(error_items[0]) - error_code = error_items[1] - message = " ".join(error_items[2:]) + error_code = error_items[2] + message = "".join(error_items[4:]) yield error_code, message, error_count @@ -249,14 +250,13 @@ def pandas_validate(func_name: str): ) for error_code, error_message, error_count in doc.validate_pep8(): + times_happening = f" ({error_count} times)" if error_count > 1 else "" result["errors"].append( pandas_error( "EX03", error_code=error_code, error_message=error_message, - times_happening=f" ({error_count} times)" - if error_count > 1 - else "", + times_happening=times_happening, ) ) examples_source_code = "".join(doc.examples_source_code) From 6c98cf5bfa93d6f50dde01889d7fb14f63d57be2 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Wed, 7 Apr 2021 15:28:22 +0300 Subject: [PATCH 5/6] Parse flake8 error messages --- scripts/validate_docstrings.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index dd524fc335de4..0434392662405 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -19,7 +19,6 @@ import importlib import json import os -import re import subprocess import sys import tempfile @@ -200,11 +199,8 @@ def validate_pep8(self): # Parse error message for error_message in error_messages: # Preserve whitespaces with a group - error_items = re.split(r"(\s+)", error_message) - error_count = int(error_items[0]) - error_code = error_items[2] - message = "".join(error_items[4:]) - yield error_code, message, error_count + error_count, error_code, message = error_message.split(maxsplit=2) + yield error_code, message, int(error_count) def pandas_validate(func_name: str): From c91d7abd47c3e450bf25236ad99bfa8ac733758c Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Wed, 7 Apr 2021 21:31:54 +0300 Subject: [PATCH 6/6] Refactor parsing flake8 error messages --- scripts/validate_docstrings.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 0434392662405..d0f32bb554cf9 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -186,19 +186,15 @@ def validate_pep8(self): with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as file: file.write(content) file.flush() - # GH40784 - cmd = ["flake8", "--quiet", "--statistics", file.name] + cmd = ["python", "-m", "flake8", "--quiet", "--statistics", file.name] response = subprocess.run(cmd, capture_output=True, text=True) stdout = response.stdout - # Remove file name from the start stdout = stdout.replace(file.name, "") - # Remove the first and last elements since they are always empty str - messages = stdout.split("\n")[1:-1] - error_messages.extend(messages) + messages = stdout.strip("\n") + if messages: + error_messages.append(messages) - # Parse error message for error_message in error_messages: - # Preserve whitespaces with a group error_count, error_code, message = error_message.split(maxsplit=2) yield error_code, message, int(error_count)