From 886b3db743a0bd0274de521d080e96397e245092 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 6 Apr 2023 11:23:04 +0100 Subject: [PATCH] improve `validate-docstrings` ergonomics --- pandas/core/window/rolling.py | 10 +++---- pandas/io/formats/style.py | 12 ++++----- scripts/tests/test_validate_docstrings.py | 14 +++++++--- scripts/validate_docstrings.py | 32 ++++++++++++++++------- setup.cfg | 2 ++ 5 files changed, 45 insertions(+), 25 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b344b04b30d73..630af2b594940 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1005,11 +1005,11 @@ class Window(BaseWindow): Rolling sum with a window span of 2 seconds. >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, - ... index = [pd.Timestamp('20130101 09:00:00'), - ... pd.Timestamp('20130101 09:00:02'), - ... pd.Timestamp('20130101 09:00:03'), - ... pd.Timestamp('20130101 09:00:05'), - ... pd.Timestamp('20130101 09:00:06')]) + ... index=[pd.Timestamp('20130101 09:00:00'), + ... pd.Timestamp('20130101 09:00:02'), + ... pd.Timestamp('20130101 09:00:03'), + ... pd.Timestamp('20130101 09:00:05'), + ... pd.Timestamp('20130101 09:00:06')]) >>> df_time B diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 26405aac7d1c0..26ec4844d2a75 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1770,16 +1770,16 @@ def apply( Using ``subset`` to restrict application to a single column or multiple columns >>> df.style.apply(highlight_max, color='red', subset="A") - ... # doctest: +SKIP + ... # doctest: +SKIP >>> df.style.apply(highlight_max, color='red', subset=["A", "B"]) - ... # doctest: +SKIP + ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None))) - ... # doctest: +SKIP - >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) - ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=([0, 1, 2], slice(None))) + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=(slice(0, 5, 2), "A")) + ... # doctest: +SKIP Using a function which returns a Series / DataFrame of unequal length but containing valid index labels diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index c413d98957007..aab29fce89abe 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -157,13 +157,16 @@ def test_bad_class(self, capsys): ( "BadDocstrings", "unused_import", - ("flake8 error: F401 'pandas as pdf' imported but unused",), + ( + "flake8 error: line 1, col 1: F401 'pandas as pdf' " + "imported but unused", + ), ), ( "BadDocstrings", "missing_whitespace_around_arithmetic_operator", ( - "flake8 error: " + "flake8 error: line 1, col 2: " "E226 missing whitespace around arithmetic operator", ), ), @@ -172,12 +175,15 @@ def test_bad_class(self, capsys): "indentation_is_not_a_multiple_of_four", # with flake8 3.9.0, the message ends with four spaces, # whereas in earlier versions, it ended with "four" - ("flake8 error: E111 indentation is not a multiple of 4",), + ( + "flake8 error: line 2, col 3: E111 indentation is not a " + "multiple of 4", + ), ), ( "BadDocstrings", "missing_whitespace_after_comma", - ("flake8 error: E231 missing whitespace after ',' (3 times)",), + ("flake8 error: line 1, col 33: E231 missing whitespace after ','",), ), ( "BadDocstrings", diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 487fe44e4d9bc..c1b8759319a18 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -58,7 +58,8 @@ "SA05": "{reference_name} in `See Also` section does not need `pandas` " "prefix, use {right_reference} instead.", "EX02": "Examples do not pass tests:\n{doctest_log}", - "EX03": "flake8 error: {error_code} {error_message}{times_happening}", + "EX03": "flake8 error: line {line_number}, col {col_number}: {error_code} " + "{error_message}", "EX04": "Do not import {imported_library}, as it is imported " "automatically for the examples (numpy as np, pandas as pd)", } @@ -212,20 +213,31 @@ def validate_pep8(self): try: file.write(content) file.flush() - cmd = ["python", "-m", "flake8", "--quiet", "--statistics", file.name] + cmd = [ + "python", + "-m", + "flake8", + "--format=%(row)d\t%(col)d\t%(code)s\t%(text)s", + file.name, + ] response = subprocess.run(cmd, capture_output=True, check=False, text=True) stdout = response.stdout stdout = stdout.replace(file.name, "") - messages = stdout.strip("\n") + messages = stdout.strip("\n").splitlines() if messages: - error_messages.append(messages) + error_messages.extend(messages) finally: file.close() os.unlink(file.name) for error_message in error_messages: - error_count, error_code, message = error_message.split(maxsplit=2) - yield error_code, message, int(error_count) + line_number, col_number, error_code, message = error_message.split( + "\t", maxsplit=3 + ) + # Note: we subtract 2 from the line number because + # 'import numpy as np\nimport pandas as pd\n' + # is prepended to the docstrings. + yield error_code, message, int(line_number) - 2, int(col_number) def non_hyphenated_array_like(self): return "array_like" in self.raw_doc @@ -276,14 +288,14 @@ def pandas_validate(func_name: str): pandas_error("EX02", doctest_log=result["examples_errs"]) ) - for error_code, error_message, error_count in doc.validate_pep8(): - times_happening = f" ({error_count} times)" if error_count > 1 else "" + for error_code, error_message, line_number, col_number in doc.validate_pep8(): result["errors"].append( pandas_error( "EX03", error_code=error_code, error_message=error_message, - times_happening=times_happening, + line_number=line_number, + col_number=col_number, ) ) examples_source_code = "".join(doc.examples_source_code) @@ -407,7 +419,7 @@ def header(title, width=80, char="#"): sys.stderr.write(header("Validation")) if result["errors"]: - sys.stderr.write(f'{len(result["errors"])} Errors found:\n') + sys.stderr.write(f'{len(result["errors"])} Errors found for `{func_name}`:\n') for err_code, err_desc in result["errors"]: if err_code == "EX02": # Failing examples are printed at the end sys.stderr.write("\tExamples do not pass tests\n") diff --git a/setup.cfg b/setup.cfg index c269237f97211..c4f16dadea825 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,6 +5,8 @@ max-line-length = 88 ignore = # space before : (needed for how black formats slicing) E203, + # expected n blank lines + E3, # line break before binary operator W503, # line break after binary operator