Skip to content

STYLE improve validate-docstrings ergonomics #52482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,11 +1005,11 @@ class Window(BaseWindow):
Rolling sum with a window span of 2 seconds.

>>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
... index = [pd.Timestamp('20130101 09:00:00'),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

couldn't resist fixing up a docstring with these improved ergonomics

... pd.Timestamp('20130101 09:00:02'),
... pd.Timestamp('20130101 09:00:03'),
... pd.Timestamp('20130101 09:00:05'),
... pd.Timestamp('20130101 09:00:06')])
... index=[pd.Timestamp('20130101 09:00:00'),
... pd.Timestamp('20130101 09:00:02'),
... pd.Timestamp('20130101 09:00:03'),
... pd.Timestamp('20130101 09:00:05'),
... pd.Timestamp('20130101 09:00:06')])

>>> df_time
B
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,16 +1770,16 @@ def apply(
Using ``subset`` to restrict application to a single column or multiple columns

>>> df.style.apply(highlight_max, color='red', subset="A")
... # doctest: +SKIP
... # doctest: +SKIP
>>> df.style.apply(highlight_max, color='red', subset=["A", "B"])
... # doctest: +SKIP
... # doctest: +SKIP

Using a 2d input to ``subset`` to select rows in addition to columns

>>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None)))
... # doctest: +SKIP
>>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A"))
... # doctest: +SKIP
>>> df.style.apply(highlight_max, color='red', subset=([0, 1, 2], slice(None)))
... # doctest: +SKIP
>>> df.style.apply(highlight_max, color='red', subset=(slice(0, 5, 2), "A"))
... # doctest: +SKIP

Using a function which returns a Series / DataFrame of unequal length but
containing valid index labels
Expand Down
14 changes: 10 additions & 4 deletions scripts/tests/test_validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,16 @@ def test_bad_class(self, capsys):
(
"BadDocstrings",
"unused_import",
("flake8 error: F401 'pandas as pdf' imported but unused",),
(
"flake8 error: line 1, col 1: F401 'pandas as pdf' "
"imported but unused",
),
),
(
"BadDocstrings",
"missing_whitespace_around_arithmetic_operator",
(
"flake8 error: "
"flake8 error: line 1, col 2: "
"E226 missing whitespace around arithmetic operator",
),
),
Expand All @@ -172,12 +175,15 @@ def test_bad_class(self, capsys):
"indentation_is_not_a_multiple_of_four",
# with flake8 3.9.0, the message ends with four spaces,
# whereas in earlier versions, it ended with "four"
("flake8 error: E111 indentation is not a multiple of 4",),
(
"flake8 error: line 2, col 3: E111 indentation is not a "
"multiple of 4",
),
),
(
"BadDocstrings",
"missing_whitespace_after_comma",
("flake8 error: E231 missing whitespace after ',' (3 times)",),
("flake8 error: line 1, col 33: E231 missing whitespace after ','",),
),
(
"BadDocstrings",
Expand Down
32 changes: 22 additions & 10 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
"SA05": "{reference_name} in `See Also` section does not need `pandas` "
"prefix, use {right_reference} instead.",
"EX02": "Examples do not pass tests:\n{doctest_log}",
"EX03": "flake8 error: {error_code} {error_message}{times_happening}",
"EX03": "flake8 error: line {line_number}, col {col_number}: {error_code} "
"{error_message}",
"EX04": "Do not import {imported_library}, as it is imported "
"automatically for the examples (numpy as np, pandas as pd)",
}
Expand Down Expand Up @@ -212,20 +213,31 @@ def validate_pep8(self):
try:
file.write(content)
file.flush()
cmd = ["python", "-m", "flake8", "--quiet", "--statistics", file.name]
cmd = [
"python",
"-m",
"flake8",
"--format=%(row)d\t%(col)d\t%(code)s\t%(text)s",
file.name,
]
response = subprocess.run(cmd, capture_output=True, check=False, text=True)
stdout = response.stdout
stdout = stdout.replace(file.name, "")
messages = stdout.strip("\n")
messages = stdout.strip("\n").splitlines()
if messages:
error_messages.append(messages)
error_messages.extend(messages)
finally:
file.close()
os.unlink(file.name)

for error_message in error_messages:
error_count, error_code, message = error_message.split(maxsplit=2)
yield error_code, message, int(error_count)
line_number, col_number, error_code, message = error_message.split(
"\t", maxsplit=3
)
# Note: we subtract 2 from the line number because
# 'import numpy as np\nimport pandas as pd\n'
# is prepended to the docstrings.
yield error_code, message, int(line_number) - 2, int(col_number)

def non_hyphenated_array_like(self):
return "array_like" in self.raw_doc
Expand Down Expand Up @@ -276,14 +288,14 @@ def pandas_validate(func_name: str):
pandas_error("EX02", doctest_log=result["examples_errs"])
)

for error_code, error_message, error_count in doc.validate_pep8():
times_happening = f" ({error_count} times)" if error_count > 1 else ""
for error_code, error_message, line_number, col_number in doc.validate_pep8():
result["errors"].append(
pandas_error(
"EX03",
error_code=error_code,
error_message=error_message,
times_happening=times_happening,
line_number=line_number,
col_number=col_number,
)
)
examples_source_code = "".join(doc.examples_source_code)
Expand Down Expand Up @@ -407,7 +419,7 @@ def header(title, width=80, char="#"):

sys.stderr.write(header("Validation"))
if result["errors"]:
sys.stderr.write(f'{len(result["errors"])} Errors found:\n')
sys.stderr.write(f'{len(result["errors"])} Errors found for `{func_name}`:\n')
for err_code, err_desc in result["errors"]:
if err_code == "EX02": # Failing examples are printed at the end
sys.stderr.write("\tExamples do not pass tests\n")
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ max-line-length = 88
ignore =
# space before : (needed for how black formats slicing)
E203,
# expected n blank lines
E3,
Comment on lines +8 to +9
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E3 shows things like "expected 2 blank lines, expected 0" which I think is a bit irrelevant for docstrings - we put the lines one after the other anyway, having empty >>> lines in docstrings wouldn't look good

# line break before binary operator
W503,
# line break after binary operator
Expand Down