Skip to content

Commit 82b793a

Browse files
MarcoGorellitopper-123
authored andcommitted
STYLE improve validate-docstrings ergonomics (pandas-dev#52482)
improve `validate-docstrings` ergonomics Co-authored-by: MarcoGorelli <>
1 parent 7c04e31 commit 82b793a

File tree

5 files changed

+45
-25
lines changed

5 files changed

+45
-25
lines changed

pandas/core/window/rolling.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1005,11 +1005,11 @@ class Window(BaseWindow):
10051005
Rolling sum with a window span of 2 seconds.
10061006
10071007
>>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
1008-
... index = [pd.Timestamp('20130101 09:00:00'),
1009-
... pd.Timestamp('20130101 09:00:02'),
1010-
... pd.Timestamp('20130101 09:00:03'),
1011-
... pd.Timestamp('20130101 09:00:05'),
1012-
... pd.Timestamp('20130101 09:00:06')])
1008+
... index=[pd.Timestamp('20130101 09:00:00'),
1009+
... pd.Timestamp('20130101 09:00:02'),
1010+
... pd.Timestamp('20130101 09:00:03'),
1011+
... pd.Timestamp('20130101 09:00:05'),
1012+
... pd.Timestamp('20130101 09:00:06')])
10131013
10141014
>>> df_time
10151015
B

pandas/io/formats/style.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1770,16 +1770,16 @@ def apply(
17701770
Using ``subset`` to restrict application to a single column or multiple columns
17711771
17721772
>>> df.style.apply(highlight_max, color='red', subset="A")
1773-
... # doctest: +SKIP
1773+
... # doctest: +SKIP
17741774
>>> df.style.apply(highlight_max, color='red', subset=["A", "B"])
1775-
... # doctest: +SKIP
1775+
... # doctest: +SKIP
17761776
17771777
Using a 2d input to ``subset`` to select rows in addition to columns
17781778
1779-
>>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None)))
1780-
... # doctest: +SKIP
1781-
>>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A"))
1782-
... # doctest: +SKIP
1779+
>>> df.style.apply(highlight_max, color='red', subset=([0, 1, 2], slice(None)))
1780+
... # doctest: +SKIP
1781+
>>> df.style.apply(highlight_max, color='red', subset=(slice(0, 5, 2), "A"))
1782+
... # doctest: +SKIP
17831783
17841784
Using a function which returns a Series / DataFrame of unequal length but
17851785
containing valid index labels

scripts/tests/test_validate_docstrings.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,16 @@ def test_bad_class(self, capsys):
157157
(
158158
"BadDocstrings",
159159
"unused_import",
160-
("flake8 error: F401 'pandas as pdf' imported but unused",),
160+
(
161+
"flake8 error: line 1, col 1: F401 'pandas as pdf' "
162+
"imported but unused",
163+
),
161164
),
162165
(
163166
"BadDocstrings",
164167
"missing_whitespace_around_arithmetic_operator",
165168
(
166-
"flake8 error: "
169+
"flake8 error: line 1, col 2: "
167170
"E226 missing whitespace around arithmetic operator",
168171
),
169172
),
@@ -172,12 +175,15 @@ def test_bad_class(self, capsys):
172175
"indentation_is_not_a_multiple_of_four",
173176
# with flake8 3.9.0, the message ends with four spaces,
174177
# whereas in earlier versions, it ended with "four"
175-
("flake8 error: E111 indentation is not a multiple of 4",),
178+
(
179+
"flake8 error: line 2, col 3: E111 indentation is not a "
180+
"multiple of 4",
181+
),
176182
),
177183
(
178184
"BadDocstrings",
179185
"missing_whitespace_after_comma",
180-
("flake8 error: E231 missing whitespace after ',' (3 times)",),
186+
("flake8 error: line 1, col 33: E231 missing whitespace after ','",),
181187
),
182188
(
183189
"BadDocstrings",

scripts/validate_docstrings.py

+22-10
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
"SA05": "{reference_name} in `See Also` section does not need `pandas` "
5959
"prefix, use {right_reference} instead.",
6060
"EX02": "Examples do not pass tests:\n{doctest_log}",
61-
"EX03": "flake8 error: {error_code} {error_message}{times_happening}",
61+
"EX03": "flake8 error: line {line_number}, col {col_number}: {error_code} "
62+
"{error_message}",
6263
"EX04": "Do not import {imported_library}, as it is imported "
6364
"automatically for the examples (numpy as np, pandas as pd)",
6465
}
@@ -212,20 +213,31 @@ def validate_pep8(self):
212213
try:
213214
file.write(content)
214215
file.flush()
215-
cmd = ["python", "-m", "flake8", "--quiet", "--statistics", file.name]
216+
cmd = [
217+
"python",
218+
"-m",
219+
"flake8",
220+
"--format=%(row)d\t%(col)d\t%(code)s\t%(text)s",
221+
file.name,
222+
]
216223
response = subprocess.run(cmd, capture_output=True, check=False, text=True)
217224
stdout = response.stdout
218225
stdout = stdout.replace(file.name, "")
219-
messages = stdout.strip("\n")
226+
messages = stdout.strip("\n").splitlines()
220227
if messages:
221-
error_messages.append(messages)
228+
error_messages.extend(messages)
222229
finally:
223230
file.close()
224231
os.unlink(file.name)
225232

226233
for error_message in error_messages:
227-
error_count, error_code, message = error_message.split(maxsplit=2)
228-
yield error_code, message, int(error_count)
234+
line_number, col_number, error_code, message = error_message.split(
235+
"\t", maxsplit=3
236+
)
237+
# Note: we subtract 2 from the line number because
238+
# 'import numpy as np\nimport pandas as pd\n'
239+
# is prepended to the docstrings.
240+
yield error_code, message, int(line_number) - 2, int(col_number)
229241

230242
def non_hyphenated_array_like(self):
231243
return "array_like" in self.raw_doc
@@ -276,14 +288,14 @@ def pandas_validate(func_name: str):
276288
pandas_error("EX02", doctest_log=result["examples_errs"])
277289
)
278290

279-
for error_code, error_message, error_count in doc.validate_pep8():
280-
times_happening = f" ({error_count} times)" if error_count > 1 else ""
291+
for error_code, error_message, line_number, col_number in doc.validate_pep8():
281292
result["errors"].append(
282293
pandas_error(
283294
"EX03",
284295
error_code=error_code,
285296
error_message=error_message,
286-
times_happening=times_happening,
297+
line_number=line_number,
298+
col_number=col_number,
287299
)
288300
)
289301
examples_source_code = "".join(doc.examples_source_code)
@@ -407,7 +419,7 @@ def header(title, width=80, char="#"):
407419

408420
sys.stderr.write(header("Validation"))
409421
if result["errors"]:
410-
sys.stderr.write(f'{len(result["errors"])} Errors found:\n')
422+
sys.stderr.write(f'{len(result["errors"])} Errors found for `{func_name}`:\n')
411423
for err_code, err_desc in result["errors"]:
412424
if err_code == "EX02": # Failing examples are printed at the end
413425
sys.stderr.write("\tExamples do not pass tests\n")

setup.cfg

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ max-line-length = 88
55
ignore =
66
# space before : (needed for how black formats slicing)
77
E203,
8+
# expected n blank lines
9+
E3,
810
# line break before binary operator
911
W503,
1012
# line break after binary operator

0 commit comments

Comments
 (0)