-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Use flake8 to check for PEP8 violations in doctests #23399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
c87360d
bc009a4
0d730ce
cb477f8
762de5d
0358c21
9bc7f65
6090b24
72f99bd
affd8f4
561f3c3
384c77f
1178cae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -319,8 +319,6 @@ def method(self, foo=None, bar=None): | |
|
||
Examples | ||
-------- | ||
>>> import numpy as np | ||
>>> import pandas as pd | ||
>>> df = pd.DataFrame(np.ones((3, 3)), | ||
... columns=('a', 'b', 'c')) | ||
>>> df.all(1) | ||
|
@@ -584,6 +582,53 @@ def prefix_pandas(self): | |
pass | ||
|
||
|
||
class BadExamples(object): | ||
|
||
def numpy_imported(self): | ||
""" | ||
Examples | ||
-------- | ||
>>> import numpy as np | ||
>>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c')) | ||
""" | ||
pass | ||
|
||
def pandas_imported(self): | ||
""" | ||
Examples | ||
-------- | ||
>>> import pandas as pd | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't realize before, but there is something tricky here. The convention is to assume numpy and pandas are always imported before the examples. I personally would be explicit, but that's how all the examples are made. This means two things:
And as I said in the previous review, I'd like to have more examples of pep8 failures, like missing spaces around an operator. Also, these examples are too long for what is being tested. |
||
>>> s = pd.Series(['Antelope', 'Lion', 'Zebra', np.nan]) | ||
""" | ||
pass | ||
|
||
def missing_whitespace_around_arithmetic_operator(self): | ||
""" | ||
Examples | ||
-------- | ||
>>> 2+5 | ||
7 | ||
""" | ||
pass | ||
|
||
def indentation_is_not_a_multiple_of_four(self): | ||
""" | ||
Examples | ||
-------- | ||
>>> if 2 + 5: | ||
... pass | ||
""" | ||
pass | ||
|
||
def missing_whitespace_after_comma(self): | ||
""" | ||
Examples | ||
-------- | ||
>>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c')) | ||
""" | ||
pass | ||
|
||
|
||
class TestValidator(object): | ||
|
||
def _import_path(self, klass=None, func=None): | ||
|
@@ -703,10 +748,23 @@ def test_bad_generic_functions(self, func): | |
# See Also tests | ||
('BadSeeAlso', 'prefix_pandas', | ||
('pandas.Series.rename in `See Also` section ' | ||
'does not need `pandas` prefix',)) | ||
'does not need `pandas` prefix',)), | ||
# Examples tests | ||
('BadExamples', 'numpy_imported', | ||
('F811 It\'s assumed that pandas and numpy' | ||
' are imported as pd or np',)), | ||
('BadExamples', 'pandas_imported', | ||
('F811 It\'s assumed that pandas and numpy' | ||
' are imported as pd or np',)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these two examples need to be removed |
||
('BadExamples', 'indentation_is_not_a_multiple_of_four', | ||
('E111 indentation is not a multiple of four',)), | ||
('BadExamples', 'missing_whitespace_around_arithmetic_operator', | ||
('E226 missing whitespace around arithmetic operator',)), | ||
('BadExamples', 'missing_whitespace_after_comma', | ||
('3 E231 missing whitespace after \',\'',)), | ||
]) | ||
def test_bad_examples(self, capsys, klass, func, msgs): | ||
result = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 | ||
result = validate_one(self._import_path(klass=klass, func=func)) | ||
for msg in msgs: | ||
assert msg in ' '.join(result['errors']) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
$ ./validate_docstrings.py | ||
$ ./validate_docstrings.py pandas.DataFrame.head | ||
""" | ||
import contextlib | ||
import os | ||
import sys | ||
import json | ||
|
@@ -24,6 +25,10 @@ | |
import inspect | ||
import importlib | ||
import doctest | ||
import tempfile | ||
|
||
import flake8.main.application | ||
|
||
try: | ||
from io import StringIO | ||
except ImportError: | ||
|
@@ -331,6 +336,33 @@ def parameter_mismatches(self): | |
|
||
return errs | ||
|
||
@contextlib.contextmanager | ||
def _write_examples_code_to_temp_file(self): | ||
""" | ||
Generate file with source code from examples section. | ||
""" | ||
content = ''.join(('import numpy as np; ' | ||
'import pandas as pd # noqa: F401,E702\n', | ||
*self.examples_source_code)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't it be simpler to join by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The implementation of |
||
with tempfile.NamedTemporaryFile(mode='w') as file: | ||
file.write(content) | ||
file.flush() | ||
yield file | ||
|
||
def validate_pep8(self): | ||
if self.examples: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. small thing, but my preference is having:
this avoids having the rest of the function indented, and in my opinion increases readability. |
||
with self._write_examples_code_to_temp_file() as file: | ||
application = flake8.main.application.Application() | ||
application.initialize(["--quiet"]) | ||
application.run_checks([file.name]) | ||
application.report() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I wasn't clear in my previous review about this. This is what I'd do:
I think it's a good practice to have inside the context manager, the code that requires to be there. Also, as this file is going quite significantly, I think it's better to keep all this functionality in a single function, so each validation is somehow isolated. I think in another context it makes more sense what you did of splitting, but I don't think we'll reuse the temporary file creation, and I think the improvement in simplicity makes it worth. |
||
|
||
for statistic in application.guide.stats.statistics_for(''): | ||
if statistic.message.endswith('from line 1'): | ||
statistic.message = "It's assumed that pandas and numpy" \ | ||
" are imported as pd or np" | ||
yield statistic | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess with the original So, I'd simply leave the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found |
||
|
||
@property | ||
def correct_parameters(self): | ||
return not bool(self.parameter_mismatches) | ||
|
@@ -490,6 +522,13 @@ def validate_one(func_name): | |
for param_err in param_errs: | ||
errs.append('\t{}'.format(param_err)) | ||
|
||
pep8_errs = list(doc.validate_pep8()) | ||
if pep8_errs: | ||
errs.append('Linting issues in doctests:') | ||
for err in pep8_errs: | ||
errs.append('\t{} {} {}'.format(err.count, err.error_code, | ||
err.message)) | ||
|
||
if doc.is_function_or_method: | ||
if not doc.returns and "return" in doc.method_source: | ||
errs.append('No Returns section found') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the misunderstanding.
numpy_imported
andpandas_imported
and the corresponding tests are not needed, as that is already tested in #23161.As mentioned in the previous review, forget about these two imports in the examples in this PR. You are repeating what's implemented in #23161 which will be merged earlier.