Skip to content

DOC: Add ignore_functions option to validate_docstrings.py #50509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,36 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Partially validate docstrings (RT02)' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \
pandas.Series.align \
pandas.Series.dt.total_seconds \
pandas.Series.cat.rename_categories \
pandas.Series.cat.reorder_categories \
pandas.Series.cat.add_categories \
pandas.Series.cat.remove_categories \
pandas.Series.cat.remove_unused_categories \
pandas.Index.all \
pandas.Index.any \
pandas.CategoricalIndex.rename_categories \
pandas.CategoricalIndex.reorder_categories \
pandas.CategoricalIndex.add_categories \
pandas.CategoricalIndex.remove_categories \
pandas.CategoricalIndex.remove_unused_categories \
pandas.MultiIndex.drop \
pandas.DatetimeIndex.to_pydatetime \
pandas.TimedeltaIndex.to_pytimedelta \
pandas.core.groupby.SeriesGroupBy.apply \
pandas.core.groupby.DataFrameGroupBy.apply \
pandas.io.formats.style.Styler.export \
pandas.api.extensions.ExtensionArray.astype \
pandas.api.extensions.ExtensionArray.dropna \
pandas.api.extensions.ExtensionArray.isna \
pandas.api.extensions.ExtensionArray.repeat \
pandas.api.extensions.ExtensionArray.unique \
pandas.DataFrame.align
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi

### DOCUMENTATION NOTEBOOKS ###
Expand Down
33 changes: 29 additions & 4 deletions scripts/tests/test_validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,25 @@ def test_leftover_files_raises(self):
self._import_path(klass="BadDocstrings", func="leftover_files")
)

def test_validate_all_ignore_functions(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
"get_all_api_items",
lambda: [
(
"pandas.DataFrame.align",
"func",
"current_section",
"current_subsection",
)
],
)
result = validate_docstrings.validate_all(
prefix=None,
ignore_functions=["pandas.DataFrame.align"],
)
assert len(result) == 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Besides checking that the ignored function is ignored, I guess it would make sense that a non-ignored function is still present. If we do that, besides checking the length, I guess an assert to check the name of the returned function would be helpful.


def test_validate_all_ignore_deprecated(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
Expand Down Expand Up @@ -339,14 +358,15 @@ def test_exit_status_for_main(self, monkeypatch):
errors=[],
output_format="default",
ignore_deprecated=False,
ignore_functions=None,
)
assert exit_status == 0

def test_exit_status_errors_for_validate_all(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
"validate_all",
lambda prefix, ignore_deprecated=False: {
lambda prefix, ignore_deprecated=False, ignore_functions=None: {
"docstring1": {
"errors": [
("ER01", "err desc"),
Expand All @@ -369,14 +389,15 @@ def test_exit_status_errors_for_validate_all(self, monkeypatch):
errors=[],
output_format="default",
ignore_deprecated=False,
ignore_functions=None,
)
assert exit_status == 5

def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
"validate_all",
lambda prefix, ignore_deprecated=False: {
lambda prefix, ignore_deprecated=False, ignore_functions=None: {
"docstring1": {"errors": [], "warnings": [("WN01", "warn desc")]},
"docstring2": {"errors": []},
},
Expand All @@ -387,6 +408,7 @@ def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch):
errors=[],
output_format="default",
ignore_deprecated=False,
ignore_functions=None,
)
assert exit_status == 0

Expand All @@ -395,7 +417,7 @@ def test_exit_status_for_validate_all_json(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
"validate_all",
lambda prefix, ignore_deprecated=False: {
lambda prefix, ignore_deprecated=False, ignore_functions=None: {
"docstring1": {
"errors": [
("ER01", "err desc"),
Expand All @@ -412,14 +434,15 @@ def test_exit_status_for_validate_all_json(self, monkeypatch):
errors=[],
output_format="json",
ignore_deprecated=False,
ignore_functions=None,
)
assert exit_status == 0

def test_errors_param_filters_errors(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
"validate_all",
lambda prefix, ignore_deprecated=False: {
lambda prefix, ignore_deprecated=False, ignore_functions=None: {
"Series.foo": {
"errors": [
("ER01", "err desc"),
Expand Down Expand Up @@ -447,6 +470,7 @@ def test_errors_param_filters_errors(self, monkeypatch):
errors=["ER01"],
output_format="default",
ignore_deprecated=False,
ignore_functions=None,
)
assert exit_status == 3

Expand All @@ -456,5 +480,6 @@ def test_errors_param_filters_errors(self, monkeypatch):
errors=["ER03"],
output_format="default",
ignore_deprecated=False,
ignore_functions=None,
)
assert exit_status == 1
44 changes: 34 additions & 10 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def pandas_validate(func_name: str):
return result


def validate_all(prefix, ignore_deprecated=False):
def validate_all(prefix, ignore_deprecated=False, ignore_functions=None):
"""
Execute the validation of all docstrings, and return a dict with the
results.
Expand All @@ -307,6 +307,8 @@ def validate_all(prefix, ignore_deprecated=False):
validated. If None, all docstrings will be validated.
ignore_deprecated: bool, default False
If True, deprecated objects are ignored when validating docstrings.
ignore_functions: list of str or None, default None
If not None, contains a list of function to ignore

Returns
-------
Expand All @@ -317,14 +319,13 @@ def validate_all(prefix, ignore_deprecated=False):
result = {}
seen = {}

base_path = pathlib.Path(__file__).parent.parent
api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference")
api_items = []
for api_doc_fname in api_doc_fnames.glob("*.rst"):
with open(api_doc_fname) as f:
api_items += list(get_api_items(f))
ignore_functions = set(ignore_functions or [])

api_items = get_all_api_items()

for func_name, _, section, subsection in api_items:
if func_name in ignore_functions:
continue
if prefix and not func_name.startswith(prefix):
continue
doc_info = pandas_validate(func_name)
Expand All @@ -348,16 +349,27 @@ def validate_all(prefix, ignore_deprecated=False):
return result


def get_all_api_items():
base_path = pathlib.Path(__file__).parent.parent
api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference")
api_items = []
for api_doc_fname in api_doc_fnames.glob("*.rst"):
with open(api_doc_fname) as f:
api_items += list(get_api_items(f))
return api_items


def print_validate_all_results(
prefix: str,
errors: list[str] | None,
output_format: str,
ignore_deprecated: bool,
ignore_functions: list[str] | None,
):
if output_format not in ("default", "json", "actions"):
raise ValueError(f'Unknown output_format "{output_format}"')

result = validate_all(prefix, ignore_deprecated)
result = validate_all(prefix, ignore_deprecated, ignore_functions)

if output_format == "json":
sys.stdout.write(json.dumps(result))
Expand Down Expand Up @@ -408,13 +420,17 @@ def header(title, width=80, char="#"):
sys.stderr.write(result["examples_errs"])


def main(func_name, prefix, errors, output_format, ignore_deprecated):
def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_functions):
"""
Main entry point. Call the validation for one or for all docstrings.
"""
if func_name is None:
return print_validate_all_results(
prefix, errors, output_format, ignore_deprecated
prefix,
errors,
output_format,
ignore_deprecated,
ignore_functions,
)
else:
print_validate_one_results(func_name)
Expand Down Expand Up @@ -464,6 +480,13 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated):
"deprecated objects are ignored when validating "
"all docstrings",
)
argparser.add_argument(
"--ignore_functions",
nargs="*",
help="function or method to not validate "
"(e.g. pandas.DataFrame.head). "
"Inverse of the `function` argument.",
)

args = argparser.parse_args()
sys.exit(
Expand All @@ -473,5 +496,6 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated):
args.errors.split(",") if args.errors else None,
args.format,
args.ignore_deprecated,
args.ignore_functions,
)
)