From 762c347f361ef070b64bb981491275df12bedfd1 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Fri, 30 Dec 2022 12:57:43 -0500 Subject: [PATCH 1/5] DOC: Add ignore_functions option to validate_docstrings.py --- ci/code_checks.sh | 2 ++ scripts/validate_docstrings.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 3c1362b1ac83e..9085644dd509d 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -83,6 +83,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06 RET=$(($RET + $?)) ; echo $MSG "DONE" + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions=pandas.Series.align,pandas.Series.dt.total_seconds,pandas.Series.cat.rename_categories,pandas.Series.cat.reorder_categories,pandas.Series.cat.add_categories,pandas.Series.cat.remove_categories,pandas.Series.cat.remove_unused_categories,pandas.Index.all,pandas.Index.any,pandas.CategoricalIndex.rename_categories,pandas.CategoricalIndex.reorder_categories,pandas.CategoricalIndex.add_categories,pandas.CategoricalIndex.remove_categories,pandas.CategoricalIndex.remove_unused_categories,pandas.MultiIndex.drop,pandas.DatetimeIndex.to_pydatetime,pandas.TimedeltaIndex.to_pytimedelta,pandas.core.groupby.SeriesGroupBy.apply,pandas.core.groupby.DataFrameGroupBy.apply,pandas.io.formats.style.Styler.export,pandas.api.extensions.ExtensionArray.astype,pandas.api.extensions.ExtensionArray.dropna,pandas.api.extensions.ExtensionArray.isna,pandas.api.extensions.ExtensionArray.repeat,pandas.api.extensions.ExtensionArray.unique,pandas.DataFrame.align + fi ### DOCUMENTATION NOTEBOOKS ### diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index a86630eba7d5d..393b2b8566671 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -295,7 +295,7 @@ def pandas_validate(func_name: str): return result -def validate_all(prefix, ignore_deprecated=False): +def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): """ Execute the validation of all docstrings, and return a dict with the results. @@ -307,6 +307,8 @@ def validate_all(prefix, ignore_deprecated=False): validated. If None, all docstrings will be validated. ignore_deprecated: bool, default False If True, deprecated objects are ignored when validating docstrings. + ignore_functions: list of str or None, default None + If not None, contains a list of function to ignore Returns ------- @@ -317,6 +319,11 @@ def validate_all(prefix, ignore_deprecated=False): result = {} seen = {} + if ignore_functions is None: + ignore_functions = {} + else: + ignore_functions = set(ignore_functions) + base_path = pathlib.Path(__file__).parent.parent api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") api_items = [] @@ -325,7 +332,9 @@ def validate_all(prefix, ignore_deprecated=False): api_items += list(get_api_items(f)) for func_name, _, section, subsection in api_items: - if prefix and not func_name.startswith(prefix): + if ( + prefix and not func_name.startswith(prefix) + ) or func_name in ignore_functions: continue doc_info = pandas_validate(func_name) if ignore_deprecated and doc_info["deprecated"]: @@ -353,11 +362,12 @@ def print_validate_all_results( errors: list[str] | None, output_format: str, ignore_deprecated: bool, + ignore_functions: list[str] | None, ): if output_format not in ("default", "json", "actions"): raise ValueError(f'Unknown output_format "{output_format}"') - result = validate_all(prefix, ignore_deprecated) + result = validate_all(prefix, ignore_deprecated, ignore_functions) if output_format == "json": sys.stdout.write(json.dumps(result)) @@ -408,13 +418,17 @@ def header(title, width=80, char="#"): sys.stderr.write(result["examples_errs"]) -def main(func_name, prefix, errors, output_format, ignore_deprecated): +def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_functions): """ Main entry point. Call the validation for one or for all docstrings. """ if func_name is None: return print_validate_all_results( - prefix, errors, output_format, ignore_deprecated + prefix, + errors, + output_format, + ignore_deprecated, + ignore_functions, ) else: print_validate_one_results(func_name) @@ -464,6 +478,13 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated): "deprecated objects are ignored when validating " "all docstrings", ) + argparser.add_argument( + "--ignore_functions", + default=None, + help="function or method to not validate " + "(e.g. Pandas.DataFrame.head). " + "Inverse of the `function` argument.", + ) args = argparser.parse_args() sys.exit( @@ -473,5 +494,6 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated): args.errors.split(",") if args.errors else None, args.format, args.ignore_deprecated, + args.ignore_functions.split(",") if args.ignore_functions else None, ) ) From ff517c0d7aa648a6f40045644cf8ad25ea47cc14 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Wed, 4 Jan 2023 10:43:02 -0500 Subject: [PATCH 2/5] fixup! DOC: Add ignore_functions option to validate_docstrings.py --- ci/code_checks.sh | 2 ++ scripts/validate_docstrings.py | 13 +++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 9085644dd509d..f7a17696cb393 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -83,7 +83,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06 RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Partially validate docstrings (RT02)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions=pandas.Series.align,pandas.Series.dt.total_seconds,pandas.Series.cat.rename_categories,pandas.Series.cat.reorder_categories,pandas.Series.cat.add_categories,pandas.Series.cat.remove_categories,pandas.Series.cat.remove_unused_categories,pandas.Index.all,pandas.Index.any,pandas.CategoricalIndex.rename_categories,pandas.CategoricalIndex.reorder_categories,pandas.CategoricalIndex.add_categories,pandas.CategoricalIndex.remove_categories,pandas.CategoricalIndex.remove_unused_categories,pandas.MultiIndex.drop,pandas.DatetimeIndex.to_pydatetime,pandas.TimedeltaIndex.to_pytimedelta,pandas.core.groupby.SeriesGroupBy.apply,pandas.core.groupby.DataFrameGroupBy.apply,pandas.io.formats.style.Styler.export,pandas.api.extensions.ExtensionArray.astype,pandas.api.extensions.ExtensionArray.dropna,pandas.api.extensions.ExtensionArray.isna,pandas.api.extensions.ExtensionArray.repeat,pandas.api.extensions.ExtensionArray.unique,pandas.DataFrame.align + RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 393b2b8566671..d69166da207f4 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -319,10 +319,7 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): result = {} seen = {} - if ignore_functions is None: - ignore_functions = {} - else: - ignore_functions = set(ignore_functions) + ignore_functions = set(ignore_functions or []) base_path = pathlib.Path(__file__).parent.parent api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") @@ -332,9 +329,9 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): api_items += list(get_api_items(f)) for func_name, _, section, subsection in api_items: - if ( - prefix and not func_name.startswith(prefix) - ) or func_name in ignore_functions: + if func_name in ignore_functions: + continue + if prefix and not func_name.startswith(prefix): continue doc_info = pandas_validate(func_name) if ignore_deprecated and doc_info["deprecated"]: @@ -482,7 +479,7 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_fun "--ignore_functions", default=None, help="function or method to not validate " - "(e.g. Pandas.DataFrame.head). " + "(e.g. pandas.DataFrame.head). " "Inverse of the `function` argument.", ) From 838a373d6a6c1ab625eb28fb5b894308c81ec1d3 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Wed, 4 Jan 2023 14:01:29 -0500 Subject: [PATCH 3/5] fixup! fixup! DOC: Add ignore_functions option to validate_docstrings.py --- scripts/tests/test_validate_docstrings.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index b490c2ffdc2e8..4edda823d33ed 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -339,6 +339,7 @@ def test_exit_status_for_main(self, monkeypatch): errors=[], output_format="default", ignore_deprecated=False, + ignore_functions=None, ) assert exit_status == 0 @@ -346,7 +347,7 @@ def test_exit_status_errors_for_validate_all(self, monkeypatch): monkeypatch.setattr( validate_docstrings, "validate_all", - lambda prefix, ignore_deprecated=False: { + lambda prefix, ignore_deprecated=False, ignore_functions=None: { "docstring1": { "errors": [ ("ER01", "err desc"), @@ -369,6 +370,7 @@ def test_exit_status_errors_for_validate_all(self, monkeypatch): errors=[], output_format="default", ignore_deprecated=False, + ignore_functions=None, ) assert exit_status == 5 @@ -376,7 +378,7 @@ def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch): monkeypatch.setattr( validate_docstrings, "validate_all", - lambda prefix, ignore_deprecated=False: { + lambda prefix, ignore_deprecated=False, ignore_functions=None: { "docstring1": {"errors": [], "warnings": [("WN01", "warn desc")]}, "docstring2": {"errors": []}, }, @@ -387,6 +389,7 @@ def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch): errors=[], output_format="default", ignore_deprecated=False, + ignore_functions=None, ) assert exit_status == 0 @@ -395,7 +398,7 @@ def test_exit_status_for_validate_all_json(self, monkeypatch): monkeypatch.setattr( validate_docstrings, "validate_all", - lambda prefix, ignore_deprecated=False: { + lambda prefix, ignore_deprecated=False, ignore_functions=None: { "docstring1": { "errors": [ ("ER01", "err desc"), @@ -412,6 +415,7 @@ def test_exit_status_for_validate_all_json(self, monkeypatch): errors=[], output_format="json", ignore_deprecated=False, + ignore_functions=None, ) assert exit_status == 0 @@ -419,7 +423,7 @@ def test_errors_param_filters_errors(self, monkeypatch): monkeypatch.setattr( validate_docstrings, "validate_all", - lambda prefix, ignore_deprecated=False: { + lambda prefix, ignore_deprecated=False, ignore_functions=None: { "Series.foo": { "errors": [ ("ER01", "err desc"), @@ -447,6 +451,7 @@ def test_errors_param_filters_errors(self, monkeypatch): errors=["ER01"], output_format="default", ignore_deprecated=False, + ignore_functions=None, ) assert exit_status == 3 @@ -456,5 +461,6 @@ def test_errors_param_filters_errors(self, monkeypatch): errors=["ER03"], output_format="default", ignore_deprecated=False, + ignore_functions=None, ) assert exit_status == 1 From 2813305ad83260acf628671ed4b729f2367387df Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Fri, 6 Jan 2023 14:58:32 -0500 Subject: [PATCH 4/5] fixup! fixup! fixup! DOC: Add ignore_functions option to validate_docstrings.py --- ci/code_checks.sh | 28 ++++++++++++++++++++++- scripts/tests/test_validate_docstrings.py | 19 +++++++++++++++ scripts/validate_docstrings.py | 21 ++++++++++------- 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f7a17696cb393..5d2f176d6bcd8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -84,7 +84,33 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Partially validate docstrings (RT02)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions=pandas.Series.align,pandas.Series.dt.total_seconds,pandas.Series.cat.rename_categories,pandas.Series.cat.reorder_categories,pandas.Series.cat.add_categories,pandas.Series.cat.remove_categories,pandas.Series.cat.remove_unused_categories,pandas.Index.all,pandas.Index.any,pandas.CategoricalIndex.rename_categories,pandas.CategoricalIndex.reorder_categories,pandas.CategoricalIndex.add_categories,pandas.CategoricalIndex.remove_categories,pandas.CategoricalIndex.remove_unused_categories,pandas.MultiIndex.drop,pandas.DatetimeIndex.to_pydatetime,pandas.TimedeltaIndex.to_pytimedelta,pandas.core.groupby.SeriesGroupBy.apply,pandas.core.groupby.DataFrameGroupBy.apply,pandas.io.formats.style.Styler.export,pandas.api.extensions.ExtensionArray.astype,pandas.api.extensions.ExtensionArray.dropna,pandas.api.extensions.ExtensionArray.isna,pandas.api.extensions.ExtensionArray.repeat,pandas.api.extensions.ExtensionArray.unique,pandas.DataFrame.align + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \ + pandas.Series.align \ + pandas.Series.dt.total_seconds \ + pandas.Series.cat.rename_categories \ + pandas.Series.cat.reorder_categories \ + pandas.Series.cat.add_categories \ + pandas.Series.cat.remove_categories \ + pandas.Series.cat.remove_unused_categories \ + pandas.Index.all \ + pandas.Index.any \ + pandas.CategoricalIndex.rename_categories \ + pandas.CategoricalIndex.reorder_categories \ + pandas.CategoricalIndex.add_categories \ + pandas.CategoricalIndex.remove_categories \ + pandas.CategoricalIndex.remove_unused_categories \ + pandas.MultiIndex.drop \ + pandas.DatetimeIndex.to_pydatetime \ + pandas.TimedeltaIndex.to_pytimedelta \ + pandas.core.groupby.SeriesGroupBy.apply \ + pandas.core.groupby.DataFrameGroupBy.apply \ + pandas.io.formats.style.Styler.export \ + pandas.api.extensions.ExtensionArray.astype \ + pandas.api.extensions.ExtensionArray.dropna \ + pandas.api.extensions.ExtensionArray.isna \ + pandas.api.extensions.ExtensionArray.repeat \ + pandas.api.extensions.ExtensionArray.unique \ + pandas.DataFrame.align RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 4edda823d33ed..cb013bac9625c 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -199,6 +199,25 @@ def test_leftover_files_raises(self): self._import_path(klass="BadDocstrings", func="leftover_files") ) + def test_validate_all_ignore_functions(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "get_all_api_items", + lambda: [ + ( + "pandas.DataFrame.align", + "func", + "current_section", + "current_subsection", + ) + ], + ) + result = validate_docstrings.validate_all( + prefix=None, + ignore_functions=["pandas.DataFrame.align"], + ) + assert len(result) == 0 + def test_validate_all_ignore_deprecated(self, monkeypatch): monkeypatch.setattr( validate_docstrings, diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index d69166da207f4..782b29fd54095 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -321,12 +321,7 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): ignore_functions = set(ignore_functions or []) - base_path = pathlib.Path(__file__).parent.parent - api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") - api_items = [] - for api_doc_fname in api_doc_fnames.glob("*.rst"): - with open(api_doc_fname) as f: - api_items += list(get_api_items(f)) + api_items = get_all_api_items() for func_name, _, section, subsection in api_items: if func_name in ignore_functions: @@ -354,6 +349,16 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): return result +def get_all_api_items(): + base_path = pathlib.Path(__file__).parent.parent + api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") + api_items = [] + for api_doc_fname in api_doc_fnames.glob("*.rst"): + with open(api_doc_fname) as f: + api_items += list(get_api_items(f)) + return api_items + + def print_validate_all_results( prefix: str, errors: list[str] | None, @@ -477,7 +482,7 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_fun ) argparser.add_argument( "--ignore_functions", - default=None, + nargs="*", help="function or method to not validate " "(e.g. pandas.DataFrame.head). " "Inverse of the `function` argument.", @@ -491,6 +496,6 @@ def main(func_name, prefix, errors, output_format, ignore_deprecated, ignore_fun args.errors.split(",") if args.errors else None, args.format, args.ignore_deprecated, - args.ignore_functions.split(",") if args.ignore_functions else None, + args.ignore_functions, ) ) From 1720c81ed918125b083508bebb424777b819109e Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Sat, 7 Jan 2023 13:42:42 -0500 Subject: [PATCH 5/5] Make get_all_api_items a generator and improve ignore_functions test --- scripts/tests/test_validate_docstrings.py | 11 +++++++++-- scripts/validate_docstrings.py | 8 ++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index cb013bac9625c..0b7ab145b054a 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -209,14 +209,21 @@ def test_validate_all_ignore_functions(self, monkeypatch): "func", "current_section", "current_subsection", - ) + ), + ( + "pandas.Index.all", + "func", + "current_section", + "current_subsection", + ), ], ) result = validate_docstrings.validate_all( prefix=None, ignore_functions=["pandas.DataFrame.align"], ) - assert len(result) == 0 + assert len(result) == 1 + assert "pandas.Index.all" in result def test_validate_all_ignore_deprecated(self, monkeypatch): monkeypatch.setattr( diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 782b29fd54095..5d0ef6e460486 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -321,9 +321,7 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): ignore_functions = set(ignore_functions or []) - api_items = get_all_api_items() - - for func_name, _, section, subsection in api_items: + for func_name, _, section, subsection in get_all_api_items(): if func_name in ignore_functions: continue if prefix and not func_name.startswith(prefix): @@ -352,11 +350,9 @@ def validate_all(prefix, ignore_deprecated=False, ignore_functions=None): def get_all_api_items(): base_path = pathlib.Path(__file__).parent.parent api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") - api_items = [] for api_doc_fname in api_doc_fnames.glob("*.rst"): with open(api_doc_fname) as f: - api_items += list(get_api_items(f)) - return api_items + yield from get_api_items(f) def print_validate_all_results(