diff --git a/pandas/tests/scripts/__init__.py b/pandas/tests/scripts/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scripts/test_validate_docstrings.py b/pandas/tests/scripts/test_validate_docstrings.py new file mode 100644 index 0000000000000..1d35d5d30bba3 --- /dev/null +++ b/pandas/tests/scripts/test_validate_docstrings.py @@ -0,0 +1,552 @@ +import os +import sys + +import numpy as np +import pytest + + +class GoodDocStrings(object): + """ + Collection of good doc strings. + + This class contains a lot of docstrings that should pass the validation + script without any errors. + """ + + def plot(self, kind, color='blue', **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Parameters + ---------- + kind : str + Kind of matplotlib plot. + color : str, default 'blue' + Color name or rgb code. + **kwargs + These parameters will be passed to the matplotlib plotting + function. + """ + pass + + def sample(self): + """ + Generate and return a random number. + + The value is sampled from a continuous uniform distribution between + 0 and 1. + + Returns + ------- + float + Random number generated. + """ + return random.random() # noqa: F821 + + def random_letters(self): + """ + Generate and return a sequence of random letters. + + The length of the returned string is also random, and is also + returned. + + Returns + ------- + length : int + Length of the returned string. + letters : str + String of random letters. + """ + length = random.randint(1, 10) # noqa: F821 + letters = ''.join(random.choice(string.ascii_lowercase) # noqa: F821 + for i in range(length)) + return length, letters + + def sample_values(self): + """ + Generate an infinite sequence of random numbers. + + The values are sampled from a continuous uniform distribution between + 0 and 1. + + Yields + ------ + float + Random number generated. + """ + while True: + yield random.random() # noqa: F821 + + def head(self): + """ + Return the first 5 elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Returns + ------- + Series + Subset of the original series with the 5 first values. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + """ + return self.iloc[:5] + + def head1(self, n=5): + """ + Return the first elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Parameters + ---------- + n : int + Number of values to return. + + Returns + ------- + Series + Subset of the original series with the n first values. + + See Also + -------- + tail : Return the last n elements of the Series. + + Examples + -------- + >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon']) + >>> s.head() + 0 Ant + 1 Bear + 2 Cow + 3 Dog + 4 Falcon + dtype: object + + With the `n` parameter, we can change the number of returned rows: + + >>> s.head(n=3) + 0 Ant + 1 Bear + 2 Cow + dtype: object + """ + return self.iloc[:n] + + def contains(self, pat, case=True, na=np.nan): + """ + Return whether each value contains `pat`. + + In this case, we are illustrating how to use sections, even + if the example is simple enough and does not require them. + + Parameters + ---------- + pat : str + Pattern to check for within each element. + case : bool, default True + Whether check should be done with case sensitivity. + na : object, default np.nan + Fill value for missing data. + + Examples + -------- + >>> s = pd.Series(['Antelope', 'Lion', 'Zebra', np.nan]) + >>> s.str.contains(pat='a') + 0 False + 1 False + 2 True + 3 NaN + dtype: object + + **Case sensitivity** + + With `case_sensitive` set to `False` we can match `a` with both + `a` and `A`: + + >>> s.str.contains(pat='a', case=False) + 0 True + 1 False + 2 True + 3 NaN + dtype: object + + **Missing values** + + We can fill missing values in the output using the `na` parameter: + + >>> s.str.contains(pat='a', na=False) + 0 False + 1 False + 2 True + 3 False + dtype: bool + """ + pass + + +class BadGenericDocStrings(object): + """Everything here has a bad docstring + """ + + def func(self): + + """Some function. + + With several mistakes in the docstring. + + It has a blank like after the signature `def func():`. + + The text 'Some function' should go in the line after the + opening quotes of the docstring, not in the same line. + + There is a blank line between the docstring and the first line + of code `foo = 1`. + + The closing quotes should be in the next line, not in this one.""" + + foo = 1 + bar = 2 + return foo + bar + + def astype(self, dtype): + """ + Casts Series type. + + Verb in third-person of the present simple, should be infinitive. + """ + pass + + def astype1(self, dtype): + """ + Method to cast Series type. + + Does not start with verb. + """ + pass + + def astype2(self, dtype): + """ + Cast Series type + + Missing dot at the end. + """ + pass + + def astype3(self, dtype): + """ + Cast Series type from its current type to the new type defined in + the parameter dtype. + + Summary is too verbose and doesn't fit in a single line. + """ + pass + + def plot(self, kind, **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Note the blank line between the parameters title and the first + parameter. Also, note that after the name of the parameter `kind` + and before the colon, a space is missing. + + Also, note that the parameter descriptions do not start with a + capital letter, and do not finish with a dot. + + Finally, the `**kwargs` parameter is missing. + + Parameters + ---------- + + kind: str + kind of matplotlib plot + """ + pass + + def method(self, foo=None, bar=None): + """ + A sample DataFrame method. + + Do not import numpy and pandas. + + Try to use meaningful data, when it makes the example easier + to understand. + + Try to avoid positional arguments like in `df.method(1)`. They + can be alright if previously defined with a meaningful name, + like in `present_value(interest_rate)`, but avoid them otherwise. + + When presenting the behavior with different parameters, do not place + all the calls one next to the other. Instead, add a short sentence + explaining what the example shows. + + Examples + -------- + >>> import numpy as np + >>> import pandas as pd + >>> df = pd.DataFrame(np.ones((3, 3)), + ... columns=('a', 'b', 'c')) + >>> df.all(1) + 0 True + 1 True + 2 True + dtype: bool + >>> df.all(bool_only=True) + Series([], dtype: bool) + """ + pass + + +class BadSummaries(object): + + def wrong_line(self): + """Exists on the wrong line""" + pass + + def no_punctuation(self): + """ + Has the right line but forgets punctuation + """ + pass + + def no_capitalization(self): + """ + provides a lowercase summary. + """ + pass + + def no_infinitive(self): + """ + Started with a verb that is not infinitive. + """ + + def multi_line(self): + """ + Extends beyond one line + which is not correct. + """ + + +class BadParameters(object): + """ + Everything here has a problem with its Parameters section. + """ + + def missing_params(self, kind, **kwargs): + """ + Lacks kwargs in Parameters. + + Parameters + ---------- + kind : str + Foo bar baz. + """ + + def bad_colon_spacing(self, kind): + """ + Has bad spacing in the type line. + + Parameters + ---------- + kind: str + Needs a space after kind. + """ + + def no_description_period(self, kind): + """ + Forgets to add a period to the description. + + Parameters + ---------- + kind : str + Doesn't end with a dot + """ + + def parameter_capitalization(self, kind): + """ + Forgets to capitalize the description. + + Parameters + ---------- + kind : str + this is not capitalized. + """ + + def blank_lines(self, kind): + """ + Adds a blank line after the section header. + + Parameters + ---------- + + kind : str + Foo bar baz. + """ + pass + + +class BadReturns(object): + + def return_not_documented(self): + """ + Lacks section for Returns + """ + return "Hello world!" + + def yield_not_documented(self): + """ + Lacks section for Yields + """ + yield "Hello world!" + + def no_type(self): + """ + Returns documented but without type. + + Returns + ------- + Some value. + """ + return "Hello world!" + + def no_description(self): + """ + Provides type but no descrption. + + Returns + ------- + str + """ + return "Hello world!" + + def no_punctuation(self): + """ + Provides type and description but no period. + + Returns + ------- + str + A nice greeting + """ + return "Hello world!" + + +class TestValidator(object): + + @pytest.fixture(autouse=True, scope="class") + def import_scripts(self): + """ + Because the scripts directory is above the top level pandas package + we need to hack sys.path to know where to find that directory for + import. The below traverses up the file system to find the scripts + directory, adds to location to sys.path and imports the required + module into the global namespace before as part of class setup, + reverting those changes on teardown. + """ + up = os.path.dirname + file_dir = up(os.path.abspath(__file__)) + script_dir = os.path.join(up(up(up(file_dir))), 'scripts') + sys.path.append(script_dir) + from validate_docstrings import validate_one + globals()['validate_one'] = validate_one + yield + sys.path.pop() + del globals()['validate_one'] + + def _import_path(self, klass=None, func=None): + """ + Build the required import path for tests in this module. + + Parameters + ---------- + klass : str + Class name of object in module. + func : str + Function name of object in module. + + Returns + ------- + str + Import path of specified object in this module + """ + base_path = 'pandas.tests.scripts.test_validate_docstrings' + if klass: + base_path = '.'.join([base_path, klass]) + if func: + base_path = '.'.join([base_path, func]) + + return base_path + + def test_good_class(self): + assert validate_one(self._import_path( # noqa: F821 + klass='GoodDocStrings')) == 0 + + @pytest.mark.parametrize("func", [ + 'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1', + 'contains']) + def test_good_functions(self, func): + assert validate_one(self._import_path( # noqa: F821 + klass='GoodDocStrings', func=func)) == 0 + + def test_bad_class(self): + assert validate_one(self._import_path( # noqa: F821 + klass='BadGenericDocStrings')) > 0 + + @pytest.mark.parametrize("func", [ + 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method']) + def test_bad_generic_functions(self, func): + assert validate_one(self._import_path( # noqa:F821 + klass='BadGenericDocStrings', func=func)) > 0 + + @pytest.mark.parametrize("klass,func,msgs", [ + # Summary tests + ('BadSummaries', 'wrong_line', + ('should start in the line immediately after the opening quotes',)), + ('BadSummaries', 'no_punctuation', + ('Summary does not end with a period',)), + ('BadSummaries', 'no_capitalization', + ('Summary does not start with a capital letter',)), + ('BadSummaries', 'no_capitalization', + ('Summary must start with infinitive verb',)), + ('BadSummaries', 'multi_line', + ('a short summary in a single line should be present',)), + # Parameters tests + ('BadParameters', 'missing_params', + ('Parameters {**kwargs} not documented',)), + ('BadParameters', 'bad_colon_spacing', + ('Parameters {kind} not documented', + 'Unknown parameters {kind: str}', + 'Parameter "kind: str" has no type')), + ('BadParameters', 'no_description_period', + ('Parameter "kind" description should finish with "."',)), + ('BadParameters', 'parameter_capitalization', + ('Parameter "kind" description should start with a capital letter',)), + pytest.param('BadParameters', 'blank_lines', ('No error yet?',), + marks=pytest.mark.xfail), + # Returns tests + ('BadReturns', 'return_not_documented', ('No Returns section found',)), + ('BadReturns', 'yield_not_documented', ('No Yields section found',)), + pytest.param('BadReturns', 'no_type', ('foo',), + marks=pytest.mark.xfail), + pytest.param('BadReturns', 'no_description', ('foo',), + marks=pytest.mark.xfail), + pytest.param('BadReturns', 'no_punctuation', ('foo',), + marks=pytest.mark.xfail) + ]) + def test_bad_examples(self, capsys, klass, func, msgs): + validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 + err = capsys.readouterr().err + for msg in msgs: + assert msg in err diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index aed3eb2f1226d..cdea2d8b83abd 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -20,7 +20,6 @@ import functools import collections import argparse -import contextlib import pydoc import inspect import importlib @@ -35,9 +34,11 @@ sys.path.insert(0, os.path.join(BASE_PATH)) import pandas +from pandas.compat import signature sys.path.insert(1, os.path.join(BASE_PATH, 'doc', 'sphinxext')) from numpydoc.docscrape import NumpyDocString +from pandas.io.formats.printing import pprint_thing PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] @@ -46,7 +47,7 @@ def _load_obj(obj_name): for maxsplit in range(1, obj_name.count('.') + 1): # TODO when py3 only replace by: module, *func_parts = ... - func_name_split = obj_name.rsplit('.', maxsplit=maxsplit) + func_name_split = obj_name.rsplit('.', maxsplit) module = func_name_split[0] func_parts = func_name_split[1:] try: @@ -107,7 +108,9 @@ def __len__(self): @property def is_function_or_method(self): - return inspect.isfunction(self.method_obj) + # TODO(py27): remove ismethod + return (inspect.isfunction(self.method_obj) + or inspect.ismethod(self.method_obj)) @property def source_file_name(self): @@ -181,18 +184,24 @@ def doc_parameters(self): @property def signature_parameters(self): - if (inspect.isclass(self.method_obj) - and self.method_name.split('.')[-1] in - self.method_obj._accessors): - # accessor classes have a signature, but don't want to show this - return tuple() + if inspect.isclass(self.method_obj): + if hasattr(self.method_obj, '_accessors') and ( + self.method_name.split('.')[-1] in + self.method_obj._accessors): + # accessor classes have a signature but don't want to show this + return tuple() try: - signature = inspect.signature(self.method_obj) + sig = signature(self.method_obj) except (TypeError, ValueError): # Some objects, mainly in C extensions do not support introspection # of the signature return tuple() - params = tuple(signature.parameters.keys()) + params = sig.args + if sig.varargs: + params.append("*" + sig.varargs) + if sig.keywords: + params.append("**" + sig.keywords) + params = tuple(params) if params and params[0] in ('self', 'cls'): return params[1:] return params @@ -204,10 +213,11 @@ def parameter_mismatches(self): doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: - errs.append('Parameters {!r} not documented'.format(missing)) + errs.append( + 'Parameters {} not documented'.format(pprint_thing(missing))) extra = set(doc_params) - set(signature_params) if extra: - errs.append('Unknown parameters {!r}'.format(extra)) + errs.append('Unknown parameters {}'.format(pprint_thing(extra))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): errs.append('Wrong parameters order. ' + @@ -240,6 +250,14 @@ def examples(self): def returns(self): return self.doc['Returns'] + @property + def yields(self): + return self.doc['Yields'] + + @property + def method_source(self): + return inspect.getsource(self.method_obj) + @property def first_line_ends_in_dot(self): if self.doc: @@ -265,8 +283,7 @@ def examples_errors(self): error_msgs = '' for test in finder.find(self.raw_doc, self.method_name, globs=context): f = StringIO() - with contextlib.redirect_stdout(f): - runner.run(test) + runner.run(test, out=f.write) error_msgs += f.getvalue() return error_msgs @@ -380,6 +397,19 @@ def validate_all(): def validate_one(func_name): + """ + Validate the docstring for the given func_name + + Parameters + ---------- + func_name : function + Function whose docstring will be evaluated + + Returns + ------- + int + The number of errors found in the `func_name` docstring + """ func_obj = _load_obj(func_name) doc = Docstring(func_name, func_obj) @@ -387,6 +417,7 @@ def validate_one(func_name): sys.stderr.write('{}\n'.format(doc.clean_doc)) errs = [] + wrns = [] if doc.start_blank_lines != 1: errs.append('Docstring text (summary) should start in the line ' 'immediately after the opening quotes (not in the same ' @@ -405,25 +436,26 @@ def validate_one(func_name): 'should be present at the beginning of the docstring)') else: if not doc.summary[0].isupper(): - errs.append('Summary does not start with capital') + errs.append('Summary does not start with a capital letter') if doc.summary[-1] != '.': - errs.append('Summary does not end with dot') + errs.append('Summary does not end with a period') if (doc.is_function_or_method and doc.summary.split(' ')[0][-1] == 's'): errs.append('Summary must start with infinitive verb, ' 'not third person (e.g. use "Generate" instead of ' '"Generates")') if not doc.extended_summary: - errs.append('No extended summary found') + wrns.append('No extended summary found') param_errs = doc.parameter_mismatches for param in doc.doc_parameters: - if not doc.parameter_type(param): - param_errs.append('Parameter "{}" has no type'.format(param)) - else: - if doc.parameter_type(param)[-1] == '.': - param_errs.append('Parameter "{}" type ' - 'should not finish with "."'.format(param)) + if not param.startswith("*"): # Check can ignore var / kwargs + if not doc.parameter_type(param): + param_errs.append('Parameter "{}" has no type'.format(param)) + else: + if doc.parameter_type(param)[-1] == '.': + param_errs.append('Parameter "{}" type should ' + 'not finish with "."'.format(param)) if not doc.parameter_desc(param): param_errs.append('Parameter "{}" ' @@ -431,7 +463,7 @@ def validate_one(func_name): else: if not doc.parameter_desc(param)[0].isupper(): param_errs.append('Parameter "{}" description ' - 'should start with ' + 'should start with a ' 'capital letter'.format(param)) if doc.parameter_desc(param)[-1] != '.': param_errs.append('Parameter "{}" description ' @@ -441,8 +473,11 @@ def validate_one(func_name): for param_err in param_errs: errs.append('\t{}'.format(param_err)) - if not doc.returns: - errs.append('No returns section found') + if doc.is_function_or_method: + if not doc.returns and "return" in doc.method_source: + errs.append('No Returns section found') + if not doc.yields and "yield" in doc.method_source: + errs.append('No Yields section found') mentioned_errs = doc.mentioned_private_classes if mentioned_errs: @@ -450,7 +485,7 @@ def validate_one(func_name): 'docstring.'.format(mentioned_errs)) if not doc.see_also: - errs.append('See Also section not found') + wrns.append('See Also section not found') else: for rel_name, rel_desc in doc.see_also.items(): if not rel_desc: @@ -464,7 +499,7 @@ def validate_one(func_name): examples_errs = '' if not doc.examples: - errs.append('No examples section found') + wrns.append('No examples section found') else: examples_errs = doc.examples_errors if examples_errs: @@ -475,7 +510,12 @@ def validate_one(func_name): sys.stderr.write('Errors found:\n') for err in errs: sys.stderr.write('\t{}\n'.format(err)) - else: + if wrns: + sys.stderr.write('Warnings found:\n') + for wrn in wrns: + sys.stderr.write('\t{}\n'.format(wrn)) + + if not errs: sys.stderr.write('Docstring for "{}" correct. :)\n'.format(func_name)) if examples_errs: