diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 396b092a286c1..7679922ddb21e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5091,7 +5091,7 @@ def get_ftype_counts(self): 1 b 2 2.0 2 c 3 3.0 - >>> df.get_ftype_counts() + >>> df.get_ftype_counts() # doctest: +SKIP float64:dense 1 int64:dense 1 object:dense 1 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fcced091b3794..d0f190c82aec7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1875,12 +1875,8 @@ def get_duplicates(self): Works on different Index of types. - >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() + >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP [2, 3] - >>> pd.Index([1., 2., 2., 3., 3., 3., 4.]).get_duplicates() - [2.0, 3.0] - >>> pd.Index(['a', 'b', 'b', 'c', 'c', 'c', 'd']).get_duplicates() - ['b', 'c'] Note that for a DatetimeIndex, it does not return a list but a new DatetimeIndex: @@ -1888,22 +1884,22 @@ def get_duplicates(self): >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03', ... '2018-01-03', '2018-01-04', '2018-01-04'], ... format='%Y-%m-%d') - >>> pd.Index(dates).get_duplicates() + >>> pd.Index(dates).get_duplicates() # doctest: +SKIP DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', freq=None) Sorts duplicated elements even when indexes are unordered. - >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() + >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP [2, 3] Return empty array-like structure when all elements are unique. - >>> pd.Index([1, 2, 3, 4]).get_duplicates() + >>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP [] >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'], ... format='%Y-%m-%d') - >>> pd.Index(dates).get_duplicates() + >>> pd.Index(dates).get_duplicates() # doctest: +SKIP DatetimeIndex([], dtype='datetime64[ns]', freq=None) """ warnings.warn("'get_duplicates' is deprecated and will be removed in " diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 72b014b018735..eb841e6398976 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1013,21 +1013,21 @@ def apply(self, func, axis='major', **kwargs): Returns a Panel with the square root of each element - >>> p = pd.Panel(np.random.rand(4,3,2)) + >>> p = pd.Panel(np.random.rand(4, 3, 2)) # doctest: +SKIP >>> p.apply(np.sqrt) Equivalent to p.sum(1), returning a DataFrame - >>> p.apply(lambda x: x.sum(), axis=1) + >>> p.apply(lambda x: x.sum(), axis=1) # doctest: +SKIP Equivalent to previous: - >>> p.apply(lambda x: x.sum(), axis='major') + >>> p.apply(lambda x: x.sum(), axis='major') # doctest: +SKIP Return the shapes of each DataFrame over axis 2 (i.e the shapes of items x major), as a Series - >>> p.apply(lambda x: x.shape, axis=(0,1)) + >>> p.apply(lambda x: x.shape, axis=(0,1)) # doctest: +SKIP Returns ------- diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 18a83269a2f0f..bf0c93437f4dc 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2156,13 +2156,6 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): `join`-keyword works as in other methods. >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2]) - >>> s.str.cat(t, join=None, na_rep='-') - 0 ad - 1 ba - 2 -e - 3 dc - dtype: object - >>> >>> s.str.cat(t, join='left', na_rep='-') 0 aa 1 b- diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 147c43b30d45f..b080ab00972c6 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -133,7 +133,7 @@ class ParserWarning(Warning): >>> csv = u'''a;b;c ... 1;1,8 ... 1;2,1''' - >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP ... # ParserWarning: Falling back to the 'python' engine... Adding `engine='python'` to `pd.read_csv` removes the Warning: diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index e0074e2cf3aef..f889e08b5d348 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -206,7 +206,7 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): ... 'versicolor', 'setosa', 'virginica', ... 'setosa'] ... }) - >>> rad_viz = pd.plotting.radviz(df, 'Category') + >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP """ import matplotlib.pyplot as plt import matplotlib.patches as patches @@ -407,7 +407,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): :context: close-figs >>> s = pd.Series(np.random.uniform(size=100)) - >>> fig = pd.plotting.bootstrap_plot(s) + >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP """ import random import matplotlib.pyplot as plt diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 271c7c3021905..ccd5f56141a6a 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -785,10 +785,10 @@ def test_bad_examples(self, capsys, klass, func, msgs): assert msg in ' '.join(err[1] for err in result['errors']) -class ApiItems(object): +class TestApiItems(object): @property def api_doc(self): - return textwrap.dedent(io.StringIO(''' + return io.StringIO(textwrap.dedent(''' .. currentmodule:: itertools Itertools @@ -861,93 +861,90 @@ def test_item_subsection(self, idx, subsection): assert result[idx][3] == subsection -class MainFunction(object): - def test_num_errors_for_validate_one(self, monkeypatch): +class TestMainFunction(object): + def test_exit_status_for_validate_one(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_one', - lambda func_name: {'docstring': 'docstring1', - 'errors': [('ER01', 'err desc'), - ('ER02', 'err desc') - ('ER03', 'err desc')], - 'warnings': [], - 'examples_errors': ''}) - num_errors = validate_docstrings.main(func_name='docstring1', - prefix=None, - errors=[], - output_format='default') - assert num_errors == 3 - - def test_no_num_errors_for_validate_one(self, monkeypatch): - monkeypatch.setattr( - validate_docstrings, 'validate_one', - lambda func_name: {'docstring': 'docstring1', - 'errors': [], - 'warnings': [('WN01', 'warn desc')], - 'examples_errors': ''}) - num_errors = validate_docstrings.main(func_name='docstring1', - prefix=None, - errors=[], - output_format='default') - assert num_errors == 0 - - def test_num_errors_for_validate_all(self, monkeypatch): + validate_docstrings, 'validate_one', lambda func_name: { + 'docstring': 'docstring1', + 'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')], + 'warnings': [], + 'examples_errors': ''}) + exit_status = validate_docstrings.main(func_name='docstring1', + prefix=None, + errors=[], + output_format='default') + assert exit_status == 0 + + def test_exit_status_errors_for_validate_all(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'docstring1': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc'), - ('ER03', 'err desc')]}, - 'docstring2': {'errors': [('ER04', 'err desc'), - ('ER05', 'err desc')]}}) - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=[], - output_format='default') - assert num_errors == 5 - - def test_no_num_errors_for_validate_all(self, monkeypatch): + validate_docstrings, 'validate_all', lambda prefix: { + 'docstring1': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')], + 'file': 'module1.py', + 'file_line': 23}, + 'docstring2': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')], + 'file': 'module2.py', + 'file_line': 925}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert exit_status == 5 + + def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'docstring1': {'errors': [], - 'warnings': [('WN01', 'warn desc')]}, - 'docstring2': {'errors': []}}) - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=[], - output_format='default') - assert num_errors == 0 - - def test_prefix_param_filters_docstrings(self, monkeypatch): + validate_docstrings, 'validate_all', lambda prefix: { + 'docstring1': {'errors': [], + 'warnings': [('WN01', 'warn desc')]}, + 'docstring2': {'errors': []}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert exit_status == 0 + + def test_exit_status_for_validate_all_json(self, monkeypatch): + print('EXECUTED') monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc'), - ('ER03', 'err desc')]}, - 'DataFrame.bar': {'errors': [('ER04', 'err desc'), - ('ER05', 'err desc')]}, - 'Series.foobar': {'errors': [('ER06', 'err desc')]}}) - num_errors = validate_docstrings.main(func_name=None, - prefix='Series.', - errors=[], - output_format='default') - assert num_errors == 4 + validate_docstrings, 'validate_all', lambda prefix: { + 'docstring1': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'docstring2': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')]}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='json') + assert exit_status == 0 def test_errors_param_filters_errors(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc'), - ('ER03', 'err desc')]}, - 'DataFrame.bar': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc')]}, - 'Series.foobar': {'errors': [('ER01', 'err desc')]}}) - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=['E01'], - output_format='default') - assert num_errors == 3 - - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=['E03'], - output_format='default') - assert num_errors == 1 + validate_docstrings, 'validate_all', lambda prefix: { + 'Series.foo': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')], + 'file': 'series.py', + 'file_line': 142}, + 'DataFrame.bar': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc')], + 'file': 'frame.py', + 'file_line': 598}, + 'Series.foobar': {'errors': [('ER01', 'err desc')], + 'file': 'series.py', + 'file_line': 279}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=['ER01'], + output_format='default') + assert exit_status == 3 + + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=['ER03'], + output_format='default') + assert exit_status == 1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 67ad21ab80b97..ed84e58049cae 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -32,6 +32,15 @@ from io import StringIO except ImportError: from cStringIO import StringIO + +# Template backend makes matplotlib to not plot anything. This is useful +# to avoid that plot windows are open from the doctests while running the +# script. Setting here before matplotlib is loaded. +# We don't warn for the number of open plots, as none is actually being opened +os.environ['MPLBACKEND'] = 'Template' +import matplotlib +matplotlib.rc('figure', max_open_warning=10000) + import numpy BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -505,6 +514,9 @@ def validate_pep8(self): file.flush() application.run_checks([file.name]) + # We need this to avoid flake8 printing the names of the files to + # the standard output + application.formatter.write = lambda line, source: None application.report() yield from application.guide.stats.statistics_for('') @@ -733,6 +745,7 @@ def header(title, width=80, char='#'): return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format( full_line=full_line, title_line=title_line) + exit_status = 0 if func_name is None: result = validate_all(prefix) @@ -751,7 +764,7 @@ def header(title, width=80, char='#'): raise ValueError('Unknown output_format "{}"'.format( output_format)) - num_errors, output = 0, '' + output = '' for name, res in result.items(): for err_code, err_desc in res['errors']: # The script would be faster if instead of filtering the @@ -759,7 +772,7 @@ def header(title, width=80, char='#'): # initially. But that would complicate the code too much if errors and err_code not in errors: continue - num_errors += 1 + exit_status += 1 output += output_format.format( name=name, path=res['file'], @@ -767,12 +780,10 @@ def header(title, width=80, char='#'): code=err_code, text='{}: {}'.format(name, err_desc)) - sys.stderr.write(output) + sys.stdout.write(output) else: result = validate_one(func_name) - num_errors = len(result['errors']) - sys.stderr.write(header('Docstring ({})'.format(func_name))) sys.stderr.write('{}\n'.format(result['docstring'])) sys.stderr.write(header('Validation')) @@ -799,7 +810,7 @@ def header(title, width=80, char='#'): sys.stderr.write(header('Doctests')) sys.stderr.write(result['examples_errors']) - return num_errors + return exit_status if __name__ == '__main__':