diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 396b092a286c1..7679922ddb21e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5091,7 +5091,7 @@ def get_ftype_counts(self):
         1   b    2    2.0
         2   c    3    3.0
 
-        >>> df.get_ftype_counts()
+        >>> df.get_ftype_counts()  # doctest: +SKIP
         float64:dense    1
         int64:dense      1
         object:dense     1
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index fcced091b3794..d0f190c82aec7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1875,12 +1875,8 @@ def get_duplicates(self):
 
         Works on different Index of types.
 
-        >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()
+        >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()  # doctest: +SKIP
         [2, 3]
-        >>> pd.Index([1., 2., 2., 3., 3., 3., 4.]).get_duplicates()
-        [2.0, 3.0]
-        >>> pd.Index(['a', 'b', 'b', 'c', 'c', 'c', 'd']).get_duplicates()
-        ['b', 'c']
 
         Note that for a DatetimeIndex, it does not return a list but a new
         DatetimeIndex:
@@ -1888,22 +1884,22 @@ def get_duplicates(self):
         >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
         ...                         '2018-01-03', '2018-01-04', '2018-01-04'],
         ...                        format='%Y-%m-%d')
-        >>> pd.Index(dates).get_duplicates()
+        >>> pd.Index(dates).get_duplicates()  # doctest: +SKIP
         DatetimeIndex(['2018-01-03', '2018-01-04'],
                       dtype='datetime64[ns]', freq=None)
 
         Sorts duplicated elements even when indexes are unordered.
 
-        >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()
+        >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()  # doctest: +SKIP
         [2, 3]
 
         Return empty array-like structure when all elements are unique.
 
-        >>> pd.Index([1, 2, 3, 4]).get_duplicates()
+        >>> pd.Index([1, 2, 3, 4]).get_duplicates()  # doctest: +SKIP
         []
         >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
         ...                        format='%Y-%m-%d')
-        >>> pd.Index(dates).get_duplicates()
+        >>> pd.Index(dates).get_duplicates()  # doctest: +SKIP
         DatetimeIndex([], dtype='datetime64[ns]', freq=None)
         """
         warnings.warn("'get_duplicates' is deprecated and will be removed in "
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 72b014b018735..eb841e6398976 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1013,21 +1013,21 @@ def apply(self, func, axis='major', **kwargs):
 
         Returns a Panel with the square root of each element
 
-        >>> p = pd.Panel(np.random.rand(4,3,2))
+        >>> p = pd.Panel(np.random.rand(4, 3, 2))  # doctest: +SKIP
         >>> p.apply(np.sqrt)
 
         Equivalent to p.sum(1), returning a DataFrame
 
-        >>> p.apply(lambda x: x.sum(), axis=1)
+        >>> p.apply(lambda x: x.sum(), axis=1)  # doctest: +SKIP
 
         Equivalent to previous:
 
-        >>> p.apply(lambda x: x.sum(), axis='major')
+        >>> p.apply(lambda x: x.sum(), axis='major')  # doctest: +SKIP
 
         Return the shapes of each DataFrame over axis 2 (i.e the shapes of
         items x major), as a Series
 
-        >>> p.apply(lambda x: x.shape, axis=(0,1))
+        >>> p.apply(lambda x: x.shape, axis=(0,1))  # doctest: +SKIP
 
         Returns
         -------
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 18a83269a2f0f..bf0c93437f4dc 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2156,13 +2156,6 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
         `join`-keyword works as in other methods.
 
         >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
-        >>> s.str.cat(t, join=None, na_rep='-')
-        0    ad
-        1    ba
-        2    -e
-        3    dc
-        dtype: object
-        >>>
         >>> s.str.cat(t, join='left', na_rep='-')
         0    aa
         1    b-
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 147c43b30d45f..b080ab00972c6 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -133,7 +133,7 @@ class ParserWarning(Warning):
     >>> csv = u'''a;b;c
     ...           1;1,8
     ...           1;2,1'''
-    >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]')
+    >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]')  # doctest: +SKIP
     ... # ParserWarning: Falling back to the 'python' engine...
 
     Adding `engine='python'` to `pd.read_csv` removes the Warning:
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index e0074e2cf3aef..f889e08b5d348 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -206,7 +206,7 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
         ...                      'versicolor', 'setosa', 'virginica',
         ...                      'setosa']
         ...     })
-        >>> rad_viz = pd.plotting.radviz(df, 'Category')
+        >>> rad_viz = pd.plotting.radviz(df, 'Category')  # doctest: +SKIP
     """
     import matplotlib.pyplot as plt
     import matplotlib.patches as patches
@@ -407,7 +407,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
             :context: close-figs
 
             >>> s = pd.Series(np.random.uniform(size=100))
-            >>> fig = pd.plotting.bootstrap_plot(s)
+            >>> fig = pd.plotting.bootstrap_plot(s)  # doctest: +SKIP
     """
     import random
     import matplotlib.pyplot as plt
diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py
index 271c7c3021905..ccd5f56141a6a 100644
--- a/scripts/tests/test_validate_docstrings.py
+++ b/scripts/tests/test_validate_docstrings.py
@@ -785,10 +785,10 @@ def test_bad_examples(self, capsys, klass, func, msgs):
             assert msg in ' '.join(err[1] for err in result['errors'])
 
 
-class ApiItems(object):
+class TestApiItems(object):
     @property
     def api_doc(self):
-        return textwrap.dedent(io.StringIO('''
+        return io.StringIO(textwrap.dedent('''
             .. currentmodule:: itertools
 
             Itertools
@@ -861,93 +861,90 @@ def test_item_subsection(self, idx, subsection):
         assert result[idx][3] == subsection
 
 
-class MainFunction(object):
-    def test_num_errors_for_validate_one(self, monkeypatch):
+class TestMainFunction(object):
+    def test_exit_status_for_validate_one(self, monkeypatch):
         monkeypatch.setattr(
-            validate_docstrings, 'validate_one',
-            lambda func_name: {'docstring': 'docstring1',
-                               'errors': [('ER01', 'err desc'),
-                                          ('ER02', 'err desc')
-                                          ('ER03', 'err desc')],
-                               'warnings': [],
-                               'examples_errors': ''})
-        num_errors = validate_docstrings.main(func_name='docstring1',
-                                              prefix=None,
-                                              errors=[],
-                                              output_format='default')
-        assert num_errors == 3
-
-    def test_no_num_errors_for_validate_one(self, monkeypatch):
-        monkeypatch.setattr(
-            validate_docstrings, 'validate_one',
-            lambda func_name: {'docstring': 'docstring1',
-                               'errors': [],
-                               'warnings': [('WN01', 'warn desc')],
-                               'examples_errors': ''})
-        num_errors = validate_docstrings.main(func_name='docstring1',
-                                              prefix=None,
-                                              errors=[],
-                                              output_format='default')
-        assert num_errors == 0
-
-    def test_num_errors_for_validate_all(self, monkeypatch):
+            validate_docstrings, 'validate_one', lambda func_name: {
+                'docstring': 'docstring1',
+                'errors': [('ER01', 'err desc'),
+                           ('ER02', 'err desc'),
+                           ('ER03', 'err desc')],
+                'warnings': [],
+                'examples_errors': ''})
+        exit_status = validate_docstrings.main(func_name='docstring1',
+                                               prefix=None,
+                                               errors=[],
+                                               output_format='default')
+        assert exit_status == 0
+
+    def test_exit_status_errors_for_validate_all(self, monkeypatch):
         monkeypatch.setattr(
-            validate_docstrings, 'validate_all',
-            lambda: {'docstring1': {'errors': [('ER01', 'err desc'),
-                                               ('ER02', 'err desc'),
-                                               ('ER03', 'err desc')]},
-                     'docstring2': {'errors': [('ER04', 'err desc'),
-                                               ('ER05', 'err desc')]}})
-        num_errors = validate_docstrings.main(func_name=None,
-                                              prefix=None,
-                                              errors=[],
-                                              output_format='default')
-        assert num_errors == 5
-
-    def test_no_num_errors_for_validate_all(self, monkeypatch):
+            validate_docstrings, 'validate_all', lambda prefix: {
+                'docstring1': {'errors': [('ER01', 'err desc'),
+                                          ('ER02', 'err desc'),
+                                          ('ER03', 'err desc')],
+                               'file': 'module1.py',
+                               'file_line': 23},
+                'docstring2': {'errors': [('ER04', 'err desc'),
+                                          ('ER05', 'err desc')],
+                               'file': 'module2.py',
+                               'file_line': 925}})
+        exit_status = validate_docstrings.main(func_name=None,
+                                               prefix=None,
+                                               errors=[],
+                                               output_format='default')
+        assert exit_status == 5
+
+    def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch):
         monkeypatch.setattr(
-            validate_docstrings, 'validate_all',
-            lambda: {'docstring1': {'errors': [],
-                                    'warnings': [('WN01', 'warn desc')]},
-                     'docstring2': {'errors': []}})
-        num_errors = validate_docstrings.main(func_name=None,
-                                              prefix=None,
-                                              errors=[],
-                                              output_format='default')
-        assert num_errors == 0
-
-    def test_prefix_param_filters_docstrings(self, monkeypatch):
+            validate_docstrings, 'validate_all', lambda prefix: {
+                'docstring1': {'errors': [],
+                               'warnings': [('WN01', 'warn desc')]},
+                'docstring2': {'errors': []}})
+        exit_status = validate_docstrings.main(func_name=None,
+                                               prefix=None,
+                                               errors=[],
+                                               output_format='default')
+        assert exit_status == 0
+
+    def test_exit_status_for_validate_all_json(self, monkeypatch):
+        print('EXECUTED')
         monkeypatch.setattr(
-            validate_docstrings, 'validate_all',
-            lambda: {'Series.foo': {'errors': [('ER01', 'err desc'),
-                                               ('ER02', 'err desc'),
-                                               ('ER03', 'err desc')]},
-                     'DataFrame.bar': {'errors': [('ER04', 'err desc'),
-                                                  ('ER05', 'err desc')]},
-                     'Series.foobar': {'errors': [('ER06', 'err desc')]}})
-        num_errors = validate_docstrings.main(func_name=None,
-                                              prefix='Series.',
-                                              errors=[],
-                                              output_format='default')
-        assert num_errors == 4
+            validate_docstrings, 'validate_all', lambda prefix: {
+                'docstring1': {'errors': [('ER01', 'err desc'),
+                                          ('ER02', 'err desc'),
+                                          ('ER03', 'err desc')]},
+                'docstring2': {'errors': [('ER04', 'err desc'),
+                                          ('ER05', 'err desc')]}})
+        exit_status = validate_docstrings.main(func_name=None,
+                                               prefix=None,
+                                               errors=[],
+                                               output_format='json')
+        assert exit_status == 0
 
     def test_errors_param_filters_errors(self, monkeypatch):
         monkeypatch.setattr(
-            validate_docstrings, 'validate_all',
-            lambda: {'Series.foo': {'errors': [('ER01', 'err desc'),
-                                               ('ER02', 'err desc'),
-                                               ('ER03', 'err desc')]},
-                     'DataFrame.bar': {'errors': [('ER01', 'err desc'),
-                                                  ('ER02', 'err desc')]},
-                     'Series.foobar': {'errors': [('ER01', 'err desc')]}})
-        num_errors = validate_docstrings.main(func_name=None,
-                                              prefix=None,
-                                              errors=['E01'],
-                                              output_format='default')
-        assert num_errors == 3
-
-        num_errors = validate_docstrings.main(func_name=None,
-                                              prefix=None,
-                                              errors=['E03'],
-                                              output_format='default')
-        assert num_errors == 1
+            validate_docstrings, 'validate_all', lambda prefix: {
+                'Series.foo': {'errors': [('ER01', 'err desc'),
+                                          ('ER02', 'err desc'),
+                                          ('ER03', 'err desc')],
+                               'file': 'series.py',
+                               'file_line': 142},
+                'DataFrame.bar': {'errors': [('ER01', 'err desc'),
+                                             ('ER02', 'err desc')],
+                                  'file': 'frame.py',
+                                  'file_line': 598},
+                'Series.foobar': {'errors': [('ER01', 'err desc')],
+                                  'file': 'series.py',
+                                  'file_line': 279}})
+        exit_status = validate_docstrings.main(func_name=None,
+                                               prefix=None,
+                                               errors=['ER01'],
+                                               output_format='default')
+        assert exit_status == 3
+
+        exit_status = validate_docstrings.main(func_name=None,
+                                               prefix=None,
+                                               errors=['ER03'],
+                                               output_format='default')
+        assert exit_status == 1
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 67ad21ab80b97..ed84e58049cae 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -32,6 +32,15 @@
     from io import StringIO
 except ImportError:
     from cStringIO import StringIO
+
+# Template backend makes matplotlib to not plot anything. This is useful
+# to avoid that plot windows are open from the doctests while running the
+# script. Setting here before matplotlib is loaded.
+# We don't warn for the number of open plots, as none is actually being opened
+os.environ['MPLBACKEND'] = 'Template'
+import matplotlib
+matplotlib.rc('figure', max_open_warning=10000)
+
 import numpy
 
 BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -505,6 +514,9 @@ def validate_pep8(self):
             file.flush()
             application.run_checks([file.name])
 
+        # We need this to avoid flake8 printing the names of the files to
+        # the standard output
+        application.formatter.write = lambda line, source: None
         application.report()
 
         yield from application.guide.stats.statistics_for('')
@@ -733,6 +745,7 @@ def header(title, width=80, char='#'):
         return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format(
             full_line=full_line, title_line=title_line)
 
+    exit_status = 0
     if func_name is None:
         result = validate_all(prefix)
 
@@ -751,7 +764,7 @@ def header(title, width=80, char='#'):
                 raise ValueError('Unknown output_format "{}"'.format(
                     output_format))
 
-            num_errors, output = 0, ''
+            output = ''
             for name, res in result.items():
                 for err_code, err_desc in res['errors']:
                     # The script would be faster if instead of filtering the
@@ -759,7 +772,7 @@ def header(title, width=80, char='#'):
                     # initially. But that would complicate the code too much
                     if errors and err_code not in errors:
                         continue
-                    num_errors += 1
+                    exit_status += 1
                     output += output_format.format(
                         name=name,
                         path=res['file'],
@@ -767,12 +780,10 @@ def header(title, width=80, char='#'):
                         code=err_code,
                         text='{}: {}'.format(name, err_desc))
 
-        sys.stderr.write(output)
+        sys.stdout.write(output)
 
     else:
         result = validate_one(func_name)
-        num_errors = len(result['errors'])
-
         sys.stderr.write(header('Docstring ({})'.format(func_name)))
         sys.stderr.write('{}\n'.format(result['docstring']))
         sys.stderr.write(header('Validation'))
@@ -799,7 +810,7 @@ def header(title, width=80, char='#'):
             sys.stderr.write(header('Doctests'))
             sys.stderr.write(result['examples_errors'])
 
-    return num_errors
+    return exit_status
 
 
 if __name__ == '__main__':