pandas-dev
diff --git a/‎.gitignore
+3-2 b/‎.gitignore
+3-2
diff --git a/‎README.md
+4-1 b/‎README.md
+4-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+14-2 b/‎asv_bench/benchmarks/groupby.py
+14-2
diff --git a/‎ci/requirements-3.6_DOC.run
+1-1 b/‎ci/requirements-3.6_DOC.run
+1-1
diff --git a/‎doc/make.py
+148-32 b/‎doc/make.py
+148-32
@@ -88,8 +88,9 @@ scikits
 *.c
 *.cpp
 
-# Performance Testing #
-#######################
+# Unit / Performance Testing #
+##############################
+.pytest_cache/
 asv_bench/env/
 asv_bench/html/
 asv_bench/results/
 
@@ -216,13 +216,16 @@ Further, general questions and discussions can also take place on the [pydata ma
 ## Discussion and Development
 Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
 
-## Contributing to pandas
+## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
+
 All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
 
 A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)**
 
 If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out.
 
+You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
+
 Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it!
 
 Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas).
@@ -11,6 +11,13 @@
 from .pandas_vb_common import setup  # noqa
 
 
+method_blacklist = {
+    'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
+               'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
+               'var', 'mad', 'describe', 'std'}
+}
+
+
 class ApplyDictReturn(object):
     goal_time = 0.2
 
@@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
     def time_frame_nth(self, df):
         df.groupby(0).nth(0)
 
+
     def time_series_nth_any(self, df):
         df[1].groupby(df[0]).nth(0, dropna='any')
 
@@ -369,23 +377,27 @@ class GroupByMethods(object):
     goal_time = 0.2
 
     param_names = ['dtype', 'method']
-    params = [['int', 'float'],
+    params = [['int', 'float', 'object'],
               ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
                'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
                'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
                'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
                'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
 
     def setup(self, dtype, method):
+        if method in method_blacklist.get(dtype, {}):
+            raise NotImplementedError  # skip benchmark
         ngroups = 1000
         size = ngroups * 2
         rng = np.arange(ngroups)
         values = rng.take(np.random.randint(0, ngroups, size=size))
         if dtype == 'int':
             key = np.random.randint(0, size, size=size)
-        else:
+        elif dtype == 'float':
             key = np.concatenate([np.random.random(ngroups) * 0.1,
                                   np.random.random(ngroups) * 10.0])
+        elif dtype == 'object':
+            key = ['foo'] * size
 
         df = DataFrame({'values': values, 'key': key})
         self.df_groupby_method = getattr(df.groupby('key')['values'], method)
 
@@ -5,7 +5,7 @@ sphinx
 nbconvert
 nbformat
 notebook
-matplotlib
+matplotlib=2.1*
 seaborn
 scipy
 lxml
 
@@ -11,12 +11,14 @@
     $ python make.py html
     $ python make.py latex
 """
+import importlib
 import sys
 import os
 import shutil
-import subprocess
+# import subprocess
 import argparse
 from contextlib import contextmanager
+import webbrowser
 import jinja2
 
 
@@ -26,28 +28,6 @@
 BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']
 
 
-def _generate_index(include_api, single_doc=None):
-    """Create index.rst file with the specified sections.
-
-    Parameters
-    ----------
-    include_api : bool
-        Whether API documentation will be built.
-    single_doc : str or None
-        If provided, this single documentation page will be generated.
-    """
-    if single_doc is not None:
-        single_doc = os.path.splitext(os.path.basename(single_doc))[0]
-        include_api = False
-
-    with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
-        t = jinja2.Template(f.read())
-
-    with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
-        f.write(t.render(include_api=include_api,
-                         single_doc=single_doc))
-
-
 @contextmanager
 def _maybe_exclude_notebooks():
     """Skip building the notebooks if pandoc is not installed.
@@ -58,6 +38,7 @@ def _maybe_exclude_notebooks():
     1. nbconvert isn't installed, or
     2. nbconvert is installed, but pandoc isn't
     """
+    # TODO move to exclude_pattern
     base = os.path.dirname(__file__)
     notebooks = [os.path.join(base, 'source', nb)
                  for nb in ['style.ipynb']]
@@ -96,8 +77,112 @@ class DocBuilder:
     All public methods of this class can be called as parameters of the
     script.
     """
-    def __init__(self, num_jobs=1):
+    def __init__(self, num_jobs=1, include_api=True, single_doc=None,
+                 verbosity=0):
         self.num_jobs = num_jobs
+        self.include_api = include_api
+        self.verbosity = verbosity
+        self.single_doc = None
+        self.single_doc_type = None
+        if single_doc is not None:
+            self._process_single_doc(single_doc)
+        self.exclude_patterns = self._exclude_patterns
+
+        self._generate_index()
+        if self.single_doc_type == 'docstring':
+            self._run_os('sphinx-autogen', '-o',
+                         'source/generated_single', 'source/index.rst')
+
+    @property
+    def _exclude_patterns(self):
+        """Docs source files that will be excluded from building."""
+        # TODO move maybe_exclude_notebooks here
+        if self.single_doc is not None:
+            rst_files = [f for f in os.listdir(SOURCE_PATH)
+                         if ((f.endswith('.rst') or f.endswith('.ipynb'))
+                             and (f != 'index.rst')
+                             and (f != '{0}.rst'.format(self.single_doc)))]
+            if self.single_doc_type != 'api':
+                rst_files += ['generated/*.rst']
+        elif not self.include_api:
+            rst_files = ['api.rst', 'generated/*.rst']
+        else:
+            rst_files = ['generated_single/*.rst']
+
+        exclude_patterns = ','.join(
+            '{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)
+
+        return exclude_patterns
+
+    def _process_single_doc(self, single_doc):
+        """Extract self.single_doc (base name) and self.single_doc_type from
+        passed single_doc kwarg.
+
+        """
+        self.include_api = False
+
+        if single_doc == 'api.rst' or single_doc == 'api':
+            self.single_doc_type = 'api'
+            self.single_doc = 'api'
+        elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
+            self.single_doc_type = 'rst'
+            self.single_doc = os.path.splitext(os.path.basename(single_doc))[0]
+        elif os.path.exists(
+                os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
+            self.single_doc_type = 'rst'
+            self.single_doc = single_doc
+        elif single_doc is not None:
+            try:
+                obj = pandas  # noqa: F821
+                for name in single_doc.split('.'):
+                    obj = getattr(obj, name)
+            except AttributeError:
+                raise ValueError('Single document not understood, it should '
+                                 'be a file in doc/source/*.rst (e.g. '
+                                 '"contributing.rst" or a pandas function or '
+                                 'method (e.g. "pandas.DataFrame.head")')
+            else:
+                self.single_doc_type = 'docstring'
+                if single_doc.startswith('pandas.'):
+                    self.single_doc = single_doc[len('pandas.'):]
+                else:
+                    self.single_doc = single_doc
+
+    def _copy_generated_docstring(self):
+        """Copy existing generated (from api.rst) docstring page because
+        this is more correct in certain cases (where a custom autodoc
+        template is used).
+
+        """
+        fname = os.path.join(SOURCE_PATH, 'generated',
+                             'pandas.{}.rst'.format(self.single_doc))
+        temp_dir = os.path.join(SOURCE_PATH, 'generated_single')
+
+        try:
+            os.makedirs(temp_dir)
+        except OSError:
+            pass
+
+        if os.path.exists(fname):
+            try:
+                # copying to make sure sphinx always thinks it is new
+                # and needs to be re-generated (to pick source code changes)
+                shutil.copy(fname, temp_dir)
+            except:  # noqa
+                pass
+
+    def _generate_index(self):
+        """Create index.rst file with the specified sections."""
+        if self.single_doc_type == 'docstring':
+            self._copy_generated_docstring()
+
+        with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
+            t = jinja2.Template(f.read())
+
+        with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
+            f.write(t.render(include_api=self.include_api,
+                             single_doc=self.single_doc,
+                             single_doc_type=self.single_doc_type))
 
     @staticmethod
     def _create_build_structure():
@@ -121,7 +206,10 @@ def _run_os(*args):
         --------
         >>> DocBuilder()._run_os('python', '--version')
         """
-        subprocess.check_call(args, stderr=subprocess.STDOUT)
+        # TODO check_call should be more safe, but it fails with
+        # exclude patterns, needs investigation
+        # subprocess.check_call(args, stderr=subprocess.STDOUT)
+        os.system(' '.join(args))
 
     def _sphinx_build(self, kind):
         """Call sphinx to build documentation.
@@ -142,11 +230,23 @@ def _sphinx_build(self, kind):
         self._run_os('sphinx-build',
                      '-j{}'.format(self.num_jobs),
                      '-b{}'.format(kind),
-                     '-d{}'.format(os.path.join(BUILD_PATH,
-                                                'doctrees')),
+                     '-{}'.format(
+                         'v' * self.verbosity) if self.verbosity else '',
+                     '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
+                     '-Dexclude_patterns={}'.format(self.exclude_patterns),
                      SOURCE_PATH,
                      os.path.join(BUILD_PATH, kind))
 
+    def _open_browser(self):
+        base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
+        if self.single_doc_type == 'docstring':
+            url = os.path.join(
+                base_url,
+                'generated_single', 'pandas.{}.html'.format(self.single_doc))
+        else:
+            url = os.path.join(base_url, '{}.html'.format(self.single_doc))
+        webbrowser.open(url, new=2)
+
     def html(self):
         """Build HTML documentation."""
         self._create_build_structure()
@@ -156,6 +256,11 @@ def html(self):
             if os.path.exists(zip_fname):
                 os.remove(zip_fname)
 
+        if self.single_doc is not None:
+            self._open_browser()
+            shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
+                          ignore_errors=True)
+
     def latex(self, force=False):
         """Build PDF documentation."""
         self._create_build_structure()
@@ -222,21 +327,32 @@ def main():
                            metavar='FILENAME',
                            type=str,
                            default=None,
-                           help=('filename of section to compile, '
-                                 'e.g. "indexing"'))
+                           help=('filename of section or method name to '
+                                 'compile, e.g. "indexing", "DataFrame.join"'))
     argparser.add_argument('--python-path',
                            type=str,
-                           default=os.path.join(DOC_PATH, '..'),
+                           default=os.path.dirname(DOC_PATH),
                            help='path')
+    argparser.add_argument('-v', action='count', dest='verbosity', default=0,
+                           help=('increase verbosity (can be repeated), '
+                                 'passed to the sphinx build command'))
     args = argparser.parse_args()
 
     if args.command not in cmds:
         raise ValueError('Unknown command {}. Available options: {}'.format(
             args.command, ', '.join(cmds)))
 
+    # Below we update both os.environ and sys.path. The former is used by
+    # external libraries (namely Sphinx) to compile this module and resolve
+    # the import of `python_path` correctly. The latter is used to resolve
+    # the import within the module, injecting it into the global namespace
     os.environ['PYTHONPATH'] = args.python_path
-    _generate_index(not args.no_api, args.single)
-    getattr(DocBuilder(args.num_jobs), args.command)()
+    sys.path.append(args.python_path)
+    globals()['pandas'] = importlib.import_module('pandas')
+
+    builder = DocBuilder(args.num_jobs, not args.no_api, args.single,
+                         args.verbosity)
+    getattr(builder, args.command)()
 
 
 if __name__ == '__main__':