scikit-learn · jnothman · Jun 21, 2018 · Jun 12, 2018 · Jun 12, 2018 · Jun 12, 2018
diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
@@ -47,6 +47,8 @@ def get_pdf_size(version):
             return human_readable_data_quantity(path_details['size'], 1000)
 
 
+print(':orphan:')
+print()
 heading = 'Available documentation for Scikit-learn'
 print(heading)
 print('=' * len(heading))

diff --git a/doc/conf.py b/doc/conf.py
@@ -70,9 +70,6 @@
 # The encoding of source files.
 #source_encoding = 'utf-8'
 
-# Generate the plots for the gallery
-plot_gallery = True
-
 # The master toctree document.
 master_doc = 'index'
 
@@ -102,7 +99,7 @@
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build', 'templates', 'includes']
+exclude_patterns = ['_build', 'templates', 'includes', 'themes']
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.

diff --git a/doc/datasets/covtype.rst b/doc/datasets/covtype.rst
@@ -9,7 +9,7 @@ collected for the task of predicting each patch's cover type,
 i.e. the dominant species of tree.
 There are seven covertypes, making this a multiclass classification problem.
 Each sample has 54 features, described on the
-`dataset's homepage <http://archive.ics.uci.edu/ml/datasets/Covertype>`_.
+`dataset's homepage <http://archive.ics.uci.edu/ml/datasets/Covertype>`__.
 Some of the features are boolean indicators,
 while others are discrete or continuous measurements.
 

diff --git a/doc/datasets/kddcup99.rst b/doc/datasets/kddcup99.rst
@@ -7,7 +7,7 @@ Kddcup 99 dataset
 The KDD Cup '99 dataset was created by processing the tcpdump portions
 of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset,
 created by MIT Lincoln Lab. The artificial data (described on the `dataset's
-homepage <http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html>`_) was
+homepage <http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html>`__) was
 generated using a closed network and hand-injected attacks to produce a
 large number of different types of attack with normal activity in the
 background. As the initial goal was to produce a large training set for

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
@@ -140,7 +140,7 @@ From source package
 ~~~~~~~~~~~~~~~~~~~
 
 download the source package from
-`pypi <https://pypi.python.org/pypi/scikit-learn>`_, unpack the sources and
+`pypi <https://pypi.python.org/pypi/scikit-learn>`__, unpack the sources and
 cd into the source directory.
 
 This packages uses distutils, which is the default way of installing

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
@@ -88,7 +88,7 @@ How to make a good bug report
 -----------------------------
 
 When you submit an issue to `Github
-<https://github.com/scikit-learn/scikit-learn/issues>`_, please do your best to
+<https://github.com/scikit-learn/scikit-learn/issues>`__, please do your best to
 follow these guidelines! This will make it a lot easier to provide you with good
 feedback:
 
@@ -416,7 +416,7 @@ underestimate how easy an issue is to solve!
     we use the help wanted tag to mark Pull Requests which have been abandoned
     by their original contributor and are available for someone to pick up where the original
     contributor left off. The list of issues with the help wanted tag can be found
-    `here <https://github.com/scikit-learn/scikit-learn/labels/help%20wanted>`_ .
+    `here <https://github.com/scikit-learn/scikit-learn/labels/help%20wanted>`__ .
 
     Note that not all issues which need contributors will have this tag.
 
@@ -454,11 +454,12 @@ To generate the full web site, including the example gallery::
 
 Generating the example gallery will run all our examples which takes a
 while. To save some time, you can use:
-    - ``make html-noplot``: this will generate the documentation without the
-      example gallery. This is useful when changing a docstring for example.
-    - ``EXAMPLES_PATTERN=your_regex_goes_here make html``: only the examples
-      matching ``your_regex_goes_here`` will be run. This is particularly
-      useful if you are modifying a few examples.
+
+- ``make html-noplot``: this will generate the documentation without the
+  example gallery. This is useful when changing a docstring for example.
+- ``EXAMPLES_PATTERN=your_regex_goes_here make html``: only the examples
+  matching ``your_regex_goes_here`` will be run. This is particularly
+  useful if you are modifying a few examples.
 
 That should create all the documentation in the ``_build/html/stable``
 directory.  Set the environment variable `NO_MATHJAX=1` if you intend to view
@@ -879,7 +880,7 @@ from high-level questions to a more detailed check-list.
   the tests validate that the code is correct, i.e. doing what the
   documentation says it does? If the change is a bug-fix, is a
   non-regression test included? Look at `this
-  <https://jeffknupp.com/blog/2013/12/09/improve-your-python-understanding-unit-testing>`_
+  <https://jeffknupp.com/blog/2013/12/09/improve-your-python-understanding-unit-testing>`__
   to get started with testing in Python.
 
 - Do the tests pass in the continuous integration build? If
@@ -1153,7 +1154,7 @@ the correct interface more easily.
     and optionally the mixin classes in ``sklearn.base``.
     For example, below is a custom classifier, with more examples included
     in the scikit-learn-contrib
-    `project template <https://github.com/scikit-learn-contrib/project-template/blob/master/skltemplate/template.py>`_.
+    `project template <https://github.com/scikit-learn-contrib/project-template/blob/master/skltemplate/template.py>`__.
 
       >>> import numpy as np
       >>> from sklearn.base import BaseEstimator, ClassifierMixin

diff --git a/doc/developers/tips.rst b/doc/developers/tips.rst
@@ -27,7 +27,7 @@ We use CircleCI to build the HTML documentation for every pull request. To
 access that documentation, instructions are provided in the :ref:`documentation
 section of the contributor guide <contribute_documentation>`. To save you a few
 clicks, we provide a `userscript
-<https://raw.githubusercontent.com/lesteve/userscripts/master/add-button-for-pr-circleci-doc.user.js>`_
+<https://raw.githubusercontent.com/lesteve/userscripts/master/add-button-for-pr-circleci-doc.user.js>`__
 that adds a button to every PR. After installing the userscript, navigate to
 any GitHub PR; a new button labeled "See CircleCI doc for this PR" should
 appear in the top-right area.
@@ -37,7 +37,7 @@ Folding and unfolding outdated diffs on pull requests
 
 GitHub hides discussions on PRs when the corresponding lines of code have been
 changed in the mean while. This `userscript
-<https://raw.githubusercontent.com/lesteve/userscripts/master/github-expand-all.user.js>`_
+<https://raw.githubusercontent.com/lesteve/userscripts/master/github-expand-all.user.js>`__
 provides a shortcut (Control-Alt-P at the time of writing but look at the code
 to be sure) to unfold all such hidden discussions at once, so you can catch up.
 

diff --git a/doc/documentation.rst b/doc/documentation.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. raw:: html
 
   <div class="container-index">

diff --git a/doc/faq.rst b/doc/faq.rst
@@ -355,7 +355,7 @@ instances everywhere and ensure that both estimators and cross-validation
 splitters have their ``random_state`` parameter set.
 
 Why do categorical variables need preprocessing in scikit-learn, compared to other tools?
---------------------------------------------------------------------------------
+-----------------------------------------------------------------------------------------
 
 Most of scikit-learn assumes data is in NumPy arrays or SciPy sparse matrices
 of a single numeric dtype. These do not explicitly represent categorical

diff --git a/doc/glossary.rst b/doc/glossary.rst
@@ -452,6 +452,7 @@ General Concepts
 
     label indicator matrix
     multilabel indicator matrix
+    multilabel indicator matrices
         The format used to represent multilabel data, where each row of a 2d
         array or sparse matrix corresponds to a sample, each column
         corresponds to a class, and each element is 1 if the sample is labeled
@@ -1067,6 +1068,13 @@ Target Types
         :func:`~utils.multiclass.type_of_target` will return
         'multilabel-indicator' for multilabel input, whether sparse or dense.
 
+    multioutput
+    multi-output
+        A target where each sample has multiple classification/regression
+        labels. See :term:`multiclass multioutput` and :term:`continuous
+        multioutput`. We do not currently support modelling mixed
+        classification and regression targets.
+
 .. _glossary_methods:
 
 Methods

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -364,7 +364,7 @@ and ignored by setting to ``None``::
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_compose_plot_feature_stacker.py`
+ * :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py`
 
 
 .. _column_transformer:

diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. raw:: html
 
     <meta http-equiv="refresh" content="1; url=./compose.html" />

diff --git a/doc/support.rst b/doc/support.rst
@@ -92,7 +92,7 @@ Documentation resources
 
 This documentation is relative to |release|. Documentation for
 other versions can be found `here
-<http://scikit-learn.org/dev/versions.html>`_.
+<http://scikit-learn.org/dev/versions.html>`__.
 
 Printable pdf documentation for old versions can be found `here
 <https://sourceforge.net/projects/scikit-learn/files/documentation/>`_.
diff --git a/doc/testimonials/testimonials.rst b/doc/testimonials/testimonials.rst
@@ -120,7 +120,7 @@ Gilad Lotan, Chief Data Scientist
 
 
 `Hugging Face <https://huggingface.co>`_
-------------------------------------
+----------------------------------------
 
 .. raw:: html
 

diff --git a/examples/README.txt b/examples/README.txt
@@ -4,6 +4,6 @@ Examples
 ========
 
 Miscellaneous examples
-----------------
+----------------------
 
 Miscellaneous and introductory examples for scikit-learn.
diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py
@@ -21,8 +21,8 @@
 * RandomForestClassifier shows the opposite behavior: the histograms show
   peaks at approx. 0.2 and 0.9 probability, while probabilities close to 0 or 1
   are very rare. An explanation for this is given by Niculescu-Mizil and Caruana
-  [1]: "Methods such as bagging and random forests that average predictions from
-  a base set of models can have difficulty making predictions near 0 and 1
+  [1]_: "Methods such as bagging and random forests that average predictions
+  from a base set of models can have difficulty making predictions near 0 and 1
   because variance in the underlying base models will bias predictions that
   should be near zero or one away from these values. Because predictions are
   restricted to the interval [0,1], errors caused by variance tend to be one-
@@ -39,7 +39,7 @@
 
 * Support Vector Classification (SVC) shows an even more sigmoid curve as
   the  RandomForestClassifier, which is typical for maximum-margin methods
-  (compare Niculescu-Mizil and Caruana [1]), which focus on hard samples
+  (compare Niculescu-Mizil and Caruana [1]_), which focus on hard samples
   that are close to the decision boundary (the support vectors).
 
 .. topic:: References:

diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py
@@ -40,16 +40,8 @@
   so one can trust our implementation by comparing to this case.
 
 
-References
-----------
-.. [1] P. J. Rousseeuw. Least median of squares regression. Journal of American
-    Statistical Ass., 79:871, 1984.
-.. [2] Johanna Hardin, David M Rocke. The distribution of robust distances.
-    Journal of Computational and Graphical Statistics. December 1, 2005,
-    14(4): 928-946.
-.. [3] Zoubir A., Koivunen V., Chakhchoukh Y. and Muma M. (2012). Robust
-    estimation in signal processing: A tutorial-style treatment of
-    fundamental concepts. IEEE Signal Processing Magazine 29(4), 61-80.
+.. [1] P. J. Rousseeuw. Least median of squares regression. Journal of
+   American Statistical Ass., 79:871, 1984.
 
 """
 print(__doc__)

diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -131,7 +131,7 @@ def autolabel(rects, n_estimators):
 
 #######################################################################
 # Compare fit times with and without early stopping
-# ----------------------------------------------
+# -------------------------------------------------
 
 plt.figure(figsize=(9, 5))
 

diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py
@@ -3,7 +3,8 @@
 IsolationForest example
 ==========================================
 
-An example using IsolationForest for anomaly detection.
+An example using :class:`sklearn.ensemble.IsolationForest` for anomaly
+detection.
 
 The IsolationForest 'isolates' observations by randomly selecting a feature
 and then randomly selecting a split value between the maximum and minimum
@@ -20,9 +21,6 @@
 Hence, when a forest of random trees collectively produce shorter path lengths
 for particular samples, they are highly likely to be anomalies.
 
-.. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest."
-    Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.
-
 """
 print(__doc__)
 

diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py
@@ -49,9 +49,8 @@
 wclf = svm.SVC(kernel='linear', class_weight={1: 10})
 wclf.fit(X, y)
 
-# plot separating hyperplanes and samples
+# plot the samples
 plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
-plt.legend()
 
 # plot the decision functions for both classifiers
 ax = plt.gca()

diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
@@ -25,12 +25,15 @@ class TransformedTargetRegressor(BaseEstimator, RegressorMixin):
     The computation during ``fit`` is::
 
         regressor.fit(X, func(y))
+
     or::
 
         regressor.fit(X, transformer.transform(y))
+
     The computation during ``predict`` is::
 
         inverse_func(regressor.predict(X))
+
     or::
 
         transformer.inverse_transform(regressor.predict(X))

diff --git a/sklearn/covariance/elliptic_envelope.py b/sklearn/covariance/elliptic_envelope.py
@@ -64,7 +64,7 @@ class EllipticEnvelope(MinCovDet, OutlierMixin):
 
     offset_ : float
         Offset used to define the decision function from the raw scores.
-        We have the relation: decision_function = score_samples - offset_.
+        We have the relation: ``decision_function = score_samples - offset_``.
         The offset depends on the contamination parameter and is defined in
         such a way we obtain the expected number of outliers (samples with
         decision function < 0) in training.
@@ -81,8 +81,9 @@ class EllipticEnvelope(MinCovDet, OutlierMixin):
 
     References
     ----------
-    ..  [1] Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum
-        covariance determinant estimator" Technometrics 41(3), 212 (1999)
+    .. [1] Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the
+       minimum covariance determinant estimator" Technometrics 41(3), 212
+       (1999)
 
     """
     def __init__(self, store_precision=True, assume_centered=False,

diff --git a/sklearn/datasets/california_housing.py b/sklearn/datasets/california_housing.py
@@ -67,8 +67,9 @@ def fetch_california_housing(data_home=None, download_if_missing=True,
         instead of trying to download the data from the source site.
 
 
-    return_X_y : boolean, default=False. If True, returns ``(data.data,
-    data.target)`` instead of a Bunch object.
+    return_X_y : boolean, default=False.
+        If True, returns ``(data.data, data.target)`` instead of a Bunch
+        object.
 
         .. versionadded:: 0.20
 

diff --git a/sklearn/datasets/covtype.py b/sklearn/datasets/covtype.py
@@ -65,8 +65,9 @@ def fetch_covtype(data_home=None, download_if_missing=True,
     shuffle : bool, default=False
         Whether to shuffle dataset.
 
-    return_X_y : boolean, default=False. If True, returns ``(data.data,
-    data.target)`` instead of a Bunch object.
+    return_X_y : boolean, default=False.
+        If True, returns ``(data.data, data.target)`` instead of a Bunch
+        object.
 
         .. versionadded:: 0.20
 

diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py
@@ -287,9 +287,10 @@ def fetch_lfw_people(data_home=None, funneled=True, resize=0.5,
         If False, raise a IOError if the data is not locally available
         instead of trying to download the data from the source site.
 
-    return_X_y : boolean, default=False. If True, returns ``(dataset.data,
-    dataset.target)`` instead of a Bunch object. See below for more
-    information about the `dataset.data` and `dataset.target` object.
+    return_X_y : boolean, default=False.
+        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch
+        object. See below for more information about the `dataset.data` and
+        `dataset.target` object.
 
         .. versionadded:: 0.20
 
@@ -472,8 +473,7 @@ def fetch_lfw_pairs(subset='train', data_home=None, funneled=True, resize=0.5,
         pixels. Changing the ``slice_``, ``resize`` or ``subset`` parameters
         will change the shape of the output.
 
-    pairs : numpy array of shape (2200, 2, 62, 47). Shape depends on
-            ``subset``.
+    pairs : numpy array of shape (2200, 2, 62, 47). Shape depends on ``subset``
         Each row has 2 face images corresponding to same or different person
         from the dataset containing 5749 people. Changing the ``slice_``,
         ``resize`` or ``subset`` parameters will change the shape of the

diff --git a/sklearn/datasets/mlcomp.py b/sklearn/datasets/mlcomp.py
@@ -26,14 +26,16 @@ def _load_document_classification(dataset_path, metadata, set_=None, **kwargs):
 def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
     """Load a datasets as downloaded from http://mlcomp.org
 
+    Read more in the :ref:`User Guide <datasets>`.
+
     Parameters
     ----------
 
     name_or_id : int or str
         The integer id or the string name metadata of the MLComp
         dataset to load
 
-    set_ : str, default='raw'
+    set\_ : str, default='raw'
         Select the portion to load: 'train', 'test' or 'raw'
 
     mlcomp_root : str, optional
@@ -43,8 +45,6 @@ def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
 
     **kwargs : domain specific kwargs to be passed to the dataset loader.
 
-    Read more in the :ref:`User Guide <datasets>`.
-
     Returns
     -------
-Original file line number
+Diff line change
@@ Expand Up / @@ -120,7 +120,7 @@ Gilad Lotan, Chief Data Scientist @@
     `Hugging Face <https://huggingface.co>`_
-    ------------------------------------
+    ----------------------------------------
     .. raw:: html
@@ Expand Down @@