pandas-dev · jreback · Jan 6, 2019 · Jan 6, 2019
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -162,6 +162,14 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     # invgrep -R --include '*.py' -E '[[:space:]] pytest.raises' pandas/tests
     # RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG
+    invgrep -R --include="*.rst" ".. code-block ::" doc/source
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for wrong space after ipython directive and before colon (".. ipython ::" instead of ".. ipython::")' ; echo $MSG
+    invgrep -R --include="*.rst" ".. ipython ::" doc/source
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
     set -o pipefail
     if [[ "$AZURE" == "true" ]]; then

diff --git a/doc/source/api/arrays.rst b/doc/source/api/arrays.rst
@@ -195,7 +195,7 @@ Methods
 
 A collection of timedeltas may be stored in a :class:`TimedeltaArray`.
 
-.. autosumarry::
+.. autosummary::
    :toctree: generated/
 
    arrays.TimedeltaArray

diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
@@ -115,7 +115,7 @@ Series is ndarray-like
 ``Series`` acts very similarly to a ``ndarray``, and is a valid argument to most NumPy functions.
 However, operations such as slicing will also slice the index.
 
-.. ipython :: python
+.. ipython:: python
 
     s[0]
     s[:3]
@@ -171,7 +171,7 @@ Series is dict-like
 A Series is like a fixed-size dict in that you can get and set values by index
 label:
 
-.. ipython :: python
+.. ipython:: python
 
     s['a']
     s['e'] = 12.

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -743,9 +743,9 @@ Selecting Random Samples
 
 A random selection of rows or columns from a Series, DataFrame, or Panel with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows.
 
-.. ipython :: python
+.. ipython:: python
 
-    s = pd.Series([0,1,2,3,4,5])
+    s = pd.Series([0, 1, 2, 3, 4, 5])
 
     # When no arguments are passed, returns 1 row.
     s.sample()
@@ -759,9 +759,9 @@ A random selection of rows or columns from a Series, DataFrame, or Panel with th
 By default, ``sample`` will return each row at most once, but one can also sample with replacement
 using the ``replace`` option:
 
-.. ipython :: python
+.. ipython:: python
 
-   s = pd.Series([0,1,2,3,4,5])
+    s = pd.Series([0, 1, 2, 3, 4, 5])
 
     # Without replacement (default):
     s.sample(n=6, replace=False)
@@ -774,9 +774,9 @@ By default, each row has an equal probability of being selected, but if you want
 to have different probabilities, you can pass the ``sample`` function sampling weights as
 ``weights``. These weights can be a list, a NumPy array, or a Series, but they must be of the same length as the object you are sampling. Missing values will be treated as a weight of zero, and inf values are not allowed. If weights do not sum to 1, they will be re-normalized by dividing all weights by the sum of the weights. For example:
 
-.. ipython :: python
+.. ipython:: python
 
-    s = pd.Series([0,1,2,3,4,5])
+    s = pd.Series([0, 1, 2, 3, 4, 5])
     example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4]
     s.sample(n=3, weights=example_weights)
 
@@ -788,23 +788,24 @@ When applied to a DataFrame, you can use a column of the DataFrame as sampling w
 (provided you are sampling rows and not columns) by simply passing the name of the column
 as a string.
 
-.. ipython :: python
+.. ipython:: python
 
-    df2 = pd.DataFrame({'col1':[9,8,7,6], 'weight_column':[0.5, 0.4, 0.1, 0]})
-    df2.sample(n = 3, weights = 'weight_column')
+    df2 = pd.DataFrame({'col1': [9, 8, 7, 6],
+                        'weight_column': [0.5, 0.4, 0.1, 0]})
+    df2.sample(n=3, weights='weight_column')
 
 ``sample`` also allows users to sample columns instead of rows using the ``axis`` argument.
 
-.. 	ipython :: python
+.. ipython:: python
 
-    df3 = pd.DataFrame({'col1':[1,2,3], 'col2':[2,3,4]})
+    df3 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [2, 3, 4]})
     df3.sample(n=1, axis=1)
 
 Finally, one can also set a seed for ``sample``'s random number generator using the ``random_state`` argument, which will accept either an integer (as a seed) or a NumPy RandomState object.
 
-.. 	ipython :: python
+.. ipython:: python
 
-    df4 = pd.DataFrame({'col1':[1,2,3], 'col2':[2,3,4]})
+    df4 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [2, 3, 4]})
 
     # With a given seed, the sample will always draw the same rows.
     df4.sample(n=2, random_state=2)

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -578,7 +578,7 @@ Duplicate names parsing
 If the file or header contains duplicate names, pandas will by default
 distinguish between them so as to prevent overwriting data:
 
-.. ipython :: python
+.. ipython:: python
 
    data = ('a,b,a\n'
            '0,1,2\n'
@@ -590,7 +590,7 @@ which modifies a series of duplicate columns 'X', ..., 'X' to become
 'X', 'X.1', ..., 'X.N'.  If ``mangle_dupe_cols=False``, duplicate data can
 arise:
 
-.. code-block :: python
+.. code-block:: ipython
 
    In [2]: data = 'a,b,a\n0,1,2\n3,4,5'
    In [3]: pd.read_csv(StringIO(data), mangle_dupe_cols=False)
@@ -602,7 +602,7 @@ arise:
 To prevent users from encountering this problem with duplicate data, a ``ValueError``
 exception is raised if ``mangle_dupe_cols != True``:
 
-.. code-block :: python
+.. code-block:: ipython
 
    In [2]: data = 'a,b,a\n0,1,2\n3,4,5'
    In [3]: pd.read_csv(StringIO(data), mangle_dupe_cols=False)

diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst
@@ -191,7 +191,7 @@ I/O Enhancements
 
     You can use ``pd.read_html()`` to read the output from ``DataFrame.to_html()`` like so
 
-    .. ipython :: python
+    .. ipython:: python
        :okwarning:
 
         df = pd.DataFrame({'a': range(3), 'b': list('abc')})
@@ -296,7 +296,7 @@ Other Enhancements
 
     For example you can do
 
-    .. ipython :: python
+    .. ipython:: python
 
         df = pd.DataFrame({'a': list('ab..'), 'b': [1, 2, 3, 4]})
         df.replace(regex=r'\s*\.\s*', value=np.nan)
@@ -306,7 +306,7 @@ Other Enhancements
 
     Regular string replacement still works as expected. For example, you can do
 
-    .. ipython :: python
+    .. ipython:: python
 
         df.replace('.', np.nan)
 

diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
@@ -1015,7 +1015,7 @@ Other:
 
   .. ipython:: python
 
-    business_dates = date_range(start='4/1/2014', end='6/30/2014', freq='B')
+    business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B')
     df = pd.DataFrame(1, index=business_dates, columns=['a', 'b'])
     # get the first, 4th, and last date index for each month
     df.groupby([df.index.year, df.index.month]).nth([0, 3, -1])

diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst
@@ -51,7 +51,7 @@ to be inserted (for example, a ``Series`` or NumPy array), or a function
 of one argument to be called on the ``DataFrame``. The new values are inserted,
 and the entire DataFrame (with all original and new columns) is returned.
 
-.. ipython :: python
+.. ipython:: python
 
    iris = pd.read_csv('data/iris.data')
    iris.head()
@@ -61,10 +61,10 @@ and the entire DataFrame (with all original and new columns) is returned.
 Above was an example of inserting a precomputed value. We can also pass in
 a function to be evaluated.
 
-.. ipython :: python
+.. ipython:: python
 
-    iris.assign(sepal_ratio = lambda x: (x['SepalWidth'] /
-                                         x['SepalLength'])).head()
+    iris.assign(sepal_ratio=lambda x: (x['SepalWidth']
+                                       / x['SepalLength'])).head()
 
 The power of ``assign`` comes when used in chains of operations. For example,
 we can limit the DataFrame to just those with a Sepal Length greater than 5,

diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst
@@ -181,9 +181,9 @@ total number or rows or columns. It also has options for sampling with or withou
 for passing in a column for weights for non-uniform sampling, and for setting seed values to
 facilitate replication. (:issue:`2419`)
 
-.. ipython :: python
+.. ipython:: python
 
-   example_series = Series([0,1,2,3,4,5])
+   example_series = pd.Series([0, 1, 2, 3, 4, 5])
 
    # When no arguments are passed, returns 1
    example_series.sample()
@@ -207,9 +207,10 @@ facilitate replication. (:issue:`2419`)
 When applied to a DataFrame, one may pass the name of a column to specify sampling weights
 when sampling from rows.
 
-.. ipython :: python
+.. ipython:: python
 
-   df = DataFrame({'col1':[9,8,7,6], 'weight_column':[0.5, 0.4, 0.1, 0]})
+   df = pd.DataFrame({'col1': [9, 8, 7, 6],
+                      'weight_column': [0.5, 0.4, 0.1, 0]})
    df.sample(n=3, weights='weight_column')
 
 

diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst
@@ -84,9 +84,9 @@ The new implementation allows for having a single-timezone across all rows, with
 
 .. ipython:: python
 
-   df = DataFrame({'A': date_range('20130101', periods=3),
-                   'B': date_range('20130101', periods=3, tz='US/Eastern'),
-                   'C': date_range('20130101', periods=3, tz='CET')})
+   df = pd.DataFrame({'A': pd.date_range('20130101', periods=3),
+                      'B': pd.date_range('20130101', periods=3, tz='US/Eastern'),
+                      'C': pd.date_range('20130101', periods=3, tz='CET')})
    df
    df.dtypes
 
@@ -442,17 +442,18 @@ Other enhancements
 
 - Added a ``DataFrame.round`` method to round the values to a variable number of decimal places (:issue:`10568`).
 
-  .. ipython :: python
+  .. ipython:: python
 
-     df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'],
-     index=['first', 'second', 'third'])
+     df = pd.DataFrame(np.random.random([3, 3]),
+                       columns=['A', 'B', 'C'],
+                       index=['first', 'second', 'third'])
      df
      df.round(2)
      df.round({'A': 0, 'C': 2})
 
 - ``drop_duplicates`` and ``duplicated`` now accept a ``keep`` keyword to target first, last, and all duplicates. The ``take_last`` keyword is deprecated, see :ref:`here <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
 
-  .. ipython :: python
+  .. ipython:: python
 
      s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D'])
      s.drop_duplicates()
@@ -630,13 +631,13 @@ Of course you can coerce this as well.
 
 .. ipython:: python
 
-   to_datetime(['2009-07-31', 'asd'], errors='coerce')
+   pd.to_datetime(['2009-07-31', 'asd'], errors='coerce')
 
 To keep the previous behavior, you can use ``errors='ignore'``:
 
 .. ipython:: python
 
-   to_datetime(['2009-07-31', 'asd'], errors='ignore')
+   pd.to_datetime(['2009-07-31', 'asd'], errors='ignore')
 
 Furthermore, ``pd.to_timedelta`` has gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword
 has been deprecated in favor of ``errors='coerce'``.
@@ -655,13 +656,13 @@ Previous Behavior:
 
 .. code-block:: ipython
 
-   In [1]: Timestamp('2012Q2')
+   In [1]: pd.Timestamp('2012Q2')
    Traceback
       ...
    ValueError: Unable to parse 2012Q2
 
    # Results in today's date.
-   In [2]: Timestamp('2014')
+   In [2]: pd.Timestamp('2014')
    Out [2]: 2014-08-12 00:00:00
 
 v0.17.0 can parse them as below. It works on ``DatetimeIndex`` also.
@@ -670,9 +671,9 @@ New Behavior:
 
 .. ipython:: python
 
-   Timestamp('2012Q2')
-   Timestamp('2014')
-   DatetimeIndex(['2012Q2', '2014'])
+   pd.Timestamp('2012Q2')
+   pd.Timestamp('2014')
+   pd.DatetimeIndex(['2012Q2', '2014'])
 
 .. note::
 
@@ -681,8 +682,8 @@ New Behavior:
    .. ipython:: python
 
       import pandas.tseries.offsets as offsets
-      Timestamp.now()
-      Timestamp.now() + offsets.DateOffset(years=1)
+      pd.Timestamp.now()
+      pd.Timestamp.now() + offsets.DateOffset(years=1)
 
 Changes to Index Comparisons
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -739,7 +740,7 @@ Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to compar
 
 .. ipython:: python
 
-   s = Series(range(3))
+   s = pd.Series(range(3))
    s.iloc[1] = None
    s
 
@@ -807,11 +808,6 @@ Previous Behavior:
 
 New Behavior:
 
-.. ipython:: python
-   :suppress:
-
-   import os
-
 .. ipython:: python
 
    df_with_missing.to_hdf('file.h5',
@@ -824,6 +820,7 @@ New Behavior:
 .. ipython:: python
    :suppress:
 
+   import os
    os.remove('file.h5')
 
 See the :ref:`docs <io.hdf5>` for more details.
@@ -876,7 +873,7 @@ Changes to ``Categorical.unique``
 - unordered category: values and categories are sorted by appearance order.
 - ordered category: values are sorted by appearance order, categories keep existing order.
 
-.. ipython :: python
+.. ipython:: python
 
    cat = pd.Categorical(['C', 'A', 'B', 'C'],
                         categories=['A', 'B', 'C'],
@@ -899,7 +896,7 @@ an integer, resulting in ``header=0`` for ``False`` and ``header=1`` for ``True`
 
 A ``bool`` input to ``header`` will now raise a ``TypeError``
 
-.. code-block :: python
+.. code-block:: ipython
 
    In [29]: df = pd.read_csv('data.csv', header=False)
    TypeError: Passing a bool to header is invalid. Use header=None for no header or
@@ -984,10 +981,12 @@ Removal of prior version deprecations/changes
 - Removal of ``colSpace`` parameter from ``DataFrame.to_string()``, in favor of ``col_space``, circa 0.8.0 version.
 - Removal of automatic time-series broadcasting (:issue:`2304`)
 
-  .. ipython :: python
+  .. ipython:: python
 
      np.random.seed(1234)
-     df = DataFrame(np.random.randn(5,2),columns=list('AB'),index=date_range('20130101',periods=5))
+     df = DataFrame(np.random.randn(5, 2),
+                    columns=list('AB'),
+                    index=date_range('20130101', periods=5))
      df
 
   Previously
@@ -1008,9 +1007,9 @@ Removal of prior version deprecations/changes
 
   Current
 
-  .. ipython :: python
+  .. ipython:: python
 
-     df.add(df.A,axis='index')
+     df.add(df.A, axis='index')
 
 
 - Remove ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`)