From 513a8bf606deda108b04c536ef14c40647283095 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sun, 2 Dec 2018 18:19:49 +0000
Subject: [PATCH 1/3] Stop ignoring fixed rst files in flake8-rst

---
 setup.cfg | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index cc7393e5a09b9..2966e8b92ad39 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -69,10 +69,6 @@ exclude =
     doc/source/advanced.rst
     doc/source/basics.rst
     doc/source/categorical.rst
-    doc/source/comparison_with_r.rst
-    doc/source/comparison_with_sql.rst
-    doc/source/comparison_with_stata.rst
-    doc/source/computation.rst
     doc/source/contributing.rst
     doc/source/contributing_docstring.rst
     doc/source/dsintro.rst
@@ -80,14 +76,12 @@ exclude =
     doc/source/extending.rst
     doc/source/groupby.rst
     doc/source/indexing.rst
-    doc/source/io.rst
     doc/source/merging.rst
     doc/source/missing_data.rst
     doc/source/options.rst
     doc/source/release.rst
     doc/source/reshaping.rst
     doc/source/timedeltas.rst
-    doc/source/timeseries.rst
     doc/source/visualization.rst
 
 

From 563007bb4a72030764d892a9c41449cc9f5b9d07 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sun, 2 Dec 2018 20:26:22 +0000
Subject: [PATCH 2/3] Fixing remaining flake8 issues in io.rst, and removing
 timedeltas.rst from validation

---
 doc/source/io.rst | 230 ++++++++++++++++++++++------------------------
 setup.cfg         |   1 -
 2 files changed, 111 insertions(+), 120 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 372a7b8a325e7..d996b1b5ce5b3 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1151,7 +1151,7 @@ Let us consider some examples:
 
 .. code-block:: python
 
-   read_csv(path, na_values=[5])
+   pd.read_csv('path_to_file.csv', na_values=[5])
 
 In the example above ``5`` and ``5.0`` will be recognized as ``NaN``, in
 addition to the defaults. A string will first be interpreted as a numerical
@@ -1159,19 +1159,19 @@ addition to the defaults. A string will first be interpreted as a numerical
 
 .. code-block:: python
 
-   read_csv(path, keep_default_na=False, na_values=[""])
+   pd.read_csv('path_to_file.csv', keep_default_na=False, na_values=[""])
 
 Above, only an empty field will be recognized as ``NaN``.
 
 .. code-block:: python
 
-   read_csv(path, keep_default_na=False, na_values=["NA", "0"])
+   pd.read_csv('path_to_file.csv', keep_default_na=False, na_values=["NA", "0"])
 
 Above, both ``NA`` and ``0`` as strings are ``NaN``.
 
 .. code-block:: python
 
-   read_csv(path, na_values=["Nope"])
+   pd.read_csv('path_to_file.csv', na_values=["Nope"])
 
 The default values, in addition to the string ``"Nope"`` are recognized as
 ``NaN``.
@@ -1245,24 +1245,13 @@ too few fields will have NA values filled in the trailing fields. Lines with
 too many fields will raise an error by default:
 
 .. ipython:: python
-   :suppress:
+   :okexcept:
 
     data = ('a,b,c\n'
             '1,2,3\n'
             '4,5,6,7\n'
             '8,9,10')
-
-.. code-block:: ipython
-
-    In [27]: data = ('a,b,c\n'
-                     '1,2,3\n'
-                     '4,5,6,7\n'
-                     '8,9,10')
-
-    In [28]: pd.read_csv(StringIO(data))
-    ---------------------------------------------------------------------------
-    ParserError                              Traceback (most recent call last)
-    ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
+    pd.read_csv(StringIO(data))
 
 You can elect to skip bad lines:
 
@@ -2754,7 +2743,7 @@ file, and the ``sheet_name`` indicating which sheet to parse.
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls', sheet_name='Sheet1')
+   pd.read_excel('path_to_file.xls', sheet_name='Sheet1')
 
 
 .. _io.excel.excelfile_class:
@@ -2803,14 +2792,14 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc
     # using the ExcelFile class
     data = {}
     with pd.ExcelFile('path_to_file.xls') as xls:
-        data['Sheet1'] = read_excel(xls, 'Sheet1', index_col=None,
-                                    na_values=['NA'])
-        data['Sheet2'] = read_excel(xls, 'Sheet2', index_col=None,
-                                    na_values=['NA'])
+        data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None,
+                                       na_values=['NA'])
+        data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=None,
+                                       na_values=['NA'])
 
     # equivalent using the read_excel function
-    data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'],
-                      index_col=None, na_values=['NA'])
+    data = pd.read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'],
+                         index_col=None, na_values=['NA'])
 
 .. _io.excel.specifying_sheets:
 
@@ -2832,35 +2821,35 @@ Specifying Sheets
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA'])
+   pd.read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA'])
 
 Using the sheet index:
 
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA'])
+   pd.read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA'])
 
 Using all default values:
 
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls')
+   pd.read_excel('path_to_file.xls')
 
 Using None to get all sheets:
 
 .. code-block:: python
 
    # Returns a dictionary of DataFrames
-   read_excel('path_to_file.xls', sheet_name=None)
+   pd.read_excel('path_to_file.xls', sheet_name=None)
 
 Using a list to get multiple sheets:
 
 .. code-block:: python
 
    # Returns the 1st and 4th sheet, as a dictionary of DataFrames.
-   read_excel('path_to_file.xls', sheet_name=['Sheet1', 3])
+   pd.read_excel('path_to_file.xls', sheet_name=['Sheet1', 3])
 
 ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either
 a list of sheet names, a list of sheet positions, or ``None`` to read all sheets.
@@ -2932,20 +2921,20 @@ to be parsed.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', usecols=2)
+   pd.read_excel('path_to_file.xls', 'Sheet1', usecols=2)
 
 You can also specify a comma-delimited set of Excel columns and ranges as a string:
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E')
+   pd.read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E')
 
 If ``usecols`` is a list of integers, then it is assumed to be the file column
 indices to be parsed.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
+   pd.read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
 
 Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
 
@@ -2957,7 +2946,7 @@ document header row(s). Those strings define which columns will be parsed:
 
 .. code-block:: python
 
-    read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar'])
+    pd.read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar'])
 
 Element order is ignored, so ``usecols=['baz', 'joe']`` is the same as ``['joe', 'baz']``.
 
@@ -2968,7 +2957,7 @@ the column names, returning names where the callable function evaluates to ``Tru
 
 .. code-block:: python
 
-    read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha())
+    pd.read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha())
 
 Parsing Dates
 +++++++++++++
@@ -2980,7 +2969,7 @@ use the ``parse_dates`` keyword to parse those strings to datetimes:
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings'])
+   pd.read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings'])
 
 
 Cell Converters
@@ -2991,7 +2980,7 @@ option. For instance, to convert a column to boolean:
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', converters={'MyBools': bool})
+   pd.read_excel('path_to_file.xls', 'Sheet1', converters={'MyBools': bool})
 
 This options handles missing values and treats exceptions in the converters
 as missing data. Transformations are applied cell by cell rather than to the
@@ -3006,7 +2995,7 @@ missing data to recover integer dtype:
        return int(x) if x else -1
 
 
-   read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun})
+   pd.read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun})
 
 dtype Specifications
 ++++++++++++++++++++
@@ -3020,7 +3009,7 @@ no type inference, use the type ``str`` or ``object``.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', dtype={'MyInts': 'int64', 'MyText': str})
+   pd.read_excel('path_to_file.xls', dtype={'MyInts': 'int64', 'MyText': str})
 
 .. _io.excel_writer:
 
@@ -5126,7 +5115,7 @@ If you have an SQLAlchemy description of your database you can express where con
                          sa.Column('Col_3', sa.Boolean),
                          )
 
-   pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 == True), engine)
+   pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 is True), engine)
 
 You can combine SQLAlchemy expressions with parameters passed to :func:`read_sql` using :func:`sqlalchemy.bindparam`
 
@@ -5155,7 +5144,7 @@ And then issue the following queries:
 
 .. code-block:: python
 
-   data.to_sql('data', cnx)
+   data.to_sql('data', con)
    pd.read_sql_query("SELECT * FROM data", con)
 
 
@@ -5372,6 +5361,9 @@ Obtain an iterator and read an XPORT file 100,000 lines at a time:
 
 .. code-block:: python
 
+    def do_something(chunk):
+        pass
+
     rdr = pd.read_sas('sas_xport.xpt', chunk=100000)
     for chunk in rdr:
         do_something(chunk)
@@ -5424,85 +5416,7 @@ ignored.
    dtypes: float64(1), int64(1)
    memory usage: 15.3 MB
 
-When writing, the top-three functions in terms of speed are are
-``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``.
-
-.. code-block:: ipython
-
-   In [14]: %timeit test_sql_write(df)
-   2.37 s ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [15]: %timeit test_hdf_fixed_write(df)
-   194 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [26]: %timeit test_hdf_fixed_write_compress(df)
-   119 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [16]: %timeit test_hdf_table_write(df)
-   623 ms ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [27]: %timeit test_hdf_table_write_compress(df)
-   563 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [17]: %timeit test_csv_write(df)
-   3.13 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [30]: %timeit test_feather_write(df)
-   103 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [31]: %timeit test_pickle_write(df)
-   109 ms ± 3.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [32]: %timeit test_pickle_write_compress(df)
-   3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
-``test_hdf_fixed_read``.
-
-.. code-block:: ipython
-
-   In [18]: %timeit test_sql_read()
-   1.35 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [19]: %timeit test_hdf_fixed_read()
-   14.3 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-   In [28]: %timeit test_hdf_fixed_read_compress()
-   23.5 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [20]: %timeit test_hdf_table_read()
-   35.4 ms ± 314 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [29]: %timeit test_hdf_table_read_compress()
-   42.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [22]: %timeit test_csv_read()
-   516 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [33]: %timeit test_feather_read()
-   4.06 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-   In [34]: %timeit test_pickle_read()
-   6.5 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-   In [35]: %timeit test_pickle_read_compress()
-   588 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-Space on disk (in bytes)
-
-.. code-block:: none
-
-    34816000 Aug 21 18:00 test.sql
-    24009240 Aug 21 18:00 test_fixed.hdf
-     7919610 Aug 21 18:00 test_fixed_compress.hdf
-    24458892 Aug 21 18:00 test_table.hdf
-     8657116 Aug 21 18:00 test_table_compress.hdf
-    28520770 Aug 21 18:00 test.csv
-    16000248 Aug 21 18:00 test.feather
-    16000848 Aug 21 18:00 test.pkl
-     7554108 Aug 21 18:00 test.pkl.compress
-
-And here's the code:
+Given the next test set:
 
 .. code-block:: python
 
@@ -5589,3 +5503,81 @@ And here's the code:
 
    def test_pickle_read_compress():
        pd.read_pickle('test.pkl.compress', compression='xz')
+
+When writing, the top-three functions in terms of speed are are
+``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``.
+
+.. code-block:: ipython
+
+   In [14]: %timeit test_sql_write(df)
+   2.37 s ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [15]: %timeit test_hdf_fixed_write(df)
+   194 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [26]: %timeit test_hdf_fixed_write_compress(df)
+   119 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [16]: %timeit test_hdf_table_write(df)
+   623 ms ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [27]: %timeit test_hdf_table_write_compress(df)
+   563 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [17]: %timeit test_csv_write(df)
+   3.13 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [30]: %timeit test_feather_write(df)
+   103 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [31]: %timeit test_pickle_write(df)
+   109 ms ± 3.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [32]: %timeit test_pickle_write_compress(df)
+   3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
+``test_hdf_fixed_read``.
+
+.. code-block:: ipython
+
+   In [18]: %timeit test_sql_read()
+   1.35 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [19]: %timeit test_hdf_fixed_read()
+   14.3 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [28]: %timeit test_hdf_fixed_read_compress()
+   23.5 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [20]: %timeit test_hdf_table_read()
+   35.4 ms ± 314 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [29]: %timeit test_hdf_table_read_compress()
+   42.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [22]: %timeit test_csv_read()
+   516 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [33]: %timeit test_feather_read()
+   4.06 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [34]: %timeit test_pickle_read()
+   6.5 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [35]: %timeit test_pickle_read_compress()
+   588 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+Space on disk (in bytes)
+
+.. code-block:: none
+
+    34816000 Aug 21 18:00 test.sql
+    24009240 Aug 21 18:00 test_fixed.hdf
+     7919610 Aug 21 18:00 test_fixed_compress.hdf
+    24458892 Aug 21 18:00 test_table.hdf
+     8657116 Aug 21 18:00 test_table_compress.hdf
+    28520770 Aug 21 18:00 test.csv
+    16000248 Aug 21 18:00 test.feather
+    16000848 Aug 21 18:00 test.pkl
+     7554108 Aug 21 18:00 test.pkl.compress
diff --git a/setup.cfg b/setup.cfg
index 2966e8b92ad39..7909d3ef7f2d7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -81,7 +81,6 @@ exclude =
     doc/source/options.rst
     doc/source/release.rst
     doc/source/reshaping.rst
-    doc/source/timedeltas.rst
     doc/source/visualization.rst
 
 

From 2f6b576593b0af1089f45c0b4461b533879c4653 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sun, 2 Dec 2018 22:36:32 +0000
Subject: [PATCH 3/3] Fixing typo that generated the wrong exception when
 parsing a csv

---
 doc/source/io.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index d996b1b5ce5b3..fbd238586c776 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1245,7 +1245,7 @@ too few fields will have NA values filled in the trailing fields. Lines with
 too many fields will raise an error by default:
 
 .. ipython:: python
-   :okexcept:
+    :okexcept:
 
     data = ('a,b,c\n'
             '1,2,3\n'