From 6faa5a66c005343c71b1ce623bb3ff89affd72b1 Mon Sep 17 00:00:00 2001
From: Chris MacLeod <ChrisM6794@gmail.com>
Date: Fri, 26 May 2017 12:06:55 -0300
Subject: [PATCH 01/55] PERF: HDFStore has faster __unicode__, new info()
 method with old behavior.

__unicode__ now only returns file path info, not (expensive) details on all existing keys.
---
 doc/source/api.rst               |  1 +
 doc/source/whatsnew/v0.21.0.txt  |  3 +-
 pandas/io/pytables.py            | 59 ++++++++++++++++++--------------
 pandas/tests/io/test_pytables.py | 37 +++++++++++---------
 4 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 888bb6d67e94b..350abb00f0849 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -99,6 +99,7 @@ HDFStore: PyTables (HDF5)
    HDFStore.append
    HDFStore.get
    HDFStore.select
+   HDFStore.info
 
 Feather
 ~~~~~~~
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index b4ca3f011a81d..246cf2e4bc8f6 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -49,6 +49,8 @@ Backwards incompatible API changes
 - Accessing a non-existent attribute on a closed :class:`HDFStore` will now
   raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
 
+- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
+
 .. _whatsnew_0210.api:
 
 Other API Changes
@@ -77,7 +79,6 @@ Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 
-
 .. _whatsnew_0210.bug_fixes:
 
 Bug Fixes
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 6665ccf8ce4c5..625b407dd43be 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -494,32 +494,7 @@ def __len__(self):
         return len(self.groups())
 
     def __unicode__(self):
-        output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
-        if self.is_open:
-            lkeys = sorted(list(self.keys()))
-            if len(lkeys):
-                keys = []
-                values = []
-
-                for k in lkeys:
-                    try:
-                        s = self.get_storer(k)
-                        if s is not None:
-                            keys.append(pprint_thing(s.pathname or k))
-                            values.append(
-                                pprint_thing(s or 'invalid_HDFStore node'))
-                    except Exception as detail:
-                        keys.append(k)
-                        values.append("[invalid_HDFStore node: %s]"
-                                      % pprint_thing(detail))
-
-                output += adjoin(12, keys, values)
-            else:
-                output += 'Empty'
-        else:
-            output += "File is CLOSED"
-
-        return output
+        return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
 
     def __enter__(self):
         return self
@@ -1161,6 +1136,38 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,
 
         return new_store
 
+    def info(self):
+        """return detailed information on the store
+
+        .. versionadded:: 0.21.0
+        """
+        output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
+        if self.is_open:
+            lkeys = sorted(list(self.keys()))
+            if len(lkeys):
+                keys = []
+                values = []
+
+                for k in lkeys:
+                    try:
+                        s = self.get_storer(k)
+                        if s is not None:
+                            keys.append(pprint_thing(s.pathname or k))
+                            values.append(
+                                pprint_thing(s or 'invalid_HDFStore node'))
+                    except Exception as detail:
+                        keys.append(k)
+                        values.append("[invalid_HDFStore node: %s]"
+                                      % pprint_thing(detail))
+
+                output += adjoin(12, keys, values)
+            else:
+                output += 'Empty'
+        else:
+            output += "File is CLOSED"
+
+        return output
+
     # private methods ######
     def _check_if_open(self):
         if not self.is_open:
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 17f524cc279c0..06a4a67964b96 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -387,6 +387,7 @@ def test_repr(self):
 
         with ensure_clean_store(self.path) as store:
             repr(store)
+            store.info()
             store['a'] = tm.makeTimeSeries()
             store['b'] = tm.makeStringSeries()
             store['c'] = tm.makeDataFrame()
@@ -418,8 +419,9 @@ def test_repr(self):
             # make a random group in hdf space
             store._handle.create_group(store._handle.root, 'bah')
 
-            repr(store)
-            str(store)
+            assert store.filename in repr(store)
+            assert store.filename in str(store)
+            store.info()
 
         # storers
         with ensure_clean_store(self.path) as store:
@@ -4371,11 +4373,11 @@ def test_multiple_open_close(self):
 
             # single
             store = HDFStore(path)
-            assert 'CLOSED' not in str(store)
+            assert 'CLOSED' not in store.info()
             assert store.is_open
 
             store.close()
-            assert 'CLOSED' in str(store)
+            assert 'CLOSED' in store.info()
             assert not store.is_open
 
         with ensure_clean_path(self.path) as path:
@@ -4396,20 +4398,20 @@ def f():
                 store1 = HDFStore(path)
                 store2 = HDFStore(path)
 
-                assert 'CLOSED' not in str(store1)
-                assert 'CLOSED' not in str(store2)
+                assert 'CLOSED' not in store1.info()
+                assert 'CLOSED' not in store2.info()
                 assert store1.is_open
                 assert store2.is_open
 
                 store1.close()
-                assert 'CLOSED' in str(store1)
+                assert 'CLOSED' in store1.info()
                 assert not store1.is_open
-                assert 'CLOSED' not in str(store2)
+                assert 'CLOSED' not in store2.info()
                 assert store2.is_open
 
                 store2.close()
-                assert 'CLOSED' in str(store1)
-                assert 'CLOSED' in str(store2)
+                assert 'CLOSED' in store1.info()
+                assert 'CLOSED' in store2.info()
                 assert not store1.is_open
                 assert not store2.is_open
 
@@ -4420,11 +4422,11 @@ def f():
                 store2 = HDFStore(path)
                 store2.append('df2', df)
                 store2.close()
-                assert 'CLOSED' in str(store2)
+                assert 'CLOSED' in store2.info()
                 assert not store2.is_open
 
                 store.close()
-                assert 'CLOSED' in str(store)
+                assert 'CLOSED' in store.info()
                 assert not store.is_open
 
                 # double closing
@@ -4433,11 +4435,11 @@ def f():
 
                 store2 = HDFStore(path)
                 store.close()
-                assert 'CLOSED' in str(store)
+                assert 'CLOSED' in store.info()
                 assert not store.is_open
 
                 store2.close()
-                assert 'CLOSED' in str(store2)
+                assert 'CLOSED' in store2.info()
                 assert not store2.is_open
 
         # ops on a closed store
@@ -4784,9 +4786,10 @@ def test_categorical(self):
             tm.assert_frame_equal(result, df2)
 
             # Make sure the metadata is OK
-            assert '/df2   ' in str(store)
-            assert '/df2/meta/values_block_0/meta' in str(store)
-            assert '/df2/meta/values_block_1/meta' in str(store)
+            info = store.info()
+            assert '/df2   ' in info
+            assert '/df2/meta/values_block_0/meta' in info
+            assert '/df2/meta/values_block_1/meta' in info
 
             # unordered
             s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[

From c8af4cf79fcdbb02e009f63f95a93b07af3dee87 Mon Sep 17 00:00:00 2001
From: Christoph Moehl <cmohl@yahoo.com>
Date: Fri, 26 May 2017 16:43:12 +0200
Subject: [PATCH 02/55] ENH: added margins_name parameter for crosstab (#16489)

* ENH #15972 added margins_name parameter for crosstab

* ENH 15972 minor changes as suggested by reviewers

* ENH 15972 correction in whatsnew

* ENH 15972 style changes in whatsnew
---
 doc/source/whatsnew/v0.20.0.txt    |  1 -
 doc/source/whatsnew/v0.21.0.txt    |  1 +
 pandas/core/reshape/pivot.py       | 28 ++++++++++++++--------
 pandas/tests/reshape/test_pivot.py | 37 ++++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index a0bf2f9b3758a..9d475390175b2 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -515,7 +515,6 @@ Other Enhancements
 - Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
 - ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)
 
-
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 246cf2e4bc8f6..2a38fad37584b 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -37,6 +37,7 @@ Other Enhancements
 - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
 - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
+- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`)
 
 .. _whatsnew_0210.api_breaking:
 
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 74dbbfc00cb11..b562f8a32f5c9 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -388,7 +388,8 @@ def _convert_by(by):
 
 
 def crosstab(index, columns, values=None, rownames=None, colnames=None,
-             aggfunc=None, margins=False, dropna=True, normalize=False):
+             aggfunc=None, margins=False, margins_name='All', dropna=True,
+             normalize=False):
     """
     Compute a simple cross-tabulation of two (or more) factors. By default
     computes a frequency table of the factors unless an array of values and an
@@ -411,6 +412,12 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
         If passed, must match number of column arrays passed
     margins : boolean, default False
         Add row/column margins (subtotals)
+    margins_name : string, default 'All'
+        Name of the row / column that will contain the totals
+        when margins is True.
+
+        .. versionadded:: 0.21.0
+
     dropna : boolean, default True
         Do not include columns whose entries are all NaN
     normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
@@ -490,23 +497,26 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
         df = DataFrame(data)
         df['__dummy__'] = 0
         table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
-                               aggfunc=len, margins=margins, dropna=dropna)
+                               aggfunc=len, margins=margins,
+                               margins_name=margins_name, dropna=dropna)
         table = table.fillna(0).astype(np.int64)
 
     else:
         data['__dummy__'] = values
         df = DataFrame(data)
         table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
-                               aggfunc=aggfunc, margins=margins, dropna=dropna)
+                               aggfunc=aggfunc, margins=margins,
+                               margins_name=margins_name, dropna=dropna)
 
     # Post-process
     if normalize is not False:
-        table = _normalize(table, normalize=normalize, margins=margins)
+        table = _normalize(table, normalize=normalize, margins=margins,
+                           margins_name=margins_name)
 
     return table
 
 
-def _normalize(table, normalize, margins):
+def _normalize(table, normalize, margins, margins_name='All'):
 
     if not isinstance(normalize, bool) and not isinstance(normalize,
                                                           compat.string_types):
@@ -537,9 +547,9 @@ def _normalize(table, normalize, margins):
 
     elif margins is True:
 
-        column_margin = table.loc[:, 'All'].drop('All')
-        index_margin = table.loc['All', :].drop('All')
-        table = table.drop('All', axis=1).drop('All')
+        column_margin = table.loc[:, margins_name].drop(margins_name)
+        index_margin = table.loc[margins_name, :].drop(margins_name)
+        table = table.drop(margins_name, axis=1).drop(margins_name)
         # to keep index and columns names
         table_index_names = table.index.names
         table_columns_names = table.columns.names
@@ -561,7 +571,7 @@ def _normalize(table, normalize, margins):
         elif normalize == "all" or normalize is True:
             column_margin = column_margin / column_margin.sum()
             index_margin = index_margin / index_margin.sum()
-            index_margin.loc['All'] = 1
+            index_margin.loc[margins_name] = 1
             table = concat([table, column_margin], axis=1)
             table = table.append(index_margin)
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 270a93e4ae382..fc5a2eb468d4f 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1071,6 +1071,43 @@ def test_crosstab_margins(self):
         exp_rows = exp_rows.fillna(0).astype(np.int64)
         tm.assert_series_equal(all_rows, exp_rows)
 
+    def test_crosstab_margins_set_margin_name(self):
+        # GH 15972
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+
+        df = DataFrame({'a': a, 'b': b, 'c': c})
+
+        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
+                          margins=True, margins_name='TOTAL')
+
+        assert result.index.names == ('a',)
+        assert result.columns.names == ['b', 'c']
+
+        all_cols = result['TOTAL', '']
+        exp_cols = df.groupby(['a']).size().astype('i8')
+        # to keep index.name
+        exp_margin = Series([len(df)], index=Index(['TOTAL'], name='a'))
+        exp_cols = exp_cols.append(exp_margin)
+        exp_cols.name = ('TOTAL', '')
+
+        tm.assert_series_equal(all_cols, exp_cols)
+
+        all_rows = result.loc['TOTAL']
+        exp_rows = df.groupby(['b', 'c']).size().astype('i8')
+        exp_rows = exp_rows.append(Series([len(df)], index=[('TOTAL', '')]))
+        exp_rows.name = 'TOTAL'
+
+        exp_rows = exp_rows.reindex(all_rows.index)
+        exp_rows = exp_rows.fillna(0).astype(np.int64)
+        tm.assert_series_equal(all_rows, exp_rows)
+
+        for margins_name in [666, None, ['a', 'b']]:
+            with pytest.raises(ValueError):
+                crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
+                         margins=True, margins_name=margins_name)
+
     def test_crosstab_pass_values(self):
         a = np.random.randint(0, 7, size=100)
         b = np.random.randint(0, 3, size=100)

From 840de2fffad125c07bf787f8d917f115545e5a46 Mon Sep 17 00:00:00 2001
From: Aaron Barber <abarber4gh@users.noreply.github.com>
Date: Fri, 26 May 2017 12:11:55 -0700
Subject: [PATCH 03/55] TST: ujson tests are not being run (#16499) (#16500)

closes #16499
---
 pandas/tests/io/json/test_ujson.py | 44 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
index 662f06dbb725e..76fb6d442a25a 100644
--- a/pandas/tests/io/json/test_ujson.py
+++ b/pandas/tests/io/json/test_ujson.py
@@ -27,7 +27,7 @@
                 else partial(json.dumps, encoding="utf-8"))
 
 
-class UltraJSONTests(object):
+class TestUltraJSONTests(object):
 
     @pytest.mark.skipif(compat.is_platform_32bit(),
                         reason="not compliant on 32-bit, xref #15865")
@@ -944,19 +944,19 @@ def my_obj_handler(obj):
                 ujson.decode(ujson.encode(l, default_handler=str)))
 
 
-class NumpyJSONTests(object):
+class TestNumpyJSONTests(object):
 
-    def testBool(self):
+    def test_Bool(self):
         b = np.bool(True)
         assert ujson.decode(ujson.encode(b)) == b
 
-    def testBoolArray(self):
+    def test_BoolArray(self):
         inpt = np.array([True, False, True, True, False, True, False, False],
                         dtype=np.bool)
         outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=np.bool)
         tm.assert_numpy_array_equal(inpt, outp)
 
-    def testInt(self):
+    def test_Int(self):
         num = np.int(2562010)
         assert np.int(ujson.decode(ujson.encode(num))) == num
 
@@ -984,7 +984,7 @@ def testInt(self):
         num = np.uint64(2562010)
         assert np.uint64(ujson.decode(ujson.encode(num))) == num
 
-    def testIntArray(self):
+    def test_IntArray(self):
         arr = np.arange(100, dtype=np.int)
         dtypes = (np.int, np.int8, np.int16, np.int32, np.int64,
                   np.uint, np.uint8, np.uint16, np.uint32, np.uint64)
@@ -993,7 +993,7 @@ def testIntArray(self):
             outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=dtype)
             tm.assert_numpy_array_equal(inpt, outp)
 
-    def testIntMax(self):
+    def test_IntMax(self):
         num = np.int(np.iinfo(np.int).max)
         assert np.int(ujson.decode(ujson.encode(num))) == num
 
@@ -1023,7 +1023,7 @@ def testIntMax(self):
             num = np.uint64(np.iinfo(np.int64).max)
             assert np.uint64(ujson.decode(ujson.encode(num))) == num
 
-    def testFloat(self):
+    def test_Float(self):
         num = np.float(256.2013)
         assert np.float(ujson.decode(ujson.encode(num))) == num
 
@@ -1033,7 +1033,7 @@ def testFloat(self):
         num = np.float64(256.2013)
         assert np.float64(ujson.decode(ujson.encode(num))) == num
 
-    def testFloatArray(self):
+    def test_FloatArray(self):
         arr = np.arange(12.5, 185.72, 1.7322, dtype=np.float)
         dtypes = (np.float, np.float32, np.float64)
 
@@ -1043,7 +1043,7 @@ def testFloatArray(self):
                 inpt, double_precision=15)), dtype=dtype)
             tm.assert_almost_equal(inpt, outp)
 
-    def testFloatMax(self):
+    def test_FloatMax(self):
         num = np.float(np.finfo(np.float).max / 10)
         tm.assert_almost_equal(np.float(ujson.decode(
             ujson.encode(num, double_precision=15))), num, 15)
@@ -1056,7 +1056,7 @@ def testFloatMax(self):
         tm.assert_almost_equal(np.float64(ujson.decode(
             ujson.encode(num, double_precision=15))), num, 15)
 
-    def testArrays(self):
+    def test_Arrays(self):
         arr = np.arange(100)
 
         arr = arr.reshape((10, 10))
@@ -1097,13 +1097,13 @@ def testArrays(self):
         outp = ujson.decode(ujson.encode(arr), numpy=True, dtype=np.float32)
         tm.assert_almost_equal(arr, outp)
 
-    def testOdArray(self):
+    def test_OdArray(self):
         def will_raise():
             ujson.encode(np.array(1))
 
         pytest.raises(TypeError, will_raise)
 
-    def testArrayNumpyExcept(self):
+    def test_ArrayNumpyExcept(self):
 
         input = ujson.dumps([42, {}, 'a'])
         try:
@@ -1186,7 +1186,7 @@ def testArrayNumpyExcept(self):
         except:
             assert False, "Wrong exception"
 
-    def testArrayNumpyLabelled(self):
+    def test_ArrayNumpyLabelled(self):
         input = {'a': []}
         output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
         assert (np.empty((1, 0)) == output[0]).all()
@@ -1220,9 +1220,9 @@ def testArrayNumpyLabelled(self):
         assert (np.array(['a', 'b']) == output[2]).all()
 
 
-class PandasJSONTests(object):
+class TestPandasJSONTests(object):
 
-    def testDataFrame(self):
+    def test_DataFrame(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
                        'a', 'b'], columns=['x', 'y', 'z'])
 
@@ -1252,7 +1252,7 @@ def testDataFrame(self):
         tm.assert_index_equal(df.transpose().columns, outp.columns)
         tm.assert_index_equal(df.transpose().index, outp.index)
 
-    def testDataFrameNumpy(self):
+    def test_DataFrameNumpy(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
                        'a', 'b'], columns=['x', 'y', 'z'])
 
@@ -1275,7 +1275,7 @@ def testDataFrameNumpy(self):
         tm.assert_index_equal(df.transpose().columns, outp.columns)
         tm.assert_index_equal(df.transpose().index, outp.index)
 
-    def testDataFrameNested(self):
+    def test_DataFrameNested(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
                        'a', 'b'], columns=['x', 'y', 'z'])
 
@@ -1301,7 +1301,7 @@ def testDataFrameNested(self):
                'df2': ujson.decode(ujson.encode(df, orient="split"))}
         assert ujson.decode(ujson.encode(nested, orient="split")) == exp
 
-    def testDataFrameNumpyLabelled(self):
+    def test_DataFrameNumpyLabelled(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
                        'a', 'b'], columns=['x', 'y', 'z'])
 
@@ -1324,7 +1324,7 @@ def testDataFrameNumpyLabelled(self):
         tm.assert_index_equal(df.columns, outp.columns)
         tm.assert_index_equal(df.index, outp.index)
 
-    def testSeries(self):
+    def test_Series(self):
         s = Series([10, 20, 30, 40, 50, 60], name="series",
                    index=[6, 7, 8, 9, 10, 15]).sort_values()
 
@@ -1372,7 +1372,7 @@ def testSeries(self):
             s, orient="index"), numpy=True)).sort_values()
         tm.assert_series_equal(outp, exp)
 
-    def testSeriesNested(self):
+    def test_SeriesNested(self):
         s = Series([10, 20, 30, 40, 50, 60], name="series",
                    index=[6, 7, 8, 9, 10, 15]).sort_values()
 
@@ -1398,7 +1398,7 @@ def testSeriesNested(self):
                's2': ujson.decode(ujson.encode(s, orient="index"))}
         assert ujson.decode(ujson.encode(nested, orient="index")) == exp
 
-    def testIndex(self):
+    def test_Index(self):
         i = Index([23, 45, 18, 98, 43, 11], name="index")
 
         # column indexed

From 4ed801b1e0c8bace0365a488b3d1692462966145 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Fri, 26 May 2017 21:35:11 -0400
Subject: [PATCH 04/55] DOC: Remove preference for pytest paradigm in
 assert_raises_regex (#16518)

---
 pandas/util/testing.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index f987045c27d5f..17e09b38b20e0 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -2424,15 +2424,8 @@ def assert_raises_regex(_exception, _regexp, _callable=None,
     Check that the specified Exception is raised and that the error message
     matches a given regular expression pattern. This may be a regular
     expression object or a string containing a regular expression suitable
-    for use by `re.search()`.
-
-    This is a port of the `assertRaisesRegexp` function from unittest in
-    Python 2.7. However, with our migration to `pytest`, please refrain
-    from using this. Instead, use the following paradigm:
-
-    with pytest.raises(_exception) as exc_info:
-       func(*args, **kwargs)
-    exc_info.matches(reg_exp)
+    for use by `re.search()`. This is a port of the `assertRaisesRegexp`
+    function from unittest in Python 2.7.
 
     Examples
     --------

From c570eafff06c1518ea59da74f97908d9d5135c07 Mon Sep 17 00:00:00 2001
From: "John W. O'Brien" <neirbowj@users.noreply.github.com>
Date: Mon, 29 May 2017 12:00:42 -0400
Subject: [PATCH 05/55] TST: Specify HTML file encoding on PY3 (#16526)

---
 pandas/tests/io/test_html.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 6da77bf423609..1e1d653cf94d1 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -20,7 +20,7 @@
 from pandas import (DataFrame, MultiIndex, read_csv, Timestamp, Index,
                     date_range, Series)
 from pandas.compat import (map, zip, StringIO, string_types, BytesIO,
-                           is_platform_windows)
+                           is_platform_windows, PY3)
 from pandas.io.common import URLError, urlopen, file_path_to_url
 from pandas.io.html import read_html
 from pandas._libs.parsers import ParserError
@@ -96,6 +96,9 @@ def read_html(self, *args, **kwargs):
 class TestReadHtml(ReadHtmlMixin):
     flavor = 'bs4'
     spam_data = os.path.join(DATA_PATH, 'spam.html')
+    spam_data_kwargs = {}
+    if PY3:
+        spam_data_kwargs['encoding'] = 'UTF-8'
     banklist_data = os.path.join(DATA_PATH, 'banklist.html')
 
     @classmethod
@@ -247,10 +250,10 @@ def test_infer_types(self):
         assert_framelist_equal(df1, df2)
 
     def test_string_io(self):
-        with open(self.spam_data) as f:
+        with open(self.spam_data, **self.spam_data_kwargs) as f:
             data1 = StringIO(f.read())
 
-        with open(self.spam_data) as f:
+        with open(self.spam_data, **self.spam_data_kwargs) as f:
             data2 = StringIO(f.read())
 
         df1 = self.read_html(data1, '.*Water.*')
@@ -258,7 +261,7 @@ def test_string_io(self):
         assert_framelist_equal(df1, df2)
 
     def test_string(self):
-        with open(self.spam_data) as f:
+        with open(self.spam_data, **self.spam_data_kwargs) as f:
             data = f.read()
 
         df1 = self.read_html(data, '.*Water.*')
@@ -267,10 +270,10 @@ def test_string(self):
         assert_framelist_equal(df1, df2)
 
     def test_file_like(self):
-        with open(self.spam_data) as f:
+        with open(self.spam_data, **self.spam_data_kwargs) as f:
             df1 = self.read_html(f, '.*Water.*')
 
-        with open(self.spam_data) as f:
+        with open(self.spam_data, **self.spam_data_kwargs) as f:
             df2 = self.read_html(f, 'Unit')
 
         assert_framelist_equal(df1, df2)

From 44d2a1232c21b0426b0ff9f866fd65e2cda71250 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 30 May 2017 17:52:55 -0500
Subject: [PATCH 06/55] BUG: Fixed tput output on windows (#16496)

---
 doc/source/whatsnew/v0.20.2.txt | 2 ++
 pandas/io/formats/terminal.py   | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 13365401f1d1c..7b7f9e8745809 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -37,6 +37,8 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when
+  detecting the terminal size. This fix only applies to python 3 (:issue:`16496`)
 - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
 - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
 
diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py
index dadd09ae74ea4..30bd1d16b538a 100644
--- a/pandas/io/formats/terminal.py
+++ b/pandas/io/formats/terminal.py
@@ -14,6 +14,8 @@
 from __future__ import print_function
 
 import os
+import sys
+import shutil
 
 __all__ = ['get_terminal_size']
 
@@ -26,6 +28,10 @@ def get_terminal_size():
     IPython zmq frontends, or IDLE do not run in a terminal,
     """
     import platform
+
+    if sys.version_info[0] >= 3:
+        return shutil.get_terminal_size()
+
     current_os = platform.system()
     tuple_xy = None
     if current_os == 'Windows':

From 1a9cb5bcfa98876687a4001987e420b06db06aa5 Mon Sep 17 00:00:00 2001
From: keitakurita <keita.kurita@gmail.com>
Date: Wed, 31 May 2017 08:12:50 +0900
Subject: [PATCH 07/55] BUG: Incorrect handling of rolling.cov with offset
 window (#16244)

---
 doc/source/whatsnew/v0.20.2.txt |  1 +
 pandas/core/window.py           |  9 ++++++++-
 pandas/tests/test_window.py     | 23 +++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 7b7f9e8745809..90146aa176b31 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -82,6 +82,7 @@ Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`)
+- Bug in ``rolling.cov()`` with offset window (:issue:`16058`)
 
 
 Sparse
diff --git a/pandas/core/window.py b/pandas/core/window.py
index cf1bad706ae1d..ba7e79944ab0e 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -81,6 +81,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
         self.freq = freq
         self.center = center
         self.win_type = win_type
+        self.win_freq = None
         self.axis = obj._get_axis_number(axis) if axis is not None else None
         self.validate()
 
@@ -996,7 +997,12 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
             # only default unset
             pairwise = True if pairwise is None else pairwise
         other = self._shallow_copy(other)
-        window = self._get_window(other)
+
+        # GH 16058: offset window
+        if self.is_freq_type:
+            window = self.win_freq
+        else:
+            window = self._get_window(other)
 
         def _get_cov(X, Y):
             # GH #12373 : rolling functions error on float32 data
@@ -1088,6 +1094,7 @@ def validate(self):
                                           "based windows")
 
             # this will raise ValueError on non-fixed freqs
+            self.win_freq = self.window
             self.window = freq.nanos
             self.win_type = 'freq'
 
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 6a640d62108b3..cbb3c345a9353 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -3833,3 +3833,26 @@ def test_non_monotonic(self):
         df2 = df.sort_values('B')
         result = df2.groupby('A').rolling('4s', on='B').C.mean()
         tm.assert_series_equal(result, expected)
+
+    def test_rolling_cov_offset(self):
+        # GH16058
+
+        idx = pd.date_range('2017-01-01', periods=24, freq='1h')
+        ss = pd.Series(np.arange(len(idx)), index=idx)
+
+        result = ss.rolling('2h').cov()
+        expected = pd.Series([np.nan] + [0.5 for _ in range(len(idx) - 1)],
+                             index=idx)
+        tm.assert_series_equal(result, expected)
+
+        expected2 = ss.rolling(2, min_periods=1).cov()
+        tm.assert_series_equal(result, expected2)
+
+        result = ss.rolling('3h').cov()
+        expected = pd.Series([np.nan, 0.5] +
+                             [1.0 for _ in range(len(idx) - 2)],
+                             index=idx)
+        tm.assert_series_equal(result, expected)
+
+        expected2 = ss.rolling(3, min_periods=1).cov()
+        tm.assert_series_equal(result, expected2)

From 0a9f54848ec03508d85321dcf9563aac0831c9e4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 30 May 2017 21:39:15 -0500
Subject: [PATCH 08/55] TST: Avoid global state in matplotlib tests (#16539)

Replaces most uses of implicit global state from matplotlib in
test_datetimelike.py. This was potentially causing random failures
where a figure expected to be on a new, blank figure would instead
plot on an existing axes (that's the guess at least).
---
 pandas/tests/plotting/test_datetimelike.py | 379 +++++++++++----------
 pandas/tests/plotting/test_series.py       | 159 ++++++---
 2 files changed, 301 insertions(+), 237 deletions(-)

diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 0e15aaa2555f4..0cff365be3ec8 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -55,16 +55,15 @@ def test_ts_plot_with_tz(self):
 
     def test_fontsize_set_correctly(self):
         # For issue #8765
-        import matplotlib.pyplot as plt  # noqa
         df = DataFrame(np.random.randn(10, 9), index=range(10))
-        ax = df.plot(fontsize=2)
+        fig, ax = self.plt.subplots()
+        df.plot(fontsize=2, ax=ax)
         for label in (ax.get_xticklabels() + ax.get_yticklabels()):
             assert label.get_fontsize() == 2
 
     @slow
     def test_frame_inferred(self):
         # inferred freq
-        import matplotlib.pyplot as plt  # noqa
         idx = date_range('1/1/1987', freq='MS', periods=100)
         idx = DatetimeIndex(idx.values, freq=None)
 
@@ -90,26 +89,24 @@ def test_is_error_nozeroindex(self):
         _check_plot_works(a.plot, yerr=a)
 
     def test_nonnumeric_exclude(self):
-        import matplotlib.pyplot as plt
-
         idx = date_range('1/1/1987', freq='A', periods=3)
         df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}, idx)
 
-        ax = df.plot()  # it works
+        fig, ax = self.plt.subplots()
+        df.plot(ax=ax)  # it works
         assert len(ax.get_lines()) == 1  # B was plotted
-        plt.close(plt.gcf())
+        self.plt.close(fig)
 
         pytest.raises(TypeError, df['A'].plot)
 
     @slow
     def test_tsplot(self):
         from pandas.tseries.plotting import tsplot
-        import matplotlib.pyplot as plt
 
-        ax = plt.gca()
+        _, ax = self.plt.subplots()
         ts = tm.makeTimeSeries()
 
-        f = lambda *args, **kwds: tsplot(s, plt.Axes.plot, *args, **kwds)
+        f = lambda *args, **kwds: tsplot(s, self.plt.Axes.plot, *args, **kwds)
 
         for s in self.period_ser:
             _check_plot_works(f, s.index.freq, ax=ax, series=s)
@@ -123,12 +120,12 @@ def test_tsplot(self):
         for s in self.datetime_ser:
             _check_plot_works(s.plot, ax=ax)
 
-        ax = ts.plot(style='k')
+        _, ax = self.plt.subplots()
+        ts.plot(style='k', ax=ax)
         color = (0., 0., 0., 1) if self.mpl_ge_2_0_0 else (0., 0., 0.)
         assert color == ax.get_lines()[0].get_color()
 
     def test_both_style_and_color(self):
-        import matplotlib.pyplot as plt  # noqa
 
         ts = tm.makeTimeSeries()
         pytest.raises(ValueError, ts.plot, style='b-', color='#000099')
@@ -140,9 +137,10 @@ def test_both_style_and_color(self):
     def test_high_freq(self):
         freaks = ['ms', 'us']
         for freq in freaks:
+            _, ax = self.plt.subplots()
             rng = date_range('1/1/2012', periods=100000, freq=freq)
             ser = Series(np.random.randn(len(rng)), rng)
-            _check_plot_works(ser.plot)
+            _check_plot_works(ser.plot, ax=ax)
 
     def test_get_datevalue(self):
         from pandas.plotting._converter import get_datevalue
@@ -167,22 +165,25 @@ def check_format_of_first_point(ax, expected_string):
 
         annual = Series(1, index=date_range('2014-01-01', periods=3,
                                             freq='A-DEC'))
-        check_format_of_first_point(annual.plot(), 't = 2014  y = 1.000000')
+        _, ax = self.plt.subplots()
+        annual.plot(ax=ax)
+        check_format_of_first_point(ax, 't = 2014  y = 1.000000')
 
         # note this is added to the annual plot already in existence, and
         # changes its freq field
         daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D'))
-        check_format_of_first_point(daily.plot(),
+        daily.plot(ax=ax)
+        check_format_of_first_point(ax,
                                     't = 2014-01-01  y = 1.000000')
         tm.close()
 
         # tsplot
-        import matplotlib.pyplot as plt
+        _, ax = self.plt.subplots()
         from pandas.tseries.plotting import tsplot
-        tsplot(annual, plt.Axes.plot)
-        check_format_of_first_point(plt.gca(), 't = 2014  y = 1.000000')
-        tsplot(daily, plt.Axes.plot)
-        check_format_of_first_point(plt.gca(), 't = 2014-01-01  y = 1.000000')
+        tsplot(annual, self.plt.Axes.plot, ax=ax)
+        check_format_of_first_point(ax, 't = 2014  y = 1.000000')
+        tsplot(daily, self.plt.Axes.plot, ax=ax)
+        check_format_of_first_point(ax, 't = 2014-01-01  y = 1.000000')
 
     @slow
     def test_line_plot_period_series(self):
@@ -215,14 +216,11 @@ def test_line_plot_inferred_freq(self):
             _check_plot_works(ser.plot)
 
     def test_fake_inferred_business(self):
-        import matplotlib.pyplot as plt
-        fig = plt.gcf()
-        plt.clf()
-        fig.add_subplot(111)
+        _, ax = self.plt.subplots()
         rng = date_range('2001-1-1', '2001-1-10')
         ts = Series(lrange(len(rng)), rng)
         ts = ts[:3].append(ts[5:])
-        ax = ts.plot()
+        ts.plot(ax=ax)
         assert not hasattr(ax, 'freq')
 
     @slow
@@ -244,15 +242,11 @@ def test_plot_multiple_inferred_freq(self):
     @slow
     def test_uhf(self):
         import pandas.plotting._converter as conv
-        import matplotlib.pyplot as plt
-        fig = plt.gcf()
-        plt.clf()
-        fig.add_subplot(111)
-
         idx = date_range('2012-6-22 21:59:51.960928', freq='L', periods=500)
         df = DataFrame(np.random.randn(len(idx), 2), idx)
 
-        ax = df.plot()
+        _, ax = self.plt.subplots()
+        df.plot(ax=ax)
         axis = ax.get_xaxis()
 
         tlocs = axis.get_ticklocs()
@@ -265,49 +259,40 @@ def test_uhf(self):
 
     @slow
     def test_irreg_hf(self):
-        import matplotlib.pyplot as plt
-        fig = plt.gcf()
-        plt.clf()
-        fig.add_subplot(111)
-
         idx = date_range('2012-6-22 21:59:51', freq='S', periods=100)
         df = DataFrame(np.random.randn(len(idx), 2), idx)
 
         irreg = df.iloc[[0, 1, 3, 4]]
-        ax = irreg.plot()
+        _, ax = self.plt.subplots()
+        irreg.plot(ax=ax)
         diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff()
 
         sec = 1. / 24 / 60 / 60
         assert (np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all()
 
-        plt.clf()
-        fig.add_subplot(111)
+        _, ax = self.plt.subplots()
         df2 = df.copy()
         df2.index = df.index.asobject
-        ax = df2.plot()
+        df2.plot(ax=ax)
         diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff()
         assert (np.fabs(diffs[1:] - sec) < 1e-8).all()
 
     def test_irregular_datetime64_repr_bug(self):
-        import matplotlib.pyplot as plt
         ser = tm.makeTimeSeries()
         ser = ser[[0, 1, 2, 7]]
 
-        fig = plt.gcf()
-        plt.clf()
+        _, ax = self.plt.subplots()
 
-        ax = fig.add_subplot(211)
-
-        ret = ser.plot()
+        ret = ser.plot(ax=ax)
         assert ret is not None
 
         for rs, xp in zip(ax.get_lines()[0].get_xdata(), ser.index):
             assert rs == xp
 
     def test_business_freq(self):
-        import matplotlib.pyplot as plt  # noqa
         bts = tm.makePeriodSeries()
-        ax = bts.plot()
+        _, ax = self.plt.subplots()
+        bts.plot(ax=ax)
         assert ax.get_lines()[0].get_xydata()[0, 0] == bts.index[0].ordinal
         idx = ax.get_lines()[0].get_xdata()
         assert PeriodIndex(data=idx).freqstr == 'B'
@@ -319,7 +304,8 @@ def test_business_freq_convert(self):
         bts = tm.makeTimeSeries().asfreq('BM')
         tm.N = n
         ts = bts.to_period('M')
-        ax = bts.plot()
+        _, ax = self.plt.subplots()
+        bts.plot(ax=ax)
         assert ax.get_lines()[0].get_xydata()[0, 0] == ts.index[0].ordinal
         idx = ax.get_lines()[0].get_xdata()
         assert PeriodIndex(data=idx).freqstr == 'M'
@@ -329,19 +315,20 @@ def test_nonzero_base(self):
         idx = (date_range('2012-12-20', periods=24, freq='H') + timedelta(
             minutes=30))
         df = DataFrame(np.arange(24), index=idx)
-        ax = df.plot()
+        _, ax = self.plt.subplots()
+        df.plot(ax=ax)
         rs = ax.get_lines()[0].get_xdata()
         assert not Index(rs).is_normalized
 
     def test_dataframe(self):
         bts = DataFrame({'a': tm.makeTimeSeries()})
-        ax = bts.plot()
+        _, ax = self.plt.subplots()
+        bts.plot(ax=ax)
         idx = ax.get_lines()[0].get_xdata()
         tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx))
 
     @slow
     def test_axis_limits(self):
-        import matplotlib.pyplot as plt
 
         def _test(ax):
             xlim = ax.get_xlim()
@@ -369,14 +356,16 @@ def _test(ax):
             assert int(result[0]) == expected[0].ordinal
             assert int(result[1]) == expected[1].ordinal
             fig = ax.get_figure()
-            plt.close(fig)
+            self.plt.close(fig)
 
         ser = tm.makeTimeSeries()
-        ax = ser.plot()
+        _, ax = self.plt.subplots()
+        ser.plot(ax=ax)
         _test(ax)
 
+        _, ax = self.plt.subplots()
         df = DataFrame({'a': ser, 'b': ser + 1})
-        ax = df.plot()
+        df.plot(ax=ax)
         _test(ax)
 
         df = DataFrame({'a': ser, 'b': ser + 1})
@@ -397,13 +386,13 @@ def test_get_finder(self):
 
     @slow
     def test_finder_daily(self):
-        import matplotlib.pyplot as plt
         xp = Period('1999-1-1', freq='B').ordinal
         day_lst = [10, 40, 252, 400, 950, 2750, 10000]
         for n in day_lst:
             rng = bdate_range('1999-1-1', periods=n)
             ser = Series(np.random.randn(len(rng)), rng)
-            ax = ser.plot()
+            _, ax = self.plt.subplots()
+            ser.plot(ax=ax)
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert xp == rs
@@ -411,17 +400,17 @@ def test_finder_daily(self):
             ax.set_xlim(vmin + 0.9, vmax)
             rs = xaxis.get_majorticklocs()[0]
             assert xp == rs
-            plt.close(ax.get_figure())
+            self.plt.close(ax.get_figure())
 
     @slow
     def test_finder_quarterly(self):
-        import matplotlib.pyplot as plt
         xp = Period('1988Q1').ordinal
         yrs = [3.5, 11]
         for n in yrs:
             rng = period_range('1987Q2', periods=int(n * 4), freq='Q')
             ser = Series(np.random.randn(len(rng)), rng)
-            ax = ser.plot()
+            _, ax = self.plt.subplots()
+            ser.plot(ax=ax)
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert rs == xp
@@ -429,17 +418,17 @@ def test_finder_quarterly(self):
             ax.set_xlim(vmin + 0.9, vmax)
             rs = xaxis.get_majorticklocs()[0]
             assert xp == rs
-            plt.close(ax.get_figure())
+            self.plt.close(ax.get_figure())
 
     @slow
     def test_finder_monthly(self):
-        import matplotlib.pyplot as plt
         xp = Period('Jan 1988').ordinal
         yrs = [1.15, 2.5, 4, 11]
         for n in yrs:
             rng = period_range('1987Q2', periods=int(n * 12), freq='M')
             ser = Series(np.random.randn(len(rng)), rng)
-            ax = ser.plot()
+            _, ax = self.plt.subplots()
+            ser.plot(ax=ax)
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert rs == xp
@@ -447,12 +436,13 @@ def test_finder_monthly(self):
             ax.set_xlim(vmin + 0.9, vmax)
             rs = xaxis.get_majorticklocs()[0]
             assert xp == rs
-            plt.close(ax.get_figure())
+            self.plt.close(ax.get_figure())
 
     def test_finder_monthly_long(self):
         rng = period_range('1988Q1', periods=24 * 12, freq='M')
         ser = Series(np.random.randn(len(rng)), rng)
-        ax = ser.plot()
+        _, ax = self.plt.subplots()
+        ser.plot(ax=ax)
         xaxis = ax.get_xaxis()
         rs = xaxis.get_majorticklocs()[0]
         xp = Period('1989Q1', 'M').ordinal
@@ -460,23 +450,24 @@ def test_finder_monthly_long(self):
 
     @slow
     def test_finder_annual(self):
-        import matplotlib.pyplot as plt
         xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170]
         for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]):
             rng = period_range('1987', periods=nyears, freq='A')
             ser = Series(np.random.randn(len(rng)), rng)
-            ax = ser.plot()
+            _, ax = self.plt.subplots()
+            ser.plot(ax=ax)
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert rs == Period(xp[i], freq='A').ordinal
-            plt.close(ax.get_figure())
+            self.plt.close(ax.get_figure())
 
     @slow
     def test_finder_minutely(self):
         nminutes = 50 * 24 * 60
         rng = date_range('1/1/1999', freq='Min', periods=nminutes)
         ser = Series(np.random.randn(len(rng)), rng)
-        ax = ser.plot()
+        _, ax = self.plt.subplots()
+        ser.plot(ax=ax)
         xaxis = ax.get_xaxis()
         rs = xaxis.get_majorticklocs()[0]
         xp = Period('1/1/1999', freq='Min').ordinal
@@ -486,7 +477,8 @@ def test_finder_hourly(self):
         nhours = 23
         rng = date_range('1/1/1999', freq='H', periods=nhours)
         ser = Series(np.random.randn(len(rng)), rng)
-        ax = ser.plot()
+        _, ax = self.plt.subplots()
+        ser.plot(ax=ax)
         xaxis = ax.get_xaxis()
         rs = xaxis.get_majorticklocs()[0]
         xp = Period('1/1/1999', freq='H').ordinal
@@ -494,11 +486,10 @@ def test_finder_hourly(self):
 
     @slow
     def test_gaps(self):
-        import matplotlib.pyplot as plt
-
         ts = tm.makeTimeSeries()
         ts[5:25] = np.nan
-        ax = ts.plot()
+        _, ax = self.plt.subplots()
+        ts.plot(ax=ax)
         lines = ax.get_lines()
         tm._skip_if_mpl_1_5()
         assert len(lines) == 1
@@ -507,13 +498,14 @@ def test_gaps(self):
         assert isinstance(data, np.ma.core.MaskedArray)
         mask = data.mask
         assert mask[5:25, 1].all()
-        plt.close(ax.get_figure())
+        self.plt.close(ax.get_figure())
 
         # irregular
         ts = tm.makeTimeSeries()
         ts = ts[[0, 1, 2, 5, 7, 9, 12, 15, 20]]
         ts[2:5] = np.nan
-        ax = ts.plot()
+        _, ax = self.plt.subplots()
+        ax = ts.plot(ax=ax)
         lines = ax.get_lines()
         assert len(lines) == 1
         l = lines[0]
@@ -521,13 +513,14 @@ def test_gaps(self):
         assert isinstance(data, np.ma.core.MaskedArray)
         mask = data.mask
         assert mask[2:5, 1].all()
-        plt.close(ax.get_figure())
+        self.plt.close(ax.get_figure())
 
         # non-ts
         idx = [0, 1, 2, 5, 7, 9, 12, 15, 20]
         ser = Series(np.random.randn(len(idx)), idx)
         ser[2:5] = np.nan
-        ax = ser.plot()
+        _, ax = self.plt.subplots()
+        ser.plot(ax=ax)
         lines = ax.get_lines()
         assert len(lines) == 1
         l = lines[0]
@@ -540,7 +533,8 @@ def test_gaps(self):
     def test_gap_upsample(self):
         low = tm.makeTimeSeries()
         low[5:25] = np.nan
-        ax = low.plot()
+        _, ax = self.plt.subplots()
+        low.plot(ax=ax)
 
         idxh = date_range(low.index[0], low.index[-1], freq='12h')
         s = Series(np.random.randn(len(idxh)), idxh)
@@ -559,26 +553,25 @@ def test_gap_upsample(self):
 
     @slow
     def test_secondary_y(self):
-        import matplotlib.pyplot as plt
-
         ser = Series(np.random.randn(10))
         ser2 = Series(np.random.randn(10))
+        fig, _ = self.plt.subplots()
         ax = ser.plot(secondary_y=True)
         assert hasattr(ax, 'left_ax')
         assert not hasattr(ax, 'right_ax')
-        fig = ax.get_figure()
         axes = fig.get_axes()
         l = ax.get_lines()[0]
         xp = Series(l.get_ydata(), l.get_xdata())
         assert_series_equal(ser, xp)
         assert ax.get_yaxis().get_ticks_position() == 'right'
         assert not axes[0].get_yaxis().get_visible()
-        plt.close(fig)
+        self.plt.close(fig)
 
-        ax2 = ser2.plot()
+        _, ax2 = self.plt.subplots()
+        ser2.plot(ax=ax2)
         assert (ax2.get_yaxis().get_ticks_position() ==
                 self.default_tick_position)
-        plt.close(ax2.get_figure())
+        self.plt.close(ax2.get_figure())
 
         ax = ser2.plot()
         ax2 = ser.plot(secondary_y=True)
@@ -590,26 +583,26 @@ def test_secondary_y(self):
 
     @slow
     def test_secondary_y_ts(self):
-        import matplotlib.pyplot as plt
         idx = date_range('1/1/2000', periods=10)
         ser = Series(np.random.randn(10), idx)
         ser2 = Series(np.random.randn(10), idx)
+        fig, _ = self.plt.subplots()
         ax = ser.plot(secondary_y=True)
         assert hasattr(ax, 'left_ax')
         assert not hasattr(ax, 'right_ax')
-        fig = ax.get_figure()
         axes = fig.get_axes()
         l = ax.get_lines()[0]
         xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp()
         assert_series_equal(ser, xp)
         assert ax.get_yaxis().get_ticks_position() == 'right'
         assert not axes[0].get_yaxis().get_visible()
-        plt.close(fig)
+        self.plt.close(fig)
 
-        ax2 = ser2.plot()
+        _, ax2 = self.plt.subplots()
+        ser2.plot(ax=ax2)
         assert (ax2.get_yaxis().get_ticks_position() ==
                 self.default_tick_position)
-        plt.close(ax2.get_figure())
+        self.plt.close(ax2.get_figure())
 
         ax = ser2.plot()
         ax2 = ser.plot(secondary_y=True)
@@ -620,20 +613,19 @@ def test_secondary_kde(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
 
-        import matplotlib.pyplot as plt  # noqa
         ser = Series(np.random.randn(10))
-        ax = ser.plot(secondary_y=True, kind='density')
+        fig, ax = self.plt.subplots()
+        ax = ser.plot(secondary_y=True, kind='density', ax=ax)
         assert hasattr(ax, 'left_ax')
         assert not hasattr(ax, 'right_ax')
-        fig = ax.get_figure()
         axes = fig.get_axes()
         assert axes[1].get_yaxis().get_ticks_position() == 'right'
 
     @slow
     def test_secondary_bar(self):
         ser = Series(np.random.randn(10))
-        ax = ser.plot(secondary_y=True, kind='bar')
-        fig = ax.get_figure()
+        fig, ax = self.plt.subplots()
+        ser.plot(secondary_y=True, kind='bar', ax=ax)
         axes = fig.get_axes()
         assert axes[1].get_yaxis().get_ticks_position() == 'right'
 
@@ -656,7 +648,7 @@ def test_secondary_bar_frame(self):
         assert axes[2].get_yaxis().get_ticks_position() == 'right'
 
     def test_mixed_freq_regular_first(self):
-        import matplotlib.pyplot as plt  # noqa
+        # TODO
         s1 = tm.makeTimeSeries()
         s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]]
 
@@ -676,11 +668,11 @@ def test_mixed_freq_regular_first(self):
 
     @slow
     def test_mixed_freq_irregular_first(self):
-        import matplotlib.pyplot as plt  # noqa
         s1 = tm.makeTimeSeries()
         s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]]
-        s2.plot(style='g')
-        ax = s1.plot()
+        _, ax = self.plt.subplots()
+        s2.plot(style='g', ax=ax)
+        s1.plot(ax=ax)
         assert not hasattr(ax, 'freq')
         lines = ax.get_lines()
         x1 = lines[0].get_xdata()
@@ -690,10 +682,10 @@ def test_mixed_freq_irregular_first(self):
 
     def test_mixed_freq_regular_first_df(self):
         # GH 9852
-        import matplotlib.pyplot as plt  # noqa
         s1 = tm.makeTimeSeries().to_frame()
         s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :]
-        ax = s1.plot()
+        _, ax = self.plt.subplots()
+        s1.plot(ax=ax)
         ax2 = s2.plot(style='g', ax=ax)
         lines = ax2.get_lines()
         idx1 = PeriodIndex(lines[0].get_xdata())
@@ -708,11 +700,11 @@ def test_mixed_freq_regular_first_df(self):
     @slow
     def test_mixed_freq_irregular_first_df(self):
         # GH 9852
-        import matplotlib.pyplot as plt  # noqa
         s1 = tm.makeTimeSeries().to_frame()
         s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :]
-        ax = s2.plot(style='g')
-        ax = s1.plot(ax=ax)
+        _, ax = self.plt.subplots()
+        s2.plot(style='g', ax=ax)
+        s1.plot(ax=ax)
         assert not hasattr(ax, 'freq')
         lines = ax.get_lines()
         x1 = lines[0].get_xdata()
@@ -725,8 +717,9 @@ def test_mixed_freq_hf_first(self):
         idxl = date_range('1/1/1999', periods=12, freq='M')
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
-        high.plot()
-        ax = low.plot()
+        _, ax = self.plt.subplots()
+        high.plot(ax=ax)
+        low.plot(ax=ax)
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'D'
 
@@ -738,33 +731,35 @@ def test_mixed_freq_alignment(self):
         ts = Series(ts_data, index=ts_ind)
         ts2 = ts.asfreq('T').interpolate()
 
-        ax = ts.plot()
-        ts2.plot(style='r')
+        _, ax = self.plt.subplots()
+        ax = ts.plot(ax=ax)
+        ts2.plot(style='r', ax=ax)
 
         assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0]
 
     @slow
     def test_mixed_freq_lf_first(self):
-        import matplotlib.pyplot as plt
 
         idxh = date_range('1/1/1999', periods=365, freq='D')
         idxl = date_range('1/1/1999', periods=12, freq='M')
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
-        low.plot(legend=True)
-        ax = high.plot(legend=True)
+        _, ax = self.plt.subplots()
+        low.plot(legend=True, ax=ax)
+        high.plot(legend=True, ax=ax)
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'D'
         leg = ax.get_legend()
         assert len(leg.texts) == 2
-        plt.close(ax.get_figure())
+        self.plt.close(ax.get_figure())
 
         idxh = date_range('1/1/1999', periods=240, freq='T')
         idxl = date_range('1/1/1999', periods=4, freq='H')
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
-        low.plot()
-        ax = high.plot()
+        _, ax = self.plt.subplots()
+        low.plot(ax=ax)
+        high.plot(ax=ax)
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'T'
 
@@ -773,8 +768,9 @@ def test_mixed_freq_irreg_period(self):
         irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]]
         rng = period_range('1/3/2000', periods=30, freq='B')
         ps = Series(np.random.randn(len(rng)), rng)
-        irreg.plot()
-        ps.plot()
+        _, ax = self.plt.subplots()
+        irreg.plot(ax=ax)
+        ps.plot(ax=ax)
 
     def test_mixed_freq_shared_ax(self):
 
@@ -813,9 +809,7 @@ def test_mixed_freq_shared_ax(self):
 
     def test_nat_handling(self):
 
-        fig = self.plt.gcf()
-        # self.plt.clf()
-        ax = fig.add_subplot(111)
+        _, ax = self.plt.subplots()
 
         dti = DatetimeIndex(['2015-01-01', NaT, '2015-01-03'])
         s = Series(range(len(dti)), dti)
@@ -831,17 +825,18 @@ def test_to_weekly_resampling(self):
         idxl = date_range('1/1/1999', periods=12, freq='M')
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
-        high.plot()
-        ax = low.plot()
+        _, ax = self.plt.subplots()
+        high.plot(ax=ax)
+        low.plot(ax=ax)
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq
 
         # tsplot
         from pandas.tseries.plotting import tsplot
-        import matplotlib.pyplot as plt
 
-        tsplot(high, plt.Axes.plot)
-        lines = tsplot(low, plt.Axes.plot)
+        _, ax = self.plt.subplots()
+        tsplot(high, self.plt.Axes.plot, ax=ax)
+        lines = tsplot(low, self.plt.Axes.plot, ax=ax)
         for l in lines:
             assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq
 
@@ -851,8 +846,9 @@ def test_from_weekly_resampling(self):
         idxl = date_range('1/1/1999', periods=12, freq='M')
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
-        low.plot()
-        ax = high.plot()
+        _, ax = self.plt.subplots()
+        low.plot(ax=ax)
+        high.plot(ax=ax)
 
         expected_h = idxh.to_period().asi8.astype(np.float64)
         expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544,
@@ -868,10 +864,10 @@ def test_from_weekly_resampling(self):
 
         # tsplot
         from pandas.tseries.plotting import tsplot
-        import matplotlib.pyplot as plt
 
-        tsplot(low, plt.Axes.plot)
-        lines = tsplot(high, plt.Axes.plot)
+        _, ax = self.plt.subplots()
+        tsplot(low, self.plt.Axes.plot, ax=ax)
+        lines = tsplot(high, self.plt.Axes.plot, ax=ax)
         for l in lines:
             assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq
             xdata = l.get_xdata(orig=False)
@@ -891,8 +887,9 @@ def test_from_resampling_area_line_mixed(self):
 
         # low to high
         for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
-            ax = low.plot(kind=kind1, stacked=True)
-            ax = high.plot(kind=kind2, stacked=True, ax=ax)
+            _, ax = self.plt.subplots()
+            low.plot(kind=kind1, stacked=True, ax=ax)
+            high.plot(kind=kind2, stacked=True, ax=ax)
 
             # check low dataframe result
             expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540,
@@ -923,8 +920,9 @@ def test_from_resampling_area_line_mixed(self):
 
         # high to low
         for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
-            ax = high.plot(kind=kind1, stacked=True)
-            ax = low.plot(kind=kind2, stacked=True, ax=ax)
+            _, ax = self.plt.subplots()
+            high.plot(kind=kind1, stacked=True, ax=ax)
+            low.plot(kind=kind2, stacked=True, ax=ax)
 
             # check high dataframe result
             expected_x = idxh.to_period().asi8.astype(np.float64)
@@ -960,16 +958,18 @@ def test_mixed_freq_second_millisecond(self):
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
         # high to low
-        high.plot()
-        ax = low.plot()
+        _, ax = self.plt.subplots()
+        high.plot(ax=ax)
+        low.plot(ax=ax)
         assert len(ax.get_lines()) == 2
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'L'
         tm.close()
 
         # low to high
-        low.plot()
-        ax = high.plot()
+        _, ax = self.plt.subplots()
+        low.plot(ax=ax)
+        high.plot(ax=ax)
         assert len(ax.get_lines()) == 2
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'L'
@@ -985,7 +985,8 @@ def test_irreg_dtypes(self):
         idx = date_range('1/1/2000', periods=10)
         idx = idx[[0, 2, 5, 9]].asobject
         df = DataFrame(np.random.randn(len(idx), 3), idx)
-        _check_plot_works(df.plot)
+        _, ax = self.plt.subplots()
+        _check_plot_works(df.plot, ax=ax)
 
     @slow
     def test_time(self):
@@ -995,7 +996,8 @@ def test_time(self):
         df = DataFrame({'a': np.random.randn(len(ts)),
                         'b': np.random.randn(len(ts))},
                        index=ts)
-        ax = df.plot()
+        _, ax = self.plt.subplots()
+        df.plot(ax=ax)
 
         # verify tick labels
         ticks = ax.get_xticks()
@@ -1031,7 +1033,8 @@ def test_time_musec(self):
         df = DataFrame({'a': np.random.randn(len(ts)),
                         'b': np.random.randn(len(ts))},
                        index=ts)
-        ax = df.plot()
+        _, ax = self.plt.subplots()
+        ax = df.plot(ax=ax)
 
         # verify tick labels
         ticks = ax.get_xticks()
@@ -1054,8 +1057,9 @@ def test_secondary_upsample(self):
         idxl = date_range('1/1/1999', periods=12, freq='M')
         high = Series(np.random.randn(len(idxh)), idxh)
         low = Series(np.random.randn(len(idxl)), idxl)
-        low.plot()
-        ax = high.plot(secondary_y=True)
+        _, ax = self.plt.subplots()
+        low.plot(ax=ax)
+        ax = high.plot(secondary_y=True, ax=ax)
         for l in ax.get_lines():
             assert PeriodIndex(l.get_xdata()).freq == 'D'
         assert hasattr(ax, 'left_ax')
@@ -1065,14 +1069,12 @@ def test_secondary_upsample(self):
 
     @slow
     def test_secondary_legend(self):
-        import matplotlib.pyplot as plt
-        fig = plt.gcf()
-        plt.clf()
+        fig = self.plt.figure()
         ax = fig.add_subplot(211)
 
         # ts
         df = tm.makeTimeDataFrame()
-        ax = df.plot(secondary_y=['A', 'B'])
+        df.plot(secondary_y=['A', 'B'], ax=ax)
         leg = ax.get_legend()
         assert len(leg.get_lines()) == 4
         assert leg.get_texts()[0].get_text() == 'A (right)'
@@ -1086,33 +1088,37 @@ def test_secondary_legend(self):
 
         # TODO: color cycle problems
         assert len(colors) == 4
+        self.plt.close(fig)
 
-        plt.clf()
+        fig = self.plt.figure()
         ax = fig.add_subplot(211)
-        ax = df.plot(secondary_y=['A', 'C'], mark_right=False)
+        df.plot(secondary_y=['A', 'C'], mark_right=False, ax=ax)
         leg = ax.get_legend()
         assert len(leg.get_lines()) == 4
         assert leg.get_texts()[0].get_text() == 'A'
         assert leg.get_texts()[1].get_text() == 'B'
         assert leg.get_texts()[2].get_text() == 'C'
         assert leg.get_texts()[3].get_text() == 'D'
+        self.plt.close(fig)
 
-        plt.clf()
-        ax = df.plot(kind='bar', secondary_y=['A'])
+        fig, ax = self.plt.subplots()
+        df.plot(kind='bar', secondary_y=['A'], ax=ax)
         leg = ax.get_legend()
         assert leg.get_texts()[0].get_text() == 'A (right)'
         assert leg.get_texts()[1].get_text() == 'B'
+        self.plt.close(fig)
 
-        plt.clf()
-        ax = df.plot(kind='bar', secondary_y=['A'], mark_right=False)
+        fig, ax = self.plt.subplots()
+        df.plot(kind='bar', secondary_y=['A'], mark_right=False, ax=ax)
         leg = ax.get_legend()
         assert leg.get_texts()[0].get_text() == 'A'
         assert leg.get_texts()[1].get_text() == 'B'
+        self.plt.close(fig)
 
-        plt.clf()
+        fig = self.plt.figure()
         ax = fig.add_subplot(211)
         df = tm.makeTimeDataFrame()
-        ax = df.plot(secondary_y=['C', 'D'])
+        ax = df.plot(secondary_y=['C', 'D'], ax=ax)
         leg = ax.get_legend()
         assert len(leg.get_lines()) == 4
         assert ax.right_ax.get_legend() is None
@@ -1122,12 +1128,13 @@ def test_secondary_legend(self):
 
         # TODO: color cycle problems
         assert len(colors) == 4
+        self.plt.close(fig)
 
         # non-ts
         df = tm.makeDataFrame()
-        plt.clf()
+        fig = self.plt.figure()
         ax = fig.add_subplot(211)
-        ax = df.plot(secondary_y=['A', 'B'])
+        ax = df.plot(secondary_y=['A', 'B'], ax=ax)
         leg = ax.get_legend()
         assert len(leg.get_lines()) == 4
         assert ax.right_ax.get_legend() is None
@@ -1137,10 +1144,11 @@ def test_secondary_legend(self):
 
         # TODO: color cycle problems
         assert len(colors) == 4
+        self.plt.close()
 
-        plt.clf()
+        fig = self.plt.figure()
         ax = fig.add_subplot(211)
-        ax = df.plot(secondary_y=['C', 'D'])
+        ax = df.plot(secondary_y=['C', 'D'], ax=ax)
         leg = ax.get_legend()
         assert len(leg.get_lines()) == 4
         assert ax.right_ax.get_legend() is None
@@ -1154,7 +1162,8 @@ def test_secondary_legend(self):
     def test_format_date_axis(self):
         rng = date_range('1/1/2012', periods=12, freq='M')
         df = DataFrame(np.random.randn(len(rng), 3), rng)
-        ax = df.plot()
+        _, ax = self.plt.subplots()
+        ax = df.plot(ax=ax)
         xaxis = ax.get_xaxis()
         for l in xaxis.get_ticklabels():
             if len(l.get_text()) > 0:
@@ -1162,28 +1171,21 @@ def test_format_date_axis(self):
 
     @slow
     def test_ax_plot(self):
-        import matplotlib.pyplot as plt
-
         x = DatetimeIndex(start='2012-01-02', periods=10, freq='D')
         y = lrange(len(x))
-        fig = plt.figure()
-        ax = fig.add_subplot(111)
+        _, ax = self.plt.subplots()
         lines = ax.plot(x, y, label='Y')
         tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x)
 
     @slow
     def test_mpl_nopandas(self):
-        import matplotlib.pyplot as plt
-
         dates = [date(2008, 12, 31), date(2009, 1, 31)]
         values1 = np.arange(10.0, 11.0, 0.5)
         values2 = np.arange(11.0, 12.0, 0.5)
 
         kw = dict(fmt='-', lw=4)
 
-        plt.close('all')
-        fig = plt.figure()
-        ax = fig.add_subplot(111)
+        _, ax = self.plt.subplots()
         ax.plot_date([x.toordinal() for x in dates], values1, **kw)
         ax.plot_date([x.toordinal() for x in dates], values2, **kw)
 
@@ -1201,7 +1203,8 @@ def test_irregular_ts_shared_ax_xlim(self):
         ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]]
 
         # plot the left section of the irregular series, then the right section
-        ax = ts_irregular[:5].plot()
+        _, ax = self.plt.subplots()
+        ts_irregular[:5].plot(ax=ax)
         ts_irregular[5:].plot(ax=ax)
 
         # check that axis limits are correct
@@ -1217,7 +1220,8 @@ def test_secondary_y_non_ts_xlim(self):
         s1 = Series(1, index=index_1)
         s2 = Series(2, index=index_2)
 
-        ax = s1.plot()
+        _, ax = self.plt.subplots()
+        s1.plot(ax=ax)
         left_before, right_before = ax.get_xlim()
         s2.plot(secondary_y=True, ax=ax)
         left_after, right_after = ax.get_xlim()
@@ -1233,7 +1237,8 @@ def test_secondary_y_regular_ts_xlim(self):
         s1 = Series(1, index=index_1)
         s2 = Series(2, index=index_2)
 
-        ax = s1.plot()
+        _, ax = self.plt.subplots()
+        s1.plot(ax=ax)
         left_before, right_before = ax.get_xlim()
         s2.plot(secondary_y=True, ax=ax)
         left_after, right_after = ax.get_xlim()
@@ -1247,7 +1252,8 @@ def test_secondary_y_mixed_freq_ts_xlim(self):
         rng = date_range('2000-01-01', periods=10000, freq='min')
         ts = Series(1, index=rng)
 
-        ax = ts.plot()
+        _, ax = self.plt.subplots()
+        ts.plot(ax=ax)
         left_before, right_before = ax.get_xlim()
         ts.resample('D').mean().plot(secondary_y=True, ax=ax)
         left_after, right_after = ax.get_xlim()
@@ -1262,7 +1268,8 @@ def test_secondary_y_irregular_ts_xlim(self):
         ts = tm.makeTimeSeries()[:20]
         ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]]
 
-        ax = ts_irregular[:5].plot()
+        _, ax = self.plt.subplots()
+        ts_irregular[:5].plot(ax=ax)
         # plot higher-x values on secondary axis
         ts_irregular[5:].plot(secondary_y=True, ax=ax)
         # ensure secondary limits aren't overwritten by plot on primary
@@ -1275,10 +1282,11 @@ def test_secondary_y_irregular_ts_xlim(self):
     def test_plot_outofbounds_datetime(self):
         # 2579 - checking this does not raise
         values = [date(1677, 1, 1), date(1677, 1, 2)]
-        self.plt.plot(values)
+        _, ax = self.plt.subplots()
+        ax.plot(values)
 
         values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)]
-        self.plt.plot(values)
+        ax.plot(values)
 
     def test_format_timedelta_ticks_narrow(self):
         if is_platform_mac():
@@ -1290,8 +1298,8 @@ def test_format_timedelta_ticks_narrow(self):
 
         rng = timedelta_range('0', periods=10, freq='ns')
         df = DataFrame(np.random.randn(len(rng), 3), rng)
-        ax = df.plot(fontsize=2)
-        fig = ax.get_figure()
+        fig, ax = self.plt.subplots()
+        df.plot(fontsize=2, ax=ax)
         fig.canvas.draw()
         labels = ax.get_xticklabels()
         assert len(labels) == len(expected_labels)
@@ -1316,8 +1324,8 @@ def test_format_timedelta_ticks_wide(self):
 
         rng = timedelta_range('0', periods=10, freq='1 d')
         df = DataFrame(np.random.randn(len(rng), 3), rng)
-        ax = df.plot(fontsize=2)
-        fig = ax.get_figure()
+        fig, ax = self.plt.subplots()
+        ax = df.plot(fontsize=2, ax=ax)
         fig.canvas.draw()
         labels = ax.get_xticklabels()
         assert len(labels) == len(expected_labels)
@@ -1327,19 +1335,22 @@ def test_format_timedelta_ticks_wide(self):
     def test_timedelta_plot(self):
         # test issue #8711
         s = Series(range(5), timedelta_range('1day', periods=5))
-        _check_plot_works(s.plot)
+        _, ax = self.plt.subplots()
+        _check_plot_works(s.plot, ax=ax)
 
         # test long period
         index = timedelta_range('1 day 2 hr 30 min 10 s',
                                 periods=10, freq='1 d')
         s = Series(np.random.randn(len(index)), index)
-        _check_plot_works(s.plot)
+        _, ax = self.plt.subplots()
+        _check_plot_works(s.plot, ax=ax)
 
         # test short period
         index = timedelta_range('1 day 2 hr 30 min 10 s',
                                 periods=10, freq='1 ns')
         s = Series(np.random.randn(len(index)), index)
-        _check_plot_works(s.plot)
+        _, ax = self.plt.subplots()
+        _check_plot_works(s.plot, ax=ax)
 
     def test_hist(self):
         # https://github.com/matplotlib/matplotlib/issues/8459
@@ -1347,7 +1358,8 @@ def test_hist(self):
         x = rng
         w1 = np.arange(0, 1, .1)
         w2 = np.arange(0, 1, .1)[::-1]
-        self.plt.hist([x, x], weights=[w1, w2])
+        _, ax = self.plt.subplots()
+        ax.hist([x, x], weights=[w1, w2])
 
     @slow
     def test_overlapping_datetime(self):
@@ -1361,7 +1373,8 @@ def test_overlapping_datetime(self):
 
         # plot first series, then add the second series to those axes,
         # then try adding the first series again
-        ax = s1.plot()
+        _, ax = self.plt.subplots()
+        s1.plot(ax=ax)
         s2.plot(ax=ax)
         s1.plot(ax=ax)
 
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 340a98484480f..7c66b5dafb9c7 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -82,7 +82,8 @@ def test_plot(self):
     @slow
     def test_plot_figsize_and_title(self):
         # figsize and title
-        ax = self.series.plot(title='Test', figsize=(16, 8))
+        _, ax = self.plt.subplots()
+        ax = self.series.plot(title='Test', figsize=(16, 8), ax=ax)
         self._check_text_labels(ax.title, 'Test')
         self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16, 8))
 
@@ -93,25 +94,28 @@ def test_dont_modify_rcParams(self):
         else:
             key = 'axes.color_cycle'
         colors = self.plt.rcParams[key]
-        Series([1, 2, 3]).plot()
+        _, ax = self.plt.subplots()
+        Series([1, 2, 3]).plot(ax=ax)
         assert colors == self.plt.rcParams[key]
 
     def test_ts_line_lim(self):
-        ax = self.ts.plot()
+        fig, ax = self.plt.subplots()
+        ax = self.ts.plot(ax=ax)
         xmin, xmax = ax.get_xlim()
         lines = ax.get_lines()
         assert xmin == lines[0].get_data(orig=False)[0][0]
         assert xmax == lines[0].get_data(orig=False)[0][-1]
         tm.close()
 
-        ax = self.ts.plot(secondary_y=True)
+        ax = self.ts.plot(secondary_y=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         lines = ax.get_lines()
         assert xmin == lines[0].get_data(orig=False)[0][0]
         assert xmax == lines[0].get_data(orig=False)[0][-1]
 
     def test_ts_area_lim(self):
-        ax = self.ts.plot.area(stacked=False)
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.area(stacked=False, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
         assert xmin == line[0]
@@ -119,7 +123,8 @@ def test_ts_area_lim(self):
         tm.close()
 
         # GH 7471
-        ax = self.ts.plot.area(stacked=False, x_compat=True)
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
         assert xmin == line[0]
@@ -128,14 +133,16 @@ def test_ts_area_lim(self):
 
         tz_ts = self.ts.copy()
         tz_ts.index = tz_ts.tz_localize('GMT').tz_convert('CET')
-        ax = tz_ts.plot.area(stacked=False, x_compat=True)
+        _, ax = self.plt.subplots()
+        ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
         assert xmin == line[0]
         assert xmax == line[-1]
         tm.close()
 
-        ax = tz_ts.plot.area(stacked=False, secondary_y=True)
+        _, ax = self.plt.subplots()
+        ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
         assert xmin == line[0]
@@ -143,23 +150,28 @@ def test_ts_area_lim(self):
 
     def test_label(self):
         s = Series([1, 2])
-        ax = s.plot(label='LABEL', legend=True)
+        _, ax = self.plt.subplots()
+        ax = s.plot(label='LABEL', legend=True, ax=ax)
         self._check_legend_labels(ax, labels=['LABEL'])
         self.plt.close()
-        ax = s.plot(legend=True)
+        _, ax = self.plt.subplots()
+        ax = s.plot(legend=True, ax=ax)
         self._check_legend_labels(ax, labels=['None'])
         self.plt.close()
         # get name from index
         s.name = 'NAME'
-        ax = s.plot(legend=True)
+        _, ax = self.plt.subplots()
+        ax = s.plot(legend=True, ax=ax)
         self._check_legend_labels(ax, labels=['NAME'])
         self.plt.close()
         # override the default
-        ax = s.plot(legend=True, label='LABEL')
+        _, ax = self.plt.subplots()
+        ax = s.plot(legend=True, label='LABEL', ax=ax)
         self._check_legend_labels(ax, labels=['LABEL'])
         self.plt.close()
         # Add lebel info, but don't draw
-        ax = s.plot(legend=False, label='LABEL')
+        _, ax = self.plt.subplots()
+        ax = s.plot(legend=False, label='LABEL', ax=ax)
         assert ax.get_legend() is None  # Hasn't been drawn
         ax.legend()  # draw it
         self._check_legend_labels(ax, labels=['LABEL'])
@@ -189,10 +201,12 @@ def test_line_area_nan_series(self):
     def test_line_use_index_false(self):
         s = Series([1, 2, 3], index=['a', 'b', 'c'])
         s.index.name = 'The Index'
-        ax = s.plot(use_index=False)
+        _, ax = self.plt.subplots()
+        ax = s.plot(use_index=False, ax=ax)
         label = ax.get_xlabel()
         assert label == ''
-        ax2 = s.plot.bar(use_index=False)
+        _, ax = self.plt.subplots()
+        ax2 = s.plot.bar(use_index=False, ax=ax)
         label2 = ax2.get_xlabel()
         assert label2 == ''
 
@@ -203,11 +217,13 @@ def test_bar_log(self):
         if not self.mpl_le_1_2_1:
             expected = np.hstack((.1, expected, 1e4))
 
-        ax = Series([200, 500]).plot.bar(log=True)
+        _, ax = self.plt.subplots()
+        ax = Series([200, 500]).plot.bar(log=True, ax=ax)
         tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected)
         tm.close()
 
-        ax = Series([200, 500]).plot.barh(log=True)
+        _, ax = self.plt.subplots()
+        ax = Series([200, 500]).plot.barh(log=True, ax=ax)
         tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected)
         tm.close()
 
@@ -219,7 +235,8 @@ def test_bar_log(self):
         if self.mpl_ge_2_0_0:
             expected = np.hstack((1.0e-05, expected))
 
-        ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar')
+        _, ax = self.plt.subplots()
+        ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar', ax=ax)
         ymin = 0.0007943282347242822 if self.mpl_ge_2_0_0 else 0.001
         ymax = 0.12589254117941673 if self.mpl_ge_2_0_0 else .10000000000000001
         res = ax.get_ylim()
@@ -228,7 +245,8 @@ def test_bar_log(self):
         tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected)
         tm.close()
 
-        ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh')
+        _, ax = self.plt.subplots()
+        ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh', ax=ax)
         res = ax.get_xlim()
         tm.assert_almost_equal(res[0], ymin)
         tm.assert_almost_equal(res[1], ymax)
@@ -237,23 +255,27 @@ def test_bar_log(self):
     @slow
     def test_bar_ignore_index(self):
         df = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
-        ax = df.plot.bar(use_index=False)
+        _, ax = self.plt.subplots()
+        ax = df.plot.bar(use_index=False, ax=ax)
         self._check_text_labels(ax.get_xticklabels(), ['0', '1', '2', '3'])
 
     def test_rotation(self):
         df = DataFrame(randn(5, 5))
         # Default rot 0
-        axes = df.plot()
+        _, ax = self.plt.subplots()
+        axes = df.plot(ax=ax)
         self._check_ticks_props(axes, xrot=0)
 
-        axes = df.plot(rot=30)
+        _, ax = self.plt.subplots()
+        axes = df.plot(rot=30, ax=ax)
         self._check_ticks_props(axes, xrot=30)
 
     def test_irregular_datetime(self):
         rng = date_range('1/1/2000', '3/1/2000')
         rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]]
         ser = Series(randn(len(rng)), rng)
-        ax = ser.plot()
+        _, ax = self.plt.subplots()
+        ax = ser.plot(ax=ax)
         xp = datetime(1999, 1, 1).toordinal()
         ax.set_xlim('1/1/1999', '1/1/2001')
         assert xp == ax.get_xlim()[0]
@@ -311,7 +333,8 @@ def test_pie_series(self):
 
     def test_pie_nan(self):
         s = Series([1, np.nan, 1, 1])
-        ax = s.plot.pie(legend=True)
+        _, ax = self.plt.subplots()
+        ax = s.plot.pie(legend=True, ax=ax)
         expected = ['0', '', '2', '3']
         result = [x.get_text() for x in ax.texts]
         assert result == expected
@@ -319,7 +342,8 @@ def test_pie_nan(self):
     @slow
     def test_hist_df_kwargs(self):
         df = DataFrame(np.random.randn(10, 2))
-        ax = df.plot.hist(bins=5)
+        _, ax = self.plt.subplots()
+        ax = df.plot.hist(bins=5, ax=ax)
         assert len(ax.patches) == 10
 
     @slow
@@ -329,10 +353,12 @@ def test_hist_df_with_nonnumerics(self):
             df = DataFrame(
                 np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
         df['E'] = ['x', 'y'] * 5
-        ax = df.plot.hist(bins=5)
+        _, ax = self.plt.subplots()
+        ax = df.plot.hist(bins=5, ax=ax)
         assert len(ax.patches) == 20
 
-        ax = df.plot.hist()  # bins=10
+        _, ax = self.plt.subplots()
+        ax = df.plot.hist(ax=ax)  # bins=10
         assert len(ax.patches) == 40
 
     @slow
@@ -439,7 +465,8 @@ def test_hist_secondary_legend(self):
         df = DataFrame(np.random.randn(30, 4), columns=list('abcd'))
 
         # primary -> secondary
-        ax = df['a'].plot.hist(legend=True)
+        _, ax = self.plt.subplots()
+        ax = df['a'].plot.hist(legend=True, ax=ax)
         df['b'].plot.hist(ax=ax, legend=True, secondary_y=True)
         # both legends are dran on left ax
         # left and right axis must be visible
@@ -449,7 +476,8 @@ def test_hist_secondary_legend(self):
         tm.close()
 
         # secondary -> secondary
-        ax = df['a'].plot.hist(legend=True, secondary_y=True)
+        _, ax = self.plt.subplots()
+        ax = df['a'].plot.hist(legend=True, secondary_y=True, ax=ax)
         df['b'].plot.hist(ax=ax, legend=True, secondary_y=True)
         # both legends are draw on left ax
         # left axis must be invisible, right axis must be visible
@@ -460,7 +488,8 @@ def test_hist_secondary_legend(self):
         tm.close()
 
         # secondary -> primary
-        ax = df['a'].plot.hist(legend=True, secondary_y=True)
+        _, ax = self.plt.subplots()
+        ax = df['a'].plot.hist(legend=True, secondary_y=True, ax=ax)
         # right axes is returned
         df['b'].plot.hist(ax=ax, legend=True)
         # both legends are draw on left ax
@@ -477,8 +506,9 @@ def test_df_series_secondary_legend(self):
         s = Series(np.random.randn(30), name='x')
 
         # primary -> secondary (without passing ax)
-        ax = df.plot()
-        s.plot(legend=True, secondary_y=True)
+        _, ax = self.plt.subplots()
+        ax = df.plot(ax=ax)
+        s.plot(legend=True, secondary_y=True, ax=ax)
         # both legends are dran on left ax
         # left and right axis must be visible
         self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)'])
@@ -487,7 +517,8 @@ def test_df_series_secondary_legend(self):
         tm.close()
 
         # primary -> secondary (with passing ax)
-        ax = df.plot()
+        _, ax = self.plt.subplots()
+        ax = df.plot(ax=ax)
         s.plot(ax=ax, legend=True, secondary_y=True)
         # both legends are dran on left ax
         # left and right axis must be visible
@@ -497,8 +528,9 @@ def test_df_series_secondary_legend(self):
         tm.close()
 
         # seconcary -> secondary (without passing ax)
-        ax = df.plot(secondary_y=True)
-        s.plot(legend=True, secondary_y=True)
+        _, ax = self.plt.subplots()
+        ax = df.plot(secondary_y=True, ax=ax)
+        s.plot(legend=True, secondary_y=True, ax=ax)
         # both legends are dran on left ax
         # left axis must be invisible and right axis must be visible
         expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)']
@@ -508,7 +540,8 @@ def test_df_series_secondary_legend(self):
         tm.close()
 
         # secondary -> secondary (with passing ax)
-        ax = df.plot(secondary_y=True)
+        _, ax = self.plt.subplots()
+        ax = df.plot(secondary_y=True, ax=ax)
         s.plot(ax=ax, legend=True, secondary_y=True)
         # both legends are dran on left ax
         # left axis must be invisible and right axis must be visible
@@ -519,7 +552,8 @@ def test_df_series_secondary_legend(self):
         tm.close()
 
         # secondary -> secondary (with passing ax)
-        ax = df.plot(secondary_y=True, mark_right=False)
+        _, ax = self.plt.subplots()
+        ax = df.plot(secondary_y=True, mark_right=False, ax=ax)
         s.plot(ax=ax, legend=True, secondary_y=True)
         # both legends are dran on left ax
         # left axis must be invisible and right axis must be visible
@@ -533,11 +567,13 @@ def test_df_series_secondary_legend(self):
     def test_plot_fails_with_dupe_color_and_style(self):
         x = Series(randn(2))
         with pytest.raises(ValueError):
-            x.plot(style='k--', color='k')
+            _, ax = self.plt.subplots()
+            x.plot(style='k--', color='k', ax=ax)
 
     @slow
     def test_hist_kde(self):
-        ax = self.ts.plot.hist(logy=True)
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.hist(logy=True, ax=ax)
         self._check_ax_scales(ax, yaxis='log')
         xlabels = ax.get_xticklabels()
         # ticks are values, thus ticklabels are blank
@@ -549,7 +585,8 @@ def test_hist_kde(self):
         _skip_if_no_scipy_gaussian_kde()
         _check_plot_works(self.ts.plot.kde)
         _check_plot_works(self.ts.plot.density)
-        ax = self.ts.plot.kde(logy=True)
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.kde(logy=True, ax=ax)
         self._check_ax_scales(ax, yaxis='log')
         xlabels = ax.get_xticklabels()
         self._check_text_labels(xlabels, [''] * len(xlabels))
@@ -565,8 +602,9 @@ def test_kde_kwargs(self):
                           ind=linspace(-100, 100, 20))
         _check_plot_works(self.ts.plot.density, bw_method=.5,
                           ind=linspace(-100, 100, 20))
+        _, ax = self.plt.subplots()
         ax = self.ts.plot.kde(logy=True, bw_method=.5,
-                              ind=linspace(-100, 100, 20))
+                              ind=linspace(-100, 100, 20), ax=ax)
         self._check_ax_scales(ax, yaxis='log')
         self._check_text_labels(ax.yaxis.get_label(), 'Density')
 
@@ -583,29 +621,34 @@ def test_kde_missing_vals(self):
 
     @slow
     def test_hist_kwargs(self):
-        ax = self.ts.plot.hist(bins=5)
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.hist(bins=5, ax=ax)
         assert len(ax.patches) == 5
         self._check_text_labels(ax.yaxis.get_label(), 'Frequency')
         tm.close()
 
         if self.mpl_ge_1_3_1:
-            ax = self.ts.plot.hist(orientation='horizontal')
+            _, ax = self.plt.subplots()
+            ax = self.ts.plot.hist(orientation='horizontal', ax=ax)
             self._check_text_labels(ax.xaxis.get_label(), 'Frequency')
             tm.close()
 
-            ax = self.ts.plot.hist(align='left', stacked=True)
+            _, ax = self.plt.subplots()
+            ax = self.ts.plot.hist(align='left', stacked=True, ax=ax)
             tm.close()
 
     @slow
     def test_hist_kde_color(self):
-        ax = self.ts.plot.hist(logy=True, bins=10, color='b')
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax)
         self._check_ax_scales(ax, yaxis='log')
         assert len(ax.patches) == 10
         self._check_colors(ax.patches, facecolors=['b'] * 10)
 
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
-        ax = self.ts.plot.kde(logy=True, color='r')
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.kde(logy=True, color='r', ax=ax)
         self._check_ax_scales(ax, yaxis='log')
         lines = ax.get_lines()
         assert len(lines) == 1
@@ -613,7 +656,8 @@ def test_hist_kde_color(self):
 
     @slow
     def test_boxplot_series(self):
-        ax = self.ts.plot.box(logy=True)
+        _, ax = self.plt.subplots()
+        ax = self.ts.plot.box(logy=True, ax=ax)
         self._check_ax_scales(ax, yaxis='log')
         xlabels = ax.get_xticklabels()
         self._check_text_labels(xlabels, [self.ts.name])
@@ -625,20 +669,22 @@ def test_kind_both_ways(self):
         s = Series(range(3))
         kinds = (plotting._core._common_kinds +
                  plotting._core._series_kinds)
+        _, ax = self.plt.subplots()
         for kind in kinds:
             if not _ok_for_gaussian_kde(kind):
                 continue
-            s.plot(kind=kind)
+            s.plot(kind=kind, ax=ax)
             getattr(s.plot, kind)()
 
     @slow
     def test_invalid_plot_data(self):
         s = Series(list('abcd'))
+        _, ax = self.plt.subplots()
         for kind in plotting._core._common_kinds:
             if not _ok_for_gaussian_kde(kind):
                 continue
             with pytest.raises(TypeError):
-                s.plot(kind=kind)
+                s.plot(kind=kind, ax=ax)
 
     @slow
     def test_valid_object_plot(self):
@@ -650,11 +696,12 @@ def test_valid_object_plot(self):
 
     def test_partially_invalid_plot_data(self):
         s = Series(['a', 'b', 1.0, 2])
+        _, ax = self.plt.subplots()
         for kind in plotting._core._common_kinds:
             if not _ok_for_gaussian_kde(kind):
                 continue
             with pytest.raises(TypeError):
-                s.plot(kind=kind)
+                s.plot(kind=kind, ax=ax)
 
     def test_invalid_kind(self):
         s = Series([1, 2])
@@ -776,13 +823,15 @@ def test_standard_colors_all(self):
 
     def test_series_plot_color_kwargs(self):
         # GH1890
-        ax = Series(np.arange(12) + 1).plot(color='green')
+        _, ax = self.plt.subplots()
+        ax = Series(np.arange(12) + 1).plot(color='green', ax=ax)
         self._check_colors(ax.get_lines(), linecolors=['green'])
 
     def test_time_series_plot_color_kwargs(self):
         # #1890
+        _, ax = self.plt.subplots()
         ax = Series(np.arange(12) + 1, index=date_range(
-            '1/1/2000', periods=12)).plot(color='green')
+            '1/1/2000', periods=12)).plot(color='green', ax=ax)
         self._check_colors(ax.get_lines(), linecolors=['green'])
 
     def test_time_series_plot_color_with_empty_kwargs(self):
@@ -797,14 +846,16 @@ def test_time_series_plot_color_with_empty_kwargs(self):
 
         ncolors = 3
 
+        _, ax = self.plt.subplots()
         for i in range(ncolors):
-            ax = s.plot()
+            ax = s.plot(ax=ax)
         self._check_colors(ax.get_lines(), linecolors=def_colors[:ncolors])
 
     def test_xticklabels(self):
         # GH11529
         s = Series(np.arange(10), index=['P%02d' % i for i in range(10)])
-        ax = s.plot(xticks=[0, 3, 5, 9])
+        _, ax = self.plt.subplots()
+        ax = s.plot(xticks=[0, 3, 5, 9], ax=ax)
         exp = ['P%02d' % i for i in [0, 3, 5, 9]]
         self._check_text_labels(ax.get_xticklabels(), exp)
 

From f7149a22b74e00fa584e94449e3bed34cc995a82 Mon Sep 17 00:00:00 2001
From: Vincent La <vincela14@gmail.com>
Date: Wed, 31 May 2017 01:54:26 -0700
Subject: [PATCH 09/55] DOC: Update to docstring of DataFrame(dtype) (#14764)
 (#16487)

* Adding some more documentation on dataframe with regards to dtype

* Making example for creating dataframe from np matrix easier
---
 pandas/core/frame.py | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 743d623ee5e44..907959c42323e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -241,17 +241,47 @@ class DataFrame(NDFrame):
         Column labels to use for resulting frame. Will default to
         np.arange(n) if no column labels are provided
     dtype : dtype, default None
-        Data type to force, otherwise infer
+        Data type to force. Only a single dtype is allowed. If None, infer
     copy : boolean, default False
         Copy data from inputs. Only affects DataFrame / 2d ndarray input
 
     Examples
     --------
-    >>> d = {'col1': ts1, 'col2': ts2}
-    >>> df = DataFrame(data=d, index=index)
-    >>> df2 = DataFrame(np.random.randn(10, 5))
-    >>> df3 = DataFrame(np.random.randn(10, 5),
-    ...                 columns=['a', 'b', 'c', 'd', 'e'])
+    Constructing DataFrame from a dictionary.
+
+    >>> d = {'col1': [1, 2], 'col2': [3, 4]}
+    >>> df = pd.DataFrame(data=d)
+    >>> df
+       col1  col2
+    0     1     3
+    1     2     4
+
+    Notice that the inferred dtype is int64.
+
+    >>> df.dtypes
+    col1    int64
+    col2    int64
+    dtype: object
+
+    To enforce a single dtype:
+
+    >>> df = pd.DataFrame(data=d, dtype=np.int8)
+    >>> df.dtypes
+    col1    int8
+    col2    int8
+    dtype: object
+
+    Constructing DataFrame from numpy ndarray:
+
+    >>> df2 = pd.DataFrame(np.random.randint(low=0, high=10, size=(5, 5)),
+    ...                    columns=['a', 'b', 'c', 'd', 'e'])
+    >>> df2
+        a   b   c   d   e
+    0   2   8   8   3   4
+    1   4   2   9   0   9
+    2   1   0   7   8   0
+    3   5   1   7   1   3
+    4   6   0   2   4   2
 
     See also
     --------

From 36d617138f2c1544ea9a05442882e8a539bd64e7 Mon Sep 17 00:00:00 2001
From: Kassandra Keeton <GitHub@ProsperousHeart.com>
Date: Wed, 31 May 2017 04:37:49 -0500
Subject: [PATCH 10/55] DOC: correct docstring examples (#3439) (#16432)

---
 ci/build_docs.sh               |  9 ++++
 pandas/core/reshape/concat.py  |  2 +
 pandas/core/reshape/pivot.py   | 72 +++++++++++++++++-------------
 pandas/core/reshape/reshape.py | 81 ++++++++++++++++++----------------
 pandas/core/reshape/tile.py    | 24 +++++-----
 5 files changed, 108 insertions(+), 80 deletions(-)

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 26917b8f9b792..a038304fe0f7a 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -59,6 +59,15 @@ if [ "$DOC" ]; then
     git remote -v
 
     git push origin gh-pages -f
+
+    echo "Running doctests"
+    cd "$TRAVIS_BUILD_DIR"
+    pytest --doctest-modules \
+           pandas/core/reshape/concat.py \
+           pandas/core/reshape/pivot.py \
+           pandas/core/reshape/reshape.py \
+           pandas/core/reshape/tile.py
+
 fi
 
 exit 0
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index af2eb734a02f6..96603b6adc3b0 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -197,6 +197,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
        0
     a  2
     >>> pd.concat([df5, df6], verify_integrity=True)
+    Traceback (most recent call last):
+        ...
     ValueError: Indexes have overlapping values: ['a']
     """
     op = _Concatenator(objs, axis=axis, join_axes=join_axes,
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index b562f8a32f5c9..0581ec7484c49 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -50,26 +50,36 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
 
     Examples
     --------
+    >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
+    ...                          "bar", "bar", "bar", "bar"],
+    ...                    "B": ["one", "one", "one", "two", "two",
+    ...                          "one", "one", "two", "two"],
+    ...                    "C": ["small", "large", "large", "small",
+    ...                          "small", "large", "small", "small",
+    ...                          "large"],
+    ...                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
     >>> df
-       A   B   C      D
-    0  foo one small  1
-    1  foo one large  2
-    2  foo one large  2
-    3  foo two small  3
-    4  foo two small  3
-    5  bar one large  4
-    6  bar one small  5
-    7  bar two small  6
-    8  bar two large  7
+         A    B      C  D
+    0  foo  one  small  1
+    1  foo  one  large  2
+    2  foo  one  large  2
+    3  foo  two  small  3
+    4  foo  two  small  3
+    5  bar  one  large  4
+    6  bar  one  small  5
+    7  bar  two  small  6
+    8  bar  two  large  7
 
     >>> table = pivot_table(df, values='D', index=['A', 'B'],
     ...                     columns=['C'], aggfunc=np.sum)
     >>> table
-              small  large
-    foo  one  1      4
-         two  6      NaN
-    bar  one  5      4
-         two  6      7
+    ... # doctest: +NORMALIZE_WHITESPACE
+    C        large  small
+    A   B
+    bar one    4.0    5.0
+        two    7.0    6.0
+    foo one    4.0    1.0
+        two    NaN    6.0
 
     Returns
     -------
@@ -445,27 +455,27 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
 
     Examples
     --------
-    >>> a
-    array([foo, foo, foo, foo, bar, bar,
-           bar, bar, foo, foo, foo], dtype=object)
-    >>> b
-    array([one, one, one, two, one, one,
-           one, two, two, two, one], dtype=object)
-    >>> c
-    array([dull, dull, shiny, dull, dull, shiny,
-           shiny, dull, shiny, shiny, shiny], dtype=object)
-
-    >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
-    b    one          two
-    c    dull  shiny  dull  shiny
+    >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
+    ...               "bar", "bar", "foo", "foo", "foo"], dtype=object)
+    >>> b = np.array(["one", "one", "one", "two", "one", "one",
+    ...               "one", "two", "two", "two", "one"], dtype=object)
+    >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
+    ...               "shiny", "dull", "shiny", "shiny", "shiny"],
+    ...               dtype=object)
+
+    >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    b   one        two
+    c   dull shiny dull shiny
     a
-    bar  1     2      1     0
-    foo  2     2      1     2
+    bar    1     2    1     0
+    foo    2     2    1     2
 
     >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
     >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
     >>> crosstab(foo, bar)  # 'c' and 'f' are not represented in the data,
-                            # but they still will be counted in the output
+    ...                     # but they still will be counted in the output
+    ... # doctest: +SKIP
     col_0  d  e  f
     row_0
     a      1  0  0
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index f944dfe22361a..dcb83d225699d 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -48,23 +48,23 @@ class _Unstacker(object):
     >>> import pandas as pd
     >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
     ...                                    ('two', 'a'), ('two', 'b')])
-    >>> s = pd.Series(np.arange(1.0, 5.0), index=index)
+    >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
     >>> s
-    one  a   1
-         b   2
-    two  a   3
-         b   4
-    dtype: float64
+    one  a    1
+         b    2
+    two  a    3
+         b    4
+    dtype: int64
 
     >>> s.unstack(level=-1)
-         a   b
+         a  b
     one  1  2
     two  3  4
 
     >>> s.unstack(level=0)
        one  two
-    a  1   2
-    b  3   4
+    a    1    3
+    b    2    4
 
     Returns
     -------
@@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None):
     >>> import pandas as pd
     >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
     ...                      'team': ['Red Sox', 'Yankees'],
-    ...                      'year1': [2007, 2008], 'year2': [2008, 2008]})
+    ...                      'year1': [2007, 2007], 'year2': [2008, 2008]})
     >>> data
        hr1  hr2     team  year1  year2
     0  514  545  Red Sox   2007   2008
     1  573  526  Yankees   2007   2008
 
     >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
-          team   hr  year
-    0  Red Sox  514  2007
-    1  Yankees  573  2007
-    2  Red Sox  545  2008
-    3  Yankees  526  2008
+          team  year   hr
+    0  Red Sox  2007  514
+    1  Yankees  2007  573
+    2  Red Sox  2008  545
+    3  Yankees  2008  526
 
     Returns
     -------
@@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
     ...                   })
     >>> df["id"] = df.index
     >>> df
-    A1970 A1980  B1970  B1980         X  id
+      A1970 A1980  B1970  B1980         X  id
     0     a     d    2.5    3.2 -1.085631   0
     1     b     e    1.2    1.3  0.997345   1
     2     c     f    0.7    0.1  0.282978   2
     >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
+    ... # doctest: +NORMALIZE_WHITESPACE
                     X  A    B
     id year
     0  1970 -1.085631  a  2.5
@@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
     8      3      3  2.1  2.9
     >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
     >>> l
+    ... # doctest: +NORMALIZE_WHITESPACE
                       ht
     famid birth age
     1     1     1    2.8
@@ -979,41 +981,44 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
 
     Less wieldy column names are also handled
 
+    >>> np.random.seed(0)
     >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
     ...                    'A(quarterly)-2011': np.random.rand(3),
     ...                    'B(quarterly)-2010': np.random.rand(3),
     ...                    'B(quarterly)-2011': np.random.rand(3),
     ...                    'X' : np.random.randint(3, size=3)})
     >>> df['id'] = df.index
-    >>> df
-      A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011
-    0          0.531828          0.724455          0.322959          0.293714
-    1          0.634401          0.611024          0.361789          0.630976
-    2          0.849432          0.722443          0.228263          0.092105
-    \
+    >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+       A(quarterly)-2010  A(quarterly)-2011  B(quarterly)-2010  ...
+    0           0.548814           0.544883           0.437587  ...
+    1           0.715189           0.423655           0.891773  ...
+    2           0.602763           0.645894           0.963663  ...
        X  id
     0  0   0
     1  1   1
-    2  2   2
-    >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'],
-                        i='id', j='year', sep='-')
-             X     A(quarterly)  B(quarterly)
+    2  1   2
+
+    >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
+    ...                 j='year', sep='-')
+    ... # doctest: +NORMALIZE_WHITESPACE
+             X  A(quarterly)  B(quarterly)
     id year
-    0  2010  0       0.531828       0.322959
-    1  2010  2       0.634401       0.361789
-    2  2010  2       0.849432       0.228263
-    0  2011  0       0.724455       0.293714
-    1  2011  2       0.611024       0.630976
-    2  2011  2       0.722443       0.092105
+    0  2010  0      0.548814     0.437587
+    1  2010  1      0.715189     0.891773
+    2  2010  1      0.602763     0.963663
+    0  2011  0      0.544883     0.383442
+    1  2011  1      0.423655     0.791725
+    2  2011  1      0.645894     0.528895
 
     If we have many columns, we could also use a regex to find our
     stubnames and pass that list on to wide_to_long
 
-    >>> stubnames = set([match[0] for match in
-                        df.columns.str.findall('[A-B]\(.*\)').values
-                        if match != [] ])
+    >>> stubnames = sorted(
+    ...     set([match[0] for match in df.columns.str.findall(
+    ...         r'[A-B]\(.*\)').values if match != [] ])
+    ... )
     >>> list(stubnames)
-    ['B(quarterly)', 'A(quarterly)']
+    ['A(quarterly)', 'B(quarterly)']
 
     Notes
     -----
@@ -1133,7 +1138,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
     2  0  0    1
 
     >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
-                        'C': [1, 2, 3]})
+    ...                    'C': [1, 2, 3]})
 
     >>> pd.get_dummies(df, prefix=['col1', 'col2'])
        C  col1_a  col1_b  col2_a  col2_b  col2_c
@@ -1149,7 +1154,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
     3  1  0  0
     4  1  0  0
 
-    >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True))
+    >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
        b  c
     0  0  0
     1  1  0
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 746742f47f2aa..866f229bec418 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -75,18 +75,18 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
     Examples
     --------
     >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)
-    ([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533],
-      (6.533, 9.7], (0.191, 3.367]]
-    Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]],
-    array([ 0.1905    ,  3.36666667,  6.53333333,  9.7       ]))
+    ... # doctest: +ELLIPSIS
+    ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], ...
+    Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] ...
 
-    >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3,
-               labels=["good","medium","bad"])
+    >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]),
+    ...        3, labels=["good", "medium", "bad"])
+    ... # doctest: +SKIP
     [good, good, good, medium, bad, good]
     Categories (3, object): [good < medium < bad]
 
     >>> pd.cut(np.ones(5), 4, labels=False)
-    array([1, 1, 1, 1, 1], dtype=int64)
+    array([1, 1, 1, 1, 1])
     """
     # NOTE: this binning code is changed a bit from histogram for var(x) == 0
 
@@ -182,15 +182,17 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
     Examples
     --------
     >>> pd.qcut(range(5), 4)
-    [[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]]
-    Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]]
+    ... # doctest: +ELLIPSIS
+    [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]]
+    Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ...
 
-    >>> pd.qcut(range(5), 3, labels=["good","medium","bad"])
+    >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
+    ... # doctest: +SKIP
     [good, good, medium, bad, bad]
     Categories (3, object): [good < medium < bad]
 
     >>> pd.qcut(range(5), 4, labels=False)
-    array([0, 0, 1, 2, 3], dtype=int64)
+    array([0, 0, 1, 2, 3])
     """
     x_is_series, series_index, name, x = _preprocess_for_cut(x)
 

From ab9bc9a2237c3b2c80ce55acc1c09c81f411476f Mon Sep 17 00:00:00 2001
From: Jeff Tratner <jeffrey.tratner@gmail.com>
Date: Wed, 31 May 2017 03:39:46 -0700
Subject: [PATCH 11/55] Fix unbound local with bad engine (#16511)

---
 doc/source/whatsnew/v0.20.2.txt | 3 +++
 pandas/io/parsers.py            | 4 ++++
 pandas/tests/io/test_common.py  | 7 +++++++
 3 files changed, 14 insertions(+)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 90146aa176b31..1517327ab7133 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -41,6 +41,9 @@ Bug Fixes
   detecting the terminal size. This fix only applies to python 3 (:issue:`16496`)
 - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
 - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
+- Passing an invalid engine to :func:`read_csv` now raises an informative
+  ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`)
+
 
 
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index e287d92f67ef6..12b606d969c7d 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -969,6 +969,10 @@ def _make_engine(self, engine='c'):
                 klass = PythonParser
             elif engine == 'python-fwf':
                 klass = FixedWidthFieldParser
+            else:
+                raise ValueError('Unknown engine: {engine} (valid options are'
+                                 ' "c", "python", or' ' "python-fwf")'.format(
+                                     engine=engine))
             self._engine = klass(self.f, **self.options)
 
     def _failover_to_python(self):
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index b7d158dd75960..289f86eb2dc53 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -223,3 +223,10 @@ def test_next(self):
             assert next_line.strip() == line.strip()
 
         pytest.raises(StopIteration, next, wrapper)
+
+    def test_unknown_engine(self):
+        with tm.ensure_clean() as path:
+            df = tm.makeDataFrame()
+            df.to_csv(path)
+            with tm.assert_raises_regex(ValueError, 'Unknown engine'):
+                read_csv(path, engine='pyt')

From 9a9c315f93fb1b5a497298d9f779e935ce4e984e Mon Sep 17 00:00:00 2001
From: Christian Stade-Schuldt <tafkasorg@yahoo.de>
Date: Wed, 31 May 2017 13:56:52 +0200
Subject: [PATCH 12/55] return empty MultiIndex for symmetrical difference on
 equal MultiIndexes (#16486)

---
 doc/source/whatsnew/v0.20.2.txt          |  1 +
 pandas/core/indexes/multi.py             |  6 ++++++
 pandas/tests/indexes/test_base.py        |  2 --
 pandas/tests/indexing/test_multiindex.py | 11 +++++++++++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 1517327ab7133..38cf683208b3d 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -40,6 +40,7 @@ Bug Fixes
 - Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when
   detecting the terminal size. This fix only applies to python 3 (:issue:`16496`)
 - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
+- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a TypeError (:issue `13490`)
 - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
 - Passing an invalid engine to :func:`read_csv` now raises an informative
   ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 569e16f2141ae..981a6a696a618 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -414,6 +414,12 @@ def view(self, cls=None):
         return result
 
     def _shallow_copy_with_infer(self, values=None, **kwargs):
+        # On equal MultiIndexes the difference is empty.
+        # Therefore, an empty MultiIndex is returned GH13490
+        if len(values) == 0:
+            return MultiIndex(levels=[[] for _ in range(self.nlevels)],
+                              labels=[[] for _ in range(self.nlevels)],
+                              **kwargs)
         return self._shallow_copy(values, **kwargs)
 
     @Appender(_index_shared_docs['_shallow_copy'])
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 6a2087b37631e..02561cba784b8 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -188,7 +188,6 @@ def test_constructor_ndarray_like(self):
         # it should be possible to convert any object that satisfies the numpy
         # ndarray interface directly into an Index
         class ArrayLike(object):
-
             def __init__(self, array):
                 self.array = array
 
@@ -246,7 +245,6 @@ def test_index_ctor_infer_nan_nat(self):
                      [np.timedelta64('nat'), np.nan],
                      [pd.NaT, np.timedelta64('nat')],
                      [np.timedelta64('nat'), pd.NaT]]:
-
             tm.assert_index_equal(Index(data), exp)
             tm.assert_index_equal(Index(np.array(data, dtype=object)), exp)
 
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
index 483c39ed8694e..fc6c627075c96 100644
--- a/pandas/tests/indexing/test_multiindex.py
+++ b/pandas/tests/indexing/test_multiindex.py
@@ -697,6 +697,17 @@ def test_multiindex_slice_first_level(self):
                                 index=range(30, 71))
         tm.assert_frame_equal(result, expected)
 
+    def test_multiindex_symmetric_difference(self):
+        # GH 13490
+        idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
+                                      names=['a', 'b'])
+        result = idx ^ idx
+        assert result.names == idx.names
+
+        idx2 = idx.copy().rename(['A', 'B'])
+        result = idx ^ idx2
+        assert result.names == [None, None]
+
 
 class TestMultiIndexSlicers(object):
 

From 79cc4a978ade10e04f1d10dce850852e56876223 Mon Sep 17 00:00:00 2001
From: JosephWagner <JosephWagner@users.noreply.github.com>
Date: Wed, 31 May 2017 04:57:57 -0700
Subject: [PATCH 13/55] BUG: select_as_multiple doesn't respect start/stop
 kwargs GH16209 (#16317)

---
 doc/source/whatsnew/v0.20.2.txt  |  1 +
 pandas/io/pytables.py            |  7 ++++---
 pandas/tests/io/test_pytables.py | 15 +++++++++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 38cf683208b3d..676da5c370041 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -71,6 +71,7 @@ I/O
 - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
 - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
 
+- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 625b407dd43be..2940d1f958776 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -806,8 +806,8 @@ def func(_start, _stop, _where):
 
             # retrieve the objs, _where is always passed as a set of
             # coordinates here
-            objs = [t.read(where=_where, columns=columns, **kwargs)
-                    for t in tbls]
+            objs = [t.read(where=_where, columns=columns, start=_start,
+                           stop=_stop, **kwargs) for t in tbls]
 
             # concat and return
             return concat(objs, axis=axis,
@@ -1432,7 +1432,8 @@ def get_result(self, coordinates=False):
 
         # if specified read via coordinates (necessary for multiple selections
         if coordinates:
-            where = self.s.read_coordinates(where=self.where)
+            where = self.s.read_coordinates(where=self.where, start=self.start,
+                                            stop=self.stop)
         else:
             where = self.where
 
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 06a4a67964b96..e68de93c3e8ce 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4221,6 +4221,21 @@ def test_start_stop_table(self):
             expected = df.loc[30:40, ['A']]
             tm.assert_frame_equal(result, expected)
 
+    def test_start_stop_multiple(self):
+
+        # GH 16209
+        with ensure_clean_store(self.path) as store:
+
+            df = DataFrame({"foo": [1, 2], "bar": [1, 2]})
+
+            store.append_to_multiple({'selector': ['foo'], 'data': None}, df,
+                                     selector='selector')
+            result = store.select_as_multiple(['selector', 'data'],
+                                              selector='selector', start=0,
+                                              stop=1)
+            expected = df.loc[[0], ['foo', 'bar']]
+            tm.assert_frame_equal(result, expected)
+
     def test_start_stop_fixed(self):
 
         with ensure_clean_store(self.path) as store:

From 0db4de5033548f615505b9a9426d518c8776848e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 31 May 2017 18:44:40 -0400
Subject: [PATCH 14/55] BUG: Bug in .resample() and .groupby() when aggregating
 on integers (#16549)

closes #16361
---
 doc/source/whatsnew/v0.20.2.txt |  1 +
 pandas/core/groupby.py          | 10 ++++++----
 pandas/tests/test_resample.py   | 22 ++++++++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 676da5c370041..9f88d629880ed 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -88,6 +88,7 @@ Groupby/Resample/Rolling
 
 - Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`)
 - Bug in ``rolling.cov()`` with offset window (:issue:`16058`)
+- Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`)
 
 
 Sparse
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 91b55c414b507..286677d613484 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -3337,13 +3337,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True):
                 obj = self.obj[data.items[locs]]
                 s = groupby(obj, self.grouper)
                 result = s.aggregate(lambda x: alt(x, axis=self.axis))
-                result = result._data.blocks[0]
+                newb = result._data.blocks[0]
 
-            # see if we can cast the block back to the original dtype
-            result = block._try_coerce_and_cast_result(result)
+            finally:
+
+                # see if we can cast the block back to the original dtype
+                result = block._try_coerce_and_cast_result(result)
+                newb = block.make_block(result)
 
             new_items.append(locs)
-            newb = block.make_block_same_class(result)
             new_blocks.append(newb)
 
         if len(new_blocks) == 0:
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index 170cab4947a5a..959e3d2f459ce 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -1672,6 +1672,28 @@ def test_resample_dtype_preservation(self):
         result = df.groupby('group').resample('1D').ffill()
         assert result.val.dtype == np.int32
 
+    def test_resample_dtype_coerceion(self):
+
+        pytest.importorskip('scipy')
+
+        # GH 16361
+        df = {"a": [1, 3, 1, 4]}
+        df = pd.DataFrame(
+            df, index=pd.date_range("2017-01-01", "2017-01-04"))
+
+        expected = (df.astype("float64")
+                    .resample("H")
+                    .mean()
+                    ["a"]
+                    .interpolate("cubic")
+                    )
+
+        result = df.resample("H")["a"].mean().interpolate("cubic")
+        tm.assert_series_equal(result, expected)
+
+        result = df.resample("H").mean()["a"].interpolate("cubic")
+        tm.assert_series_equal(result, expected)
+
     def test_weekly_resample_buglet(self):
         # #1327
         rng = date_range('1/1/2000', freq='B', periods=20)

From c193235a10ff399411027cd8496df5c4457b2604 Mon Sep 17 00:00:00 2001
From: Matti Picus <matti.picus@gmail.com>
Date: Thu, 1 Jun 2017 02:04:55 +0300
Subject: [PATCH 15/55] COMPAT: cython str-to-int can raise a ValueError on
 non-CPython (#16563)

---
 pandas/_libs/index.pyx      | 4 ++--
 pandas/core/frame.py        | 2 +-
 pandas/core/indexes/base.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 21680fb0b3921..5e92c506b5d0c 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -152,7 +152,7 @@ cdef class IndexEngine:
 
         try:
             return self.mapping.get_item(val)
-        except TypeError:
+        except (TypeError, ValueError):
             raise KeyError(val)
 
     cdef inline _get_loc_duplicates(self, object val):
@@ -470,7 +470,7 @@ cdef class DatetimeEngine(Int64Engine):
         try:
             val = _to_i8(val)
             return self.mapping.get_item(val)
-        except TypeError:
+        except (TypeError, ValueError):
             self._date_check_type(val)
             raise KeyError(val)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 907959c42323e..25c3c3fe4e48e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1918,7 +1918,7 @@ def get_value(self, index, col, takeable=False):
 
         try:
             return engine.get_value(series._values, index)
-        except TypeError:
+        except (TypeError, ValueError):
 
             # we cannot handle direct indexing
             # use positional
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2af4f112ca941..e1e08e008f782 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1590,7 +1590,7 @@ def __contains__(self, key):
         hash(key)
         try:
             return key in self._engine
-        except TypeError:
+        except (TypeError, ValueError):
             return False
 
     _index_shared_docs['contains'] = """
@@ -1610,7 +1610,7 @@ def contains(self, key):
         hash(key)
         try:
             return key in self._engine
-        except TypeError:
+        except (TypeError, ValueError):
             return False
 
     def __hash__(self):

From b9febe04b2ca3c44bdf8ce74c1cfb74d6b50152e Mon Sep 17 00:00:00 2001
From: Giulio Pepe <pepicello@users.noreply.github.com>
Date: Thu, 1 Jun 2017 00:10:24 +0100
Subject: [PATCH 16/55] CLN: raise correct error for Panel sort_values (#16532)

---
 pandas/core/generic.py       | 9 +++++++--
 pandas/tests/test_panel.py   | 5 +++++
 pandas/tests/test_panel4d.py | 5 +++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e541f1532d0a0..98999ec267c82 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2366,9 +2366,14 @@ def add_suffix(self, suffix):
         1   A    1    1
         """
 
-    def sort_values(self, by, axis=0, ascending=True, inplace=False,
+    def sort_values(self, by=None, axis=0, ascending=True, inplace=False,
                     kind='quicksort', na_position='last'):
-        raise AbstractMethodError(self)
+        """
+        NOT IMPLEMENTED: do not call this method, as sorting values is not
+        supported for Panel objects and will raise an error.
+        """
+        raise NotImplementedError("sort_values has not been implemented "
+                                  "on Panel or Panel4D objects.")
 
     _shared_docs['sort_index'] = """
         Sort object by labels (along an axis)
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 3243b69a25acd..e19e42e062932 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -2429,6 +2429,11 @@ def test_all_any_unhandled(self):
         pytest.raises(NotImplementedError, self.panel.all, bool_only=True)
         pytest.raises(NotImplementedError, self.panel.any, bool_only=True)
 
+    # GH issue 15960
+    def test_sort_values(self):
+        pytest.raises(NotImplementedError, self.panel.sort_values)
+        pytest.raises(NotImplementedError, self.panel.sort_values, 'ItemA')
+
 
 class TestLongPanel(object):
     """
diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py
index 96f02d63712fc..e1995316e7b7c 100644
--- a/pandas/tests/test_panel4d.py
+++ b/pandas/tests/test_panel4d.py
@@ -939,3 +939,8 @@ def test_rename(self):
 
     def test_get_attr(self):
         tm.assert_panel_equal(self.panel4d['l1'], self.panel4d.l1)
+
+    # GH issue 15960
+    def test_sort_values(self):
+        pytest.raises(NotImplementedError, self.panel4d.sort_values)
+        pytest.raises(NotImplementedError, self.panel4d.sort_values, 'ItemA')

From 98ed54d0cc7a12653a9c07f00a222e8f7e6f3e62 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 1 Jun 2017 05:30:57 -0500
Subject: [PATCH 17/55] BUG: Fixed pd.unique on array of tuples (#16543)

---
 doc/source/whatsnew/v0.20.2.txt |  3 +--
 pandas/core/algorithms.py       |  7 ++++++-
 pandas/tests/test_algos.py      | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 9f88d629880ed..31df5899f0fc3 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -44,8 +44,7 @@ Bug Fixes
 - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
 - Passing an invalid engine to :func:`read_csv` now raises an informative
   ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`)
-
-
+- Bug in :func:`unique` on an array of tuples (:issue:`16519`)
 
 
 - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 77d79c9585e57..d74c5e66ea1a9 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -163,7 +163,7 @@ def _ensure_arraylike(values):
                                ABCIndexClass, ABCSeries)):
         inferred = lib.infer_dtype(values)
         if inferred in ['mixed', 'string', 'unicode']:
-            values = np.asarray(values, dtype=object)
+            values = lib.list_to_object_array(values)
         else:
             values = np.asarray(values)
     return values
@@ -328,6 +328,11 @@ def unique(values):
     [b, a, c]
     Categories (3, object): [a < b < c]
 
+    An array of tuples
+
+    >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')])
+    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
+
     See Also
     --------
     pandas.Index.unique
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 351e646cbb0b2..063dcea5c76d6 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -929,6 +929,22 @@ def test_unique_index(self):
             tm.assert_numpy_array_equal(case.duplicated(),
                                         np.array([False, False, False]))
 
+    @pytest.mark.parametrize('arr, unique', [
+        ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)],
+         [(0, 0), (0, 1), (1, 0), (1, 1)]),
+        ([('b', 'c'), ('a', 'b'), ('a', 'b'), ('b', 'c')],
+         [('b', 'c'), ('a', 'b')]),
+        ([('a', 1), ('b', 2), ('a', 3), ('a', 1)],
+         [('a', 1), ('b', 2), ('a', 3)]),
+    ])
+    def test_unique_tuples(self, arr, unique):
+        # https://github.com/pandas-dev/pandas/issues/16519
+        expected = np.empty(len(unique), dtype=object)
+        expected[:] = unique
+
+        result = pd.unique(arr)
+        tm.assert_numpy_array_equal(result, expected)
+
 
 class GroupVarTestMixin(object):
 

From 6d761b4ed5fdbc5171fec5eb81d7b29b2bc658dc Mon Sep 17 00:00:00 2001
From: Patrick O'Melveny <pvomelveny@gmail.com>
Date: Thu, 1 Jun 2017 03:35:18 -0700
Subject: [PATCH 18/55] BUG: Allow non-callable attributes in aggregate
 function. Fixes GH16405 (#16458)

---
 doc/source/whatsnew/v0.20.2.txt   |  1 +
 pandas/core/base.py               | 12 +++++++--
 pandas/tests/frame/test_apply.py  | 45 +++++++++++++++++++++++++++++++
 pandas/tests/series/test_apply.py | 16 +++++++++++
 4 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 31df5899f0fc3..c8b6dfa134120 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -102,6 +102,7 @@ Reshaping
 - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`)
 - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`)
 - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`)
+- Bug in ``DataFrame.agg()`` and ``Series.agg()`` with aggregating on non-callable attributes (:issue:`16405`)
 
 
 Numeric
diff --git a/pandas/core/base.py b/pandas/core/base.py
index a3ef24c80f883..97c4c8626dcbb 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -378,7 +378,7 @@ def aggregate(self, func, *args, **kwargs):
     def _try_aggregate_string_function(self, arg, *args, **kwargs):
         """
         if arg is a string, then try to operate on it:
-        - try to find a function on ourselves
+        - try to find a function (or attribute) on ourselves
         - try to find a numpy function
         - raise
 
@@ -387,7 +387,15 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs):
 
         f = getattr(self, arg, None)
         if f is not None:
-            return f(*args, **kwargs)
+            if callable(f):
+                return f(*args, **kwargs)
+
+            # people may try to aggregate on a non-callable attribute
+            # but don't let them think they can pass args to it
+            assert len(args) == 0
+            assert len([kwarg for kwarg in kwargs
+                        if kwarg not in ['axis', '_level']]) == 0
+            return f
 
         f = getattr(np, arg, None)
         if f is not None:
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
index aa7c7a7120c1b..a6f39cabb60ed 100644
--- a/pandas/tests/frame/test_apply.py
+++ b/pandas/tests/frame/test_apply.py
@@ -635,3 +635,48 @@ def test_nuiscance_columns(self):
         expected = DataFrame([[6, 6., 'foobarbaz']],
                              index=['sum'], columns=['A', 'B', 'C'])
         assert_frame_equal(result, expected)
+
+    def test_non_callable_aggregates(self):
+
+        # GH 16405
+        # 'size' is a property of frame/series
+        # validate that this is working
+        df = DataFrame({'A': [None, 2, 3],
+                        'B': [1.0, np.nan, 3.0],
+                        'C': ['foo', None, 'bar']})
+
+        # Function aggregate
+        result = df.agg({'A': 'count'})
+        expected = pd.Series({'A': 2})
+
+        assert_series_equal(result, expected)
+
+        # Non-function aggregate
+        result = df.agg({'A': 'size'})
+        expected = pd.Series({'A': 3})
+
+        assert_series_equal(result, expected)
+
+        # Mix function and non-function aggs
+        result1 = df.agg(['count', 'size'])
+        result2 = df.agg({'A': ['count', 'size'],
+                          'B': ['count', 'size'],
+                          'C': ['count', 'size']})
+        expected = pd.DataFrame({'A': {'count': 2, 'size': 3},
+                                 'B': {'count': 2, 'size': 3},
+                                 'C': {'count': 2, 'size': 3}})
+
+        assert_frame_equal(result1, result2, check_like=True)
+        assert_frame_equal(result2, expected, check_like=True)
+
+        # Just functional string arg is same as calling df.arg()
+        result = df.agg('count')
+        expected = df.count()
+
+        assert_series_equal(result, expected)
+
+        # Just a string attribute arg same as calling df.arg
+        result = df.agg('size')
+        expected = df.size
+
+        assert result == expected
diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py
index c273d3161fff5..2c5f0d7772cc2 100644
--- a/pandas/tests/series/test_apply.py
+++ b/pandas/tests/series/test_apply.py
@@ -306,6 +306,22 @@ def test_reduce(self):
                           name=self.series.name)
         assert_series_equal(result, expected)
 
+    def test_non_callable_aggregates(self):
+        # test agg using non-callable series attributes
+        s = Series([1, 2, None])
+
+        # Calling agg w/ just a string arg same as calling s.arg
+        result = s.agg('size')
+        expected = s.size
+        assert result == expected
+
+        # test when mixed w/ callable reducers
+        result = s.agg(['size', 'count', 'mean'])
+        expected = Series(OrderedDict({'size': 3.0,
+                                       'count': 2.0,
+                                       'mean': 1.5}))
+        assert_series_equal(result[expected.index], expected)
+
 
 class TestSeriesMap(TestData):
 

From ed542ee91547eea11e6c8fd1c511fa4b67088543 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 1 Jun 2017 05:37:00 -0500
Subject: [PATCH 19/55] Strictly monotonic (#16555)

---
 doc/source/api.rst                            |  2 +
 doc/source/whatsnew/v0.20.2.txt               |  3 +-
 pandas/core/indexes/base.py                   | 50 +++++++++++++++++++
 pandas/core/indexes/datetimes.py              |  2 +-
 .../tests/indexes/datetimes/test_datetime.py  |  7 +++
 pandas/tests/indexes/test_base.py             |  6 ++-
 pandas/tests/indexes/test_multi.py            | 26 ++++++++++
 pandas/tests/indexes/test_numeric.py          | 22 +++++++-
 pandas/tests/indexes/test_range.py            | 10 ++++
 9 files changed, 124 insertions(+), 4 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 350abb00f0849..cdb6e36870f24 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1287,6 +1287,8 @@ Attributes
    Index.is_monotonic
    Index.is_monotonic_increasing
    Index.is_monotonic_decreasing
+   Index.is_strictly_monotonic_increasing
+   Index.is_strictly_monotonic_decreasing
    Index.is_unique
    Index.has_duplicates
    Index.dtype
diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index c8b6dfa134120..e3328e2d01dc7 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -21,6 +21,7 @@ Enhancements
 
 - Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
 - ``Series`` provides a ``to_latex`` method (:issue:`16180`)
+- Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`)
 
 .. _whatsnew_0202.performance:
 
@@ -61,7 +62,7 @@ Indexing
 ^^^^^^^^
 
 - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)
-
+- Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e1e08e008f782..e8c2043138edb 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1191,6 +1191,15 @@ def is_monotonic_increasing(self):
         """
         return if the index is monotonic increasing (only equal or
         increasing) values.
+
+        Examples
+        --------
+        >>> Index([1, 2, 3]).is_monotonic_increasing
+        True
+        >>> Index([1, 2, 2]).is_monotonic_increasing
+        True
+        >>> Index([1, 3, 2]).is_monotonic_increasing
+        False
         """
         return self._engine.is_monotonic_increasing
 
@@ -1199,9 +1208,50 @@ def is_monotonic_decreasing(self):
         """
         return if the index is monotonic decreasing (only equal or
         decreasing) values.
+
+        Examples
+        --------
+        >>> Index([3, 2, 1]).is_monotonic_decreasing
+        True
+        >>> Index([3, 2, 2]).is_monotonic_decreasing
+        True
+        >>> Index([3, 1, 2]).is_monotonic_decreasing
+        False
         """
         return self._engine.is_monotonic_decreasing
 
+    @property
+    def is_strictly_monotonic_increasing(self):
+        """return if the index is strictly monotonic increasing
+        (only increasing) values
+
+        Examples
+        --------
+        >>> Index([1, 2, 3]).is_strictly_monotonic_increasing
+        True
+        >>> Index([1, 2, 2]).is_strictly_monotonic_increasing
+        False
+        >>> Index([1, 3, 2]).is_strictly_monotonic_increasing
+        False
+        """
+        return self.is_unique and self.is_monotonic_increasing
+
+    @property
+    def is_strictly_monotonic_decreasing(self):
+        """return if the index is strictly monotonic decreasing
+        (only decreasing) values
+
+        Examples
+        --------
+        >>> Index([3, 2, 1]).is_strictly_monotonic_decreasing
+        True
+        >>> Index([3, 2, 2]).is_strictly_monotonic_decreasing
+        False
+        >>> Index([3, 1, 2]).is_strictly_monotonic_decreasing
+        False
+        """
+        return self.is_unique and self.is_monotonic_decreasing
+
     def is_lexsorted_for_tuple(self, tup):
         return True
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index ec678b1577d81..60560374cd420 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1472,7 +1472,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
             # the bounds need swapped if index is reverse sorted and has a
             # length > 1 (is_monotonic_decreasing gives True for empty
             # and length 1 index)
-            if self.is_monotonic_decreasing and len(self) > 1:
+            if self.is_strictly_monotonic_decreasing and len(self) > 1:
                 return upper if side == 'left' else lower
             return lower if side == 'left' else upper
         else:
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index 6cba7e17abf8e..f99dcee9e5c8a 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -771,3 +771,10 @@ def test_slice_bounds_empty(self):
         left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc')
         exp = Timestamp('2015-01-02 00:00:00')
         assert left == exp
+
+    def test_slice_duplicate_monotonic(self):
+        # https://github.com/pandas-dev/pandas/issues/16515
+        idx = pd.DatetimeIndex(['2017', '2017'])
+        result = idx._maybe_cast_slice_bound('2017-01-01', 'left', 'loc')
+        expected = Timestamp('2017-01-01')
+        assert result == expected
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 02561cba784b8..a6933316e4291 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1328,8 +1328,10 @@ def test_tuple_union_bug(self):
 
     def test_is_monotonic_incomparable(self):
         index = Index([5, datetime.now(), 7])
-        assert not index.is_monotonic
+        assert not index.is_monotonic_increasing
         assert not index.is_monotonic_decreasing
+        assert not index.is_strictly_monotonic_increasing
+        assert not index.is_strictly_monotonic_decreasing
 
     def test_get_set_value(self):
         values = np.random.randn(100)
@@ -2028,6 +2030,8 @@ def test_is_monotonic_na(self):
         for index in examples:
             assert not index.is_monotonic_increasing
             assert not index.is_monotonic_decreasing
+            assert not index.is_strictly_monotonic_increasing
+            assert not index.is_strictly_monotonic_decreasing
 
     def test_repr_summary(self):
         with cf.option_context('display.max_seq_items', 10):
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 1fe4d85815c4b..388a49d25cb82 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2373,22 +2373,30 @@ def test_is_monotonic(self):
         i = MultiIndex.from_product([np.arange(10),
                                      np.arange(10)], names=['one', 'two'])
         assert i.is_monotonic
+        assert i.is_strictly_monotonic_increasing
         assert Index(i.values).is_monotonic
+        assert i.is_strictly_monotonic_increasing
 
         i = MultiIndex.from_product([np.arange(10, 0, -1),
                                      np.arange(10)], names=['one', 'two'])
         assert not i.is_monotonic
+        assert not i.is_strictly_monotonic_increasing
         assert not Index(i.values).is_monotonic
+        assert not Index(i.values).is_strictly_monotonic_increasing
 
         i = MultiIndex.from_product([np.arange(10),
                                      np.arange(10, 0, -1)],
                                     names=['one', 'two'])
         assert not i.is_monotonic
+        assert not i.is_strictly_monotonic_increasing
         assert not Index(i.values).is_monotonic
+        assert not Index(i.values).is_strictly_monotonic_increasing
 
         i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
         assert not i.is_monotonic
+        assert not i.is_strictly_monotonic_increasing
         assert not Index(i.values).is_monotonic
+        assert not Index(i.values).is_strictly_monotonic_increasing
 
         # string ordering
         i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
@@ -2398,6 +2406,8 @@ def test_is_monotonic(self):
                        names=['first', 'second'])
         assert not i.is_monotonic
         assert not Index(i.values).is_monotonic
+        assert not i.is_strictly_monotonic_increasing
+        assert not Index(i.values).is_strictly_monotonic_increasing
 
         i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
                                ['mom', 'next', 'zenith']],
@@ -2406,6 +2416,8 @@ def test_is_monotonic(self):
                        names=['first', 'second'])
         assert i.is_monotonic
         assert Index(i.values).is_monotonic
+        assert i.is_strictly_monotonic_increasing
+        assert Index(i.values).is_strictly_monotonic_increasing
 
         # mixed levels, hits the TypeError
         i = MultiIndex(
@@ -2416,6 +2428,20 @@ def test_is_monotonic(self):
             names=['household_id', 'asset_id'])
 
         assert not i.is_monotonic
+        assert not i.is_strictly_monotonic_increasing
+
+    def test_is_strictly_monotonic(self):
+        idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
+                            labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
+        assert idx.is_monotonic_increasing
+        assert not idx.is_strictly_monotonic_increasing
+
+    @pytest.mark.xfail(reason="buggy MultiIndex.is_monotonic_decresaing.")
+    def test_is_strictly_monotonic_decreasing(self):
+        idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
+                            labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
+        assert idx.is_monotonic_decreasing
+        assert not idx.is_strictly_monotonic_decreasing
 
     def test_reconstruct_sort(self):
 
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 3d06f1672ae32..77f34dbf210e0 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -465,16 +465,36 @@ def test_view(self):
     def test_is_monotonic(self):
         assert self.index.is_monotonic
         assert self.index.is_monotonic_increasing
+        assert self.index.is_strictly_monotonic_increasing
         assert not self.index.is_monotonic_decreasing
+        assert not self.index.is_strictly_monotonic_decreasing
 
         index = self._holder([4, 3, 2, 1])
         assert not index.is_monotonic
-        assert index.is_monotonic_decreasing
+        assert not index.is_strictly_monotonic_increasing
+        assert index.is_strictly_monotonic_decreasing
 
         index = self._holder([1])
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
+        assert index.is_strictly_monotonic_increasing
+        assert index.is_strictly_monotonic_decreasing
+
+    def test_is_strictly_monotonic(self):
+        index = self._holder([1, 1, 2, 3])
+        assert index.is_monotonic_increasing
+        assert not index.is_strictly_monotonic_increasing
+
+        index = self._holder([3, 2, 1, 1])
+        assert index.is_monotonic_decreasing
+        assert not index.is_strictly_monotonic_decreasing
+
+        index = self._holder([1, 1])
+        assert index.is_monotonic_increasing
+        assert index.is_monotonic_decreasing
+        assert not index.is_strictly_monotonic_increasing
+        assert not index.is_strictly_monotonic_decreasing
 
     def test_logical_compat(self):
         idx = self.create_index()
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index c7af0954cf483..db8180cb736c4 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -331,25 +331,35 @@ def test_is_monotonic(self):
         assert self.index.is_monotonic
         assert self.index.is_monotonic_increasing
         assert not self.index.is_monotonic_decreasing
+        assert self.index.is_strictly_monotonic_increasing
+        assert not self.index.is_strictly_monotonic_decreasing
 
         index = RangeIndex(4, 0, -1)
         assert not index.is_monotonic
+        assert not index.is_strictly_monotonic_increasing
         assert index.is_monotonic_decreasing
+        assert index.is_strictly_monotonic_decreasing
 
         index = RangeIndex(1, 2)
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
+        assert index.is_strictly_monotonic_increasing
+        assert index.is_strictly_monotonic_decreasing
 
         index = RangeIndex(2, 1)
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
+        assert index.is_strictly_monotonic_increasing
+        assert index.is_strictly_monotonic_decreasing
 
         index = RangeIndex(1, 1)
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
+        assert index.is_strictly_monotonic_increasing
+        assert index.is_strictly_monotonic_decreasing
 
     def test_equals_range(self):
         equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)),

From f92ec386ea9784177598c937da47bee4cf4c5204 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Thu, 1 Jun 2017 06:38:50 -0400
Subject: [PATCH 20/55] COMPAT: Consider Python 2.x tarfiles file-like (#16533)

---
 doc/source/whatsnew/v0.20.2.txt            |   1 +
 pandas/core/dtypes/inference.py            |   2 +-
 pandas/io/parsers.py                       |  29 ++++++++++++---
 pandas/tests/dtypes/test_inference.py      |   4 +-
 pandas/tests/io/parser/c_parser_only.py    |  36 ++++++++++++++++++
 pandas/tests/io/parser/data/tar_csv.tar    | Bin 0 -> 10240 bytes
 pandas/tests/io/parser/data/tar_csv.tar.gz | Bin 0 -> 10240 bytes
 pandas/tests/io/parser/test_unsupported.py |  41 ++++++++++++++++-----
 setup.py                                   |   2 +
 9 files changed, 98 insertions(+), 17 deletions(-)
 create mode 100644 pandas/tests/io/parser/data/tar_csv.tar
 create mode 100644 pandas/tests/io/parser/data/tar_csv.tar.gz

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index e3328e2d01dc7..e309ac0a79e4b 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -70,6 +70,7 @@ I/O
 - Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`)
 - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
 - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
+- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
 
 - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
 
diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
index a5316a83612cb..ff7e215951a1f 100644
--- a/pandas/core/dtypes/inference.py
+++ b/pandas/core/dtypes/inference.py
@@ -171,7 +171,7 @@ def is_file_like(obj):
     if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
         return False
 
-    if not is_iterator(obj):
+    if not hasattr(obj, "__iter__"):
         return False
 
     return True
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 12b606d969c7d..aab70c8ce2cd4 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -13,7 +13,7 @@
 import numpy as np
 
 from pandas import compat
-from pandas.compat import (range, lrange, StringIO, lzip,
+from pandas.compat import (range, lrange, PY3, StringIO, lzip,
                            zip, string_types, map, u)
 from pandas.core.dtypes.common import (
     is_integer, _ensure_object,
@@ -31,10 +31,10 @@
 from pandas.core.common import AbstractMethodError
 from pandas.io.date_converters import generic_parser
 from pandas.errors import ParserWarning, ParserError, EmptyDataError
-from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg,
-                              _get_handle, UnicodeReader, UTF8Recoder,
-                              BaseIterator,
-                              _NA_VALUES, _infer_compression)
+from pandas.io.common import (get_filepath_or_buffer, is_file_like,
+                              _validate_header_arg, _get_handle,
+                              UnicodeReader, UTF8Recoder, _NA_VALUES,
+                              BaseIterator, _infer_compression)
 from pandas.core.tools import datetimes as tools
 
 from pandas.util._decorators import Appender
@@ -755,7 +755,9 @@ def __init__(self, f, engine=None, **kwds):
         self.squeeze = options.pop('squeeze', False)
 
         # might mutate self.engine
+        self.engine = self._check_file_or_buffer(f, engine)
         self.options, self.engine = self._clean_options(options, engine)
+
         if 'has_index_names' in kwds:
             self.options['has_index_names'] = kwds['has_index_names']
 
@@ -801,6 +803,23 @@ def _get_options_with_defaults(self, engine):
 
         return options
 
+    def _check_file_or_buffer(self, f, engine):
+        # see gh-16530
+        if is_file_like(f):
+            next_attr = "__next__" if PY3 else "next"
+
+            # The C engine doesn't need the file-like to have the "next" or
+            # "__next__" attribute. However, the Python engine explicitly calls
+            # "next(...)" when iterating through such an object, meaning it
+            # needs to have that attribute ("next" for Python 2.x, "__next__"
+            # for Python 3.x)
+            if engine != "c" and not hasattr(f, next_attr):
+                msg = ("The 'python' engine cannot iterate "
+                       "through this file buffer.")
+                raise ValueError(msg)
+
+        return engine
+
     def _clean_options(self, options, engine):
         result = options.copy()
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index b88481abcb2ec..ec5fe45d7f610 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -120,9 +120,9 @@ class MockFile(object):
     m = MockFile()
     assert not is_file(m)
 
+    # gh-16530: Valid iterator just means we have the
+    # __iter__ attribute for our purposes.
     MockFile.__iter__ = lambda self: self
-    MockFile.__next__ = lambda self: 0
-    MockFile.next = MockFile.__next__
 
     # Valid write-only file
     m = MockFile()
diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py
index 56ac10404b7b2..48812c04e3b55 100644
--- a/pandas/tests/io/parser/c_parser_only.py
+++ b/pandas/tests/io/parser/c_parser_only.py
@@ -7,7 +7,9 @@
 further arguments when parsing.
 """
 
+import os
 import sys
+import tarfile
 
 import pytest
 import numpy as np
@@ -446,3 +448,37 @@ def test_comment_whitespace_delimited(self):
                               [7, np.nan],
                               [8, np.nan]])
         tm.assert_frame_equal(df, expected)
+
+    def test_file_like_no_next(self):
+        # gh-16530: the file-like need not have a "next" or "__next__"
+        # attribute despite having an "__iter__" attribute.
+        #
+        # NOTE: This is only true for the C engine, not Python engine.
+        class NoNextBuffer(StringIO):
+            def __next__(self):
+                raise AttributeError("No next method")
+
+            next = __next__
+
+        data = "a\n1"
+
+        expected = pd.DataFrame({"a": [1]})
+        result = self.read_csv(NoNextBuffer(data))
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"])
+    def test_read_tarfile(self, tar_suffix):
+        # see gh-16530
+        #
+        # Unfortunately, Python's CSV library can't handle
+        # tarfile objects (expects string, not bytes when
+        # iterating through a file-like).
+        tar_path = os.path.join(self.dirpath, "tar_csv" + tar_suffix)
+
+        tar = tarfile.open(tar_path, "r")
+        data_file = tar.extractfile("tar_data.csv")
+
+        out = self.read_csv(data_file)
+        expected = pd.DataFrame({"a": [1]})
+        tm.assert_frame_equal(out, expected)
diff --git a/pandas/tests/io/parser/data/tar_csv.tar b/pandas/tests/io/parser/data/tar_csv.tar
new file mode 100644
index 0000000000000000000000000000000000000000..d1819550e0a0064b4d9ad829f120e49760c3ffe2
GIT binary patch
literal 10240
zcmeIuK?;O03_#JW1@F)k3{BNsM}nR}J9B<JuGFO>-TY7p4K!(9_GO$s`)68z@>0YS
zW+wk!;+jjzL^~}framQ!s=W;o;!FQIwf(Nymk>_1JC}X6!*X|eRCwcUqis`RFe4E_
x009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009IZ32f(E6h{C6

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/parser/data/tar_csv.tar.gz b/pandas/tests/io/parser/data/tar_csv.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..b5a0f3e1b580535a3fbdc2ff943b79d8c585df9f
GIT binary patch
literal 10240
zcmeIu%?W@o41m#`1$XEK(ok*3k)RW3b$+WS^{9xKFPG3j^YgMz{b<>mVP55<@Fil5
zvgZ=_TuM<isD_YI=vwiroloNYNBdmbE_1fYxl8TI<2dbf+>;(Z{IR;yy82--BN0FV
w0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R&zNY&Ss^RsaA1

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
index 3f62ff44531fb..5d248f2fef59c 100644
--- a/pandas/tests/io/parser/test_unsupported.py
+++ b/pandas/tests/io/parser/test_unsupported.py
@@ -16,6 +16,13 @@
 from pandas.errors import ParserError
 from pandas.io.parsers import read_csv, read_table
 
+import pytest
+
+
+@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
+def python_engine(request):
+    return request.param
+
 
 class TestUnsupportedFeatures(object):
 
@@ -82,7 +89,7 @@ def test_c_engine(self):
         with tm.assert_raises_regex(ValueError, msg):
             read_csv(StringIO(data), lineterminator='~~')
 
-    def test_python_engine(self):
+    def test_python_engine(self, python_engine):
         from pandas.io.parsers import _python_unsupported as py_unsupported
 
         data = """1,2,3,,
@@ -90,16 +97,32 @@ def test_python_engine(self):
 1,2,3,4,5
 1,2,,,
 1,2,3,4,"""
-        engines = 'python', 'python-fwf'
 
-        for engine in engines:
-            for default in py_unsupported:
-                msg = ('The %r option is not supported '
-                       'with the %r engine' % (default, engine))
+        for default in py_unsupported:
+            msg = ('The %r option is not supported '
+                   'with the %r engine' % (default, python_engine))
+
+            kwargs = {default: object()}
+            with tm.assert_raises_regex(ValueError, msg):
+                read_csv(StringIO(data), engine=python_engine, **kwargs)
 
-                kwargs = {default: object()}
-                with tm.assert_raises_regex(ValueError, msg):
-                    read_csv(StringIO(data), engine=engine, **kwargs)
+    def test_python_engine_file_no_next(self, python_engine):
+        # see gh-16530
+        class NoNextBuffer(object):
+            def __init__(self, csv_data):
+                self.data = csv_data
+
+            def __iter__(self):
+                return self
+
+            def read(self):
+                return self.data
+
+        data = "a\n1"
+        msg = "The 'python' engine cannot iterate"
+
+        with tm.assert_raises_regex(ValueError, msg):
+            read_csv(NoNextBuffer(data), engine=python_engine)
 
 
 class TestDeprecatedFeatures(object):
diff --git a/setup.py b/setup.py
index 82d5f407228a9..31a3cddc3f9fd 100755
--- a/setup.py
+++ b/setup.py
@@ -702,6 +702,8 @@ def pxd(name):
                                         'parser/data/*.gz',
                                         'parser/data/*.bz2',
                                         'parser/data/*.txt',
+                                        'parser/data/*.tar',
+                                        'parser/data/*.tar.gz',
                                         'sas/data/*.csv',
                                         'sas/data/*.xpt',
                                         'sas/data/*.sas7bdat',

From 3f70fda0183f860955d098dd637fbc9125dc92ec Mon Sep 17 00:00:00 2001
From: Christian Prinoth <christian@prinoth.name>
Date: Thu, 1 Jun 2017 06:50:27 -0400
Subject: [PATCH 21/55] BUG: Fixed to_html ignoring index_names parameter

closes #16493

Author: Christian Prinoth <christian@prinoth.name>
Author: Tom Augspurger <tom.w.augspurger@gmail.com>
Author: Christian Prinoth <c.prinoth@CPmbpro.local>
Author: Jeff Reback <jeff.reback@twosigma.com>

This patch had conflicts when merged, resolved by
Committer: Jeff Reback <jeff.reback@twosigma.com>

Closes #16495 from CRP/bugfix_16493 and squashes the following commits:

567ae69 [Jeff Reback] doc corrections
8429f9a [Tom Augspurger] Fixed lint error
469a0e6 [Christian Prinoth] BUG: fix for bug 16493
20d512f [Christian Prinoth] BUG: fix for bug 16493
6bef829 [Christian Prinoth] BUG: fix for bug 16493
426565e [Christian Prinoth] BUG: fix for bug 16493
a40820d [Christian Prinoth] BUG: fix for bug 16493
---
 doc/source/whatsnew/v0.20.2.txt         | 1 +
 pandas/io/formats/format.py             | 4 +++-
 pandas/tests/io/formats/test_to_html.py | 7 +++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index e309ac0a79e4b..e918bc4fccfca 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -71,6 +71,7 @@ I/O
 - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
 - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
 - Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
+- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
 
 - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 054db769c56dd..3deaec2dfbbc5 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1292,7 +1292,9 @@ def _column_header():
             self.write_tr(col_row, indent, self.indent_delta, header=True,
                           align=align)
 
-        if self.fmt.has_index_names and self.fmt.index:
+        if all((self.fmt.has_index_names,
+                self.fmt.index,
+                self.fmt.show_index_names)):
             row = ([x if x is not None else ''
                     for x in self.frame.index.names] +
                    [''] * min(len(self.columns), self.max_cols))
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index cde920b1511d2..9f4e532ec2287 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -1869,3 +1869,10 @@ def test_to_html_notebook_has_no_style(self):
         df = pd.DataFrame({"A": [1, 2, 3]})
         result = df.to_html()
         assert "thead tr:only-child" not in result
+
+    def test_to_html_with_index_names_false(self):
+        # gh-16493
+        df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(['a', 'b'],
+                                                        name='myindexname'))
+        result = df.to_html(index_names=False)
+        assert 'myindexname' not in result

From 785887a2e27b92d48b5060828ed2f49b7992024d Mon Sep 17 00:00:00 2001
From: economy <the.economy@gmail.com>
Date: Thu, 1 Jun 2017 06:56:20 -0400
Subject: [PATCH 22/55] BUG: fixed wrong order of ordered labels in pd.cut()

closes #16459

Author: economy <the.economy@gmail.com>

This patch had conflicts when merged, resolved by
Committer: Jeff Reback <jeff.reback@twosigma.com>

Closes #16466 from economy/fix_cut and squashes the following commits:

29128b3 [economy] comments and whatsnew edits
3898b72 [economy] BUG: fixed wrong order of ordered labels in pd.cut()
---
 doc/source/whatsnew/v0.20.2.txt   | 4 +---
 pandas/core/reshape/tile.py       | 2 +-
 pandas/tests/reshape/test_tile.py | 8 ++++++++
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index e918bc4fccfca..379249b6e55d6 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -46,11 +46,9 @@ Bug Fixes
 - Passing an invalid engine to :func:`read_csv` now raises an informative
   ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`)
 - Bug in :func:`unique` on an array of tuples (:issue:`16519`)
-
-
+- Bug in :func:`cut`` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`)
 - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`)
 
-
 Conversion
 ^^^^^^^^^^
 
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 866f229bec418..d8398023a5083 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -254,7 +254,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
                 raise ValueError('Bin labels must be one fewer than '
                                  'the number of bin edges')
         if not is_categorical_dtype(labels):
-            labels = Categorical(labels, ordered=True)
+            labels = Categorical(labels, categories=labels, ordered=True)
 
         np.putmask(ids, na_mask, 0)
         result = algos.take_nd(labels, ids - 1)
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index 8602b33856fea..542af321632cf 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -211,6 +211,7 @@ def test_cut_pass_labels(self):
 
         result = cut(arr, bins, labels=labels)
         exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'],
+                          categories=labels,
                           ordered=True)
         tm.assert_categorical_equal(result, exp)
 
@@ -219,6 +220,13 @@ def test_cut_pass_labels(self):
         exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels)
         tm.assert_categorical_equal(result, exp)
 
+	# issue 16459
+        labels = ['Good', 'Medium', 'Bad']
+        result = cut(arr, 3, labels=labels)
+        exp = cut(arr, 3, labels=Categorical(labels, categories=labels,
+                                             ordered=True))
+        tm.assert_categorical_equal(result, exp)
+
     def test_qcut_include_lowest(self):
         values = np.arange(10)
 

From 746c3cbe34330667f3ca85320796a72161bcde37 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff.reback@twosigma.com>
Date: Thu, 1 Jun 2017 07:37:15 -0400
Subject: [PATCH 23/55] fix linting

---
 pandas/tests/reshape/test_tile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index 542af321632cf..2523f8ab9f776 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -220,7 +220,7 @@ def test_cut_pass_labels(self):
         exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels)
         tm.assert_categorical_equal(result, exp)
 
-	# issue 16459
+        # issue 16459
         labels = ['Good', 'Medium', 'Bad']
         result = cut(arr, 3, labels=labels)
         exp = cut(arr, 3, labels=Categorical(labels, categories=labels,

From 885522aea70ff73418dab592fbd00d3eaecf36ee Mon Sep 17 00:00:00 2001
From: Hugues Valois <huvalo@microsoft.com>
Date: Thu, 1 Jun 2017 12:31:52 -0700
Subject: [PATCH 24/55] TST: writing invalid table names to sqlite (#16464)

* Add test for bug #13206.

* Improve test by reading back the values from sql and comparing. Also fixes coding style violation.
---
 pandas/tests/io/test_sql.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 7b3717281bf89..a6ad44ba31422 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -816,6 +816,16 @@ def test_unicode_column_name(self):
         df = DataFrame([[1, 2], [3, 4]], columns=[u'\xe9', u'b'])
         df.to_sql('test_unicode', self.conn, index=False)
 
+    def test_escaped_table_name(self):
+        # GH 13206
+        df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]})
+        df.to_sql('d1187b08-4943-4c8d-a7f6', self.conn, index=False)
+
+        res = sql.read_sql_query('SELECT * FROM `d1187b08-4943-4c8d-a7f6`',
+                                 self.conn)
+
+        tm.assert_frame_equal(res, df)
+
 
 @pytest.mark.single
 class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi):

From 79beeb6bb8618602ce712f7a6994e283de0e0ade Mon Sep 17 00:00:00 2001
From: "John W. O'Brien" <neirbowj@users.noreply.github.com>
Date: Thu, 1 Jun 2017 15:50:37 -0400
Subject: [PATCH 25/55] TST: Skip test_database_uri_string if pg8000 importable
 (#16528)

---
 pandas/tests/io/test_sql.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index a6ad44ba31422..deeb8cba2b228 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -948,6 +948,13 @@ def test_database_uri_string(self):
 
         # using driver that will not be installed on Travis to trigger error
         # in sqlalchemy.create_engine -> test passing of this error to user
+        try:
+            # the rest of this test depends on pg8000's being absent
+            import pg8000  # noqa
+            pytest.skip("pg8000 is installed")
+        except ImportError:
+            pass
+
         db_uri = "postgresql+pg8000://user:pass@host/dbname"
         with tm.assert_raises_regex(ImportError, "pg8000"):
             sql.read_sql("select * from table", db_uri)

From a7c95f2bc8618c0a07d3f8d593e170017791b5de Mon Sep 17 00:00:00 2001
From: kiwirob <rsullivan.nz@gmail.com>
Date: Thu, 1 Jun 2017 20:59:44 +0100
Subject: [PATCH 26/55] DOC: Remove incorrect elements of PeriodIndex docstring
 (#16553)

* DOC: Remove incorrect elements of PeriodIndex docstring

See #9056.

* Removed trailing space
---
 pandas/core/indexes/period.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 15fd9b7dc2b6a..f8af6c8303d99 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -125,15 +125,7 @@ def _new_PeriodIndex(cls, **d):
 class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
     """
     Immutable ndarray holding ordinal values indicating regular periods in
-    time such as particular years, quarters, months, etc. A value of 1 is the
-    period containing the Gregorian proleptic datetime Jan 1, 0001 00:00:00.
-    This ordinal representation is from the scikits.timeseries project.
-
-    For instance,
-        # construct period for day 1/1/1 and get the first second
-        i = Period(year=1,month=1,day=1,freq='D').asfreq('S', 'S')
-        i.ordinal
-        ===> 1
+    time such as particular years, quarters, months, etc.
 
     Index keys are boxed to Period objects which carries the metadata (eg,
     frequency information).

From 50479ae90a92edf89da4589bd5402660dfc3a69c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 1 Jun 2017 17:09:52 -0500
Subject: [PATCH 27/55] TST: Make HDF5 fspath write test robust (#16575)

The test_write_fspath_all test would fail on the HDF5 example
occasionally (about 1/100 in my experience). Apparently you don't get an
identical HDF5 every single time. This refactors that test out to its own where
we write and read both versions, and compare equality that way.
---
 pandas/tests/io/test_common.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 289f86eb2dc53..b527e3c5dc254 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -143,7 +143,6 @@ def test_read_fspath_all(self, reader, module, path):
         ('to_csv', {}, 'os'),
         ('to_excel', {'engine': 'xlwt'}, 'xlwt'),
         ('to_feather', {}, 'feather'),
-        ('to_hdf', {'key': 'bar', 'mode': 'w'}, 'tables'),
         ('to_html', {}, 'os'),
         ('to_json', {}, 'os'),
         ('to_latex', {}, 'os'),
@@ -171,6 +170,26 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
 
             assert result == expected
 
+    def test_write_fspath_hdf5(self):
+        # Same test as write_fspath_all, except HDF5 files aren't
+        # necessarily byte-for-byte identical for a given dataframe, so we'll
+        # have to read and compare equality
+        pytest.importorskip('tables')
+
+        df = pd.DataFrame({"A": [1, 2]})
+        p1 = tm.ensure_clean('string')
+        p2 = tm.ensure_clean('fspath')
+
+        with p1 as string, p2 as fspath:
+            mypath = CustomFSPath(fspath)
+            df.to_hdf(mypath, key='bar')
+            df.to_hdf(string, key='bar')
+
+            result = pd.read_hdf(fspath, key='bar')
+            expected = pd.read_hdf(string, key='bar')
+
+        tm.assert_frame_equal(result, expected)
+
 
 class TestMMapWrapper(object):
 

From b8ca9fcdcbccac2fe41d144134977d8ae95ce1ba Mon Sep 17 00:00:00 2001
From: DSM <dsm054@gmail.com>
Date: Thu, 1 Jun 2017 18:12:14 -0400
Subject: [PATCH 28/55] ENH: add .ngroup() method to groupby objects (#14026)
 (#14026)

---
 doc/source/api.rst                     |   1 +
 doc/source/groupby.rst                 |  63 +++++++-
 doc/source/reshaping.rst               |   2 +-
 doc/source/whatsnew/v0.20.2.txt        |   5 +
 pandas/core/groupby.py                 |  75 +++++++++-
 pandas/tests/groupby/test_counting.py  | 197 +++++++++++++++++++++++++
 pandas/tests/groupby/test_groupby.py   |  54 -------
 pandas/tests/groupby/test_whitelist.py |   4 +-
 8 files changed, 338 insertions(+), 63 deletions(-)
 create mode 100644 pandas/tests/groupby/test_counting.py

diff --git a/doc/source/api.rst b/doc/source/api.rst
index cdb6e36870f24..04b952a99e8f7 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1708,6 +1708,7 @@ Computations / Descriptive Stats
    GroupBy.mean
    GroupBy.median
    GroupBy.min
+   GroupBy.ngroup
    GroupBy.nth
    GroupBy.ohlc
    GroupBy.prod
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index cf4f1059ae17a..865f1ccae2c04 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -1122,12 +1122,36 @@ To see the order in which each row appears within its group, use the
 
 .. ipython:: python
 
-   df = pd.DataFrame(list('aaabba'), columns=['A'])
-   df
+   dfg = pd.DataFrame(list('aaabba'), columns=['A'])
+   dfg
+
+   dfg.groupby('A').cumcount()
+
+   dfg.groupby('A').cumcount(ascending=False)
+
+.. _groupby.ngroup:
+
+Enumerate groups
+~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.20.2
+
+To see the ordering of the groups (as opposed to the order of rows
+within a group given by ``cumcount``) you can use the ``ngroup``
+method.
+
+Note that the numbers given to the groups match the order in which the
+groups would be seen when iterating over the groupby object, not the
+order they are first observed.
+
+.. ipython:: python
 
-   df.groupby('A').cumcount()
+   dfg = pd.DataFrame(list('aaabba'), columns=['A'])
+   dfg
 
-   df.groupby('A').cumcount(ascending=False)  # kwarg only
+   dfg.groupby('A').ngroup()
+
+   dfg.groupby('A').ngroup(ascending=False)
 
 Plotting
 ~~~~~~~~
@@ -1176,14 +1200,41 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on
    df
    df.groupby(df.sum(), axis=1).sum()
 
+.. _groupby.multicolumn_factorization
+
+Multi-column factorization
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using ``.ngroup()``, we can extract information about the groups in
+a way similar to :func:`factorize` (as described further in the
+:ref:`reshaping API <reshaping.factorization>`) but which applies
+naturally to multiple columns of mixed type and different
+sources. This can be useful as an intermediate categorical-like step
+in processing, when the relationships between the group rows are more
+important than their content, or as input to an algorithm which only
+accepts the integer encoding. (For more information about support in
+pandas for full categorical data, see the :ref:`Categorical
+introduction <categorical>` and the
+:ref:`API documentation <api.categorical>`.)
+
+.. ipython:: python
+
+    dfg = pd.DataFrame({"A": [1, 1, 2, 3, 2], "B": list("aaaba")})
+
+    dfg
+
+    dfg.groupby(["A", "B"]).ngroup()
+
+    dfg.groupby(["A", [0, 0, 0, 1, 1]]).ngroup()
+
 Groupby by Indexer to 'resample' data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Resampling produces new hypothetical samples(resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
+Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
 
 In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized.
 
-In the following examples, **df.index // 5** returns a binary array which is used to determine what get's selected for the groupby operation.
+In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation.
 
 .. note:: The below example shows how we can downsample by consolidation of samples into fewer samples. Here by using **df.index // 5**, we are aggregating the samples in bins. By applying **std()** function, we aggregate the information contained in many samples into a small subset of values which is their standard deviation thereby reducing the number of samples.
 
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index b93749922c8ea..5f125e329f6f1 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -636,7 +636,7 @@ When a column contains only one level, it will be omitted in the result.
 
     pd.get_dummies(df, drop_first=True)
 
-
+.. _reshaping.factorize:
 
 Factorizing values
 ------------------
diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 379249b6e55d6..4028d594d954f 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -23,6 +23,11 @@ Enhancements
 - ``Series`` provides a ``to_latex`` method (:issue:`16180`)
 - Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`)
 
+- A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`,
+  parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`,
+  has been added to return the group order (:issue:`11642`); see
+  :ref:`here <groupby.ngroup>`.
+
 .. _whatsnew_0202.performance:
 
 Performance Improvements
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 286677d613484..9d6d2297f6ea0 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -150,7 +150,7 @@
     'last', 'first',
     'head', 'tail', 'median',
     'mean', 'sum', 'min', 'max',
-    'cumcount',
+    'cumcount', 'ngroup',
     'resample',
     'rank', 'quantile',
     'fillna',
@@ -1437,6 +1437,75 @@ def nth(self, n, dropna=None):
 
         return result
 
+    @Substitution(name='groupby')
+    @Appender(_doc_template)
+    def ngroup(self, ascending=True):
+        """
+        Number each group from 0 to the number of groups - 1.
+
+        This is the enumerative complement of cumcount.  Note that the
+        numbers given to the groups match the order in which the groups
+        would be seen when iterating over the groupby object, not the
+        order they are first observed.
+
+        .. versionadded:: 0.20.2
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            If False, number in reverse, from number of group - 1 to 0.
+
+        Examples
+        --------
+
+        >>> df = pd.DataFrame({"A": list("aaabba")})
+        >>> df
+           A
+        0  a
+        1  a
+        2  a
+        3  b
+        4  b
+        5  a
+        >>> df.groupby('A').ngroup()
+        0    0
+        1    0
+        2    0
+        3    1
+        4    1
+        5    0
+        dtype: int64
+        >>> df.groupby('A').ngroup(ascending=False)
+        0    1
+        1    1
+        2    1
+        3    0
+        4    0
+        5    1
+        dtype: int64
+        >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup()
+        0    0
+        1    0
+        2    1
+        3    3
+        4    2
+        5    0
+        dtype: int64
+
+        See also
+        --------
+        .cumcount : Number the rows in each group.
+
+        """
+
+        self._set_group_selection()
+
+        index = self._selected_obj.index
+        result = Series(self.grouper.group_info[0], index)
+        if not ascending:
+            result = self.ngroups - 1 - result
+        return result
+
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def cumcount(self, ascending=True):
@@ -1481,6 +1550,10 @@ def cumcount(self, ascending=True):
         4    0
         5    0
         dtype: int64
+
+        See also
+        --------
+        .ngroup : Number the groups themselves.
         """
 
         self._set_group_selection()
diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
new file mode 100644
index 0000000000000..485241d593d4f
--- /dev/null
+++ b/pandas/tests/groupby/test_counting.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function
+
+import numpy as np
+
+from pandas import (DataFrame, Series, MultiIndex)
+from pandas.util.testing import assert_series_equal
+from pandas.compat import (range, product as cart_product)
+
+
+class TestCounting(object):
+
+    def test_cumcount(self):
+        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series([0, 1, 2, 0, 3])
+
+        assert_series_equal(expected, g.cumcount())
+        assert_series_equal(expected, sg.cumcount())
+
+    def test_cumcount_empty(self):
+        ge = DataFrame().groupby(level=0)
+        se = Series().groupby(level=0)
+
+        # edge case, as this is usually considered float
+        e = Series(dtype='int64')
+
+        assert_series_equal(e, ge.cumcount())
+        assert_series_equal(e, se.cumcount())
+
+    def test_cumcount_dupe_index(self):
+        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
+                       index=[0] * 5)
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)
+
+        assert_series_equal(expected, g.cumcount())
+        assert_series_equal(expected, sg.cumcount())
+
+    def test_cumcount_mi(self):
+        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
+        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
+                       index=mi)
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series([0, 1, 2, 0, 3], index=mi)
+
+        assert_series_equal(expected, g.cumcount())
+        assert_series_equal(expected, sg.cumcount())
+
+    def test_cumcount_groupby_not_col(self):
+        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
+                       index=[0] * 5)
+        g = df.groupby([0, 0, 0, 1, 0])
+        sg = g.A
+
+        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)
+
+        assert_series_equal(expected, g.cumcount())
+        assert_series_equal(expected, sg.cumcount())
+
+    def test_ngroup(self):
+        df = DataFrame({'A': list('aaaba')})
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series([0, 0, 0, 1, 0])
+
+        assert_series_equal(expected, g.ngroup())
+        assert_series_equal(expected, sg.ngroup())
+
+    def test_ngroup_distinct(self):
+        df = DataFrame({'A': list('abcde')})
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series(range(5), dtype='int64')
+
+        assert_series_equal(expected, g.ngroup())
+        assert_series_equal(expected, sg.ngroup())
+
+    def test_ngroup_one_group(self):
+        df = DataFrame({'A': [0] * 5})
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series([0] * 5)
+
+        assert_series_equal(expected, g.ngroup())
+        assert_series_equal(expected, sg.ngroup())
+
+    def test_ngroup_empty(self):
+        ge = DataFrame().groupby(level=0)
+        se = Series().groupby(level=0)
+
+        # edge case, as this is usually considered float
+        e = Series(dtype='int64')
+
+        assert_series_equal(e, ge.ngroup())
+        assert_series_equal(e, se.ngroup())
+
+    def test_ngroup_series_matches_frame(self):
+        df = DataFrame({'A': list('aaaba')})
+        s = Series(list('aaaba'))
+
+        assert_series_equal(df.groupby(s).ngroup(),
+                            s.groupby(s).ngroup())
+
+    def test_ngroup_dupe_index(self):
+        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
+        g = df.groupby('A')
+        sg = g.A
+
+        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)
+
+        assert_series_equal(expected, g.ngroup())
+        assert_series_equal(expected, sg.ngroup())
+
+    def test_ngroup_mi(self):
+        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
+        df = DataFrame({'A': list('aaaba')}, index=mi)
+        g = df.groupby('A')
+        sg = g.A
+        expected = Series([0, 0, 0, 1, 0], index=mi)
+
+        assert_series_equal(expected, g.ngroup())
+        assert_series_equal(expected, sg.ngroup())
+
+    def test_ngroup_groupby_not_col(self):
+        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
+        g = df.groupby([0, 0, 0, 1, 0])
+        sg = g.A
+
+        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)
+
+        assert_series_equal(expected, g.ngroup())
+        assert_series_equal(expected, sg.ngroup())
+
+    def test_ngroup_descending(self):
+        df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A'])
+        g = df.groupby(['A'])
+
+        ascending = Series([0, 0, 1, 0, 1])
+        descending = Series([1, 1, 0, 1, 0])
+
+        assert_series_equal(descending, (g.ngroups - 1) - ascending)
+        assert_series_equal(ascending, g.ngroup(ascending=True))
+        assert_series_equal(descending, g.ngroup(ascending=False))
+
+    def test_ngroup_matches_cumcount(self):
+        # verify one manually-worked out case works
+        df = DataFrame([['a', 'x'], ['a', 'y'], ['b', 'x'],
+                        ['a', 'x'], ['b', 'y']], columns=['A', 'X'])
+        g = df.groupby(['A', 'X'])
+        g_ngroup = g.ngroup()
+        g_cumcount = g.cumcount()
+        expected_ngroup = Series([0, 1, 2, 0, 3])
+        expected_cumcount = Series([0, 0, 0, 1, 0])
+
+        assert_series_equal(g_ngroup, expected_ngroup)
+        assert_series_equal(g_cumcount, expected_cumcount)
+
+    def test_ngroup_cumcount_pair(self):
+        # brute force comparison for all small series
+        for p in cart_product(range(3), repeat=4):
+            df = DataFrame({'a': p})
+            g = df.groupby(['a'])
+
+            order = sorted(set(p))
+            ngroupd = [order.index(val) for val in p]
+            cumcounted = [p[:i].count(val) for i, val in enumerate(p)]
+
+            assert_series_equal(g.ngroup(), Series(ngroupd))
+            assert_series_equal(g.cumcount(), Series(cumcounted))
+
+    def test_ngroup_respects_groupby_order(self):
+        np.random.seed(0)
+        df = DataFrame({'a': np.random.choice(list('abcdef'), 100)})
+        for sort_flag in (False, True):
+            g = df.groupby(['a'], sort=sort_flag)
+            df['group_id'] = -1
+            df['group_index'] = -1
+
+            for i, (_, group) in enumerate(g):
+                df.loc[group.index, 'group_id'] = i
+                for j, ind in enumerate(group.index):
+                    df.loc[ind, 'group_index'] = j
+
+            assert_series_equal(Series(df['group_id'].values),
+                                g.ngroup())
+            assert_series_equal(Series(df['group_index'].values),
+                                g.cumcount())
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 88afa51e46b6c..19124a33bdbcb 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -3399,60 +3399,6 @@ def test_groupby_with_small_elem(self):
         res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start'))
         tm.assert_frame_equal(res, df.iloc[[2], :])
 
-    def test_cumcount(self):
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3])
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_cumcount_empty(self):
-        ge = DataFrame().groupby(level=0)
-        se = Series().groupby(level=0)
-
-        # edge case, as this is usually considered float
-        e = Series(dtype='int64')
-
-        assert_series_equal(e, ge.cumcount())
-        assert_series_equal(e, se.cumcount())
-
-    def test_cumcount_dupe_index(self):
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
-                       index=[0] * 5)
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_cumcount_mi(self):
-        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
-                       index=mi)
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3], index=mi)
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_cumcount_groupby_not_col(self):
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
-                       index=[0] * 5)
-        g = df.groupby([0, 0, 0, 1, 0])
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
     def test_fill_constistency(self):
 
         # GH9221
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index 5d131717f8345..2c8bf57f20fae 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -24,6 +24,7 @@
     'head',
     'tail',
     'cumcount',
+    'ngroup',
     'resample',
     'rank',
     'quantile',
@@ -61,6 +62,7 @@
     'head',
     'tail',
     'cumcount',
+    'ngroup',
     'resample',
     'rank',
     'quantile',
@@ -237,7 +239,7 @@ def test_tab_completion(mframe):
          'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count',
          'nunique', 'head', 'describe', 'cummax', 'quantile',
          'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna',
-         'cumsum', 'cumcount', 'all', 'shift', 'skew',
+         'cumsum', 'cumcount', 'ngroup', 'all', 'shift', 'skew',
          'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
          'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
          'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding'])

From e331c783e800954aaa0fd98f23ff500f13a2aea1 Mon Sep 17 00:00:00 2001
From: Oleg Shteynbuk <oleg_shteynbuk@yahoo.com>
Date: Thu, 1 Jun 2017 18:19:10 -0400
Subject: [PATCH 29/55] make null lowercase a missing value (#16534)

---
 doc/source/io.rst                   | 2 +-
 doc/source/whatsnew/v0.21.0.txt     | 2 +-
 pandas/_libs/parsers.pyx            | 2 +-
 pandas/io/common.py                 | 2 +-
 pandas/tests/io/parser/na_values.py | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index bca23dd18a0e3..82cb7abde4b38 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -227,7 +227,7 @@ na_values : scalar, str, list-like, or dict, default ``None``
   Additional strings to recognize as NA/NaN. If dict passed, specific per-column
   NA values. By default the following values are interpreted as NaN:
   ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA',
-  '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''``.
+  '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``.
 keep_default_na : boolean, default ``True``
   If na_values are specified and keep_default_na is ``False`` the default NaN
   values are overridden, otherwise they're appended to.
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 2a38fad37584b..9460039449ebc 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -38,7 +38,7 @@ Other Enhancements
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
 - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
 - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`)
-
+- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`)
 .. _whatsnew_0210.api_breaking:
 
 Backwards incompatible API changes
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 2def4dc9dcf24..7a6f366d5b1a9 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
 _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN',
-              b'#N/A N/A', b'NA', b'#NA', b'NULL', b'NaN',
+              b'#N/A N/A', b'NA', b'#NA', b'NULL', b'null', b'NaN',
               b'nan', b'']
 
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index f4e12ea3fb173..1c987f6a9dfc3 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -31,7 +31,7 @@
 # '1.#INF','-1.#INF', '1.#INF000000',
 _NA_VALUES = set([
     '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A',
-    'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''
+    'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''
 ])
 
 try:
diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py
index 362837a46f838..6f72ed51d76c6 100644
--- a/pandas/tests/io/parser/na_values.py
+++ b/pandas/tests/io/parser/na_values.py
@@ -70,7 +70,7 @@ def test_non_string_na_values(self):
 
     def test_default_na_values(self):
         _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
-                          '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'NaN',
+                          '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN',
                           'nan', '-NaN', '-nan', '#N/A N/A', ''])
         assert _NA_VALUES == parsers._NA_VALUES
         nv = len(_NA_VALUES)

From e24e57c301b3bc757e2d4590755062ba2d4b9bfa Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Thu, 1 Jun 2017 18:24:19 -0400
Subject: [PATCH 30/55] MAINT: Drop has_index_names input from read_excel
 (#16522)

---
 doc/source/io.rst               |  5 ---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/io/excel.py              | 40 +++++++--------------
 pandas/tests/io/test_excel.py   | 63 ++++++++++++++++++++-------------
 4 files changed, 52 insertions(+), 57 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 82cb7abde4b38..0c31bfe014a12 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header``
    import os
    os.remove('path_to_file.xlsx')
 
-.. warning::
-
-   Excel files saved in version 0.16.2 or prior that had index names will still able to be read in,
-   but the ``has_index_names`` argument must specified to ``True``.
-
 
 Parsing Specific Columns
 ++++++++++++++++++++++++
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 9460039449ebc..c2468917013f4 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -72,6 +72,7 @@ Deprecations
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`)
 
 
 .. _whatsnew_0210.performance:
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index aa08e5fd378f0..a4d2fabf76a41 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -141,10 +141,6 @@
     convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
     data will be read in as floats: Excel stores all numbers as floats
     internally
-has_index_names : boolean, default None
-    DEPRECATED: for version 0.17+ index names will be automatically
-    inferred based on index_col.  To read Excel output from 0.16.2 and
-    prior that had saved index names, use True.
 
 Returns
 -------
@@ -198,8 +194,8 @@ def get_writer(engine_name):
 def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
                index_col=None, names=None, parse_cols=None, parse_dates=False,
                date_parser=None, na_values=None, thousands=None,
-               convert_float=True, has_index_names=None, converters=None,
-               dtype=None, true_values=None, false_values=None, engine=None,
+               convert_float=True, converters=None, dtype=None,
+               true_values=None, false_values=None, engine=None,
                squeeze=False, **kwds):
 
     # Can't use _deprecate_kwarg since sheetname=None has a special meaning
@@ -218,10 +214,9 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
         sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
         index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
         date_parser=date_parser, na_values=na_values, thousands=thousands,
-        convert_float=convert_float, has_index_names=has_index_names,
-        skip_footer=skip_footer, converters=converters, dtype=dtype,
-        true_values=true_values, false_values=false_values, squeeze=squeeze,
-        **kwds)
+        convert_float=convert_float, skip_footer=skip_footer,
+        converters=converters, dtype=dtype, true_values=true_values,
+        false_values=false_values, squeeze=squeeze, **kwds)
 
 
 class ExcelFile(object):
@@ -283,9 +278,8 @@ def __fspath__(self):
     def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
               names=None, index_col=None, parse_cols=None, parse_dates=False,
               date_parser=None, na_values=None, thousands=None,
-              convert_float=True, has_index_names=None,
-              converters=None, true_values=None, false_values=None,
-              squeeze=False, **kwds):
+              convert_float=True, converters=None, true_values=None,
+              false_values=None, squeeze=False, **kwds):
         """
         Parse specified sheet(s) into a DataFrame
 
@@ -296,7 +290,6 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
         return self._parse_excel(sheetname=sheet_name, header=header,
                                  skiprows=skiprows, names=names,
                                  index_col=index_col,
-                                 has_index_names=has_index_names,
                                  parse_cols=parse_cols,
                                  parse_dates=parse_dates,
                                  date_parser=date_parser, na_values=na_values,
@@ -343,23 +336,17 @@ def _excel2num(x):
             return i in parse_cols
 
     def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
-                     skip_footer=0, index_col=None, has_index_names=None,
-                     parse_cols=None, parse_dates=False, date_parser=None,
-                     na_values=None, thousands=None, convert_float=True,
-                     true_values=None, false_values=None, verbose=False,
-                     dtype=None, squeeze=False, **kwds):
+                     skip_footer=0, index_col=None, parse_cols=None,
+                     parse_dates=False, date_parser=None, na_values=None,
+                     thousands=None, convert_float=True, true_values=None,
+                     false_values=None, verbose=False, dtype=None,
+                     squeeze=False, **kwds):
 
         skipfooter = kwds.pop('skipfooter', None)
         if skipfooter is not None:
             skip_footer = skipfooter
 
         _validate_header_arg(header)
-        if has_index_names is not None:
-            warn("\nThe has_index_names argument is deprecated; index names "
-                 "will be automatically inferred based on index_col.\n"
-                 "This argmument is still necessary if reading Excel output "
-                 "from 0.16.2 or prior with index names.", FutureWarning,
-                 stacklevel=3)
 
         if 'chunksize' in kwds:
             raise NotImplementedError("chunksize keyword of read_excel "
@@ -511,8 +498,7 @@ def _parse_cell(cell_contents, cell_typ):
                         else:
                             last = data[row][col]
 
-            if is_list_like(header) and len(header) > 1:
-                has_index_names = True
+            has_index_names = is_list_like(header) and len(header) > 1
 
             # GH 12292 : error when read one empty column from excel file
             try:
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 4441ed815370b..abe3757ec64f3 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -881,8 +881,42 @@ def test_excel_multindex_roundtrip(self):
                             tm.assert_frame_equal(
                                 df, act, check_names=check_names)
 
-    def test_excel_oldindex_format(self):
-        # GH 4679
+    def test_excel_old_index_format(self):
+        # see gh-4679
+        filename = 'test_index_name_pre17' + self.ext
+        in_file = os.path.join(self.dirpath, filename)
+
+        # We detect headers to determine if index names exist, so
+        # that "index" name in the "names" version of the data will
+        # now be interpreted as rows that include null data.
+        data = np.array([[None, None, None, None, None],
+                         ['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
+                         ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
+                         ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
+                         ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'],
+                         ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']])
+        columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4']
+        mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1',
+                                 'R_l0_g2', 'R_l0_g3', 'R_l0_g4'],
+                                ['R1', 'R_l1_g0', 'R_l1_g1',
+                                 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']],
+                        labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]],
+                        names=[None, None])
+        si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2',
+                    'R_l0_g3', 'R_l0_g4'], name=None)
+
+        expected = pd.DataFrame(data, index=si, columns=columns)
+
+        actual = pd.read_excel(in_file, 'single_names')
+        tm.assert_frame_equal(actual, expected)
+
+        expected.index = mi
+
+        actual = pd.read_excel(in_file, 'multi_names')
+        tm.assert_frame_equal(actual, expected)
+
+        # The analogous versions of the "names" version data
+        # where there are explicitly no names for the indices.
         data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
                          ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
                          ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
@@ -894,40 +928,19 @@ def test_excel_oldindex_format(self):
                                 ['R_l1_g0', 'R_l1_g1', 'R_l1_g2',
                                  'R_l1_g3', 'R_l1_g4']],
                         labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
-                        names=['R0', 'R1'])
+                        names=[None, None])
         si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2',
-                    'R_l0_g3', 'R_l0_g4'], name='R0')
-
-        in_file = os.path.join(
-            self.dirpath, 'test_index_name_pre17' + self.ext)
+                    'R_l0_g3', 'R_l0_g4'], name=None)
 
         expected = pd.DataFrame(data, index=si, columns=columns)
-        with tm.assert_produces_warning(FutureWarning):
-            actual = pd.read_excel(
-                in_file, 'single_names', has_index_names=True)
-        tm.assert_frame_equal(actual, expected)
 
-        expected.index.name = None
         actual = pd.read_excel(in_file, 'single_no_names')
         tm.assert_frame_equal(actual, expected)
-        with tm.assert_produces_warning(FutureWarning):
-            actual = pd.read_excel(
-                in_file, 'single_no_names', has_index_names=False)
-        tm.assert_frame_equal(actual, expected)
 
         expected.index = mi
-        with tm.assert_produces_warning(FutureWarning):
-            actual = pd.read_excel(
-                in_file, 'multi_names', has_index_names=True)
-        tm.assert_frame_equal(actual, expected)
 
-        expected.index.names = [None, None]
         actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1])
         tm.assert_frame_equal(actual, expected, check_names=False)
-        with tm.assert_produces_warning(FutureWarning):
-            actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1],
-                                   has_index_names=False)
-        tm.assert_frame_equal(actual, expected, check_names=False)
 
     def test_read_excel_bool_header_arg(self):
         # GH 6114

From ec535e91583dd35356becc60d25ca6c8c7453b4d Mon Sep 17 00:00:00 2001
From: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
Date: Thu, 1 Jun 2017 20:23:10 -0400
Subject: [PATCH 31/55] BUG: reimplement MultiIndex.remove_unused_levels
 (#16565)

---
 asv_bench/benchmarks/indexing.py   |  9 ++++++++
 doc/source/whatsnew/v0.20.2.txt    |  2 ++
 pandas/core/indexes/multi.py       | 34 +++++++++++++-----------------
 pandas/tests/indexes/test_multi.py | 29 ++++++++++++++++++++++++-
 4 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index 6a2c9d48c4a28..d941ef20dc7ac 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -204,6 +204,12 @@ def setup(self):
             [np.arange(100), list('A'), list('A')],
             names=['one', 'two', 'three'])
 
+        rng = np.random.RandomState(4)
+        size = 1 << 16
+        self.mi_unused_levels = pd.MultiIndex.from_arrays([
+            rng.randint(0, 1 << 13, size),
+            rng.randint(0, 1 << 10, size)])[rng.rand(size) < 0.1]
+
     def time_series_xs_mi_ix(self):
         self.s.ix[999]
 
@@ -248,6 +254,9 @@ def time_multiindex_small_get_loc_warm(self):
     def time_is_monotonic(self):
         self.miint.is_monotonic
 
+    def time_remove_unused_levels(self):
+        self.mi_unused_levels.remove_unused_levels()
+
 
 class IntervalIndexing(object):
     goal_time = 0.2
diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 4028d594d954f..87a790d43577f 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -37,6 +37,7 @@ Performance Improvements
 - Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`)
 - Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`)
 - Improved performance of groupby with categorical groupers (:issue:`16413`)
+- Improved performance of ``MultiIndex.remove_unused_levels()`` (:issue:`16556`)
 
 .. _whatsnew_0202.bug_fixes:
 
@@ -66,6 +67,7 @@ Indexing
 
 - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)
 - Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`)
+- Bug in ``MultiIndex.remove_unused_levels()`` (:issue:`16556`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 981a6a696a618..f30da5b05f8ae 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1290,8 +1290,8 @@ def remove_unused_levels(self):
         new_levels = []
         new_labels = []
 
-        changed = np.ones(self.nlevels, dtype=bool)
-        for i, (lev, lab) in enumerate(zip(self.levels, self.labels)):
+        changed = False
+        for lev, lab in zip(self.levels, self.labels):
 
             uniques = algos.unique(lab)
 
@@ -1299,33 +1299,29 @@ def remove_unused_levels(self):
             if len(uniques) == len(lev):
                 new_levels.append(lev)
                 new_labels.append(lab)
-                changed[i] = False
                 continue
 
-            # set difference, then reverse sort
-            diff = Index(np.arange(len(lev))).difference(uniques)
-            unused = diff.sort_values(ascending=False)
+            changed = True
+
+            # labels get mapped from uniques to 0:len(uniques)
+            label_mapping = np.zeros(len(lev))
+            label_mapping[uniques] = np.arange(len(uniques))
+            lab = label_mapping[lab]
 
             # new levels are simple
             lev = lev.take(uniques)
 
-            # new labels, we remove the unsued
-            # by decrementing the labels for that value
-            # prob a better way
-            for u in unused:
-
-                lab = np.where(lab > u, lab - 1, lab)
-
             new_levels.append(lev)
             new_labels.append(lab)
 
-        # nothing changed
-        if not changed.any():
-            return self
+        result = self._shallow_copy()
 
-        return MultiIndex(new_levels, new_labels,
-                          names=self.names, sortorder=self.sortorder,
-                          verify_integrity=False)
+        if changed:
+            result._reset_identity()
+            result._set_levels(new_levels, validate=False)
+            result._set_labels(new_labels, validate=False)
+
+        return result
 
     @property
     def nlevels(self):
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 388a49d25cb82..242a9d63eac63 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2515,7 +2515,34 @@ def test_reconstruct_remove_unused(self):
         # idempotent
         result2 = result.remove_unused_levels()
         tm.assert_index_equal(result2, expected)
-        assert result2 is result
+        assert result2.is_(result)
+
+    @pytest.mark.parametrize('first_type,second_type', [
+        ('int64', 'int64'),
+        ('datetime64[D]', 'str')])
+    def test_remove_unused_levels_large(self, first_type, second_type):
+        # GH16556
+
+        # because tests should be deterministic (and this test in particular
+        # checks that levels are removed, which is not the case for every
+        # random input):
+        rng = np.random.RandomState(4)  # seed is arbitrary value that works
+
+        size = 1 << 16
+        df = DataFrame(dict(
+            first=rng.randint(0, 1 << 13, size).astype(first_type),
+            second=rng.randint(0, 1 << 10, size).astype(second_type),
+            third=rng.rand(size)))
+        df = df.groupby(['first', 'second']).sum()
+        df = df[df.third < 0.1]
+
+        result = df.index.remove_unused_levels()
+        assert len(result.levels[0]) < len(df.index.levels[0])
+        assert len(result.levels[1]) < len(df.index.levels[1])
+        assert result.equals(df.index)
+
+        expected = df.reset_index().set_index(['first', 'second']).index
+        tm.assert_index_equal(result, expected)
 
     def test_isin(self):
         values = [('foo', 2), ('bar', 3), ('quux', 4)]

From 9e71f08038260629017b4a4d515c9c3edc2b4cf8 Mon Sep 17 00:00:00 2001
From: Chris Filo Gorgolewski <krzysztof.gorgolewski@gmail.com>
Date: Thu, 1 Jun 2017 17:28:09 -0700
Subject: [PATCH 32/55] Adding 'n/a' to list of strings denoting missing values
 (#16079)

---
 doc/source/io.rst                   | 2 +-
 doc/source/whatsnew/v0.21.0.txt     | 4 +++-
 pandas/_libs/parsers.pyx            | 2 +-
 pandas/io/common.py                 | 2 +-
 pandas/tests/io/parser/na_values.py | 4 ++--
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 0c31bfe014a12..bd81b478b5326 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -226,7 +226,7 @@ NA and Missing Data Handling
 na_values : scalar, str, list-like, or dict, default ``None``
   Additional strings to recognize as NA/NaN. If dict passed, specific per-column
   NA values. By default the following values are interpreted as NaN:
-  ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA',
+  ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA',
   '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``.
 keep_default_na : boolean, default ``True``
   If na_values are specified and keep_default_na is ``False`` the default NaN
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index c2468917013f4..fe3eb291d06ff 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -38,7 +38,7 @@ Other Enhancements
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
 - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
 - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`)
-- :func:`read_csv` has gained 'null' as an additional default missing value.(:issue:`16471`)
+
 .. _whatsnew_0210.api_breaking:
 
 Backwards incompatible API changes
@@ -49,6 +49,8 @@ Backwards incompatible API changes
 
 - Accessing a non-existent attribute on a closed :class:`HDFStore` will now
   raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
+- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`)
+- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
 
 - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 7a6f366d5b1a9..2549c8545908d 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
 _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN',
-              b'#N/A N/A', b'NA', b'#NA', b'NULL', b'null', b'NaN',
+              b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'null', b'NaN',
               b'nan', b'']
 
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 1c987f6a9dfc3..cbfc33dbebb81 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -31,7 +31,7 @@
 # '1.#INF','-1.#INF', '1.#INF000000',
 _NA_VALUES = set([
     '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A',
-    'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''
+    'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''
 ])
 
 try:
diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py
index 6f72ed51d76c6..170f9d428c9cc 100644
--- a/pandas/tests/io/parser/na_values.py
+++ b/pandas/tests/io/parser/na_values.py
@@ -70,8 +70,8 @@ def test_non_string_na_values(self):
 
     def test_default_na_values(self):
         _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
-                          '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'null', 'NaN',
-                          'nan', '-NaN', '-nan', '#N/A N/A', ''])
+                          '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null',
+                          'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', ''])
         assert _NA_VALUES == parsers._NA_VALUES
         nv = len(_NA_VALUES)
 

From 32512b938b6cce3cd2a5f1847aeb4ae7fcfb3b04 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Fri, 2 Jun 2017 04:54:40 -0500
Subject: [PATCH 33/55] API: Make is_strictly_monotonic_* private (#16576)

---
 doc/source/advanced.rst              | 10 +++++++++
 doc/source/api.rst                   |  2 --
 doc/source/whatsnew/v0.20.2.txt      |  1 -
 pandas/core/indexes/base.py          | 16 +++++++-------
 pandas/core/indexes/datetimes.py     |  2 +-
 pandas/tests/indexes/test_base.py    |  8 +++----
 pandas/tests/indexes/test_multi.py   | 32 ++++++++++++++--------------
 pandas/tests/indexes/test_numeric.py | 20 ++++++++---------
 pandas/tests/indexes/test_range.py   | 20 ++++++++---------
 9 files changed, 59 insertions(+), 52 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index ea00588ba156f..711c3e9a95d05 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -948,6 +948,16 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
     In [11]: df.loc[2:3, :]
     KeyError: 'Cannot get right slice bound for non-unique label: 3'
 
+:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
+an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
+:meth:`Index.is_unique`
+
+.. ipython:: python
+
+   weakly_monotonic = pd.Index(['a', 'b', 'c', 'c'])
+   weakly_monotonic
+   weakly_monotonic.is_monotonic_increasing
+   weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique
 
 Endpoints are inclusive
 ~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 04b952a99e8f7..d6053791d6f4b 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1287,8 +1287,6 @@ Attributes
    Index.is_monotonic
    Index.is_monotonic_increasing
    Index.is_monotonic_decreasing
-   Index.is_strictly_monotonic_increasing
-   Index.is_strictly_monotonic_decreasing
    Index.is_unique
    Index.has_duplicates
    Index.dtype
diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 87a790d43577f..d58a98703f22a 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -21,7 +21,6 @@ Enhancements
 
 - Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
 - ``Series`` provides a ``to_latex`` method (:issue:`16180`)
-- Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`)
 
 - A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`,
   parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`,
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e8c2043138edb..028464ad5cd89 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1221,33 +1221,33 @@ def is_monotonic_decreasing(self):
         return self._engine.is_monotonic_decreasing
 
     @property
-    def is_strictly_monotonic_increasing(self):
+    def _is_strictly_monotonic_increasing(self):
         """return if the index is strictly monotonic increasing
         (only increasing) values
 
         Examples
         --------
-        >>> Index([1, 2, 3]).is_strictly_monotonic_increasing
+        >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
         True
-        >>> Index([1, 2, 2]).is_strictly_monotonic_increasing
+        >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
         False
-        >>> Index([1, 3, 2]).is_strictly_monotonic_increasing
+        >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
         False
         """
         return self.is_unique and self.is_monotonic_increasing
 
     @property
-    def is_strictly_monotonic_decreasing(self):
+    def _is_strictly_monotonic_decreasing(self):
         """return if the index is strictly monotonic decreasing
         (only decreasing) values
 
         Examples
         --------
-        >>> Index([3, 2, 1]).is_strictly_monotonic_decreasing
+        >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
         True
-        >>> Index([3, 2, 2]).is_strictly_monotonic_decreasing
+        >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
         False
-        >>> Index([3, 1, 2]).is_strictly_monotonic_decreasing
+        >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
         False
         """
         return self.is_unique and self.is_monotonic_decreasing
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 60560374cd420..239894cff3874 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1472,7 +1472,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
             # the bounds need swapped if index is reverse sorted and has a
             # length > 1 (is_monotonic_decreasing gives True for empty
             # and length 1 index)
-            if self.is_strictly_monotonic_decreasing and len(self) > 1:
+            if self._is_strictly_monotonic_decreasing and len(self) > 1:
                 return upper if side == 'left' else lower
             return lower if side == 'left' else upper
         else:
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index a6933316e4291..d9f8e5e7f382b 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1330,8 +1330,8 @@ def test_is_monotonic_incomparable(self):
         index = Index([5, datetime.now(), 7])
         assert not index.is_monotonic_increasing
         assert not index.is_monotonic_decreasing
-        assert not index.is_strictly_monotonic_increasing
-        assert not index.is_strictly_monotonic_decreasing
+        assert not index._is_strictly_monotonic_increasing
+        assert not index._is_strictly_monotonic_decreasing
 
     def test_get_set_value(self):
         values = np.random.randn(100)
@@ -2030,8 +2030,8 @@ def test_is_monotonic_na(self):
         for index in examples:
             assert not index.is_monotonic_increasing
             assert not index.is_monotonic_decreasing
-            assert not index.is_strictly_monotonic_increasing
-            assert not index.is_strictly_monotonic_decreasing
+            assert not index._is_strictly_monotonic_increasing
+            assert not index._is_strictly_monotonic_decreasing
 
     def test_repr_summary(self):
         with cf.option_context('display.max_seq_items', 10):
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 242a9d63eac63..ba917f33d8595 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2373,30 +2373,30 @@ def test_is_monotonic(self):
         i = MultiIndex.from_product([np.arange(10),
                                      np.arange(10)], names=['one', 'two'])
         assert i.is_monotonic
-        assert i.is_strictly_monotonic_increasing
+        assert i._is_strictly_monotonic_increasing
         assert Index(i.values).is_monotonic
-        assert i.is_strictly_monotonic_increasing
+        assert i._is_strictly_monotonic_increasing
 
         i = MultiIndex.from_product([np.arange(10, 0, -1),
                                      np.arange(10)], names=['one', 'two'])
         assert not i.is_monotonic
-        assert not i.is_strictly_monotonic_increasing
+        assert not i._is_strictly_monotonic_increasing
         assert not Index(i.values).is_monotonic
-        assert not Index(i.values).is_strictly_monotonic_increasing
+        assert not Index(i.values)._is_strictly_monotonic_increasing
 
         i = MultiIndex.from_product([np.arange(10),
                                      np.arange(10, 0, -1)],
                                     names=['one', 'two'])
         assert not i.is_monotonic
-        assert not i.is_strictly_monotonic_increasing
+        assert not i._is_strictly_monotonic_increasing
         assert not Index(i.values).is_monotonic
-        assert not Index(i.values).is_strictly_monotonic_increasing
+        assert not Index(i.values)._is_strictly_monotonic_increasing
 
         i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
         assert not i.is_monotonic
-        assert not i.is_strictly_monotonic_increasing
+        assert not i._is_strictly_monotonic_increasing
         assert not Index(i.values).is_monotonic
-        assert not Index(i.values).is_strictly_monotonic_increasing
+        assert not Index(i.values)._is_strictly_monotonic_increasing
 
         # string ordering
         i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
@@ -2406,8 +2406,8 @@ def test_is_monotonic(self):
                        names=['first', 'second'])
         assert not i.is_monotonic
         assert not Index(i.values).is_monotonic
-        assert not i.is_strictly_monotonic_increasing
-        assert not Index(i.values).is_strictly_monotonic_increasing
+        assert not i._is_strictly_monotonic_increasing
+        assert not Index(i.values)._is_strictly_monotonic_increasing
 
         i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
                                ['mom', 'next', 'zenith']],
@@ -2416,8 +2416,8 @@ def test_is_monotonic(self):
                        names=['first', 'second'])
         assert i.is_monotonic
         assert Index(i.values).is_monotonic
-        assert i.is_strictly_monotonic_increasing
-        assert Index(i.values).is_strictly_monotonic_increasing
+        assert i._is_strictly_monotonic_increasing
+        assert Index(i.values)._is_strictly_monotonic_increasing
 
         # mixed levels, hits the TypeError
         i = MultiIndex(
@@ -2428,20 +2428,20 @@ def test_is_monotonic(self):
             names=['household_id', 'asset_id'])
 
         assert not i.is_monotonic
-        assert not i.is_strictly_monotonic_increasing
+        assert not i._is_strictly_monotonic_increasing
 
     def test_is_strictly_monotonic(self):
         idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
                             labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
         assert idx.is_monotonic_increasing
-        assert not idx.is_strictly_monotonic_increasing
+        assert not idx._is_strictly_monotonic_increasing
 
     @pytest.mark.xfail(reason="buggy MultiIndex.is_monotonic_decresaing.")
-    def test_is_strictly_monotonic_decreasing(self):
+    def test__is_strictly_monotonic_decreasing(self):
         idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
                             labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
         assert idx.is_monotonic_decreasing
-        assert not idx.is_strictly_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
 
     def test_reconstruct_sort(self):
 
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 77f34dbf210e0..29d4214fd549b 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -465,36 +465,36 @@ def test_view(self):
     def test_is_monotonic(self):
         assert self.index.is_monotonic
         assert self.index.is_monotonic_increasing
-        assert self.index.is_strictly_monotonic_increasing
+        assert self.index._is_strictly_monotonic_increasing
         assert not self.index.is_monotonic_decreasing
-        assert not self.index.is_strictly_monotonic_decreasing
+        assert not self.index._is_strictly_monotonic_decreasing
 
         index = self._holder([4, 3, 2, 1])
         assert not index.is_monotonic
-        assert not index.is_strictly_monotonic_increasing
-        assert index.is_strictly_monotonic_decreasing
+        assert not index._is_strictly_monotonic_increasing
+        assert index._is_strictly_monotonic_decreasing
 
         index = self._holder([1])
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
-        assert index.is_strictly_monotonic_increasing
-        assert index.is_strictly_monotonic_decreasing
+        assert index._is_strictly_monotonic_increasing
+        assert index._is_strictly_monotonic_decreasing
 
     def test_is_strictly_monotonic(self):
         index = self._holder([1, 1, 2, 3])
         assert index.is_monotonic_increasing
-        assert not index.is_strictly_monotonic_increasing
+        assert not index._is_strictly_monotonic_increasing
 
         index = self._holder([3, 2, 1, 1])
         assert index.is_monotonic_decreasing
-        assert not index.is_strictly_monotonic_decreasing
+        assert not index._is_strictly_monotonic_decreasing
 
         index = self._holder([1, 1])
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
-        assert not index.is_strictly_monotonic_increasing
-        assert not index.is_strictly_monotonic_decreasing
+        assert not index._is_strictly_monotonic_increasing
+        assert not index._is_strictly_monotonic_decreasing
 
     def test_logical_compat(self):
         idx = self.create_index()
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index db8180cb736c4..0d88e88030604 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -331,35 +331,35 @@ def test_is_monotonic(self):
         assert self.index.is_monotonic
         assert self.index.is_monotonic_increasing
         assert not self.index.is_monotonic_decreasing
-        assert self.index.is_strictly_monotonic_increasing
-        assert not self.index.is_strictly_monotonic_decreasing
+        assert self.index._is_strictly_monotonic_increasing
+        assert not self.index._is_strictly_monotonic_decreasing
 
         index = RangeIndex(4, 0, -1)
         assert not index.is_monotonic
-        assert not index.is_strictly_monotonic_increasing
+        assert not index._is_strictly_monotonic_increasing
         assert index.is_monotonic_decreasing
-        assert index.is_strictly_monotonic_decreasing
+        assert index._is_strictly_monotonic_decreasing
 
         index = RangeIndex(1, 2)
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
-        assert index.is_strictly_monotonic_increasing
-        assert index.is_strictly_monotonic_decreasing
+        assert index._is_strictly_monotonic_increasing
+        assert index._is_strictly_monotonic_decreasing
 
         index = RangeIndex(2, 1)
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
-        assert index.is_strictly_monotonic_increasing
-        assert index.is_strictly_monotonic_decreasing
+        assert index._is_strictly_monotonic_increasing
+        assert index._is_strictly_monotonic_decreasing
 
         index = RangeIndex(1, 1)
         assert index.is_monotonic
         assert index.is_monotonic_increasing
         assert index.is_monotonic_decreasing
-        assert index.is_strictly_monotonic_increasing
-        assert index.is_strictly_monotonic_decreasing
+        assert index._is_strictly_monotonic_increasing
+        assert index._is_strictly_monotonic_decreasing
 
     def test_equals_range(self):
         equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)),

From 36670fce472436419efd5666cd8189c0b56fdc8c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 2 Jun 2017 14:26:44 +0200
Subject: [PATCH 34/55] DOC: change doc build to python 3.6 (#16545)

* DOC: change doc build to python 3.6

* Remove pinning of pyqt to 4.x

* Remove pinning of openpyxl

* Add xsel to doc build for clipboard
---
 .travis.yml                                               | 8 ++++++--
 ...uirements-3.5_DOC.build => requirements-3.6_DOC.build} | 2 +-
 ci/{requirements-3.5_DOC.run => requirements-3.6_DOC.run} | 4 ++--
 ci/{requirements-3.5_DOC.sh => requirements-3.6_DOC.sh}   | 0
 4 files changed, 9 insertions(+), 5 deletions(-)
 rename ci/{requirements-3.5_DOC.build => requirements-3.6_DOC.build} (73%)
 rename ci/{requirements-3.5_DOC.run => requirements-3.6_DOC.run} (87%)
 rename ci/{requirements-3.5_DOC.sh => requirements-3.6_DOC.sh} (100%)

diff --git a/.travis.yml b/.travis.yml
index 8b6700e11d2c5..5dc4256a268ad 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -74,7 +74,11 @@ matrix:
     # In allow_failures
     - os: linux
       env:
-        - JOB="3.5_DOC" DOC=true
+        - JOB="3.6_DOC" DOC=true
+      addons:
+        apt:
+          packages:
+          - xsel
     allow_failures:
       - os: linux
         env:
@@ -87,7 +91,7 @@ matrix:
           - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
       - os: linux
         env:
-          - JOB="3.5_DOC" DOC=true
+          - JOB="3.6_DOC" DOC=true
 
 before_install:
   - echo "before_install"
diff --git a/ci/requirements-3.5_DOC.build b/ci/requirements-3.6_DOC.build
similarity index 73%
rename from ci/requirements-3.5_DOC.build
rename to ci/requirements-3.6_DOC.build
index 73aeb3192242f..bdcfe28105866 100644
--- a/ci/requirements-3.5_DOC.build
+++ b/ci/requirements-3.6_DOC.build
@@ -1,4 +1,4 @@
-python=3.5*
+python=3.6*
 python-dateutil
 pytz
 numpy
diff --git a/ci/requirements-3.5_DOC.run b/ci/requirements-3.6_DOC.run
similarity index 87%
rename from ci/requirements-3.5_DOC.run
rename to ci/requirements-3.6_DOC.run
index 9647ab53ab835..df8087f62ef16 100644
--- a/ci/requirements-3.5_DOC.run
+++ b/ci/requirements-3.6_DOC.run
@@ -12,7 +12,7 @@ lxml
 beautifulsoup4
 html5lib
 pytables
-openpyxl=1.8.5
+openpyxl
 xlrd
 xlwt
 xlsxwriter
@@ -21,4 +21,4 @@ numexpr
 bottleneck
 statsmodels
 xarray
-pyqt=4.11.4
+pyqt
diff --git a/ci/requirements-3.5_DOC.sh b/ci/requirements-3.6_DOC.sh
similarity index 100%
rename from ci/requirements-3.5_DOC.sh
rename to ci/requirements-3.6_DOC.sh

From 5d7a02079087c55b497155389a2d40a0fb76c542 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 2 Jun 2017 18:30:10 -0400
Subject: [PATCH 35/55] DOC: whatsnew 0.20.2 edits (#16587)

---
 doc/source/whatsnew/v0.20.2.txt | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index d58a98703f22a..c9486954258c8 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -1,7 +1,7 @@
 .. _whatsnew_0202:
 
-v0.20.2 (???)
--------------
+v0.20.2 (June 3, 2017)
+----------------------
 
 This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
 bug fixes and performance improvements.
@@ -46,19 +46,19 @@ Bug Fixes
 - Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when
   detecting the terminal size. This fix only applies to python 3 (:issue:`16496`)
 - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
-- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a TypeError (:issue `13490`)
+- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a ``TypeError`` (:issue `13490`)
 - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
 - Passing an invalid engine to :func:`read_csv` now raises an informative
   ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`)
 - Bug in :func:`unique` on an array of tuples (:issue:`16519`)
-- Bug in :func:`cut`` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`)
-- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`)
+- Bug in :func:`cut` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`)
+- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on ``Categoricals`` (:issue:`16409`)
 
 Conversion
 ^^^^^^^^^^
 
-- Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`)
-- Silence numpy warnings when broadcasting DataFrame to Series with comparison ops (:issue:`16378`, :issue:`16306`)
+- Bug in :func:`to_numeric` in which empty data inputs were causing a segfault of the interpreter (:issue:`16302`)
+- Silence numpy warnings when broadcasting ``DataFrame`` to ``Series`` with comparison ops (:issue:`16378`, :issue:`16306`)
 
 
 Indexing
@@ -66,15 +66,15 @@ Indexing
 
 - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)
 - Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`)
-- Bug in ``MultiIndex.remove_unused_levels()`` (:issue:`16556`)
+- Bug in ``MultiIndex.remove_unused_levels()`` that would not return a ``MultiIndex`` equal to the original. (:issue:`16556`)
 
 I/O
 ^^^
 
-- Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`)
+- Bug in :func:`read_csv` when ``comment`` is passed in a space delimited text file (:issue:`16472`)
 - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
-- Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
-- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
+- Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`)
+- Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
 - Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
 
 - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
@@ -92,7 +92,7 @@ Plotting
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-- Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`)
+- Bug in creating a time-based rolling window on an empty ``DataFrame`` (:issue:`15819`)
 - Bug in ``rolling.cov()`` with offset window (:issue:`16058`)
 - Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`)
 
@@ -100,12 +100,12 @@ Groupby/Resample/Rolling
 Sparse
 ^^^^^^
 
-- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
+- Bug in construction of ``SparseDataFrame`` from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
 
 Reshaping
 ^^^^^^^^^
 
-- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`)
+- Bug in ``DataFrame.stack`` with unsorted levels in ``MultiIndex`` columns (:issue:`16323`)
 - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`)
 - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`)
 - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`)
@@ -114,7 +114,7 @@ Reshaping
 
 Numeric
 ^^^^^^^
-- Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282)
+- Bug in ``.interpolate()``, where ``limit_direction`` was not respected when ``limit=None`` (default) was passed (:issue:`16282`)
 
 Categorical
 ^^^^^^^^^^^
@@ -124,4 +124,4 @@ Categorical
 Other
 ^^^^^
 
-- Bug in ``pd.drop([])`` for DataFrame with non-unique indices (:issue:`16270`)
+- Bug in ``DataFrame.drop()`` with an empty-list with non-unique indices (:issue:`16270`)

From 882ea0f3ff7741f7f613f1bac9f63f7fb2afb780 Mon Sep 17 00:00:00 2001
From: Tong SHEN <funnycrabneverfail@gmail.com>
Date: Sun, 4 Jun 2017 08:52:50 +0800
Subject: [PATCH 36/55] DOC: Fix typo in timeseries.rst (#16590)

---
 doc/source/timeseries.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 71d85f9b3995b..1dd80aec4fd6c 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1922,7 +1922,7 @@ then you can use a ``PeriodIndex`` and/or ``Series`` of ``Periods`` to do comput
    span = pd.period_range('1215-01-01', '1381-01-01', freq='D')
    span
 
-To convert from a ``int64`` based YYYYMMDD representation.
+To convert from an ``int64`` based YYYYMMDD representation.
 
 .. ipython:: python
 

From 9d0be9d92f4d967c1d154fa1623f52f1b3abb422 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sun, 4 Jun 2017 05:39:31 -0500
Subject: [PATCH 37/55] PERF: vectorize _interp_limit (#16592)

* PERF: vectorize _interp_limit

* CLN: remove old implementation

* fixup! CLN: remove old implementation
---
 pandas/core/missing.py | 77 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 10 deletions(-)

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 51778684d68f5..5aabc9d8730dd 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -143,12 +143,6 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                              'DatetimeIndex')
         method = 'values'
 
-    def _interp_limit(invalid, fw_limit, bw_limit):
-        "Get idx of values that won't be filled b/c they exceed the limits."
-        for x in np.where(invalid)[0]:
-            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
-                yield x
-
     valid_limit_directions = ['forward', 'backward', 'both']
     limit_direction = limit_direction.lower()
     if limit_direction not in valid_limit_directions:
@@ -180,21 +174,29 @@ def _interp_limit(invalid, fw_limit, bw_limit):
 
     # default limit is unlimited GH #16282
     if limit is None:
-        limit = len(xvalues)
+        # limit = len(xvalues)
+        pass
     elif not is_integer(limit):
         raise ValueError('Limit must be an integer')
     elif limit < 1:
         raise ValueError('Limit must be greater than 0')
 
     # each possible limit_direction
-    if limit_direction == 'forward':
+    # TODO: do we need sorted?
+    if limit_direction == 'forward' and limit is not None:
         violate_limit = sorted(start_nans |
                                set(_interp_limit(invalid, limit, 0)))
-    elif limit_direction == 'backward':
+    elif limit_direction == 'forward':
+        violate_limit = sorted(start_nans)
+    elif limit_direction == 'backward' and limit is not None:
         violate_limit = sorted(end_nans |
                                set(_interp_limit(invalid, 0, limit)))
-    elif limit_direction == 'both':
+    elif limit_direction == 'backward':
+        violate_limit = sorted(end_nans)
+    elif limit_direction == 'both' and limit is not None:
         violate_limit = sorted(_interp_limit(invalid, limit, limit))
+    else:
+        violate_limit = []
 
     xvalues = getattr(xvalues, 'values', xvalues)
     yvalues = getattr(yvalues, 'values', yvalues)
@@ -630,3 +632,58 @@ def fill_zeros(result, x, y, name, fill):
             result = result.reshape(shape)
 
     return result
+
+
+def _interp_limit(invalid, fw_limit, bw_limit):
+    """Get idx of values that won't be filled b/c they exceed the limits.
+
+    This is equivalent to the more readable, but slower
+
+    .. code-block:: python
+
+       for x in np.where(invalid)[0]:
+           if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
+               yield x
+    """
+    # handle forward first; the backward direction is the same except
+    # 1. operate on the reversed array
+    # 2. subtract the returned indicies from N - 1
+    N = len(invalid)
+
+    def inner(invalid, limit):
+        limit = min(limit, N)
+        windowed = _rolling_window(invalid, limit + 1).all(1)
+        idx = (set(np.where(windowed)[0] + limit) |
+               set(np.where((~invalid[:limit + 1]).cumsum() == 0)[0]))
+        return idx
+
+    if fw_limit == 0:
+        f_idx = set(np.where(invalid)[0])
+    else:
+        f_idx = inner(invalid, fw_limit)
+
+    if bw_limit == 0:
+        # then we don't even need to care about backwards, just use forwards
+        return f_idx
+    else:
+        b_idx = set(N - 1 - np.asarray(list(inner(invalid[::-1], bw_limit))))
+        if fw_limit == 0:
+            return b_idx
+    return f_idx & b_idx
+
+
+def _rolling_window(a, window):
+    """
+    [True, True, False, True, False], 2 ->
+
+    [
+        [True,  True],
+        [True, False],
+        [False, True],
+        [True, False],
+    ]
+    """
+    # https://stackoverflow.com/a/6811241
+    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
+    strides = a.strides + (a.strides[-1],)
+    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

From b3769f16f308550c9f7e5585c12560be7ca843b2 Mon Sep 17 00:00:00 2001
From: Mahdi Ben Jelloul <mahdi.benjelloul@gmail.com>
Date: Sun, 4 Jun 2017 12:42:47 +0200
Subject: [PATCH 38/55] DOC: Fix typo in merge doc for validate kwarg (#16595)

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 25c3c3fe4e48e..2b2e7be62427b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -184,7 +184,7 @@
       dataset.
     * "many_to_one" or "m:1": check if merge keys are unique in right
       dataset.
-    * "many_to_may" or "m:m": allowed, but does not result in checks.
+    * "many_to_many" or "m:m": allowed, but does not result in checks.
 
     .. versionadded:: 0.21.0
 

From a0174eb1e79a9157aa14a0ff02d684c2ede933ad Mon Sep 17 00:00:00 2001
From: "Mehmet Ali \"Mali\" Akmanalp" <makmanalp@users.noreply.github.com>
Date: Sun, 4 Jun 2017 06:44:25 -0400
Subject: [PATCH 39/55] BUG: convert numpy strings in index names in HDF #13492
 (#16444)

* BUG: Handle numpy strings in index names in HDF5 #13492

* REF: refactor to _ensure_str
---
 doc/source/whatsnew/v0.20.2.txt  |  1 +
 pandas/io/pytables.py            | 14 +++++++++++++-
 pandas/tests/io/test_pytables.py | 23 ++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index c9486954258c8..362a80c10694a 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -76,6 +76,7 @@ I/O
 - Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`)
 - Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
 - Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
+- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`)
 
 - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 2940d1f958776..ddd25aafa060c 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -73,6 +73,18 @@ def _ensure_encoding(encoding):
     return encoding
 
 
+def _ensure_str(name):
+    """Ensure that an index / column name is a str (python 3) or
+    unicode (python 2); otherwise they may be np.string dtype.
+    Non-string dtypes are passed through unchanged.
+
+    https://github.com/pandas-dev/pandas/issues/13492
+    """
+    if isinstance(name, compat.string_types):
+        name = compat.text_type(name)
+    return name
+
+
 Term = Expr
 
 
@@ -2574,7 +2586,7 @@ def read_index_node(self, node, start=None, stop=None):
         name = None
 
         if 'name' in node._v_attrs:
-            name = node._v_attrs.name
+            name = _ensure_str(node._v_attrs.name)
 
         index_class = self._alias_to_class(getattr(node._v_attrs,
                                                    'index_class', ''))
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index e68de93c3e8ce..efec778e12b50 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -16,7 +16,7 @@
                     date_range, timedelta_range, Index, DatetimeIndex,
                     isnull)
 
-from pandas.compat import is_platform_windows, PY3, PY35, BytesIO
+from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type
 from pandas.io.formats.printing import pprint_thing
 
 tables = pytest.importorskip('tables')
@@ -2922,6 +2922,27 @@ def test_store_index_name_with_tz(self):
             recons = store['frame']
             tm.assert_frame_equal(recons, df)
 
+    @pytest.mark.parametrize('table_format', ['table', 'fixed'])
+    def test_store_index_name_numpy_str(self, table_format):
+        # GH #13492
+        idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
+                                       datetime.date(2000, 1, 2)]),
+                       name=u('cols\u05d2'))
+        idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
+                                        datetime.date(2010, 1, 2)]),
+                        name=u('rows\u05d0'))
+        df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
+
+        # This used to fail, returning numpy strings instead of python strings.
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format=table_format)
+            df2 = read_hdf(path, 'df')
+
+            assert_frame_equal(df, df2, check_names=True)
+
+            assert type(df2.index.name) == text_type
+            assert type(df2.columns.name) == text_type
+
     def test_store_series_name(self):
         df = tm.makeDataFrame()
         series = df['A']

From 977151477395c1a20ab108f9956524cd225ddfe0 Mon Sep 17 00:00:00 2001
From: bpraggastis <bpraggastis@users.noreply.github.com>
Date: Sun, 4 Jun 2017 03:47:14 -0700
Subject: [PATCH 40/55] ERRR: Raise error in usecols when column doesn't exist
 but length matches (#16460)

* gh-14671 Check if usecols with type string contains a subset of names, if not throws an error

* tests added for gh-14671, expected behavior of simultaneous use of usecols and names unclear so these tests are commented out

* Review comments
---
 doc/source/whatsnew/v0.20.2.txt   |  1 +
 pandas/io/parsers.py              |  6 ++++
 pandas/tests/io/parser/usecols.py | 51 +++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index 362a80c10694a..e1469cf15e20c 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -72,6 +72,7 @@ I/O
 ^^^
 
 - Bug in :func:`read_csv` when ``comment`` is passed in a space delimited text file (:issue:`16472`)
+- Bug in :func:`read_csv` not raising an exception with nonexistent columns in ``usecols`` when it had the correct length (:issue:`14671`)
 - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
 - Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`)
 - Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index aab70c8ce2cd4..055d6d045d2f2 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1649,6 +1649,12 @@ def __init__(self, src, **kwds):
 
         if self.usecols:
             usecols = _evaluate_usecols(self.usecols, self.orig_names)
+
+            # GH 14671
+            if (self.usecols_dtype == 'string' and
+                    not set(usecols).issubset(self.orig_names)):
+                raise ValueError("Usecols do not match names.")
+
             if len(self.names) > len(usecols):
                 self.names = [n for i, n in enumerate(self.names)
                               if (i in usecols or n in usecols)]
diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py
index 8761d1ccd3da4..f582e5037ca07 100644
--- a/pandas/tests/io/parser/usecols.py
+++ b/pandas/tests/io/parser/usecols.py
@@ -475,3 +475,54 @@ def test_uneven_length_cols(self):
                               'C': [3, 5, 4, 3, 3, 7]})
         df = self.read_csv(StringIO(data), usecols=usecols)
         tm.assert_frame_equal(df, expected)
+
+    def test_raise_on_usecols_names_mismatch(self):
+        # GH 14671
+        data = 'a,b,c,d\n1,2,3,4\n5,6,7,8'
+
+        if self.engine == 'c':
+            msg = 'Usecols do not match names'
+        else:
+            msg = 'is not in list'
+
+        usecols = ['a', 'b', 'c', 'd']
+        df = self.read_csv(StringIO(data), usecols=usecols)
+        expected = DataFrame({'a': [1, 5], 'b': [2, 6], 'c': [3, 7],
+                              'd': [4, 8]})
+        tm.assert_frame_equal(df, expected)
+
+        usecols = ['a', 'b', 'c', 'f']
+        with tm.assert_raises_regex(ValueError, msg):
+            self.read_csv(StringIO(data), usecols=usecols)
+
+        usecols = ['a', 'b', 'f']
+        with tm.assert_raises_regex(ValueError, msg):
+            self.read_csv(StringIO(data), usecols=usecols)
+
+        names = ['A', 'B', 'C', 'D']
+
+        df = self.read_csv(StringIO(data), header=0, names=names)
+        expected = DataFrame({'A': [1, 5], 'B': [2, 6], 'C': [3, 7],
+                              'D': [4, 8]})
+        tm.assert_frame_equal(df, expected)
+
+        # TODO: https://github.com/pandas-dev/pandas/issues/16469
+        # usecols = ['A','C']
+        # df = self.read_csv(StringIO(data), header=0, names=names,
+        #                    usecols=usecols)
+        # expected = DataFrame({'A': [1,5], 'C': [3,7]})
+        # tm.assert_frame_equal(df, expected)
+        #
+        # usecols = [0,2]
+        # df = self.read_csv(StringIO(data), header=0, names=names,
+        #                    usecols=usecols)
+        # expected = DataFrame({'A': [1,5], 'C': [3,7]})
+        # tm.assert_frame_equal(df, expected)
+
+        usecols = ['A', 'B', 'C', 'f']
+        with tm.assert_raises_regex(ValueError, msg):
+            self.read_csv(StringIO(data), header=0, names=names,
+                          usecols=usecols)
+        usecols = ['A', 'B', 'f']
+        with tm.assert_raises_regex(ValueError, msg):
+            self.read_csv(StringIO(data), names=names, usecols=usecols)

From cf5f2d899a24d4a1406de6495b7263e1ff9f6eee Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sun, 4 Jun 2017 06:29:15 -0500
Subject: [PATCH 41/55] DOC: Whatsnew fixups (#16596)

---
 doc/source/whatsnew/v0.20.2.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
index e1469cf15e20c..31125db0f34d4 100644
--- a/doc/source/whatsnew/v0.20.2.txt
+++ b/doc/source/whatsnew/v0.20.2.txt
@@ -1,14 +1,12 @@
 .. _whatsnew_0202:
 
-v0.20.2 (June 3, 2017)
+v0.20.2 (June 4, 2017)
 ----------------------
 
 This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
 bug fixes and performance improvements.
 We recommend that all users upgrade to this version.
 
-Highlights include:
-
 .. contents:: What's new in v0.20.2
     :local:
     :backlinks: none

From 1415b95f965026639563b2c572c79c321727ee10 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 4 Jun 2017 15:52:53 -0500
Subject: [PATCH 42/55] DOC: Update release.rst

---
 doc/source/release.rst | 50 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 2587962299569..bf272e243e0dd 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -37,6 +37,56 @@ analysis / manipulation tool available in any language.
 * Binary installers on PyPI: http://pypi.python.org/pypi/pandas
 * Documentation: http://pandas.pydata.org
 
+pandas 0.20.2
+-------------
+
+**Release date:** June 4, 2017
+
+This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
+bug fixes and performance improvements.
+We recommend that all users upgrade to this version.
+
+See the :ref:`v0.20.2 Whatsnew <whatsnew_0202>` overview for an extensive list
+of all enhancements and bugs that have been fixed in 0.20.2.
+
+Thanks
+~~~~~~
+
+- Aaron Barber
+- Andrew 亮
+- Becky Sweger
+- Christian Prinoth
+- Christian Stade-Schuldt
+- DSM
+- Erik Fredriksen
+- Hugues Valois
+- Jeff Reback
+- Jeff Tratner
+- JimStearns206
+- John W. O'Brien
+- Joris Van den Bossche
+- JosephWagner
+- Keith Webber
+- Mehmet Ali "Mali" Akmanalp
+- Pankaj Pandey
+- Patrick Luo
+- Patrick O'Melveny
+- Pietro Battiston
+- RobinFiveWords
+- Ryan Hendrickson
+- SimonBaron
+- Tom Augspurger
+- WBare
+- bpraggastis
+- chernrick
+- chris-b1
+- economy
+- gfyoung
+- jaredsnyder
+- keitakurita
+- linebp
+- lloydkirk
+
 pandas 0.20.0 / 0.20.1
 ----------------------
 

From 93aabe746b13d99ec8099738dd35b4040e37a249 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 6 Jun 2017 09:10:00 -0400
Subject: [PATCH 43/55] BUG: pickle compat with UTC tz's (#16611)

closes #16608
---
 doc/source/whatsnew.rst                       |   2 +
 doc/source/whatsnew/v0.20.3.txt               |  89 ++++++++++++++++++
 pandas/compat/pickle_compat.py                |   2 +-
 .../0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle  | Bin 125349 -> 126076 bytes
 .../tests/io/generate_legacy_storage_files.py |   8 +-
 5 files changed, 99 insertions(+), 2 deletions(-)
 create mode 100644 doc/source/whatsnew/v0.20.3.txt
 mode change 100644 => 100755 pandas/tests/io/generate_legacy_storage_files.py

diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index b1f9990a3e6af..3385bafc26467 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -20,6 +20,8 @@ These are new features and improvements of note in each release.
 
 .. include:: whatsnew/v0.21.0.txt
 
+.. include:: whatsnew/v0.20.3.txt
+
 .. include:: whatsnew/v0.20.2.txt
 
 .. include:: whatsnew/v0.20.0.txt
diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
new file mode 100644
index 0000000000000..2032209c4aa23
--- /dev/null
+++ b/doc/source/whatsnew/v0.20.3.txt
@@ -0,0 +1,89 @@
+.. _whatsnew_0203:
+
+v0.20.3 (June ??, 2017)
+-----------------------
+
+This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
+bug fixes and performance improvements.
+We recommend that all users upgrade to this version.
+
+.. contents:: What's new in v0.20.3
+    :local:
+    :backlinks: none
+
+
+.. _whatsnew_0203.enhancements:
+
+Enhancements
+~~~~~~~~~~~~
+
+
+
+
+
+
+.. _whatsnew_0203.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
+
+
+
+.. _whatsnew_0203.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+
+
+
+Conversion
+^^^^^^^^^^
+
+- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
+
+Indexing
+^^^^^^^^
+
+
+
+I/O
+^^^
+
+
+
+Plotting
+^^^^^^^^
+
+
+
+
+Groupby/Resample/Rolling
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+
+Sparse
+^^^^^^
+
+
+
+
+Reshaping
+^^^^^^^^^
+
+
+
+Numeric
+^^^^^^^
+
+
+Categorical
+^^^^^^^^^^^
+
+
+Other
+^^^^^
diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
index b875bbb0d63c0..f6223c48994ae 100644
--- a/pandas/compat/pickle_compat.py
+++ b/pandas/compat/pickle_compat.py
@@ -15,7 +15,7 @@ def load_reduce(self):
     args = stack.pop()
     func = stack[-1]
 
-    if type(args[0]) is type:
+    if len(args) and type(args[0]) is type:
         n = args[0].__name__  # noqa
 
     try:
diff --git a/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle
index 6bb02672a4151c8d6536127fc94e68634e56c86d..75ea95ff402c4e9f0c93ef80e6c04baf7f0a70d7 100644
GIT binary patch
delta 785
zcmah{Ur19?7(d_H*{*vCuc`ZU4d>j<EtPHR=Au6at{w^|i1w6i%8c6u1xY3R6SGJY
z_SAPnG9n6mvdFkGi4Yc1FY^Xb4?)mF57D5YhaMYr?i|RVKO8t8eEj~M?>p=7gkK*7
zbDG^U-F;$y>Z#7`OHC!lFJ2x?bta}}FQG0)G3l&>LHyKm2&<d7X^i1`bK7ZB%&iwq
zwd6llol-|-TU=g`%UMMSLNY8dG}x!2>K{<2m(&4u44d}5@v*~-n>`aa6m6)H86ooF
zVb!;w`sAa^6^4sZJ6&ezZnRTmc*NGKJ@d?hu)~3h&Tvw4im%F~UCw)k!CEKIx7g_m
zhHs^A{)k?!>R=kg?-f8|#x+T*FBr{JN*5($dQ{1oxGv}2cOSQwJd6v2UYq9ii9mOF
zlKQYw`aRAXhq)qci@B7x&5WCV2?|=?KSE(r>uCQ-K}TyKR-G*qc9Z7tT%cFn<URkQ
zYP4eVNk!8(0x^JX{1Kdh*O&-3aPC~lS5k1{%TOcy#LuA{j^Z9~;|$0TSL8<vssa|g
z+UXI$RTx<<I2^E3<qN*)Oo66#bxH7d{#dt*+n)%Z=l1u)c5DhW&~Jvl2XrpF(#Hfs
zh_TyczQHAaMrD_>hd1Ir5Br*S!Bu>mgir7f1XJYfo`F!-#$;UsyRIra-ib&&MK&_?
z50Ey|LqR-dsn|ji(@_VVv6Q|{#)AEBJ3P3Sk8b6oaC?MG!q$Z~2*zP<b@eN3!z6aJ
Kx310?!2cVaaRo>K

delta 603
zcmex!gMH~`cGd=#sXDVZvTjk{JVC9Bk-eCOfg!7eWpbgEq(BakonaX}rFM!&hD}yA
z%VYy#$;rVQ;!GJVlP^k{Y+j(TpHW~6ilXH#lk>HtL??g*-Lj0C7#K>EvJNl<%>?RM
zsA$IqHotiDBP~rvrec=KilU|<%3oPha6i-_4TyDTSSEiE6=%FOd7`p3<AlwNbP}0Z
zBv=_3CLhpM*{rR<g^96b@&iK`{#c+G!jb8djg9OD!hvF%jbL}XWVmKUZEiAhVPs61
zeB3yXEf*-~%|Cgfv=~bX$hcjGs$i4kC(kmGW1KyCr%8vz5`_NM-i#S*Iy2VRPRUxq
zI@!itVzL%HmtfXppbCVo8(1gzm`X7APF`x7!Wgxg%S?(9pBeH%r^J}YG3HJ-w3Oxm
zxuVmVg?VzKh!jifl&q4;f6S$jREP^OfkeToI2eF{4X9F*6)ZS;w}l)~=`D*eh+#mV
zC|Ie;K#gPrDxOj@C8>Q%(3CbHOv;eYN}1eiS%$2bv1GHW6%(W26R3}%!OzYH3VtS8
zw#kZe;!MVDll>LN89kd9S#Mut%{ViW=^)#7+qsMeevC@f^IkLVg-6Zw;5Uq#0GDgF
A(*OVf

diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py
old mode 100644
new mode 100755
index 22c62b738e6a2..996965999724e
--- a/pandas/tests/io/generate_legacy_storage_files.py
+++ b/pandas/tests/io/generate_legacy_storage_files.py
@@ -1,3 +1,5 @@
+#!/usr/env/bin python
+
 """ self-contained to write legacy storage (pickle/msgpack) files """
 from __future__ import print_function
 from warnings import catch_warnings
@@ -125,7 +127,11 @@ def create_data():
                  mixed_dup=mixed_dup_df,
                  dt_mixed_tzs=DataFrame({
                      u'A': Timestamp('20130102', tz='US/Eastern'),
-                     u'B': Timestamp('20130603', tz='CET')}, index=range(5))
+                     u'B': Timestamp('20130603', tz='CET')}, index=range(5)),
+                 dt_mixed2_tzs=DataFrame({
+                     u'A': Timestamp('20130102', tz='US/Eastern'),
+                     u'B': Timestamp('20130603', tz='CET'),
+                     u'C': Timestamp('20130603', tz='UTC')}, index=range(5))
                  )
 
     with catch_warnings(record=True):

From 3ebd7191fd3d85af3ba5c1319c4d9ada5d8917fd Mon Sep 17 00:00:00 2001
From: Jean Helie <jhelie@users.noreply.github.com>
Date: Wed, 7 Jun 2017 01:41:31 +0100
Subject: [PATCH 44/55] Fix some lgtm alerts (#16613)

---
 pandas/core/dtypes/cast.py      | 2 +-
 pandas/core/generic.py          | 8 ++++----
 pandas/core/indexes/interval.py | 4 ++--
 pandas/core/internals.py        | 3 ---
 pandas/core/sparse/array.py     | 2 +-
 pandas/io/parsers.py            | 2 +-
 pandas/tseries/offsets.py       | 1 -
 7 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index fd61813a57c98..16b0a5c8a74ca 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -668,7 +668,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True,
 
         if convert_timedeltas == 'coerce':
             from pandas.core.tools.timedeltas import to_timedelta
-            new_values = to_timedelta(values, coerce=True)
+            new_values = to_timedelta(values, errors='coerce')
 
             # if we are all nans then leave me alone
             if not isnull(new_values).all():
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 98999ec267c82..accb7d0db1d2c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4285,7 +4285,7 @@ def asof(self, where, subset=None):
                 raise ValueError("subset is not valid for Series")
         elif self.ndim > 2:
             raise NotImplementedError("asof is not implemented "
-                                      "for {type}".format(type(self)))
+                                      "for {type}".format(type=type(self)))
         else:
             if subset is None:
                 subset = self.columns
@@ -4980,7 +4980,7 @@ def last(self, offset):
 
         offset = to_offset(offset)
 
-        start_date = start = self.index[-1] - offset
+        start_date = self.index[-1] - offset
         start = self.index.searchsorted(start_date, side='right')
         return self.iloc[start:]
 
@@ -5303,8 +5303,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
 
             # slice me out of the other
             else:
-                raise NotImplemented("cannot align with a higher dimensional "
-                                     "NDFrame")
+                raise NotImplementedError("cannot align with a higher "
+                                          "dimensional NDFrame")
 
         elif is_list_like(other):
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index b1523cd6c0d0c..e6b2bc0953680 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1053,11 +1053,11 @@ def interval_range(start=None, end=None, freq=None, periods=None,
         if periods is None or end is None:
             raise ValueError("must specify 2 of start, end, periods")
         start = end - periods * freq
-    elif end is None:
+    if end is None:
         if periods is None or start is None:
             raise ValueError("must specify 2 of start, end, periods")
         end = start + periods * freq
-    elif periods is None:
+    if periods is None:
         if start is None or end is None:
             raise ValueError("must specify 2 of start, end, periods")
         pass
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 15851a17274ca..58690ad632152 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -4645,7 +4645,6 @@ def _block2d_to_blocknd(values, placement, shape, labels, ref_items):
         pvalues = np.empty(panel_shape, dtype=dtype)
         pvalues.fill(fill_value)
 
-    values = values
     for i in range(len(placement)):
         pvalues[i].flat[mask] = values[:, i]
 
@@ -5154,8 +5153,6 @@ def dtype(self):
             return _get_dtype(maybe_promote(self.block.dtype,
                                             self.block.fill_value)[0])
 
-        return self._dtype
-
     @cache_readonly
     def is_null(self):
         if self.block is None:
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 8ac9d3916573e..c75de01b98e4e 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -125,7 +125,7 @@ def _sparse_array_op(left, right, op, name, series=False):
             name = name[1:]
 
         if name in ('and', 'or') and dtype == 'bool':
-            opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype)
+            opname = 'sparse_{name}_uint8'.format(name=name)
             # to make template simple, cast here
             left_sp_values = left.sp_values.view(np.uint8)
             right_sp_values = right.sp_values.view(np.uint8)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 055d6d045d2f2..c2d5a629b03a3 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2211,7 +2211,7 @@ def _exclude_implicit_index(self, alldata):
     def get_chunk(self, size=None):
         if size is None:
             size = self.chunksize
-        return self.read(nrows=size)
+        return self.read(rows=size)
 
     def _convert_data(self, data):
         # apply converters
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index f9f4adc1b2c81..2a120a0696836 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -1596,7 +1596,6 @@ def apply(self, other):
             if otherDay != self.weekday:
                 other = other + timedelta((self.weekday - otherDay) % 7)
                 k = k - 1
-            other = other
             for i in range(k):
                 other = other + self._inc
         else:

From fd171ebeda446a0d7abfe0d61e5bfc0897d4937e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 8 Jun 2017 06:47:18 -0400
Subject: [PATCH 45/55] BLD: fix numpy on 3.6 build as 1.13 was released but no
 deps are built for it (#16633)

---
 ci/requirements-3.6.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build
index 1c4b46aea3865..8d09e0ee93070 100644
--- a/ci/requirements-3.6.build
+++ b/ci/requirements-3.6.build
@@ -2,5 +2,5 @@ python=3.6*
 python-dateutil
 pytz
 nomkl
-numpy
+numpy=1.12*
 cython

From 4b0ef03c30102a5516c7345b92fa4906bf5bd87f Mon Sep 17 00:00:00 2001
From: DSM <dsm054@gmail.com>
Date: Thu, 8 Jun 2017 06:47:32 -0400
Subject: [PATCH 46/55] BUG: Fix Series.get failure on missing NaN (#8569)
 (#16619)

---
 doc/source/whatsnew/v0.20.3.txt      |  2 +-
 pandas/core/indexes/numeric.py       |  2 ++
 pandas/tests/indexes/test_multi.py   |  8 ++++++++
 pandas/tests/indexes/test_numeric.py |  8 ++++++++
 pandas/tests/series/test_indexing.py | 15 +++++++++++++++
 5 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
index 2032209c4aa23..049737f948e17 100644
--- a/doc/source/whatsnew/v0.20.3.txt
+++ b/doc/source/whatsnew/v0.20.3.txt
@@ -48,7 +48,7 @@ Conversion
 Indexing
 ^^^^^^^^
 
-
+- Bug in ``Float64Index`` causing an empty array instead of None to be returned from ``.get(np.nan)`` on a Series whose index did not contain any NaNs (:issue:`8569`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index bdae0ac7ac5e9..72d521cbe2d60 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -369,6 +369,8 @@ def get_loc(self, key, method=None, tolerance=None):
                 except (ValueError, IndexError):
                     # should only need to catch ValueError here but on numpy
                     # 1.7 .item() can raise IndexError when NaNs are present
+                    if not len(nan_idxs):
+                        raise KeyError(key)
                     return nan_idxs
         except (TypeError, NotImplementedError):
             pass
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index ba917f33d8595..7d2e6f495311f 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1172,6 +1172,14 @@ def test_get_loc_level(self):
         assert result == expected
         assert new_index.equals(index.droplevel(0))
 
+    def test_get_loc_missing_nan(self):
+        # GH 8569
+        idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
+        assert isinstance(idx.get_loc(1), slice)
+        pytest.raises(KeyError, idx.get_loc, 3)
+        pytest.raises(KeyError, idx.get_loc, np.nan)
+        pytest.raises(KeyError, idx.get_loc, [np.nan])
+
     def test_slice_locs(self):
         df = tm.makeTimeDataFrame()
         stacked = df.stack()
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 29d4214fd549b..62ac337d02727 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -371,6 +371,14 @@ def test_get_loc_na(self):
         assert idx.get_loc(1) == 1
         pytest.raises(KeyError, idx.slice_locs, np.nan)
 
+    def test_get_loc_missing_nan(self):
+        # GH 8569
+        idx = Float64Index([1, 2])
+        assert idx.get_loc(1) == 0
+        pytest.raises(KeyError, idx.get_loc, 3)
+        pytest.raises(KeyError, idx.get_loc, np.nan)
+        pytest.raises(KeyError, idx.get_loc, [np.nan])
+
     def test_contains_nans(self):
         i = Float64Index([1.0, 2.0, np.nan])
         assert np.nan in i
diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
index 6ded4d593a571..7774d10c5eaf8 100644
--- a/pandas/tests/series/test_indexing.py
+++ b/pandas/tests/series/test_indexing.py
@@ -70,6 +70,21 @@ def test_get(self):
         result = vc.get(True, default='Missing')
         assert result == 'Missing'
 
+    def test_get_nan(self):
+        # GH 8569
+        s = pd.Float64Index(range(10)).to_series()
+        assert s.get(np.nan) is None
+        assert s.get(np.nan, default='Missing') == 'Missing'
+
+        # ensure that fixing the above hasn't broken get
+        # with multiple elements
+        idx = [20, 30]
+        assert_series_equal(s.get(idx),
+                            Series([np.nan] * 2, index=idx))
+        idx = [np.nan, np.nan]
+        assert_series_equal(s.get(idx),
+                            Series([np.nan] * 2, index=idx))
+
     def test_delitem(self):
 
         # GH 5542

From 1b159af6879a46d54811891ac394cb807264209a Mon Sep 17 00:00:00 2001
From: DSM <dsm054@gmail.com>
Date: Thu, 8 Jun 2017 06:48:00 -0400
Subject: [PATCH 47/55] TST: NaN in MultiIndex should not become a string
 (#7031) (#16625)

---
 pandas/tests/indexes/test_multi.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 7d2e6f495311f..3f6fd8c8aa827 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2866,3 +2866,24 @@ def test_tuples_with_name_string(self):
             pd.Index(li, name='abc')
         with pytest.raises(ValueError):
             pd.Index(li, name='a')
+
+    def test_nan_stays_float(self):
+
+        # GH 7031
+        idx0 = pd.MultiIndex(levels=[["A", "B"], []],
+                             labels=[[1, 0], [-1, -1]],
+                             names=[0, 1])
+        idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
+                             labels=[[0], [0]],
+                             names=[0, 1])
+        idxm = idx0.join(idx1, how='outer')
+        assert pd.isnull(idx0.get_level_values(1)).all()
+        # the following failed in 0.14.1
+        assert pd.isnull(idxm.get_level_values(1)[:-1]).all()
+
+        df0 = pd.DataFrame([[1, 2]], index=idx0)
+        df1 = pd.DataFrame([[3, 4]], index=idx1)
+        dfm = df0 - df1
+        assert pd.isnull(df0.index.get_level_values(1)).all()
+        # the following failed in 0.14.1
+        assert pd.isnull(dfm.index.get_level_values(1)[:-1]).all()

From 8eb0c7fdc6a9fde19815a1d50e3c54ec195ddfcc Mon Sep 17 00:00:00 2001
From: DSM <dsm054@gmail.com>
Date: Thu, 8 Jun 2017 06:48:22 -0400
Subject: [PATCH 48/55] TST: verify we can add and subtract from indices
 (#8142) (#16629)

---
 pandas/tests/indexes/test_base.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index d9f8e5e7f382b..18dbe6624008a 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1800,6 +1800,25 @@ def test_string_index_repr(self):
 
                 assert coerce(idx) == expected
 
+    @pytest.mark.parametrize('dtype', [np.int64, np.float64])
+    @pytest.mark.parametrize('delta', [1, 0, -1])
+    def test_addsub_arithmetic(self, dtype, delta):
+        # GH 8142
+        delta = dtype(delta)
+        idx = pd.Index([10, 11, 12], dtype=dtype)
+        result = idx + delta
+        expected = pd.Index(idx.values + delta, dtype=dtype)
+        tm.assert_index_equal(result, expected)
+
+        # this subtraction used to fail
+        result = idx - delta
+        expected = pd.Index(idx.values - delta, dtype=dtype)
+        tm.assert_index_equal(result, expected)
+
+        tm.assert_index_equal(idx + idx, 2 * idx)
+        tm.assert_index_equal(idx - idx, 0 * idx)
+        assert not (idx - idx).empty
+
 
 class TestMixedIntIndex(Base):
     # Mostly the tests from common.py for which the results differ

From 6fa83d3680100c5114d252b4908c0182b963006c Mon Sep 17 00:00:00 2001
From: Pradyumna Reddy Chinthala <reddy.pradyumna5@gmail.com>
Date: Fri, 9 Jun 2017 21:21:11 +0530
Subject: [PATCH 49/55] BUG: conversion of Series to Categorical (#16557)

fix #16524
---
 doc/source/whatsnew/v0.20.3.txt    | 1 +
 pandas/core/internals.py           | 2 +-
 pandas/tests/series/test_dtypes.py | 9 +++++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
index 049737f948e17..52f7701724f18 100644
--- a/doc/source/whatsnew/v0.20.3.txt
+++ b/doc/source/whatsnew/v0.20.3.txt
@@ -44,6 +44,7 @@ Conversion
 ^^^^^^^^^^
 
 - Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
+- Bug in Series construction when passing a Series with ``dtype='category'`` (:issue:`16524`).
 
 Indexing
 ^^^^^^^^
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 58690ad632152..f2a7ac76481d4 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -471,7 +471,7 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
                             **kwargs)
 
     def _astype(self, dtype, copy=False, errors='raise', values=None,
-                klass=None, mgr=None, **kwargs):
+                klass=None, mgr=None, raise_on_error=False, **kwargs):
         """
         Coerce to the new type (if copy=True, return a new copy)
         raise on an except if raise == True
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index e084fa58d6c51..9ab02a8c2aad7 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -248,3 +248,12 @@ def test_intercept_astype_object(self):
 
         result = df.values.squeeze()
         assert (result[:, 0] == expected.values).all()
+
+    def test_series_to_categorical(self):
+        # see gh-16524: test conversion of Series to Categorical
+        series = Series(['a', 'b', 'c'])
+
+        result = Series(series, dtype='category')
+        expected = Series(['a', 'b', 'c'], dtype='category')
+
+        tm.assert_series_equal(result, expected)

From aba51b6afa011a9614411bb2cd966a0903dbef74 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 9 Jun 2017 18:45:29 -0400
Subject: [PATCH 50/55] BLD: fix numpy on 2.7 build as 1.13 was released but no
 deps are built for it (#16633) (#16650)

---
 ci/requirements-2.7.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build
index 415df13179fcf..a7b950e615464 100644
--- a/ci/requirements-2.7.build
+++ b/ci/requirements-2.7.build
@@ -2,5 +2,5 @@ python=2.7*
 python-dateutil=2.4.1
 pytz=2013b
 nomkl
-numpy
+numpy=1.12*
 cython=0.23

From fdb54dfbc782890887e02230d62850f6512c6233 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 9 Jun 2017 15:46:33 -0700
Subject: [PATCH 51/55] CLN: make license file machine readable (#16649)

Splits extra information about the license and copyright holders to
AUTHORS.md.
---
 AUTHORS.md |  57 ++++++++++++++++++++++++++++
 LICENSE    | 106 ++++++++++++-----------------------------------------
 2 files changed, 81 insertions(+), 82 deletions(-)
 create mode 100644 AUTHORS.md

diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 0000000000000..dcaaea101f4c8
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,57 @@
+About the Copyright Holders
+===========================
+
+*   Copyright (c) 2008-2011 AQR Capital Management, LLC
+
+    AQR Capital Management began pandas development in 2008. Development was
+    led by Wes McKinney. AQR released the source under this license in 2009.
+*   Copyright (c) 2011-2012, Lambda Foundry, Inc.
+
+    Wes is now an employee of Lambda Foundry, and remains the pandas project
+    lead.
+*   Copyright (c) 2011-2012, PyData Development Team
+
+    The PyData Development Team is the collection of developers of the PyData
+    project. This includes all of the PyData sub-projects, including pandas. The
+    core team that coordinates development on GitHub can be found here:
+    http://github.com/pydata.
+
+Full credits for pandas contributors can be found in the documentation.
+
+Our Copyright Policy
+====================
+
+PyData uses a shared copyright model. Each contributor maintains copyright
+over their contributions to PyData. However, it is important to note that
+these contributions are typically only changes to the repositories. Thus,
+the PyData source code, in its entirety, is not the copyright of any single
+person or institution. Instead, it is the collective copyright of the
+entire PyData Development Team. If individual contributors want to maintain
+a record of what changes/contributions they have specific copyright on,
+they should indicate their copyright in the commit message of the change
+when they commit the change to one of the PyData repositories.
+
+With this in mind, the following banner should be used in any source code
+file to indicate the copyright and license terms:
+
+```
+#-----------------------------------------------------------------------------
+# Copyright (c) 2012, PyData Development Team
+# All rights reserved.
+#
+# Distributed under the terms of the BSD Simplified License.
+#
+# The full license is in the LICENSE file, distributed with this software.
+#-----------------------------------------------------------------------------
+```
+
+Other licenses can be found in the LICENSES directory.
+
+License
+=======
+
+pandas is distributed under a 3-clause ("Simplified" or "New") BSD
+license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
+BSD-compatible licenses, are included. Their licenses follow the pandas
+license.
+
diff --git a/LICENSE b/LICENSE
index c9b8834e8774b..924de26253bf4 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,87 +1,29 @@
-=======
-License
-=======
+BSD 3-Clause License
 
-pandas is distributed under a 3-clause ("Simplified" or "New") BSD
-license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
-BSD-compatible licenses, are included. Their licenses follow the pandas
-license.
-
-pandas license
-==============
-
-Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
-All rights reserved.
-
-Copyright (c) 2008-2011 AQR Capital Management, LLC
+Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-       notice, this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above
-       copyright notice, this list of conditions and the following
-       disclaimer in the documentation and/or other materials provided
-       with the distribution.
-
-    * Neither the name of the copyright holder nor the names of any
-       contributors may be used to endorse or promote products derived
-       from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-About the Copyright Holders
-===========================
-
-AQR Capital Management began pandas development in 2008. Development was
-led by Wes McKinney. AQR released the source under this license in 2009.
-Wes is now an employee of Lambda Foundry, and remains the pandas project
-lead.
-
-The PyData Development Team is the collection of developers of the PyData
-project. This includes all of the PyData sub-projects, including pandas. The
-core team that coordinates development on GitHub can be found here:
-http://github.com/pydata.
-
-Full credits for pandas contributors can be found in the documentation.
-
-Our Copyright Policy
-====================
-
-PyData uses a shared copyright model. Each contributor maintains copyright
-over their contributions to PyData. However, it is important to note that
-these contributions are typically only changes to the repositories. Thus,
-the PyData source code, in its entirety, is not the copyright of any single
-person or institution. Instead, it is the collective copyright of the
-entire PyData Development Team. If individual contributors want to maintain
-a record of what changes/contributions they have specific copyright on,
-they should indicate their copyright in the commit message of the change
-when they commit the change to one of the PyData repositories.
-
-With this in mind, the following banner should be used in any source code
-file to indicate the copyright and license terms:
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2012, PyData Development Team
-# All rights reserved.
-#
-# Distributed under the terms of the BSD Simplified License.
-#
-# The full license is in the LICENSE file, distributed with this software.
-#-----------------------------------------------------------------------------
-
-Other licenses can be found in the LICENSES directory.
\ No newline at end of file

From 41b3968e57a72ca5d80e02f8b87f2566fa336444 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 9 Jun 2017 21:28:41 -0400
Subject: [PATCH 52/55] fix pytest-xidst version as 1.17 appears buggy (#16652)

---
 ci/install_travis.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/install_travis.sh b/ci/install_travis.sh
index 8cf6f2ce636da..f4e6c979f28a4 100755
--- a/ci/install_travis.sh
+++ b/ci/install_travis.sh
@@ -107,7 +107,7 @@ if [ -e ${REQ} ]; then
 fi
 
 time conda install -n pandas pytest
-time pip install pytest-xdist
+time pip install pytest-xdist==1.16.0
 
 if [ "$LINT" ]; then
    conda install flake8

From 9d4c88d37b4e093ca06372124ac5c8781800ab6d Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 9 Jun 2017 22:09:24 -0400
Subject: [PATCH 53/55] COMPAT: numpy 1.13 test compat (#16654)

* COMPAT: numpy 1.13 test compat

* CI: fix doc build to 1.12
---
 ci/requirements-3.6_DOC.build    | 2 +-
 pandas/compat/numpy/__init__.py  | 3 +++
 pandas/tests/test_expressions.py | 6 +++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/ci/requirements-3.6_DOC.build b/ci/requirements-3.6_DOC.build
index bdcfe28105866..37faaa7e4db88 100644
--- a/ci/requirements-3.6_DOC.build
+++ b/ci/requirements-3.6_DOC.build
@@ -1,5 +1,5 @@
 python=3.6*
 python-dateutil
 pytz
-numpy
+numpy=1.12*
 cython
diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
index 4a9a2647ece0f..2c5a18973afa8 100644
--- a/pandas/compat/numpy/__init__.py
+++ b/pandas/compat/numpy/__init__.py
@@ -15,6 +15,7 @@
 _np_version_under1p11 = _nlv < '1.11'
 _np_version_under1p12 = _nlv < '1.12'
 _np_version_under1p13 = _nlv < '1.13'
+_np_version_under1p14 = _nlv < '1.14'
 
 if _nlv < '1.7.0':
     raise ImportError('this version of pandas is incompatible with '
@@ -74,4 +75,6 @@ def np_array_datetime64_compat(arr, *args, **kwargs):
            '_np_version_under1p10',
            '_np_version_under1p11',
            '_np_version_under1p12',
+           '_np_version_under1p13',
+           '_np_version_under1p14'
            ]
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
index fae7bfa513dcd..08c3a25e66b0e 100644
--- a/pandas/tests/test_expressions.py
+++ b/pandas/tests/test_expressions.py
@@ -13,7 +13,7 @@
 
 from pandas.core.api import DataFrame, Panel
 from pandas.core.computation import expressions as expr
-from pandas import compat, _np_version_under1p11
+from pandas import compat, _np_version_under1p11, _np_version_under1p13
 from pandas.util.testing import (assert_almost_equal, assert_series_equal,
                                  assert_frame_equal, assert_panel_equal,
                                  assert_panel4d_equal, slow)
@@ -420,6 +420,10 @@ def test_bool_ops_warn_on_arithmetic(self):
             f = getattr(operator, name)
             fe = getattr(operator, sub_funcs[subs[op]])
 
+            # >= 1.13.0 these are now TypeErrors
+            if op == '-' and not _np_version_under1p13:
+                continue
+
             with tm.use_numexpr(True, min_elements=5):
                 with tm.assert_produces_warning(check_stacklevel=False):
                     r = f(df, df)

From 8f6e50ae280c8c99bc796f290f765eb65edb6515 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 10 Jun 2017 11:09:30 -0400
Subject: [PATCH 54/55] Revert "fix pytest-xidst version as 1.17 appears buggy
 (#16652)" (#16657)

This reverts commit ec6bf6deaf502ac05a7120df13bd9b13cb3083f6.

1.17.1 released that fixes
---
 ci/install_travis.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/install_travis.sh b/ci/install_travis.sh
index f4e6c979f28a4..8cf6f2ce636da 100755
--- a/ci/install_travis.sh
+++ b/ci/install_travis.sh
@@ -107,7 +107,7 @@ if [ -e ${REQ} ]; then
 fi
 
 time conda install -n pandas pytest
-time pip install pytest-xdist==1.16.0
+time pip install pytest-xdist
 
 if [ "$LINT" ]; then
    conda install flake8

From 1de16b6fe61222aabad176a85f7b5c6fc688d984 Mon Sep 17 00:00:00 2001
From: Chris MacLeod <ChrisM6794@gmail.com>
Date: Sun, 11 Jun 2017 07:56:59 -0300
Subject: [PATCH 55/55] Add ASV benchmark.

---
 asv_bench/benchmarks/hdfstore_bench.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py
index dc72f3d548aaf..9da7aea2e7b5e 100644
--- a/asv_bench/benchmarks/hdfstore_bench.py
+++ b/asv_bench/benchmarks/hdfstore_bench.py
@@ -90,6 +90,14 @@ def time_query_store_table(self):
         stop = self.df2.index[15000]
         self.store.select('table', where="index > start and index < stop")
 
+    def time_store_tostring(self):
+        repr(self.store)
+        str(self.store)
+
+    def time_store_info(self):
+        self.store.info()
+
+
 
 class HDF5Panel(object):
     goal_time = 0.2