From 2f981d6502e552bda67dde581e8caf230d3c595e Mon Sep 17 00:00:00 2001
From: Vaibhav Vishal <vaibhav.hrt@gmail.com>
Date: Fri, 31 May 2019 13:59:58 +0530
Subject: [PATCH 01/43] convert some Unions to TypeVar

---
 pandas/_typing.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index f5bf0dcd3e220..24ee65645905b 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import IO, AnyStr, Type, Union
+from typing import IO, AnyStr, Type, TypeVar, Union
 
 import numpy as np
 
@@ -11,12 +11,13 @@
 from pandas.core.dtypes.generic import (
     ABCExtensionArray, ABCIndexClass, ABCSeries, ABCSparseSeries)
 
-AnyArrayLike = Union[ABCExtensionArray,
-                     ABCIndexClass,
-                     ABCSeries,
-                     ABCSparseSeries,
-                     np.ndarray]
-ArrayLike = Union[ABCExtensionArray, np.ndarray]
+AnyArrayLike = TypeVar('AnyArrayLike',
+                       ABCExtensionArray,
+                       ABCIndexClass,
+                       ABCSeries,
+                       ABCSparseSeries,
+                       np.ndarray)
+ArrayLike = TypeVar('ArrayLike', ABCExtensionArray, np.ndarray)
 DatetimeLikeScalar = Type[Union[Period, Timestamp, Timedelta]]
 Dtype = Union[str, np.dtype, ExtensionDtype]
 FilePathOrBuffer = Union[str, Path, IO[AnyStr]]

From c2e62676cf20c9aa179a1745ea4fbcf3c65fbe73 Mon Sep 17 00:00:00 2001
From: lrjball <50599110+lrjball@users.noreply.github.com>
Date: Fri, 31 May 2019 13:41:10 +0100
Subject: [PATCH 02/43] DOC: Fixed redirects in various parts of the
 documentation (#26497)

---
 pandas/core/arrays/categorical.py |  3 ++-
 pandas/core/arrays/interval.py    |  2 +-
 pandas/core/dtypes/concat.py      |  2 +-
 pandas/core/generic.py            | 10 +++++-----
 pandas/core/groupby/groupby.py    |  2 +-
 pandas/core/groupby/grouper.py    |  2 +-
 pandas/core/indexes/datetimes.py  |  8 ++++----
 pandas/core/indexes/interval.py   |  2 +-
 pandas/core/indexes/multi.py      |  3 ++-
 pandas/core/indexes/period.py     |  2 +-
 pandas/core/indexes/timedeltas.py |  4 ++--
 pandas/core/indexing.py           |  4 ++--
 pandas/core/reshape/concat.py     |  2 +-
 pandas/core/tools/datetimes.py    |  2 +-
 pandas/core/window.py             |  6 +++---
 pandas/io/json/json.py            |  4 ++--
 pandas/io/parsers.py              |  4 ++--
 pandas/io/pytables.py             |  6 +++---
 18 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 0fa705369908a..89b86c66d7b05 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -272,7 +272,8 @@ class Categorical(ExtensionArray, PandasObject):
     Notes
     -----
     See the `user guide
-    <http://pandas.pydata.org/pandas-docs/stable/categorical.html>`_ for more.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html>`_
+    for more.
 
     Examples
     --------
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 94b9dc8ebab55..4f628eff43167 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -95,7 +95,7 @@
 Notes
 -----
 See the `user guide
-<http://pandas.pydata.org/pandas-docs/stable/advanced.html#intervalindex>`_
+<http://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`_
 for more.
 
 %(examples)s\
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index f8488b7a153e3..b22ed45642cf6 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -244,7 +244,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
     -----
 
     To learn more about categories, see `link
-    <http://pandas.pydata.org/pandas-docs/stable/categorical.html#unioning>`__
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html#unioning>`__
 
     Examples
     --------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 87db069d94893..0596d0ab844ec 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3328,8 +3328,8 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
                      "A value is trying to be set on a copy of a slice from a "
                      "DataFrame\n\n"
                      "See the caveats in the documentation: "
-                     "http://pandas.pydata.org/pandas-docs/stable/"
-                     "indexing.html#indexing-view-versus-copy"
+                     "http://pandas.pydata.org/pandas-docs/stable/user_guide/"
+                     "indexing.html#returning-a-view-versus-a-copy"
                      )
 
             else:
@@ -3338,8 +3338,8 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
                      "DataFrame.\n"
                      "Try using .loc[row_indexer,col_indexer] = value "
                      "instead\n\nSee the caveats in the documentation: "
-                     "http://pandas.pydata.org/pandas-docs/stable/"
-                     "indexing.html#indexing-view-versus-copy"
+                     "http://pandas.pydata.org/pandas-docs/stable/user_guide/"
+                     "indexing.html#returning-a-view-versus-a-copy"
                      )
 
             if value == 'raise':
@@ -7762,7 +7762,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False,
         Notes
         -----
         To learn more about the frequency strings, please see `this link
-        <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+        <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
         Examples
         --------
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 91bb71a1a8af7..2b190c53da53d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -219,7 +219,7 @@ class providing the base-class of operations.
 Notes
 -----
 See more `here
-<http://pandas.pydata.org/pandas-docs/stable/groupby.html#piping-function-calls>`_
+<http://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
 
 Examples
 --------
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 04d407ebc670d..febfdc7bdf908 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -49,7 +49,7 @@ class Grouper:
         This will groupby the specified frequency if the target selection
         (via key or level) is a datetime-like object. For full specification
         of available frequencies, please see `here
-        <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`_.
+        <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
     axis : number/name of the axis, defaults to 0
     sort : boolean, default to False
         whether to sort the resulting labels
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index e68431b79dcd3..1bf3cb86811cb 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -215,7 +215,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin):
     Notes
     -----
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Creating a DatetimeIndex based on `start`, `periods`, and `end` has
     been deprecated in favor of :func:`date_range`.
@@ -1377,7 +1377,7 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
     ``start`` and ``end`` (closed on both sides).
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
@@ -1533,7 +1533,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
     desired.
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
@@ -1605,7 +1605,7 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
     must be specified.
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Returns
     -------
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 53e1a36c48994..41cf23c5542a9 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1215,7 +1215,7 @@ def interval_range(start=None, end=None, periods=None, freq=None,
     ``start`` and ``end``, inclusively.
 
     To learn more about datetime-like frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index f1553d9db835f..ec2cc70d1a352 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -182,7 +182,8 @@ class MultiIndex(Index):
     Notes
     -----
     See the `user guide
-    <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`_
+    for more.
 
     Examples
     --------
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 64272431cf703..b20b0c6f853d9 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -939,7 +939,7 @@ def period_range(start=None, end=None, periods=None, freq=None, name=None):
     must be specified.
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 6ae17e62b49c6..0574a4b41c920 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -141,7 +141,7 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index,
     Notes
     -----
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Creating a TimedeltaIndex based on `start`, `periods`, and `end` has
     been deprecated in favor of :func:`timedelta_range`.
@@ -730,7 +730,7 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
     ``start`` and ``end`` (closed on both sides).
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 86158fa9ee529..7f4827be6dff7 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1190,7 +1190,7 @@ def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
             KeyError in the future, you can use .reindex() as an alternative.
 
             See the documentation here:
-            https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""")  # noqa
+            https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""")  # noqa
 
             if not (ax.is_categorical() or ax.is_interval()):
                 warnings.warn(_missing_key_warning,
@@ -1339,7 +1339,7 @@ class _IXIndexer(_NDFrameIndexer):
         .iloc for positional indexing
 
         See the documentation here:
-        http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""")  # noqa
+        http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated""")  # noqa
 
     def __init__(self, name, obj):
         warnings.warn(self._ix_deprecation_warning,
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index ee3ed3899a55f..4523a6ad48f19 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -100,7 +100,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
 
     A walkthrough of how this method fits in with other tools for combining
     pandas objects can be found `here
-    <http://pandas.pydata.org/pandas-docs/stable/merging.html>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
 
     Examples
     --------
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 817d539d4ad6f..0756bdb3777ec 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -533,7 +533,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     dtype: datetime64[ns]
 
     If a date does not meet the `timestamp limitations
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
     #timeseries-timestamp-limits>`_, passing errors='ignore'
     will return the original input instead of raising any exception.
 
diff --git a/pandas/core/window.py b/pandas/core/window.py
index d51e12035c829..f332075380c79 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -462,7 +462,7 @@ class Window(_Window):
         See the notes below for further information.
     on : str, optional
         For a DataFrame, column on which to calculate
-        the rolling window, rather than the index
+        the rolling window, rather than the index.
     axis : int or str, default 0
     closed : str, default None
         Make the interval closed on the 'right', 'left', 'both' or
@@ -488,7 +488,7 @@ class Window(_Window):
     changed to the center of the window by setting ``center=True``.
 
     To learn more about the offsets & frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     The recognized win_types are:
 
@@ -2188,7 +2188,7 @@ class EWM(_Rolling):
     (if adjust is True), and 1-alpha and alpha (if adjust is False).
 
     More details can be found at
-    http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
+    http://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows
 
     Examples
     --------
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index ee9d9e000d7e3..20bed9bff7383 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -330,8 +330,8 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
 
     chunksize : integer, default None
         Return JsonReader object for iteration.
-        See the `line-delimted json docs
-        <http://pandas.pydata.org/pandas-docs/stable/io.html#io-jsonl>`_
+        See the `line-delimited json docs
+        <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json>`_
         for more information on ``chunksize``.
         This can only be passed if `lines=True`.
         If this is None, the file will be read into memory all at once.
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index c65c11e840c27..bcbdd80865360 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -58,7 +58,7 @@
 into chunks.
 
 Additional help can be found in the online docs for
-`IO Tools <http://pandas.pydata.org/pandas-docs/stable/io.html>`_.
+`IO Tools <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
 
 Parameters
 ----------
@@ -753,7 +753,7 @@ def read_fwf(filepath_or_buffer: FilePathOrBuffer,
     into chunks.
 
     Additional help can be found in the `online docs for IO Tools
-    <http://pandas.pydata.org/pandas-docs/stable/io.html>`_.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
 
     Parameters
     ----------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 11f705e88179d..53ef2395a302a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -867,8 +867,8 @@ def put(self, key, value, format=None, append=False, **kwargs):
             This will force Table format, append the input data to the
             existing.
         data_columns : list of columns to create as data columns, or True to
-            use all columns. See
-            `here <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__ # noqa
+            use all columns. See `here
+            <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
         encoding : default None, provide an encoding for strings
         dropna   : boolean, default False, do not write an ALL nan row to
             the store settable by the option 'io.hdf.dropna_table'
@@ -949,7 +949,7 @@ def append(self, key, value, format=None, append=True, columns=None,
             List of columns to create as indexed data columns for on-disk
             queries, or True to use all columns. By default only the axes
             of the object are indexed. See `here
-            <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__.
+            <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
         min_itemsize : dict of columns that specify minimum string sizes
         nan_rep      : string to use as string nan represenation
         chunksize    : size to chunk the writing

From 805d7e8c219f804f1129fdc9e4115cf3d65b2b57 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Sat, 1 Jun 2019 02:17:53 +0200
Subject: [PATCH 03/43] TST: Datetime conftest.py improvements (#26596)

xref gh-23537
---
 pandas/conftest.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 3c411f8ba3e31..8f71028f51ab4 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -376,10 +376,16 @@ def unique_nulls_fixture(request):
              FixedOffset(0), FixedOffset(-300), timezone.utc,
              timezone(timedelta(hours=1)),
              timezone(timedelta(hours=-1), name='foo')]
+TIMEZONE_IDS = ['None', 'UTC', 'US/Eastern', 'Asia/Tokyp',
+                'dateutil/US/Pacific', 'dateutil/Asia/Singapore',
+                'dateutil.tz.tzutz()', 'dateutil.tz.tzlocal()',
+                'pytz.FixedOffset(300)', 'pytz.FixedOffset(0)',
+                'pytz.FixedOffset(-300)', 'datetime.timezone.utc',
+                'datetime.timezone.+1', 'datetime.timezone.-1.named']
 
 
-@td.parametrize_fixture_doc(str(TIMEZONES))
-@pytest.fixture(params=TIMEZONES)
+@td.parametrize_fixture_doc(str(TIMEZONE_IDS))
+@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS)
 def tz_naive_fixture(request):
     """
     Fixture for trying timezones including default (None): {0}
@@ -387,8 +393,8 @@ def tz_naive_fixture(request):
     return request.param
 
 
-@td.parametrize_fixture_doc(str(TIMEZONES[1:]))
-@pytest.fixture(params=TIMEZONES[1:])
+@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:]))
+@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:])
 def tz_aware_fixture(request):
     """
     Fixture for trying explicit timezones: {0}
@@ -398,6 +404,8 @@ def tz_aware_fixture(request):
 
 # ----------------------------------------------------------------
 # Dtypes
+# ----------------------------------------------------------------
+
 UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
 UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
 SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
@@ -409,8 +417,8 @@ def tz_aware_fixture(request):
 COMPLEX_DTYPES = [complex, "complex64", "complex128"]
 STRING_DTYPES = [str, 'str', 'U']
 
-DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]']
-TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]']
+DATETIME64_DTYPES = ['datetime64[ns]', 'M8[ns]']
+TIMEDELTA64_DTYPES = ['timedelta64[ns]', 'm8[ns]']
 
 BOOL_DTYPES = [bool, 'bool']
 BYTES_DTYPES = [bytes, 'bytes']
@@ -418,7 +426,7 @@ def tz_aware_fixture(request):
 
 ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
 ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES +
-                    DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES +
+                    DATETIME64_DTYPES + TIMEDELTA64_DTYPES + BOOL_DTYPES +
                     OBJECT_DTYPES + BYTES_DTYPES)
 
 

From c591569f5fb55b73bef1bcd541689afc03f0861d Mon Sep 17 00:00:00 2001
From: Alexander Nordin <alexander.f.nordin+git@gmail.com>
Date: Sat, 1 Jun 2019 10:04:14 -0400
Subject: [PATCH 04/43] ERR: better error message on too large excel sheet
 (#26080)

---
 doc/source/whatsnew/v0.25.0.rst |  1 +
 pandas/io/formats/excel.py      | 10 ++++++++++
 pandas/tests/io/test_excel.py   | 18 ++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 89a9da4a73b35..ae5b6aafe4c7d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -533,6 +533,7 @@ I/O
 - Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`)
 - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`)
 - Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
+- :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index fd6e3304ec4ef..4db00e34b39e2 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -341,6 +341,9 @@ class ExcelFormatter:
         This is only called for body cells.
     """
 
+    max_rows = 2**20
+    max_cols = 2**14
+
     def __init__(self, df, na_rep='', float_format=None, cols=None,
                  header=True, index=True, index_label=None, merge_cells=False,
                  inf_rep='inf', style_converter=None):
@@ -648,6 +651,13 @@ def write(self, writer, sheet_name='Sheet1', startrow=0,
         from pandas.io.excel import ExcelWriter
         from pandas.io.common import _stringify_path
 
+        num_rows, num_cols = self.df.shape
+        if num_rows > self.max_rows or num_cols > self.max_cols:
+            raise ValueError("This sheet is too large! Your sheet size is: " +
+                             "{}, {} ".format(num_rows, num_cols) +
+                             "Max sheet size is: {}, {}".
+                             format(self.max_rows, self.max_cols))
+
         if isinstance(writer, ExcelWriter):
             need_save = False
         else:
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 1421fc94b67f4..7693caf3b31d2 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -1118,6 +1118,24 @@ class and any subclasses, on account of the `autouse=True`
 class TestExcelWriter(_WriterBase):
     # Base class for test cases to run with different Excel writers.
 
+    def test_excel_sheet_size(self):
+
+        # GH 26080
+        breaking_row_count = 2**20 + 1
+        breaking_col_count = 2**14 + 1
+        # purposely using two arrays to prevent memory issues while testing
+        row_arr = np.zeros(shape=(breaking_row_count, 1))
+        col_arr = np.zeros(shape=(1, breaking_col_count))
+        row_df = pd.DataFrame(row_arr)
+        col_df = pd.DataFrame(col_arr)
+
+        msg = "sheet is too large"
+        with pytest.raises(ValueError, match=msg):
+            row_df.to_excel(self.path)
+
+        with pytest.raises(ValueError, match=msg):
+            col_df.to_excel(self.path)
+
     def test_excel_sheet_by_name_raise(self, *_):
         import xlrd
 

From cfa03b6d1c4f6ea1b0cddbff3213c47405005c41 Mon Sep 17 00:00:00 2001
From: Pauli Virtanen <pav@iki.fi>
Date: Sat, 1 Jun 2019 14:08:20 +0000
Subject: [PATCH 05/43] CLN: remove sample_time attributes from benchmarks
 (#26598)

---
 asv_bench/benchmarks/index_object.py | 1 -
 asv_bench/benchmarks/rolling.py      | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index 0fdf46e7c64de..896a20bae2069 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -52,7 +52,6 @@ def time_is_dates_only(self):
 
 class Ops:
 
-    sample_time = 0.2
     params = ['float', 'int']
     param_names = ['dtype']
 
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 2532d326dff4b..033b466c8b9be 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -4,7 +4,6 @@
 
 class Methods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
@@ -23,7 +22,6 @@ def time_rolling(self, constructor, window, dtype, method):
 
 class ExpandingMethods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               ['int', 'float'],
               ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
@@ -41,7 +39,6 @@ def time_expanding(self, constructor, dtype, method):
 
 class EWMMethods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
@@ -58,7 +55,6 @@ def time_ewm(self, constructor, window, dtype, method):
 
 
 class VariableWindowMethods(Methods):
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               ['50s', '1h', '1d'],
               ['int', 'float'],
@@ -75,7 +71,6 @@ def setup(self, constructor, window, dtype, method):
 
 class Pairwise:
 
-    sample_time = 0.2
     params = ([10, 1000, None],
               ['corr', 'cov'],
               [True, False])
@@ -95,7 +90,6 @@ def time_pairwise(self, window, method, pairwise):
 
 
 class Quantile:
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],

From e6f21d89d5e7dc66cc5c4526ff331a5309cd815e Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 1 Jun 2019 15:09:27 +0100
Subject: [PATCH 06/43] TST: add concrete examples of dataframe fixtures to
 docstrings (#26593)

---
 pandas/tests/frame/conftest.py | 169 +++++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index 27c0e070c10c2..c451cd58f1497 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -11,6 +11,25 @@ def float_frame():
     Fixture for DataFrame of floats with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
+    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
+    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
+    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
+    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
+    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
+    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
+    ...              ...       ...       ...       ...
+    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
+    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
+    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
+    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
+    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
+    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
+    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
+
+    [30 rows x 4 columns]
     """
     return DataFrame(tm.getSeriesData())
 
@@ -21,6 +40,25 @@ def float_frame_with_na():
     Fixture for DataFrame of floats with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']; some entries are missing
+
+                       A         B         C         D
+    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
+    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
+    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
+    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
+    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
+    soujjZ0A08       NaN       NaN       NaN       NaN
+    7W6NLGsjB9       NaN       NaN       NaN       NaN
+    ...              ...       ...       ...       ...
+    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
+    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
+    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
+    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
+    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
+    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
+    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517
+
+    [30 rows x 4 columns]
     """
     df = DataFrame(tm.getSeriesData())
     # set some NAs
@@ -35,6 +73,25 @@ def bool_frame_with_na():
     Fixture for DataFrame of booleans with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']; some entries are missing
+
+                    A      B      C      D
+    zBZxY2IDGd  False  False  False  False
+    IhBWBMWllt  False   True   True   True
+    ctjdvZSR6R   True  False   True   True
+    AVTujptmxb  False   True  False   True
+    G9lrImrSWq  False  False  False   True
+    sFFwdIUfz2    NaN    NaN    NaN    NaN
+    s15ptEJnRb    NaN    NaN    NaN    NaN
+    ...           ...    ...    ...    ...
+    UW41KkDyZ4   True   True  False  False
+    l9l6XkOdqV   True  False  False  False
+    X2MeZfzDYA  False   True  False  False
+    xWkIKU7vfX  False   True  False   True
+    QOhL6VmpGU  False  False  False   True
+    22PwkRJdat  False   True  False  False
+    kfboQ3VeIK   True  False   True  False
+
+    [30 rows x 4 columns]
     """
     df = DataFrame(tm.getSeriesData()) > 0
     df = df.astype(object)
@@ -50,6 +107,25 @@ def int_frame():
     Fixture for DataFrame of ints with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']
+
+                A  B  C  D
+    vpBeWjM651  1  0  1  0
+    5JyxmrP1En -1  0  0  0
+    qEDaoD49U2 -1  1  0  0
+    m66TkTfsFe  0  0  0  0
+    EHPaNzEUFm -1  0 -1  0
+    fpRJCevQhi  2  0  0  0
+    OlQvnmfi3Q  0  0 -2  0
+    ...        .. .. .. ..
+    uB1FPlz4uP  0  0  0  1
+    EcSe6yNzCU  0  0 -1  0
+    L50VudaiI8 -1  1 -2  0
+    y3bpw4nwIp  0 -1  0  0
+    H0RdLLwrCT  1  1  0  0
+    rY82K0vMwm  0  0  0  0
+    1OPIUjnkjk  2  0  0  0
+
+    [30 rows x 4 columns]
     """
     df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
     # force these all to int64 to avoid platform testing issues
@@ -62,6 +138,25 @@ def datetime_frame():
     Fixture for DataFrame of floats with DatetimeIndex
 
     Columns are ['A', 'B', 'C', 'D']
+
+                       A         B         C         D
+    2000-01-03 -1.122153  0.468535  0.122226  1.693711
+    2000-01-04  0.189378  0.486100  0.007864 -1.216052
+    2000-01-05  0.041401 -0.835752 -0.035279 -0.414357
+    2000-01-06  0.430050  0.894352  0.090719  0.036939
+    2000-01-07 -0.620982 -0.668211 -0.706153  1.466335
+    2000-01-10 -0.752633  0.328434 -0.815325  0.699674
+    2000-01-11 -2.236969  0.615737 -0.829076 -1.196106
+    ...              ...       ...       ...       ...
+    2000-02-03  1.642618 -0.579288  0.046005  1.385249
+    2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
+    2000-02-07 -2.656149 -0.601387  1.410148  0.444150
+    2000-02-08 -1.201881 -1.289040  0.772992 -1.445300
+    2000-02-09  1.377373  0.398619  1.008453 -0.928207
+    2000-02-10  0.473194 -0.636677  0.984058  0.511519
+    2000-02-11 -0.965556  0.408313 -1.312844 -0.381948
+
+    [30 rows x 4 columns]
     """
     return DataFrame(tm.getTimeSeriesData())
 
@@ -72,6 +167,25 @@ def float_string_frame():
     Fixture for DataFrame of floats and strings with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D', 'foo'].
+
+                       A         B         C         D  foo
+    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
+    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
+    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
+    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
+    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
+    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
+    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
+    ...              ...       ...       ...       ...  ...
+    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
+    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
+    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
+    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
+    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
+    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
+    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar
+
+    [30 rows x 5 columns]
     """
     df = DataFrame(tm.getSeriesData())
     df['foo'] = 'bar'
@@ -84,6 +198,25 @@ def mixed_float_frame():
     Fixture for DataFrame of different float types with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
+    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
+    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
+    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
+    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
+    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
+    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
+    ...              ...       ...       ...       ...
+    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
+    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
+    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
+    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
+    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
+    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
+    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502
+
+    [30 rows x 4 columns]
     """
     df = DataFrame(tm.getSeriesData())
     df.A = df.A.astype('float32')
@@ -99,6 +232,25 @@ def mixed_int_frame():
     Fixture for DataFrame of different int types with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
+
+                A  B    C    D
+    mUrCZ67juP  0  1    2    2
+    rw99ACYaKS  0  1    0    0
+    7QsEcpaaVU  0  1    1    1
+    xkrimI2pcE  0  1    0    0
+    dz01SuzoS8  0  1  255  255
+    ccQkqOHX75 -1  1    0    0
+    DN0iXaoDLd  0  1    0    0
+    ...        .. ..  ...  ...
+    Dfb141wAaQ  1  1  254  254
+    IPD8eQOVu5  0  1    0    0
+    CcaKulsCmv  0  1    0    0
+    rIBa8gu7E5  0  1    0    0
+    RP6peZmh5o  0  1    1    1
+    NMb9pipQWQ  0  1    0    0
+    PqgbJEzjib  0  1    3    3
+
+    [30 rows x 4 columns]
     """
     df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
     df.A = df.A.astype('int32')
@@ -114,6 +266,11 @@ def timezone_frame():
     Fixture for DataFrame of date_range Series with different time zones
 
     Columns are ['A', 'B', 'C']; some entries are missing
+
+               A                         B                         C
+    0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
+    1 2013-01-02                       NaT                       NaT
+    2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
     """
     df = DataFrame({'A': date_range('20130101', periods=3),
                     'B': date_range('20130101', periods=3,
@@ -131,6 +288,11 @@ def simple_frame():
     Fixture for simple 3x3 DataFrame
 
     Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
+
+       one  two  three
+    a  1.0  2.0    3.0
+    b  4.0  5.0    6.0
+    c  7.0  8.0    9.0
     """
     arr = np.array([[1., 2., 3.],
                     [4., 5., 6.],
@@ -147,6 +309,13 @@ def frame_of_index_cols():
 
     Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
     'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
+
+         A      B  C         D         E  (tuple, as, label)
+    0  foo    one  a  0.608477 -0.012500           -1.664297
+    1  foo    two  b -0.633460  0.249614           -0.364411
+    2  foo  three  c  0.615256  2.154968           -0.834666
+    3  bar    one  d  0.234246  1.085675            0.718445
+    4  bar    two  e  0.533841 -0.005702           -3.533912
     """
     df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
                     'B': ['one', 'two', 'three', 'one', 'two'],

From dbafe6f0cb4c9e5b38c2dc159f461f651382a153 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Jun 2019 15:12:40 +0100
Subject: [PATCH 07/43] CI/DOC: Building documentation with azure (#26591)

---
 .travis.yml                |  4 +--
 azure-pipelines.yml        | 62 +++++++++++++++++++++++++++++++++++++-
 ci/deps/travis-36-doc.yaml | 46 ----------------------------
 3 files changed, 63 insertions(+), 49 deletions(-)
 delete mode 100644 ci/deps/travis-36-doc.yaml

diff --git a/.travis.yml b/.travis.yml
index ce8817133a477..90dd904e6cb1e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,14 +51,14 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
+        - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
     allow_failures:
       - dist: trusty
         env:
           - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
       - dist: trusty
         env:
-          - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
+          - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
 
 before_install:
   - echo "before_install"
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 17eaee5458af8..9f83917024049 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -15,7 +15,7 @@ jobs:
     name: Windows
     vmImage: vs2017-win2016
 
-- job: 'Checks_and_doc'
+- job: 'Checks'
   pool:
     vmImage: ubuntu-16.04
   timeoutInMinutes: 90
@@ -116,3 +116,63 @@ jobs:
       fi
     displayName: 'Running benchmarks'
     condition: true
+
+- job: 'Docs'
+  pool:
+    vmImage: ubuntu-16.04
+  timeoutInMinutes: 90
+  steps:
+  - script: |
+      echo '##vso[task.setvariable variable=CONDA_ENV]pandas-dev'
+      echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
+    displayName: 'Setting environment variables'
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      sudo apt-get install -y libc6-dev-i386
+      ci/setup_env.sh
+    displayName: 'Setup environment and build pandas'
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      doc/make.py
+    displayName: 'Build documentation'
+
+  - script: |
+      cd doc/build/html
+      git init
+      touch .nojekyll
+      git add --all .
+      git config user.email "pandas-dev@python.org"
+      git config user.name "pandas-docs-bot"
+      git commit -m "pandas documentation in master"
+    displayName: 'Create git repo for docs build'
+    condition : |
+      and(not(eq(variables['Build.Reason'], 'PullRequest')),
+          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+
+  # This task to work requires next steps:
+  # 1. Got to "Library > Secure files" in the azure-pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles
+  # 2. Click on "+ Secure file"
+  # 3. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key")
+  # 4. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save
+  # 5. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be specified as a deploy key of the repo where the docs will be pushed: https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
+  - task: InstallSSHKey@0
+    inputs:
+      hostName: 'github.com'
+      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== pandas-dev@python.org'
+      sshKeySecureFile: 'pandas_docs_key'
+    displayName: 'Install GitHub ssh deployment key'
+    condition : |
+      and(not(eq(variables['Build.Reason'], 'PullRequest')),
+          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+
+  - script: |
+      cd doc/build/html
+      git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git
+      git push origin master -f
+    displayName: 'Publish docs to GitHub pages'
+    condition : |
+      and(not(eq(variables['Build.Reason'], 'PullRequest')),
+          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml
deleted file mode 100644
index 9d6cbd82fdc05..0000000000000
--- a/ci/deps/travis-36-doc.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: pandas-dev
-channels:
-  - defaults
-  - conda-forge
-dependencies:
-  - beautifulsoup4
-  - bottleneck
-  - cython>=0.28.2
-  - fastparquet>=0.2.1
-  - gitpython
-  - html5lib
-  - hypothesis>=3.58.0
-  - ipykernel
-  - ipython
-  - ipywidgets
-  - lxml
-  - matplotlib
-  - nbconvert>=5.4.1
-  - nbformat
-  - nbsphinx
-  - notebook>=5.7.5
-  - numexpr
-  - numpy
-  - numpydoc
-  - openpyxl
-  - pandoc
-  - pyarrow
-  - pyqt
-  - pytables
-  - python-dateutil
-  - python-snappy
-  - python=3.6.*
-  - pytz
-  - scipy
-  - seaborn
-  - sphinx
-  - sqlalchemy
-  - statsmodels
-  - xarray
-  - xlrd
-  - xlsxwriter
-  - xlwt
-  # universal
-  - pytest>=4.0.2
-  - pytest-xdist
-  - isort

From eb4b0b5fae97d6a7ef6f83f6993103a80413f55d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sat, 1 Jun 2019 09:35:25 -0500
Subject: [PATCH 08/43] DOC: sparse doc fixups (#26571)

---
 doc/source/user_guide/sparse.rst | 2 +-
 doc/source/whatsnew/v0.16.0.rst  | 2 ++
 doc/source/whatsnew/v0.18.1.rst  | 2 ++
 doc/source/whatsnew/v0.19.0.rst  | 2 ++
 doc/source/whatsnew/v0.20.0.rst  | 1 +
 pandas/core/sparse/frame.py      | 2 +-
 pandas/core/sparse/series.py     | 2 +-
 7 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
index 8fed29d7a6316..09ed895a847ff 100644
--- a/doc/source/user_guide/sparse.rst
+++ b/doc/source/user_guide/sparse.rst
@@ -269,7 +269,7 @@ have no replacement.
 Interaction with scipy.sparse
 -----------------------------
 
-Use :meth:`DataFrame.sparse.from_coo` to create a ``DataFrame`` with sparse values from a sparse matrix.
+Use :meth:`DataFrame.sparse.from_spmatrix` to create a ``DataFrame`` with sparse values from a sparse matrix.
 
 .. versionadded:: 0.25.0
 
diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst
index 1e4ec682f0504..2cb09325c9466 100644
--- a/doc/source/whatsnew/v0.16.0.rst
+++ b/doc/source/whatsnew/v0.16.0.rst
@@ -92,6 +92,7 @@ Interaction with scipy.sparse
 Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here <sparse.scipysparse>`). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels:
 
 .. ipython:: python
+   :okwarning:
 
    s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
    s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0),
@@ -121,6 +122,7 @@ The from_coo method is a convenience method for creating a ``SparseSeries``
 from a ``scipy.sparse.coo_matrix``:
 
 .. ipython:: python
+   :okwarning:
 
    from scipy import sparse
    A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),
diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
index f099ccf284bc2..069395c2e0f36 100644
--- a/doc/source/whatsnew/v0.18.1.rst
+++ b/doc/source/whatsnew/v0.18.1.rst
@@ -394,6 +394,7 @@ used in the ``pandas`` implementation (:issue:`12644`, :issue:`12638`, :issue:`1
 An example of this signature augmentation is illustrated below:
 
 .. ipython:: python
+   :okwarning:
 
    sp = pd.SparseDataFrame([1, 2, 3])
    sp
@@ -409,6 +410,7 @@ Previous behaviour:
 New behaviour:
 
 .. ipython:: python
+   :okwarning:
 
    np.cumsum(sp, axis=0)
 
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index 29eeb415e2f6d..de29a1eb93709 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -1236,6 +1236,7 @@ Operators now preserve dtypes
 - Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`)
 
 .. ipython:: python
+   :okwarning:
 
    s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64)
    s.dtype
@@ -1245,6 +1246,7 @@ Operators now preserve dtypes
 - Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`)
 
 .. ipython:: python
+   :okwarning:
 
    s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0)
    s
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index 741aa6ca143bb..6a88a5810eca4 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -339,6 +339,7 @@ See the :ref:`documentation <sparse.scipysparse>` for more information. (:issue:
 All sparse formats are supported, but matrices that are not in :mod:`COOrdinate <scipy.sparse>` format will be converted, copying data as needed.
 
 .. ipython:: python
+   :okwarning:
 
    from scipy.sparse import csr_matrix
    arr = np.random.random(size=(1000, 5))
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index fa3cd781eaf88..bf1cec7571f4d 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -42,7 +42,7 @@ class SparseDataFrame(DataFrame):
     DataFrame containing sparse floating point data in the form of SparseSeries
     objects
 
-    .. deprectaed:: 0.25.0
+    .. deprecated:: 0.25.0
 
        Use a DataFrame with sparse values instead.
 
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index e4f8579a398dd..3f95acdbfb42c 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -46,7 +46,7 @@
 class SparseSeries(Series):
     """Data structure for labeled, sparse floating point data
 
-    .. deprectaed:: 0.25.0
+    .. deprecated:: 0.25.0
 
        Use a Series with sparse values instead.
 

From 5dedbfa2ba1c770d5b58d4d7dcc0aca2e8b4059d Mon Sep 17 00:00:00 2001
From: nathalier <nathalier@gmail.com>
Date: Sat, 1 Jun 2019 15:45:06 +0100
Subject: [PATCH 09/43] BUG: ignore errors for invalid dates in to_datetime()
 with errors=coerce (#25512) (#26561)

---
 doc/source/whatsnew/v0.25.0.rst              |  1 +
 pandas/core/tools/datetimes.py               |  6 +++---
 pandas/tests/indexes/datetimes/test_tools.py | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index ae5b6aafe4c7d..a62cac7a94bbd 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -427,6 +427,7 @@ Datetimelike
 - Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
 - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
 - Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
+- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 0756bdb3777ec..73119671550a5 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -775,21 +775,21 @@ def calc_with_mask(carg, mask):
     # try intlike / strings that are ints
     try:
         return calc(arg.astype(np.int64))
-    except ValueError:
+    except (ValueError, OverflowError):
         pass
 
     # a float with actual np.nan
     try:
         carg = arg.astype(np.float64)
         return calc_with_mask(carg, notna(carg))
-    except ValueError:
+    except (ValueError, OverflowError):
         pass
 
     # string with NaN-like
     try:
         mask = ~algorithms.isin(arg, list(tslib.nat_strings))
         return calc_with_mask(arg, mask)
-    except ValueError:
+    except (ValueError, OverflowError):
         pass
 
     return None
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index d62d8d1276fec..c507c31ee54dd 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -96,6 +96,25 @@ def test_to_datetime_format_YYYYMMDD(self, cache):
         result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
                                 cache=cache)
         expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("input_s, expected", [
+        # NaN before strings with invalid date values
+        [Series(['19801222', np.nan, '20010012', '10019999']),
+         Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
+        # NaN after strings with invalid date values
+        [Series(['19801222', '20010012', '10019999', np.nan]),
+         Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
+        # NaN before integers with invalid date values
+        [Series([20190813, np.nan, 20010012, 20019999]),
+         Series([Timestamp('20190813'), np.nan, np.nan, np.nan])],
+        # NaN after integers with invalid date values
+        [Series([20190813, 20010012, np.nan, 20019999]),
+         Series([Timestamp('20190813'), np.nan, np.nan, np.nan])]])
+    def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
+        # GH 25512
+        # format='%Y%m%d', errors='coerce'
+        result = pd.to_datetime(input_s, format='%Y%m%d', errors='coerce')
         assert_series_equal(result, expected)
 
     @pytest.mark.parametrize('cache', [True, False])

From 3457fb2f7370317f8927e7e5e2b79f5b93357c66 Mon Sep 17 00:00:00 2001
From: Mak Sze Chun <makszechun@gmail.com>
Date: Sat, 1 Jun 2019 22:48:37 +0800
Subject: [PATCH 10/43] TST/CLN: Fixturize tests/frame/test_quantile.py
 (#26556)

---
 pandas/tests/frame/test_quantile.py | 56 +++++++++++++++++------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py
index a5771839e0997..9ccbd290923ba 100644
--- a/pandas/tests/frame/test_quantile.py
+++ b/pandas/tests/frame/test_quantile.py
@@ -3,24 +3,24 @@
 
 import pandas as pd
 from pandas import DataFrame, Series, Timestamp
-from pandas.tests.frame.common import TestData
 import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 
 
-class TestDataFrameQuantile(TestData):
+class TestDataFrameQuantile:
 
-    def test_quantile(self):
+    def test_quantile(self, datetime_frame):
         from numpy import percentile
 
-        q = self.tsframe.quantile(0.1, axis=0)
-        assert q['A'] == percentile(self.tsframe['A'], 10)
-        tm.assert_index_equal(q.index, self.tsframe.columns)
+        df = datetime_frame
+        q = df.quantile(0.1, axis=0)
+        assert q['A'] == percentile(df['A'], 10)
+        tm.assert_index_equal(q.index, df.columns)
 
-        q = self.tsframe.quantile(0.9, axis=1)
+        q = df.quantile(0.9, axis=1)
         assert (q['2000-01-17'] ==
-                percentile(self.tsframe.loc['2000-01-17'], 90))
-        tm.assert_index_equal(q.index, self.tsframe.index)
+                percentile(df.loc['2000-01-17'], 90))
+        tm.assert_index_equal(q.index, df.index)
 
         # test degenerate case
         q = DataFrame({'x': [], 'y': []}).quantile(0.1, axis=0)
@@ -99,18 +99,6 @@ def test_quantile_axis_parameter(self):
 
     def test_quantile_interpolation(self):
         # see gh-10174
-        from numpy import percentile
-
-        # interpolation = linear (default case)
-        q = self.tsframe.quantile(0.1, axis=0, interpolation='linear')
-        assert q['A'] == percentile(self.tsframe['A'], 10)
-        q = self.intframe.quantile(0.1)
-        assert q['A'] == percentile(self.intframe['A'], 10)
-
-        # test with and without interpolation keyword
-        q1 = self.intframe.quantile(0.1)
-        assert q1['A'] == np.percentile(self.intframe['A'], 10)
-        tm.assert_series_equal(q, q1)
 
         # interpolation method other than default linear
         df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
@@ -155,6 +143,28 @@ def test_quantile_interpolation(self):
                              index=[.25, .5], columns=['a', 'b', 'c'])
         assert_frame_equal(result, expected)
 
+    def test_quantile_interpolation_datetime(self, datetime_frame):
+        # see gh-10174
+
+        # interpolation = linear (default case)
+        df = datetime_frame
+        q = df.quantile(0.1, axis=0, interpolation='linear')
+        assert q['A'] == np.percentile(df['A'], 10)
+
+    def test_quantile_interpolation_int(self, int_frame):
+        # see gh-10174
+
+        df = int_frame
+        # interpolation = linear (default case)
+        q = df.quantile(0.1)
+        assert q['A'] == np.percentile(df['A'], 10)
+
+        # test with and without interpolation keyword
+        # TODO: q1 is not different from q
+        q1 = df.quantile(0.1)
+        assert q1['A'] == np.percentile(df['A'], 10)
+        tm.assert_series_equal(q, q1)
+
     def test_quantile_multi(self):
         df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
                        columns=['a', 'b', 'c'])
@@ -214,11 +224,11 @@ def test_quantile_datetime(self):
         # result = df[['a', 'c']].quantile(.5)
         # result = df[['a', 'c']].quantile([.5])
 
-    def test_quantile_invalid(self):
+    def test_quantile_invalid(self, datetime_frame):
         msg = 'percentiles should all be in the interval \\[0, 1\\]'
         for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
             with pytest.raises(ValueError, match=msg):
-                self.tsframe.quantile(invalid)
+                datetime_frame.quantile(invalid)
 
     def test_quantile_box(self):
         df = DataFrame({'A': [pd.Timestamp('2011-01-01'),

From 605476ebb6e42be17196b295a3d3aa97f385896b Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Sat, 1 Jun 2019 10:51:27 -0400
Subject: [PATCH 11/43] BUG: fix categorical comparison with missing values
 (#26504 ) (#26514)

---
 doc/source/whatsnew/v0.25.0.rst               |  2 +-
 pandas/core/arrays/categorical.py             | 13 +++++---
 .../arrays/categorical/test_operators.py      | 32 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index a62cac7a94bbd..61182b9fa32f2 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -414,7 +414,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
--
+- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
 -
 
 Datetimelike
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 89b86c66d7b05..44bb44457bc25 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -89,18 +89,23 @@ def f(self, other):
             else:
                 other_codes = other._codes
 
-            na_mask = (self._codes == -1) | (other_codes == -1)
+            mask = (self._codes == -1) | (other_codes == -1)
             f = getattr(self._codes, op)
             ret = f(other_codes)
-            if na_mask.any():
+            if mask.any():
                 # In other series, the leads to False, so do that here too
-                ret[na_mask] = False
+                ret[mask] = False
             return ret
 
         if is_scalar(other):
             if other in self.categories:
                 i = self.categories.get_loc(other)
-                return getattr(self._codes, op)(i)
+                ret = getattr(self._codes, op)(i)
+
+                # check for NaN in self
+                mask = (self._codes == -1)
+                ret[mask] = False
+                return ret
             else:
                 if op == '__eq__':
                     return np.repeat(False, len(self))
diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index dc6e1a5bc36b3..a443408bf9479 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -1,4 +1,5 @@
 import operator
+import warnings
 
 import numpy as np
 import pytest
@@ -17,7 +18,6 @@ def test_categories_none_comparisons(self):
         tm.assert_categorical_equal(factor, self.factor)
 
     def test_comparisons(self):
-
         result = self.factor[self.factor == 'a']
         expected = self.factor[np.asarray(self.factor) == 'a']
         tm.assert_categorical_equal(result, expected)
@@ -186,6 +186,36 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
+    def test_comparison_of_ordered_categorical_with_nan_to_scalar(
+            self, compare_operators_no_eq_ne):
+        # https://github.com/pandas-dev/pandas/issues/26504
+        # BUG: fix ordered categorical comparison with missing values (#26504 )
+        # and following comparisons with scalars in categories with missing
+        # values should be evaluated as False
+
+        cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
+        scalar = 2
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            expected = getattr(np.array(cat),
+                               compare_operators_no_eq_ne)(scalar)
+        actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
+        tm.assert_numpy_array_equal(actual, expected)
+
+    def test_comparison_of_ordered_categorical_with_nan_to_listlike(
+            self, compare_operators_no_eq_ne):
+        # https://github.com/pandas-dev/pandas/issues/26504
+        # and following comparisons of missing values in ordered Categorical
+        # with listlike should be evaluated as False
+
+        cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
+        other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
+        actual = getattr(cat, compare_operators_no_eq_ne)(other)
+        tm.assert_numpy_array_equal(actual, expected)
+
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
         ([1, 2, 3], [3, 2, 1], [2, 2, 2])]

From a69d56f9491d72567c870c8dec874a9dd1ccc027 Mon Sep 17 00:00:00 2001
From: enisnazif <enisnazif@gmail.com>
Date: Sat, 1 Jun 2019 15:52:35 +0100
Subject: [PATCH 12/43] Fix the output of df.describe on an empty categorical /
 object column  (#26474)

---
 doc/source/whatsnew/v0.25.0.rst      | 28 ++++++++++++++++++++++++++++
 pandas/core/arrays/categorical.py    |  2 +-
 pandas/core/generic.py               |  6 ++++++
 pandas/tests/frame/test_analytics.py | 11 +++++++++++
 4 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 61182b9fa32f2..ebca80025b9f7 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -253,6 +253,34 @@ are returned. (:issue:`21521`)
 
     df.groupby("a").ffill()
 
+``DataFrame`` describe on an empty categorical / object column will return top and freq
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When calling :meth:`DataFrame.describe` with an empty categorical / object
+column, the 'top' and 'freq' columns were previously omitted, which was inconsistent with
+the output for non-empty columns. Now the 'top' and 'freq' columns will always be included,
+with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({"empty_col": pd.Categorical([])})
+   df
+
+*Previous Behavior*:
+
+.. code-block:: python
+
+   In [3]: df.describe()
+   Out[3]:
+           empty_col
+   count           0
+   unique          0
+
+*New Behavior*:
+
+.. ipython:: python
+
+    df.describe()
 
 ``__str__`` methods now call ``__repr__`` rather than vica-versa
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 44bb44457bc25..49dd0041854bc 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1483,7 +1483,7 @@ def value_counts(self, dropna=True):
 
         if dropna or clean:
             obs = code if clean else code[mask]
-            count = bincount(obs, minlength=ncat or None)
+            count = bincount(obs, minlength=ncat or 0)
         else:
             count = bincount(np.where(mask, code, ncat))
             ix = np.append(ix, -1)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0596d0ab844ec..7ca2c52e18c41 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9920,6 +9920,12 @@ def describe_categorical_1d(data):
                     names += ['top', 'freq']
                     result += [top, freq]
 
+            # If the DataFrame is empty, set 'top' and 'freq' to None
+            # to maintain output shape consistency
+            else:
+                names += ['top', 'freq']
+                result += [None, None]
+
             return pd.Series(result, index=names, name=data.name)
 
         def describe_1d(data):
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index effe7eb47323d..487ff7932ec5f 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -588,6 +588,16 @@ def test_describe_categorical(self):
         result = df3.describe()
         tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)
 
+    def test_describe_empty_categorical_column(self):
+        # GH 26397
+        # Ensure the index of an an empty categoric DataFrame column
+        # also contains (count, unique, top, freq)
+        df = pd.DataFrame({"empty_col": Categorical([])})
+        result = df.describe()
+        expected = DataFrame({'empty_col': [0, 0, None, None]},
+                             index=['count', 'unique', 'top', 'freq'])
+        tm.assert_frame_equal(result, expected)
+
     def test_describe_categorical_columns(self):
         # GH 11558
         columns = pd.CategoricalIndex(['int1', 'int2', 'obj'],
@@ -608,6 +618,7 @@ def test_describe_categorical_columns(self):
                              index=['count', 'mean', 'std', 'min', '25%',
                                     '50%', '75%', 'max'],
                              columns=exp_columns)
+
         tm.assert_frame_equal(result, expected)
         tm.assert_categorical_equal(result.columns.values,
                                     expected.columns.values)

From 210e2dcd43d2055ca1888d07e8f49961ef60ab5e Mon Sep 17 00:00:00 2001
From: Jiang Yue <35633013+jiangyue12392@users.noreply.github.com>
Date: Sat, 1 Jun 2019 22:56:34 +0800
Subject: [PATCH 13/43] BUG: MultiIndex not dropping nan level and invalid code
 value (#26408)

---
 doc/source/whatsnew/v0.25.0.rst               | 37 ++++++++++-
 pandas/core/indexes/multi.py                  | 62 ++++++++++++++++---
 .../tests/indexes/multi/test_constructor.py   | 41 +++++++++++-
 pandas/tests/indexes/multi/test_missing.py    | 15 +++++
 4 files changed, 143 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index ebca80025b9f7..3275223b159f8 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -119,6 +119,42 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
 
     df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
 
+
+.. _whatsnew_0250.api_breaking.multi_indexing:
+
+
+MultiIndex constructed from levels and codes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Constructing a :class:`MultiIndex` with NaN levels or codes value < -1 was allowed previously.
+Now, construction with codes value < -1 is not allowed and NaN levels' corresponding codes
+would be reassigned as -1. (:issue:`19387`)
+
+.. ipython:: python
+
+    mi1 = pd.MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
+                        codes=[[0, -1, 1, 2, 3, 4]])
+    mi2 = pd.MultiIndex(levels=[[1, 2]], codes=[[0, -2]])
+
+*Previous Behavior*:
+
+.. code-block:: ipython
+
+    In [1]: mi1
+    Out[1]: MultiIndex(levels=[[nan, None, NaT, 128, 2]],
+                       codes=[[0, -1, 1, 2, 3, 4]])
+    In [2]: mi2
+    Out[2]: MultiIndex(levels=[[1, 2]],
+                       codes=[[0, -2]])
+
+*New Behavior*:
+
+.. ipython:: python
+
+    mi1
+    mi2
+
+
 .. _whatsnew_0250.api_breaking.groupby_apply_first_group_once:
 
 GroupBy.apply on ``DataFrame`` evaluates first group only once
@@ -536,7 +572,6 @@ MultiIndex
 
 - Bug in which incorrect exception raised by :class:`Timedelta` when testing the membership of :class:`MultiIndex` (:issue:`24570`)
 -
--
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ec2cc70d1a352..9217b388ce86b 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -243,11 +243,35 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
             result.sortorder = sortorder
 
         if verify_integrity:
-            result._verify_integrity()
+            new_codes = result._verify_integrity()
+            result._codes = new_codes
+
         if _set_identity:
             result._reset_identity()
+
         return result
 
+    def _validate_codes(self, level: list, code: list):
+        """
+        Reassign code values as -1 if their corresponding levels are NaN.
+
+        Parameters
+        ----------
+        code : list
+            Code to reassign.
+        level : list
+            Level to check for missing values (NaN, NaT, None).
+
+        Returns
+        -------
+        code : new code where code value = -1 if it corresponds
+        to a level with missing values (NaN, NaT, None).
+        """
+        null_mask = isna(level)
+        if np.any(null_mask):
+            code = np.where(null_mask[code], -1, code)
+        return code
+
     def _verify_integrity(self, codes=None, levels=None):
         """
 
@@ -263,6 +287,11 @@ def _verify_integrity(self, codes=None, levels=None):
         ValueError
             If length of levels and codes don't match, if the codes for any
             level would exceed level bounds, or there are any duplicate levels.
+
+        Returns
+        -------
+        codes : new codes where code value = -1 if it corresponds to a
+        NaN level.
         """
         # NOTE: Currently does not check, among other things, that cached
         # nlevels matches nor that sortorder matches actually sortorder.
@@ -272,22 +301,33 @@ def _verify_integrity(self, codes=None, levels=None):
         if len(levels) != len(codes):
             raise ValueError("Length of levels and codes must match. NOTE:"
                              " this index is in an inconsistent state.")
-        codes_length = len(self.codes[0])
+        codes_length = len(codes[0])
         for i, (level, level_codes) in enumerate(zip(levels, codes)):
             if len(level_codes) != codes_length:
                 raise ValueError("Unequal code lengths: %s" %
                                  ([len(code_) for code_ in codes]))
             if len(level_codes) and level_codes.max() >= len(level):
-                raise ValueError("On level %d, code max (%d) >= length of"
-                                 " level  (%d). NOTE: this index is in an"
-                                 " inconsistent state" % (i, level_codes.max(),
-                                                          len(level)))
+                msg = ("On level {level}, code max ({max_code}) >= length of "
+                       "level ({level_len}). NOTE: this index is in an "
+                       "inconsistent state".format(
+                           level=i, max_code=level_codes.max(),
+                           level_len=len(level)))
+                raise ValueError(msg)
+            if len(level_codes) and level_codes.min() < -1:
+                raise ValueError("On level {level}, code value ({code})"
+                                 " < -1".format(
+                                     level=i, code=level_codes.min()))
             if not level.is_unique:
                 raise ValueError("Level values must be unique: {values} on "
                                  "level {level}".format(
                                      values=[value for value in level],
                                      level=i))
 
+        codes = [self._validate_codes(level, code)
+                 for level, code in zip(levels, codes)]
+        new_codes = FrozenList(codes)
+        return new_codes
+
     @classmethod
     def from_arrays(cls, arrays, sortorder=None, names=None):
         """
@@ -586,7 +626,8 @@ def _set_levels(self, levels, level=None, copy=False, validate=True,
             new_levels = FrozenList(new_levels)
 
         if verify_integrity:
-            self._verify_integrity(levels=new_levels)
+            new_codes = self._verify_integrity(levels=new_levels)
+            self._codes = new_codes
 
         names = self.names
         self._levels = new_levels
@@ -676,7 +717,6 @@ def labels(self):
 
     def _set_codes(self, codes, level=None, copy=False, validate=True,
                    verify_integrity=False):
-
         if validate and level is None and len(codes) != self.nlevels:
             raise ValueError("Length of codes must match number of levels")
         if validate and level is not None and len(codes) != len(level):
@@ -696,9 +736,10 @@ def _set_codes(self, codes, level=None, copy=False, validate=True,
             new_codes = FrozenList(new_codes)
 
         if verify_integrity:
-            self._verify_integrity(codes=new_codes)
+            new_codes = self._verify_integrity(codes=new_codes)
 
         self._codes = new_codes
+
         self._tuples = None
         self._reset_cache()
 
@@ -1763,9 +1804,10 @@ def __setstate__(self, state):
 
         self._set_levels([Index(x) for x in levels], validate=False)
         self._set_codes(codes)
+        new_codes = self._verify_integrity()
+        self._set_codes(new_codes)
         self._set_names(names)
         self.sortorder = sortorder
-        self._verify_integrity()
         self._reset_identity()
 
     def __getitem__(self, key):
diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py
index 37290bc6eb1c0..7cab05660ac49 100644
--- a/pandas/tests/indexes/multi/test_constructor.py
+++ b/pandas/tests/indexes/multi/test_constructor.py
@@ -63,9 +63,10 @@ def test_constructor_mismatched_codes_levels(idx):
     with pytest.raises(ValueError, match=msg):
         MultiIndex(levels=levels, codes=codes)
 
-    length_error = (r"On level 0, code max \(3\) >= length of level  \(1\)\."
+    length_error = (r"On level 0, code max \(3\) >= length of level \(1\)\."
                     " NOTE: this index is in an inconsistent state")
     label_error = r"Unequal code lengths: \[4, 2\]"
+    code_value_error = r"On level 0, code value \(-2\) < -1"
 
     # important to check that it's looking at the right thing.
     with pytest.raises(ValueError, match=length_error):
@@ -82,6 +83,44 @@ def test_constructor_mismatched_codes_levels(idx):
     with pytest.raises(ValueError, match=label_error):
         idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
 
+    # test set_codes with verify_integrity=False
+    # the setting should not raise any value error
+    idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]],
+                         verify_integrity=False)
+
+    # code value smaller than -1
+    with pytest.raises(ValueError, match=code_value_error):
+        MultiIndex(levels=[['a'], ['b']], codes=[[0, -2], [0, 0]])
+
+
+def test_na_levels():
+    # GH26408
+    # test if codes are re-assigned value -1 for levels
+    # with mising values (NaN, NaT, None)
+    result = MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
+                        codes=[[0, -1, 1, 2, 3, 4]])
+    expected = MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
+                          codes=[[-1, -1, -1, -1, 3, 4]])
+    tm.assert_index_equal(result, expected)
+
+    result = MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
+                        codes=[[0, -1, 1, 2, 3, 4]])
+    expected = MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
+                          codes=[[-1, -1, 1, -1, 3, -1]])
+    tm.assert_index_equal(result, expected)
+
+    # verify set_levels and set_codes
+    result = MultiIndex(
+        levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]).set_levels(
+            [[np.nan, 's', pd.NaT, 128, None]])
+    tm.assert_index_equal(result, expected)
+
+    result = MultiIndex(
+        levels=[[np.nan, 's', pd.NaT, 128, None]],
+        codes=[[1, 2, 2, 2, 2, 2]]).set_codes(
+            [[0, -1, 1, 2, 3, 4]])
+    tm.assert_index_equal(result, expected)
+
 
 def test_labels_deprecated(idx):
     # GH23752
diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py
index ed90f74d80989..518c12bb20e13 100644
--- a/pandas/tests/indexes/multi/test_missing.py
+++ b/pandas/tests/indexes/multi/test_missing.py
@@ -73,6 +73,21 @@ def test_dropna():
     with pytest.raises(ValueError, match=msg):
         idx.dropna(how='xxx')
 
+    # GH26408
+    # test if missing values are dropped for mutiindex constructed
+    # from codes and values
+    idx = MultiIndex(levels=[[np.nan, None, pd.NaT, "128", 2],
+                             [np.nan, None, pd.NaT, "128", 2]],
+                     codes=[[0, -1, 1, 2, 3, 4],
+                            [0, -1, 3, 3, 3, 4]])
+    expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
+    tm.assert_index_equal(idx.dropna(), expected)
+    tm.assert_index_equal(idx.dropna(how='any'), expected)
+
+    expected = MultiIndex.from_arrays([[np.nan, np.nan, "128", 2],
+                                       ["128", "128", "128", 2]])
+    tm.assert_index_equal(idx.dropna(how='all'), expected)
+
 
 def test_nulls(idx):
     # this is really a smoke test for the methods

From a2f9013efc251d64878123fb81e29f73a21e1fc3 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Sat, 1 Jun 2019 17:03:06 +0200
Subject: [PATCH 14/43] API: Series.str-accessor infers dtype (and Index.str
 does not raise on all-NA) (#23167)

---
 doc/source/user_guide/text.rst  |  10 ++
 doc/source/whatsnew/v0.25.0.rst |  40 +++++-
 pandas/core/strings.py          | 214 +++++++++++++++++++++++++-------
 pandas/tests/test_strings.py    |  48 +++----
 4 files changed, 233 insertions(+), 79 deletions(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index f7fdfcf8bf882..87c75e8bcd91f 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -70,6 +70,16 @@ and replacing any remaining whitespaces with underscores:
     ``.str`` methods which operate on elements of type ``list`` are not available on such a
     ``Series``.
 
+.. _text.warn_types:
+
+.. warning::
+
+    Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
+    v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously.
+
+    Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
+    exceptions, other uses are not supported, and may be disabled at a later point.
+
 
 Splitting and Replacing Strings
 -------------------------------
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 3275223b159f8..87a8010998bd0 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -231,6 +231,43 @@ returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwi
 Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
 cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 
+The ``.str``-accessor performs stricter type checks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Due to the lack of more fine-grained dtypes, :attr:`Series.str` so far only checked whether the data was
+of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* the Series; in particular,
+``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`,
+:meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`.
+
+*Previous Behaviour*:
+
+.. code-block:: python
+
+    In [1]: s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+
+    In [2]: s
+    Out[2]:
+    0      b'a'
+    1     b'ba'
+    2    b'cba'
+    dtype: object
+
+    In [3]: s.str.startswith(b'a')
+    Out[3]:
+    0     True
+    1    False
+    2    False
+    dtype: bool
+
+*New Behaviour*:
+
+.. ipython:: python
+    :okexcept:
+
+    s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+    s
+    s.str.startswith(b'a')
+
 .. _whatsnew_0250.api_breaking.incompatible_index_unions
 
 Incompatible Index Type Unions
@@ -331,7 +368,6 @@ This change is backward compatible for direct usage of Pandas, but if you subcla
 Pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods,
 you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`).
 
-
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies
@@ -537,7 +573,7 @@ Conversion
 Strings
 ^^^^^^^
 
--
+- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
 -
 -
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index ee3796241690d..bd756491abd2f 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1,4 +1,5 @@
 import codecs
+from functools import wraps
 import re
 import textwrap
 from typing import Dict
@@ -12,8 +13,8 @@
 
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
-    is_list_like, is_object_dtype, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
+    is_list_like, is_re, is_scalar, is_string_like)
+from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
@@ -1720,12 +1721,78 @@ def str_encode(arr, encoding, errors="strict"):
     return _na_map(f, arr)
 
 
-def _noarg_wrapper(f, docstring=None, **kargs):
+def forbid_nonstring_types(forbidden, name=None):
+    """
+    Decorator to forbid specific types for a method of StringMethods.
+
+    For calling `.str.{method}` on a Series or Index, it is necessary to first
+    initialize the :class:`StringMethods` object, and then call the method.
+    However, different methods allow different input types, and so this can not
+    be checked during :meth:`StringMethods.__init__`, but must be done on a
+    per-method basis. This decorator exists to facilitate this process, and
+    make it explicit which (inferred) types are disallowed by the method.
+
+    :meth:`StringMethods.__init__` allows the *union* of types its different
+    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
+    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+
+    The default string types ['string', 'empty'] are allowed for all methods.
+    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
+    then needs to forbid the types it is not intended for.
+
+    Parameters
+    ----------
+    forbidden : list-of-str or None
+        List of forbidden non-string types, may be one or more of
+        `['bytes', 'mixed', 'mixed-integer']`.
+    name : str, default None
+        Name of the method to use in the error message. By default, this is
+        None, in which case the name from the method being wrapped will be
+        copied. However, for working with further wrappers (like _pat_wrapper
+        and _noarg_wrapper), it is necessary to specify the name.
+
+    Returns
+    -------
+    func : wrapper
+        The method to which the decorator is applied, with an added check that
+        enforces the inferred type to not be in the list of forbidden types.
+
+    Raises
+    ------
+    TypeError
+        If the inferred type of the underlying data is in `forbidden`.
+    """
+
+    # deal with None
+    forbidden = [] if forbidden is None else forbidden
+
+    allowed_types = {'string', 'empty', 'bytes',
+                     'mixed', 'mixed-integer'} - set(forbidden)
+
+    def _forbid_nonstring_types(func):
+        func_name = func.__name__ if name is None else name
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self._inferred_dtype not in allowed_types:
+                msg = ('Cannot use .str.{name} with values of inferred dtype '
+                       '{inf_type!r}.'.format(name=func_name,
+                                              inf_type=self._inferred_dtype))
+                raise TypeError(msg)
+            return func(self, *args, **kwargs)
+        wrapper.__name__ = func_name
+        return wrapper
+    return _forbid_nonstring_types
+
+
+def _noarg_wrapper(f, name=None, docstring=None, forbidden_types=['bytes'],
+                   **kargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper(self):
         result = _na_map(f, self._parent, **kargs)
         return self._wrap_result(result)
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if docstring is not None:
         wrapper.__doc__ = docstring
     else:
@@ -1734,22 +1801,26 @@ def wrapper(self):
     return wrapper
 
 
-def _pat_wrapper(f, flags=False, na=False, **kwargs):
+def _pat_wrapper(f, flags=False, na=False, name=None,
+                 forbidden_types=['bytes'], **kwargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper1(self, pat):
         result = f(self._parent, pat)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper2(self, pat, flags=0, **kwargs):
         result = f(self._parent, pat, flags=flags, **kwargs)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper3(self, pat, na=np.nan):
         result = f(self._parent, pat, na=na)
         return self._wrap_result(result)
 
     wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if f.__doc__:
         wrapper.__doc__ = f.__doc__
 
@@ -1780,7 +1851,7 @@ class StringMethods(NoNewAttributesMixin):
     """
 
     def __init__(self, data):
-        self._validate(data)
+        self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data)
 
         # .values.categories works for both Series/Index
@@ -1791,38 +1862,44 @@ def __init__(self, data):
 
     @staticmethod
     def _validate(data):
-        from pandas.core.index import Index
-
-        if (isinstance(data, ABCSeries) and
-                not ((is_categorical_dtype(data.dtype) and
-                      is_object_dtype(data.values.categories)) or
-                     (is_object_dtype(data.dtype)))):
-            # it's neither a string series not a categorical series with
-            # strings inside the categories.
-            # this really should exclude all series with any non-string values
-            # (instead of test for object dtype), but that isn't practical for
-            # performance reasons until we have a str dtype (GH 9343)
+        """
+        Auxiliary function for StringMethods, infers and checks dtype of data.
+
+        This is a "first line of defence" at the creation of the StringMethods-
+        object (see _make_accessor), and just checks that the dtype is in the
+        *union* of the allowed types over all string methods below; this
+        restriction is then refined on a per-method basis using the decorator
+        @forbid_nonstring_types (more info in the corresponding docstring).
+
+        This really should exclude all series/index with any non-string values,
+        but that isn't practical for performance reasons until we have a str
+        dtype (GH 9343 / 13877)
+
+        Parameters
+        ----------
+        data : The content of the Series
+
+        Returns
+        -------
+        dtype : inferred dtype of data
+        """
+        if isinstance(data, ABCMultiIndex):
+            raise AttributeError('Can only use .str accessor with Index, '
+                                 'not MultiIndex')
+
+        # see _libs/lib.pyx for list of inferred types
+        allowed_types = ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']
+
+        values = getattr(data, 'values', data)  # Series / Index
+        values = getattr(values, 'categories', values)  # categorical / normal
+
+        # missing values obfuscate type inference -> skip
+        inferred_dtype = lib.infer_dtype(values, skipna=True)
+
+        if inferred_dtype not in allowed_types:
             raise AttributeError("Can only use .str accessor with string "
-                                 "values, which use np.object_ dtype in "
-                                 "pandas")
-        elif isinstance(data, Index):
-            # can't use ABCIndex to exclude non-str
-
-            # see src/inference.pyx which can contain string values
-            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
-            if is_categorical_dtype(data.dtype):
-                inf_type = data.categories.inferred_type
-            else:
-                inf_type = data.inferred_type
-            if inf_type not in allowed_types:
-                message = ("Can only use .str accessor with string values "
-                           "(i.e. inferred_type is 'string', 'unicode' or "
-                           "'mixed')")
-                raise AttributeError(message)
-            if data.nlevels > 1:
-                message = ("Can only use .str accessor with Index, not "
-                           "MultiIndex")
-                raise AttributeError(message)
+                                 "values!")
+        return inferred_dtype
 
     def __getitem__(self, key):
         if isinstance(key, slice):
@@ -2025,12 +2102,13 @@ def _get_series_list(self, others, ignore_index=False):
                     warnings.warn('list-likes other than Series, Index, or '
                                   'np.ndarray WITHIN another list-like are '
                                   'deprecated and will be removed in a future '
-                                  'version.', FutureWarning, stacklevel=3)
+                                  'version.', FutureWarning, stacklevel=4)
                 return (los, join_warn)
             elif all(not is_list_like(x) for x in others):
                 return ([Series(others, index=idx)], False)
         raise TypeError(err_msg)
 
+    @forbid_nonstring_types(['bytes', 'mixed', 'mixed-integer'])
     def cat(self, others=None, sep=None, na_rep=None, join=None):
         """
         Concatenate strings in the Series/Index with given separator.
@@ -2211,7 +2289,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "Index/DataFrame in `others`. To enable alignment "
                           "and silence this warning, pass `join='left'|"
                           "'outer'|'inner'|'right'`. The future default will "
-                          "be `join='left'`.", FutureWarning, stacklevel=2)
+                          "be `join='left'`.", FutureWarning, stacklevel=3)
 
         # if join is None, _get_series_list already force-aligned indexes
         join = 'left' if join is None else join
@@ -2384,6 +2462,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
     @Appender(_shared_docs['str_split'] % {
         'side': 'beginning',
         'method': 'split'})
+    @forbid_nonstring_types(['bytes'])
     def split(self, pat=None, n=-1, expand=False):
         result = str_split(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2391,6 +2470,7 @@ def split(self, pat=None, n=-1, expand=False):
     @Appender(_shared_docs['str_split'] % {
         'side': 'end',
         'method': 'rsplit'})
+    @forbid_nonstring_types(['bytes'])
     def rsplit(self, pat=None, n=-1, expand=False):
         result = str_rsplit(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2485,6 +2565,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
                 '`sep`.'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def partition(self, sep=' ', expand=True):
         f = lambda x: x.partition(sep)
         result = _na_map(f, self._parent)
@@ -2498,6 +2579,7 @@ def partition(self, sep=' ', expand=True):
                 '`sep`.'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def rpartition(self, sep=' ', expand=True):
         f = lambda x: x.rpartition(sep)
         result = _na_map(f, self._parent)
@@ -2509,33 +2591,39 @@ def get(self, i):
         return self._wrap_result(result)
 
     @copy(str_join)
+    @forbid_nonstring_types(['bytes'])
     def join(self, sep):
         result = str_join(self._parent, sep)
         return self._wrap_result(result)
 
     @copy(str_contains)
+    @forbid_nonstring_types(['bytes'])
     def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         result = str_contains(self._parent, pat, case=case, flags=flags, na=na,
                               regex=regex)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_match)
+    @forbid_nonstring_types(['bytes'])
     def match(self, pat, case=True, flags=0, na=np.nan):
         result = str_match(self._parent, pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_replace)
+    @forbid_nonstring_types(['bytes'])
     def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         result = str_replace(self._parent, pat, repl, n=n, case=case,
                              flags=flags, regex=regex)
         return self._wrap_result(result)
 
     @copy(str_repeat)
+    @forbid_nonstring_types(['bytes'])
     def repeat(self, repeats):
         result = str_repeat(self._parent, repeats)
         return self._wrap_result(result)
 
     @copy(str_pad)
+    @forbid_nonstring_types(['bytes'])
     def pad(self, width, side='left', fillchar=' '):
         result = str_pad(self._parent, width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
@@ -2559,17 +2647,21 @@ def pad(self, width, side='left', fillchar=' '):
 
     @Appender(_shared_docs['str_pad'] % dict(side='left and right',
                                              method='center'))
+    @forbid_nonstring_types(['bytes'])
     def center(self, width, fillchar=' '):
         return self.pad(width, side='both', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='right', method='ljust'))
+    @forbid_nonstring_types(['bytes'])
     def ljust(self, width, fillchar=' '):
         return self.pad(width, side='right', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='left', method='rjust'))
+    @forbid_nonstring_types(['bytes'])
     def rjust(self, width, fillchar=' '):
         return self.pad(width, side='left', fillchar=fillchar)
 
+    @forbid_nonstring_types(['bytes'])
     def zfill(self, width):
         """
         Pad strings in the Series/Index by prepending '0' characters.
@@ -2639,16 +2731,19 @@ def slice(self, start=None, stop=None, step=None):
         return self._wrap_result(result)
 
     @copy(str_slice_replace)
+    @forbid_nonstring_types(['bytes'])
     def slice_replace(self, start=None, stop=None, repl=None):
         result = str_slice_replace(self._parent, start, stop, repl)
         return self._wrap_result(result)
 
     @copy(str_decode)
     def decode(self, encoding, errors="strict"):
+        # need to allow bytes here
         result = str_decode(self._parent, encoding, errors)
         return self._wrap_result(result)
 
     @copy(str_encode)
+    @forbid_nonstring_types(['bytes'])
     def encode(self, encoding, errors="strict"):
         result = str_encode(self._parent, encoding, errors)
         return self._wrap_result(result)
@@ -2718,28 +2813,33 @@ def encode(self, encoding, errors="strict"):
 
     @Appender(_shared_docs['str_strip'] % dict(side='left and right sides',
                                                method='strip'))
+    @forbid_nonstring_types(['bytes'])
     def strip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='both')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='left side',
                                                method='lstrip'))
+    @forbid_nonstring_types(['bytes'])
     def lstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='right side',
                                                method='rstrip'))
+    @forbid_nonstring_types(['bytes'])
     def rstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='right')
         return self._wrap_result(result)
 
     @copy(str_wrap)
+    @forbid_nonstring_types(['bytes'])
     def wrap(self, width, **kwargs):
         result = str_wrap(self._parent, width, **kwargs)
         return self._wrap_result(result)
 
     @copy(str_get_dummies)
+    @forbid_nonstring_types(['bytes'])
     def get_dummies(self, sep='|'):
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
@@ -2749,20 +2849,23 @@ def get_dummies(self, sep='|'):
                                  name=name, expand=True)
 
     @copy(str_translate)
+    @forbid_nonstring_types(['bytes'])
     def translate(self, table):
         result = str_translate(self._parent, table)
         return self._wrap_result(result)
 
-    count = _pat_wrapper(str_count, flags=True)
-    startswith = _pat_wrapper(str_startswith, na=True)
-    endswith = _pat_wrapper(str_endswith, na=True)
-    findall = _pat_wrapper(str_findall, flags=True)
+    count = _pat_wrapper(str_count, flags=True, name='count')
+    startswith = _pat_wrapper(str_startswith, na=True, name='startswith')
+    endswith = _pat_wrapper(str_endswith, na=True, name='endswith')
+    findall = _pat_wrapper(str_findall, flags=True, name='findall')
 
     @copy(str_extract)
+    @forbid_nonstring_types(['bytes'])
     def extract(self, pat, flags=0, expand=True):
         return str_extract(self, pat, flags=flags, expand=expand)
 
     @copy(str_extractall)
+    @forbid_nonstring_types(['bytes'])
     def extractall(self, pat, flags=0):
         return str_extractall(self._orig, pat, flags=flags)
 
@@ -2792,6 +2895,7 @@ def extractall(self, pat, flags=0):
     @Appender(_shared_docs['find'] %
               dict(side='lowest', method='find',
                    also='rfind : Return highest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def find(self, sub, start=0, end=None):
         result = str_find(self._parent, sub, start=start, end=end, side='left')
         return self._wrap_result(result)
@@ -2799,11 +2903,13 @@ def find(self, sub, start=0, end=None):
     @Appender(_shared_docs['find'] %
               dict(side='highest', method='rfind',
                    also='find : Return lowest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def rfind(self, sub, start=0, end=None):
         result = str_find(self._parent, sub,
                           start=start, end=end, side='right')
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(['bytes'])
     def normalize(self, form):
         """
         Return the Unicode normal form for the strings in the Series/Index.
@@ -2851,6 +2957,7 @@ def normalize(self, form):
     @Appender(_shared_docs['index'] %
               dict(side='lowest', similar='find', method='index',
                    also='rindex : Return highest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def index(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='left')
@@ -2859,6 +2966,7 @@ def index(self, sub, start=0, end=None):
     @Appender(_shared_docs['index'] %
               dict(side='highest', similar='rfind', method='rindex',
                    also='index : Return lowest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def rindex(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='right')
@@ -2908,7 +3016,8 @@ def rindex(self, sub, start=0, end=None):
     5    3.0
     dtype: float64
     """)
-    len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
+    len = _noarg_wrapper(len, docstring=_shared_docs['len'],
+                         forbidden_types=None, dtype=int)
 
     _shared_docs['casemethods'] = ("""
     Convert strings in the Series/Index to %(type)s.
@@ -2989,21 +3098,27 @@ def rindex(self, sub, start=0, end=None):
     _doc_args['casefold'] = dict(type='be casefolded', method='casefold',
                                  version='\n    .. versionadded:: 0.25.0\n')
     lower = _noarg_wrapper(lambda x: x.lower(),
+                           name='lower',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['lower'])
     upper = _noarg_wrapper(lambda x: x.upper(),
+                           name='upper',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['upper'])
     title = _noarg_wrapper(lambda x: x.title(),
+                           name='title',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['title'])
     capitalize = _noarg_wrapper(lambda x: x.capitalize(),
+                                name='capitalize',
                                 docstring=_shared_docs['casemethods'] %
                                 _doc_args['capitalize'])
     swapcase = _noarg_wrapper(lambda x: x.swapcase(),
+                              name='swapcase',
                               docstring=_shared_docs['casemethods'] %
                               _doc_args['swapcase'])
     casefold = _noarg_wrapper(lambda x: x.casefold(),
+                              name='casefold',
                               docstring=_shared_docs['casemethods'] %
                               _doc_args['casefold'])
 
@@ -3157,30 +3272,39 @@ def rindex(self, sub, start=0, end=None):
     _doc_args['isnumeric'] = dict(type='numeric', method='isnumeric')
     _doc_args['isdecimal'] = dict(type='decimal', method='isdecimal')
     isalnum = _noarg_wrapper(lambda x: x.isalnum(),
+                             name='isalnum',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isalnum'])
     isalpha = _noarg_wrapper(lambda x: x.isalpha(),
+                             name='isalpha',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isalpha'])
     isdigit = _noarg_wrapper(lambda x: x.isdigit(),
+                             name='isdigit',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isdigit'])
     isspace = _noarg_wrapper(lambda x: x.isspace(),
+                             name='isspace',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isspace'])
     islower = _noarg_wrapper(lambda x: x.islower(),
+                             name='islower',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['islower'])
     isupper = _noarg_wrapper(lambda x: x.isupper(),
+                             name='isupper',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isupper'])
     istitle = _noarg_wrapper(lambda x: x.istitle(),
+                             name='istitle',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['istitle'])
     isnumeric = _noarg_wrapper(lambda x: x.isnumeric(),
+                               name='isnumeric',
                                docstring=_shared_docs['ismethods'] %
                                _doc_args['isnumeric'])
     isdecimal = _noarg_wrapper(lambda x: x.isdecimal(),
+                               name='isdecimal',
                                docstring=_shared_docs['ismethods'] %
                                _doc_args['isdecimal'])
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 2951ca24fa7ff..1ba0ef3918fb7 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -150,6 +150,9 @@ def any_allowed_skipna_inferred_dtype(request):
     ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
     ...     # will pass
     ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    ...
+    ...     # constructor for .str-accessor will also pass
+    ...     pd.Series(values).str
     """
     inferred_dtype, values = request.param
     values = np.array(values, dtype=object)  # object dtype to avoid casting
@@ -179,20 +182,6 @@ def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
             pytest.xfail(reason='Conversion to numpy array fails because '
                          'the ._values-attribute is not a numpy array for '
                          'PeriodArray/IntervalArray; see GH 23553')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
-        if (box == Series
-                and (dtype == object and inferred_dtype not in [
-                    'string', 'unicode', 'empty',
-                    'bytes', 'mixed', 'mixed-integer'])
-                or (dtype == 'category'
-                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
-            pytest.xfail(reason='Not raising correctly; solved by GH 23167')
 
         types_passing_constructor = ['string', 'unicode', 'empty',
                                      'bytes', 'mixed', 'mixed-integer']
@@ -220,27 +209,21 @@ def test_api_per_method(self, box, dtype,
         method_name, args, kwargs = any_string_method
 
         # TODO: get rid of these xfails
-        if (method_name not in ['encode', 'decode', 'len']
-                and inferred_dtype == 'bytes'):
-            pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
-                         'Also: malformed method names, see GH 23551; '
-                         'solved by GH 23167')
-        if (method_name == 'cat'
-                and inferred_dtype in ['mixed', 'mixed-integer']):
-            pytest.xfail(reason='Bad error message; should raise better; '
-                         'solved by GH 23167')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
+        if (method_name in ['partition', 'rpartition'] and box == Index
+                and inferred_dtype == 'empty'):
+            pytest.xfail(reason='Method cannot deal with empty Index')
+        if (method_name == 'split' and box == Index and values.size == 0
+                and kwargs.get('expand', None) is not None):
+            pytest.xfail(reason='Split fails on empty Series when expand=True')
+        if (method_name == 'get_dummies' and box == Index
+                and inferred_dtype == 'empty' and (dtype == object
+                                                   or values.size == 0)):
+            pytest.xfail(reason='Need to fortify get_dummies corner cases')
 
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
 
-        bytes_allowed = method_name in ['encode', 'decode', 'len']
+        bytes_allowed = method_name in ['decode', 'get', 'len', 'slice']
         # as of v0.23.4, all methods except 'cat' are very lenient with the
         # allowed data types, just returning NaN for entries that error.
         # This could be changed with an 'errors'-kwarg to the `str`-accessor,
@@ -3167,7 +3150,8 @@ def test_str_accessor_no_new_attributes(self):
     def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))
-        with pytest.raises(TypeError, match="can't concat str to bytes"):
+        with pytest.raises(TypeError,
+                           match="Cannot use .str.cat with values of.*"):
             lhs.str.cat(rhs)
 
     def test_casefold(self):

From 4cd348bbe97f342787c0f3be2370e57695badd0f Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Jun 2019 16:34:57 +0100
Subject: [PATCH 15/43] Changing dev docs ssh key (#26604)

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 9f83917024049..0064d0a932960 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -161,7 +161,7 @@ jobs:
   - task: InstallSSHKey@0
     inputs:
       hostName: 'github.com'
-      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== pandas-dev@python.org'
+      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDfF0BSddjvZx/z4/2TXsy+RxjwBpgdHkmjtL9WfRHxEw1TchBuEj5vWWcxBNTK+9oVzD/Lca89HAXXrklsfkdAK3LvLfGCxTGpP8t/3CxxFdnSg3EN+4cDGKuDlbeTyzdASdPBOq0GTZjUFekl9ZfFrFJ9SoPpqZ4mmPRPapPrkwTs4xIrBly0eWcISFYgZcG58m65+XQpyyBMbpsO5ZHBBxE8kkWN0yY+gKt5PeeIO82xE+7F+3Qhlc67fKfB4FEitQ5SKrbKyGNNdFtEGcC6CEtD0B0vJxssltQEl5dDWPJP6tH4cIm/J6m28mpSYc5fEBhr75jE4Ybw6NtGgBZEdtFRFlnb91mSiVSjM/HEkV7/xYai+H1Gk+I/8tcl8cf3JCiJSP2glz8bp52+i5it29FUL8ITxdJSo0duUkVm3nZ8cDI6zag+nSSmzdZ1I9Fw7M7RRPHM2zd5+6RskeqamR5lY3Iv+t8Yo8cRX10IiHNF89b+3vI5ZkIKqytrPfrY45jGVMXA6x/whMh94Ac94qm+Do7P3eT/66a1lX0r+UfV6UnfwHE6cZ1ZFX2AzlmSiYMKmTD3hn1GNyHHuvk3Mneanbk4+x+8SjAXIK354zJ8c1Qgk1iEicDvna2IBd94R4tBWjYZ8xH7avmPlhs0HwbjiNOFDc45UXvwIl+D7w== pandas-dev@python.org'
       sshKeySecureFile: 'pandas_docs_key'
     displayName: 'Install GitHub ssh deployment key'
     condition : |

From ad7c9e9580cc1e5e18ce0f6b68ec952fbddbb71e Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Jun 2019 17:46:56 +0100
Subject: [PATCH 16/43] CI: Removing doc build in azure (#26609)

---
 azure-pipelines.yml | 60 ---------------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 0064d0a932960..85325c52e7e6d 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -116,63 +116,3 @@ jobs:
       fi
     displayName: 'Running benchmarks'
     condition: true
-
-- job: 'Docs'
-  pool:
-    vmImage: ubuntu-16.04
-  timeoutInMinutes: 90
-  steps:
-  - script: |
-      echo '##vso[task.setvariable variable=CONDA_ENV]pandas-dev'
-      echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
-    displayName: 'Setting environment variables'
-
-  - script: |
-      export PATH=$HOME/miniconda3/bin:$PATH
-      sudo apt-get install -y libc6-dev-i386
-      ci/setup_env.sh
-    displayName: 'Setup environment and build pandas'
-
-  - script: |
-      export PATH=$HOME/miniconda3/bin:$PATH
-      source activate pandas-dev
-      doc/make.py
-    displayName: 'Build documentation'
-
-  - script: |
-      cd doc/build/html
-      git init
-      touch .nojekyll
-      git add --all .
-      git config user.email "pandas-dev@python.org"
-      git config user.name "pandas-docs-bot"
-      git commit -m "pandas documentation in master"
-    displayName: 'Create git repo for docs build'
-    condition : |
-      and(not(eq(variables['Build.Reason'], 'PullRequest')),
-          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-
-  # This task to work requires next steps:
-  # 1. Got to "Library > Secure files" in the azure-pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles
-  # 2. Click on "+ Secure file"
-  # 3. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key")
-  # 4. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save
-  # 5. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be specified as a deploy key of the repo where the docs will be pushed: https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
-  - task: InstallSSHKey@0
-    inputs:
-      hostName: 'github.com'
-      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDfF0BSddjvZx/z4/2TXsy+RxjwBpgdHkmjtL9WfRHxEw1TchBuEj5vWWcxBNTK+9oVzD/Lca89HAXXrklsfkdAK3LvLfGCxTGpP8t/3CxxFdnSg3EN+4cDGKuDlbeTyzdASdPBOq0GTZjUFekl9ZfFrFJ9SoPpqZ4mmPRPapPrkwTs4xIrBly0eWcISFYgZcG58m65+XQpyyBMbpsO5ZHBBxE8kkWN0yY+gKt5PeeIO82xE+7F+3Qhlc67fKfB4FEitQ5SKrbKyGNNdFtEGcC6CEtD0B0vJxssltQEl5dDWPJP6tH4cIm/J6m28mpSYc5fEBhr75jE4Ybw6NtGgBZEdtFRFlnb91mSiVSjM/HEkV7/xYai+H1Gk+I/8tcl8cf3JCiJSP2glz8bp52+i5it29FUL8ITxdJSo0duUkVm3nZ8cDI6zag+nSSmzdZ1I9Fw7M7RRPHM2zd5+6RskeqamR5lY3Iv+t8Yo8cRX10IiHNF89b+3vI5ZkIKqytrPfrY45jGVMXA6x/whMh94Ac94qm+Do7P3eT/66a1lX0r+UfV6UnfwHE6cZ1ZFX2AzlmSiYMKmTD3hn1GNyHHuvk3Mneanbk4+x+8SjAXIK354zJ8c1Qgk1iEicDvna2IBd94R4tBWjYZ8xH7avmPlhs0HwbjiNOFDc45UXvwIl+D7w== pandas-dev@python.org'
-      sshKeySecureFile: 'pandas_docs_key'
-    displayName: 'Install GitHub ssh deployment key'
-    condition : |
-      and(not(eq(variables['Build.Reason'], 'PullRequest')),
-          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-
-  - script: |
-      cd doc/build/html
-      git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git
-      git push origin master -f
-    displayName: 'Publish docs to GitHub pages'
-    condition : |
-      and(not(eq(variables['Build.Reason'], 'PullRequest')),
-          eq(variables['Build.SourceBranch'], 'refs/heads/master'))

From 68c6766110b918bf6d75d0b5895e2731b14ca610 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Sat, 1 Jun 2019 17:03:58 +0000
Subject: [PATCH 17/43] PERF: don't call RangeIndex._data unnecessarily
 (#26565)

---
 asv_bench/benchmarks/index_object.py |  6 +++++
 doc/source/whatsnew/v0.25.0.rst      |  1 +
 pandas/core/indexes/range.py         | 32 +++++++++++++++++++++++--
 pandas/tests/indexes/test_range.py   | 36 ++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index 896a20bae2069..78fe2ae966896 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -94,6 +94,12 @@ def time_min(self):
     def time_min_trivial(self):
         self.idx_inc.min()
 
+    def time_get_loc_inc(self):
+        self.idx_inc.get_loc(900000)
+
+    def time_get_loc_dec(self):
+        self.idx_dec.get_loc(100000)
+
 
 class IndexAppend:
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 87a8010998bd0..1619ba1a45739 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -493,6 +493,7 @@ Performance Improvements
 - Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
   int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
 - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
+- Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
 - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
 - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
 - Improved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index ea14a4c789cd3..9401de3346ccd 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -22,6 +22,8 @@
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.numeric import Int64Index
 
+from pandas.io.formats.printing import pprint_thing
+
 
 class RangeIndex(Int64Index):
     """
@@ -64,6 +66,8 @@ class RangeIndex(Int64Index):
     _typ = 'rangeindex'
     _engine_type = libindex.Int64Engine
 
+    # check whether self._data has benn called
+    _cached_data = None  # type: np.ndarray
     # --------------------------------------------------------------------
     # Constructors
 
@@ -164,6 +168,8 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
         for k, v in kwargs.items():
             setattr(result, k, v)
 
+        result._range = range(result._start, result._stop, result._step)
+
         result._reset_identity()
         return result
 
@@ -180,9 +186,19 @@ def _constructor(self):
         """ return the class to use for construction """
         return Int64Index
 
-    @cache_readonly
+    @property
     def _data(self):
-        return np.arange(self._start, self._stop, self._step, dtype=np.int64)
+        """
+        An int array that for performance reasons is created only when needed.
+
+        The constructed array is saved in ``_cached_data``. This allows us to
+        check if the array has been created without accessing ``_data`` and
+        triggering the construction.
+        """
+        if self._cached_data is None:
+            self._cached_data = np.arange(self._start, self._stop, self._step,
+                                          dtype=np.int64)
+        return self._cached_data
 
     @cache_readonly
     def _int64index(self):
@@ -215,6 +231,9 @@ def _format_data(self, name=None):
         # we are formatting thru the attributes
         return None
 
+    def _format_with_header(self, header, na_rep='NaN', **kwargs):
+        return header + list(map(pprint_thing, self._range))
+
     # --------------------------------------------------------------------
     @property
     def start(self):
@@ -296,6 +315,15 @@ def is_monotonic_decreasing(self):
     def has_duplicates(self):
         return False
 
+    @Appender(_index_shared_docs['get_loc'])
+    def get_loc(self, key, method=None, tolerance=None):
+        if is_integer(key) and method is None and tolerance is None:
+            try:
+                return self._range.index(key)
+            except ValueError:
+                raise KeyError(key)
+        return super().get_loc(key, method=method, tolerance=tolerance)
+
     def tolist(self):
         return list(range(self._start, self._stop, self._step))
 
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index b2c330015081c..477a4e527f278 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -241,6 +241,42 @@ def test_view(self):
     def test_dtype(self):
         assert self.index.dtype == np.int64
 
+    def test_cached_data(self):
+        # GH 26565
+        # Calling RangeIndex._data caches an int64 array of the same length at
+        # self._cached_data. This tests whether _cached_data has been set.
+        idx = RangeIndex(0, 100, 10)
+
+        assert idx._cached_data is None
+
+        repr(idx)
+        assert idx._cached_data is None
+
+        str(idx)
+        assert idx._cached_data is None
+
+        idx.get_loc(20)
+        assert idx._cached_data is None
+
+        df = pd.DataFrame({'a': range(10)}, index=idx)
+
+        df.loc[50]
+        assert idx._cached_data is None
+
+        with pytest.raises(KeyError):
+            df.loc[51]
+        assert idx._cached_data is None
+
+        df.loc[10:50]
+        assert idx._cached_data is None
+
+        df.iloc[5:10]
+        assert idx._cached_data is None
+
+        # actually calling data._data
+        assert isinstance(idx._data, np.ndarray)
+        assert isinstance(idx._cached_data, np.ndarray)
+
     def test_is_monotonic(self):
         assert self.index.is_monotonic is True
         assert self.index.is_monotonic_increasing is True

From 1f837331e57f119f9471758f4c8fecaa1e7dc16e Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sun, 2 Jun 2019 12:47:34 +0100
Subject: [PATCH 18/43] CI: pin pytest version on Python 3.5 (#26619)

---
 ci/deps/azure-35-compat.yaml | 2 +-
 ci/deps/azure-macos-35.yaml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/deps/azure-35-compat.yaml b/ci/deps/azure-35-compat.yaml
index d0a48bd3f8b27..e55a4fbdf3fa9 100644
--- a/ci/deps/azure-35-compat.yaml
+++ b/ci/deps/azure-35-compat.yaml
@@ -26,5 +26,5 @@ dependencies:
   - pip
   - pip:
     # for python 3.5, pytest>=4.0.2 is not available in conda
-    - pytest>=4.0.2
+    - pytest==4.5.0
     - html5lib==1.0b2
diff --git a/ci/deps/azure-macos-35.yaml b/ci/deps/azure-macos-35.yaml
index 591266348a5f1..00c2051f29760 100644
--- a/ci/deps/azure-macos-35.yaml
+++ b/ci/deps/azure-macos-35.yaml
@@ -25,7 +25,7 @@ dependencies:
   - pip:
     - python-dateutil==2.5.3
     # universal
-    - pytest>=4.0.2
+    - pytest==4.5.0
     - pytest-xdist
     - pytest-mock
     - hypothesis>=3.58.0

From 6fb0be001fce70d9e87ba571a97b55d273d76f4a Mon Sep 17 00:00:00 2001
From: Chuanzhu Xu <chuanzhu.xu@gmail.com>
Date: Sun, 2 Jun 2019 17:09:44 -0400
Subject: [PATCH 19/43] remove outdated gtk package from code (#26590)

---
 doc/source/install.rst            |  1 -
 doc/source/user_guide/io.rst      |  2 +-
 doc/source/whatsnew/v0.25.0.rst   |  1 +
 pandas/core/generic.py            |  2 +-
 pandas/io/clipboard/__init__.py   | 21 +++++----------------
 pandas/io/clipboard/clipboards.py | 16 ----------------
 pandas/io/clipboards.py           |  2 +-
 7 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/doc/source/install.rst b/doc/source/install.rst
index b3b5945cc515e..98443ede2e965 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -281,7 +281,6 @@ Optional Dependencies
   `qtpy  <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),
   `PyQt5 <https://www.riverbankcomputing.com/software/pyqt/download5>`__,
   `PyQt4 <http://www.riverbankcomputing.com/software/pyqt/download>`__,
-  `pygtk <http://www.pygtk.org/>`__,
   `xsel <http://www.vergenet.net/~conrad/software/xsel/>`__, or
   `xclip <https://github.com/astrand/xclip/>`__: necessary to use
   :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 88d8ccbbe036e..4aacb6fa1e278 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -3272,7 +3272,7 @@ We can see that we got the same content back, which we had earlier written to th
 
 .. note::
 
-   You may need to install xclip or xsel (with gtk, PyQt5, PyQt4 or qtpy) on Linux to use these methods.
+   You may need to install xclip or xsel (with PyQt5, PyQt4 or qtpy) on Linux to use these methods.
 
 .. _io.pickle:
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1619ba1a45739..f122c73325b7d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -434,6 +434,7 @@ Other API Changes
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
 - The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`)
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
+- Removed support of gtk package for clipboards (:issue:`26563`)
 
 .. _whatsnew_0250.deprecations:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7ca2c52e18c41..33b0035e74913 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2679,7 +2679,7 @@ def to_clipboard(self, excel=True, sep=None, **kwargs):
         -----
         Requirements for your platform.
 
-          - Linux : `xclip`, or `xsel` (with `gtk` or `PyQt4` modules)
+          - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
           - Windows : none
           - OS X : none
 
diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py
index b76a843e3e7f2..2063978c76c5a 100644
--- a/pandas/io/clipboard/__init__.py
+++ b/pandas/io/clipboard/__init__.py
@@ -18,21 +18,19 @@
 On Linux, install xclip or xsel via package manager. For example, in Debian:
 sudo apt-get install xclip
 
-Otherwise on Linux, you will need the gtk, qtpy or PyQt modules installed.
+Otherwise on Linux, you will need the qtpy or PyQt modules installed.
 qtpy also requires a python-qt-bindings module: PyQt4, PyQt5, PySide, PySide2
 
-gtk and PyQt4 modules are not available for Python 3,
-and this module does not work with PyGObject yet.
+This module does not work with PyGObject yet.
 """
 __version__ = '1.5.27'
 
 import platform
 import os
 import subprocess
-from .clipboards import (init_osx_clipboard,
-                         init_gtk_clipboard, init_qt_clipboard,
-                         init_xclip_clipboard, init_xsel_clipboard,
-                         init_klipper_clipboard, init_no_clipboard)
+from .clipboards import (
+    init_osx_clipboard, init_qt_clipboard, init_xclip_clipboard,
+    init_xsel_clipboard, init_klipper_clipboard, init_no_clipboard)
 from .windows import init_windows_clipboard
 
 # `import qtpy` sys.exit()s if DISPLAY is not in the environment.
@@ -60,14 +58,6 @@ def determine_clipboard():
         return init_osx_clipboard()
     if HAS_DISPLAY:
         # Determine which command/module is installed, if any.
-        try:
-            # Check if gtk is installed
-            import gtk  # noqa
-        except ImportError:
-            pass
-        else:
-            return init_gtk_clipboard()
-
         try:
             # qtpy is a small abstraction layer that lets you write
             # applications using a single api call to either PyQt or PySide
@@ -104,7 +94,6 @@ def set_clipboard(clipboard):
     global copy, paste
 
     clipboard_types = {'osx': init_osx_clipboard,
-                       'gtk': init_gtk_clipboard,
                        'qt': init_qt_clipboard,
                        'xclip': init_xclip_clipboard,
                        'xsel': init_xsel_clipboard,
diff --git a/pandas/io/clipboard/clipboards.py b/pandas/io/clipboard/clipboards.py
index 66e2e35bf0c59..52abdeafb5ecc 100644
--- a/pandas/io/clipboard/clipboards.py
+++ b/pandas/io/clipboard/clipboards.py
@@ -22,22 +22,6 @@ def paste_osx():
     return copy_osx, paste_osx
 
 
-def init_gtk_clipboard():
-    import gtk
-
-    def copy_gtk(text):
-        global cb
-        cb = gtk.Clipboard()
-        cb.set_text(text)
-        cb.store()
-
-    def paste_gtk():
-        clipboardContents = gtk.Clipboard().wait_for_text()
-        return clipboardContents
-
-    return copy_gtk, paste_gtk
-
-
 def init_qt_clipboard():
     # $DISPLAY should exist
 
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
index be1256edf7afe..dc30285895dd5 100644
--- a/pandas/io/clipboards.py
+++ b/pandas/io/clipboards.py
@@ -91,7 +91,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
     Notes
     -----
     Requirements for your platform
-      - Linux: xclip, or xsel (with gtk or PyQt4 modules)
+      - Linux: xclip, or xsel (with PyQt4 modules)
       - Windows:
       - OS X:
     """

From a6ad17dde640e026eddadbe3551e15d4e25961ee Mon Sep 17 00:00:00 2001
From: iamshwin <23633545+iamshwin@users.noreply.github.com>
Date: Mon, 3 Jun 2019 00:11:48 +0100
Subject: [PATCH 20/43] Tidy documentation about plotting Series histograms
 (#26624)

---
 pandas/plotting/_core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index fed4b0d90983c..3f6a30c4639bc 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -2477,8 +2477,6 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
         bin edges are calculated and returned. If bins is a sequence, gives
         bin edges, including left edge of first bin and right edge of last
         bin. In this case, bins is returned unmodified.
-    bins : integer, default 10
-        Number of histogram bins to be used
     `**kwds` : keywords
         To be passed to the actual plotting function
 

From 3a5619531e44f07b5e7a58858e79432b64e0f29d Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 3 Jun 2019 00:13:08 +0100
Subject: [PATCH 21/43] TST/CLN: deduplicate fixture from test_to_latex.py
 (#26603)

---
 pandas/conftest.py                       | 31 ++++++++++++++++++++++++
 pandas/tests/frame/conftest.py           | 29 ----------------------
 pandas/tests/io/formats/test_to_latex.py | 27 +++++++++------------
 3 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 8f71028f51ab4..09fe8e0829fa1 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -12,6 +12,8 @@
 import pandas.util._test_decorators as td
 
 import pandas as pd
+from pandas import DataFrame
+import pandas.util.testing as tm
 
 hypothesis.settings.register_profile(
     "ci",
@@ -690,3 +692,32 @@ def tick_classes(request):
         normalize=st.booleans(),
         startingMonth=st.integers(min_value=1, max_value=12)
     ))
+
+
+@pytest.fixture
+def float_frame():
+    """
+    Fixture for DataFrame of floats with index of unique strings
+
+    Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
+    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
+    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
+    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
+    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
+    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
+    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
+    ...              ...       ...       ...       ...
+    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
+    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
+    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
+    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
+    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
+    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
+    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
+
+    [30 rows x 4 columns]
+    """
+    return DataFrame(tm.getSeriesData())
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index c451cd58f1497..d8a590bc492a4 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -5,35 +5,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture
-def float_frame():
-    """
-    Fixture for DataFrame of floats with index of unique strings
-
-    Columns are ['A', 'B', 'C', 'D'].
-
-                       A         B         C         D
-    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
-    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
-    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
-    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
-    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
-    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
-    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
-    ...              ...       ...       ...       ...
-    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
-    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
-    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
-    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
-    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
-    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
-    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
-
-    [30 rows x 4 columns]
-    """
-    return DataFrame(tm.getSeriesData())
-
-
 @pytest.fixture
 def float_frame_with_na():
     """
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 5a6511fbd20ee..b9f28ec36d021 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -8,19 +8,14 @@
 from pandas.util import testing as tm
 
 
-@pytest.fixture
-def frame():
-    return DataFrame(tm.getSeriesData())
-
-
 class TestToLatex:
 
-    def test_to_latex_filename(self, frame):
+    def test_to_latex_filename(self, float_frame):
         with tm.ensure_clean('test.tex') as path:
-            frame.to_latex(path)
+            float_frame.to_latex(path)
 
             with open(path, 'r') as f:
-                assert frame.to_latex() == f.read()
+                assert float_frame.to_latex() == f.read()
 
         # test with utf-8 and encoding option (GH 7061)
         df = DataFrame([['au\xdfgangen']])
@@ -35,9 +30,9 @@ def test_to_latex_filename(self, frame):
             with codecs.open(path, 'r', encoding='utf-8') as f:
                 assert df.to_latex() == f.read()
 
-    def test_to_latex(self, frame):
+    def test_to_latex(self, float_frame):
         # it works!
-        frame.to_latex()
+        float_frame.to_latex()
 
         df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex()
@@ -66,9 +61,9 @@ def test_to_latex(self, frame):
 
         assert withoutindex_result == withoutindex_expected
 
-    def test_to_latex_format(self, frame):
+    def test_to_latex_format(self, float_frame):
         # GH Bug #9402
-        frame.to_latex(column_format='ccc')
+        float_frame.to_latex(column_format='ccc')
 
         df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex(column_format='ccc')
@@ -389,8 +384,8 @@ def test_to_latex_special_escape(self):
 """
         assert escaped_result == escaped_expected
 
-    def test_to_latex_longtable(self, frame):
-        frame.to_latex(longtable=True)
+    def test_to_latex_longtable(self, float_frame):
+        float_frame.to_latex(longtable=True)
 
         df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex(longtable=True)
@@ -535,9 +530,9 @@ def test_to_latex_specified_header(self):
         with pytest.raises(ValueError):
             df.to_latex(header=['A'])
 
-    def test_to_latex_decimal(self, frame):
+    def test_to_latex_decimal(self, float_frame):
         # GH 12031
-        frame.to_latex()
+        float_frame.to_latex()
 
         df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex(decimal=',')

From ee52d0efe8ad1281292e80937662854a44a9da9a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sun, 2 Jun 2019 16:20:15 -0700
Subject: [PATCH 22/43] CLN: Remove convert_objects (#26612)

---
 doc/source/reference/frame.rst        |   1 -
 doc/source/reference/series.rst       |   1 -
 doc/source/whatsnew/v0.25.0.rst       |   1 +
 pandas/core/generic.py                |  48 +---------
 pandas/tests/series/test_internals.py | 125 --------------------------
 5 files changed, 2 insertions(+), 174 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index dfa475684c834..b4fb85c028b3e 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -48,7 +48,6 @@ Conversion
    :toctree: api/
 
    DataFrame.astype
-   DataFrame.convert_objects
    DataFrame.infer_objects
    DataFrame.copy
    DataFrame.isna
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index b406893e3414a..8fccdea979602 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -56,7 +56,6 @@ Conversion
 
    Series.astype
    Series.infer_objects
-   Series.convert_objects
    Series.copy
    Series.bool
    Series.to_numpy
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index f122c73325b7d..1cbec223008c4 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -483,6 +483,7 @@ Removal of prior version deprecations/changes
 - Removed the previously deprecated ``TimeGrouper`` (:issue:`16942`)
 - Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`)
 - Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`)
+- Removed the previously deprecated ``convert_objects`` (:issue:`11221`)
 
 .. _whatsnew_0250.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 33b0035e74913..2428bbad7003b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -113,7 +113,7 @@ class NDFrame(PandasObject, SelectionMixin):
     _internal_names_set = set(_internal_names)  # type: Set[str]
     _accessors = set()  # type: Set[str]
     _deprecations = frozenset([
-        'as_blocks', 'blocks', 'convert_objects', 'is_copy'
+        'as_blocks', 'blocks', 'is_copy'
     ])  # type: FrozenSet[str]
     _metadata = []  # type: List[str]
     _is_copy = None
@@ -5913,52 +5913,6 @@ def _convert(self, datetime=False, numeric=False, timedelta=False,
                                timedelta=timedelta, coerce=coerce,
                                copy=copy)).__finalize__(self)
 
-    def convert_objects(self, convert_dates=True, convert_numeric=False,
-                        convert_timedeltas=True, copy=True):
-        """
-        Attempt to infer better dtype for object columns.
-
-        .. deprecated:: 0.21.0
-
-        Parameters
-        ----------
-        convert_dates : boolean, default True
-            If True, convert to date where possible. If 'coerce', force
-            conversion, with unconvertible values becoming NaT.
-        convert_numeric : boolean, default False
-            If True, attempt to coerce to numbers (including strings), with
-            unconvertible values becoming NaN.
-        convert_timedeltas : boolean, default True
-            If True, convert to timedelta where possible. If 'coerce', force
-            conversion, with unconvertible values becoming NaT.
-        copy : boolean, default True
-            If True, return a copy even if no copy is necessary (e.g. no
-            conversion was done). Note: This is meant for internal use, and
-            should not be confused with inplace.
-
-        Returns
-        -------
-        converted : same as input object
-
-        See Also
-        --------
-        to_datetime : Convert argument to datetime.
-        to_timedelta : Convert argument to timedelta.
-        to_numeric : Convert argument to numeric type.
-        """
-        msg = ("convert_objects is deprecated.  To re-infer data dtypes for "
-               "object columns, use {klass}.infer_objects()\nFor all "
-               "other conversions use the data-type specific converters "
-               "pd.to_datetime, pd.to_timedelta and pd.to_numeric."
-               ).format(klass=self.__class__.__name__)
-        warnings.warn(msg, FutureWarning, stacklevel=2)
-
-        return self._constructor(
-            self._data.convert(convert_dates=convert_dates,
-                               convert_numeric=convert_numeric,
-                               convert_timedeltas=convert_timedeltas,
-                               copy=copy)).__finalize__(self)
-
     def infer_objects(self):
         """
         Attempt to infer better dtypes for object columns.
diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py
index f6f4a2db359f7..29846f10dae33 100644
--- a/pandas/tests/series/test_internals.py
+++ b/pandas/tests/series/test_internals.py
@@ -12,131 +12,6 @@
 
 class TestSeriesInternals:
 
-    def test_convert_objects(self):
-
-        s = Series([1., 2, 3], index=['a', 'b', 'c'])
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, s)
-
-        # force numeric conversion
-        r = s.copy().astype('O')
-        r['a'] = '1'
-        with tm.assert_produces_warning(FutureWarning):
-            result = r.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, s)
-
-        r = s.copy().astype('O')
-        r['a'] = '1.'
-        with tm.assert_produces_warning(FutureWarning):
-            result = r.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, s)
-
-        r = s.copy().astype('O')
-        r['a'] = 'garbled'
-        expected = s.copy()
-        expected['a'] = np.nan
-        with tm.assert_produces_warning(FutureWarning):
-            result = r.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, expected)
-
-        # GH 4119, not converting a mixed type (e.g.floats and object)
-        s = Series([1, 'na', 3, 4])
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_numeric=True)
-        expected = Series([1, np.nan, 3, 4])
-        assert_series_equal(result, expected)
-
-        s = Series([1, '', 3, 4])
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_numeric=True)
-        expected = Series([1, np.nan, 3, 4])
-        assert_series_equal(result, expected)
-
-        # dates
-        s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
-                    datetime(2001, 1, 3, 0, 0)])
-        s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
-                     datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
-                     Timestamp('20010104'), '20010105'],
-                    dtype='O')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates=True,
-                                       convert_numeric=False)
-        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
-                           Timestamp('20010103')], dtype='M8[ns]')
-        assert_series_equal(result, expected)
-
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=False)
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=True)
-        assert_series_equal(result, expected)
-
-        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
-                           Timestamp('20010103'),
-                           NaT, NaT, NaT, Timestamp('20010104'),
-                           Timestamp('20010105')], dtype='M8[ns]')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s2.convert_objects(convert_dates='coerce',
-                                        convert_numeric=False)
-        assert_series_equal(result, expected)
-        with tm.assert_produces_warning(FutureWarning):
-            result = s2.convert_objects(convert_dates='coerce',
-                                        convert_numeric=True)
-        assert_series_equal(result, expected)
-
-        # preserver all-nans (if convert_dates='coerce')
-        s = Series(['foo', 'bar', 1, 1.0], dtype='O')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=False)
-        expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
-        assert_series_equal(result, expected)
-
-        # preserver if non-object
-        s = Series([1], dtype='float32')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=False)
-        assert_series_equal(result, s)
-
-        # r = s.copy()
-        # r[0] = np.nan
-        # result = r.convert_objects(convert_dates=True,convert_numeric=False)
-        # assert result.dtype == 'M8[ns]'
-
-        # dateutil parses some single letters into today's value as a date
-        for x in 'abcdefghijklmnopqrstuvwxyz':
-            s = Series([x])
-            with tm.assert_produces_warning(FutureWarning):
-                result = s.convert_objects(convert_dates='coerce')
-            assert_series_equal(result, s)
-            s = Series([x.upper()])
-            with tm.assert_produces_warning(FutureWarning):
-                result = s.convert_objects(convert_dates='coerce')
-            assert_series_equal(result, s)
-
-    def test_convert_objects_preserve_bool(self):
-        s = Series([1, True, 3, 5], dtype=object)
-        with tm.assert_produces_warning(FutureWarning):
-            r = s.convert_objects(convert_numeric=True)
-        e = Series([1, 1, 3, 5], dtype='i8')
-        tm.assert_series_equal(r, e)
-
-    def test_convert_objects_preserve_all_bool(self):
-        s = Series([False, True, False, False], dtype=object)
-        with tm.assert_produces_warning(FutureWarning):
-            r = s.convert_objects(convert_numeric=True)
-        e = Series([False, True, False, False], dtype=bool)
-        tm.assert_series_equal(r, e)
-
     # GH 10265
     def test_convert(self):
         # Tests: All to nans, coerce, true

From 6f9aa6a678fbbf113956badf7db5c8532c04958f Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Mon, 3 Jun 2019 01:34:27 +0200
Subject: [PATCH 23/43] Clean up ufuncs post numpy bump (#26606)

---
 pandas/core/arrays/sparse.py |  9 ---------
 pandas/core/sparse/frame.py  |  6 ------
 pandas/core/sparse/series.py | 20 --------------------
 3 files changed, 35 deletions(-)

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index ecc06db2bd07b..926ed6a829a6d 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -573,7 +573,6 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin):
         Whether to explicitly copy the incoming `data` array.
     """
 
-    __array_priority__ = 15
     _pandas_ftype = 'sparse'
     _subtyp = 'sparse_array'  # register ABCSparseArray
 
@@ -1639,14 +1638,6 @@ def T(self):
     # Ufuncs
     # ------------------------------------------------------------------------
 
-    def __array_wrap__(self, array, context=None):
-        from pandas.core.dtypes.generic import ABCSparseSeries
-
-        ufunc, inputs, _ = context
-        inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x
-                       for x in inputs)
-        return self.__array_ufunc__(ufunc, '__call__', *inputs)
-
     _HANDLED_TYPES = (np.ndarray, numbers.Number)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index bf1cec7571f4d..0320da6d9a48d 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -242,12 +242,6 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
     def to_coo(self):
         return SparseFrameAccessor(self).to_coo()
 
-    def __array_wrap__(self, result):
-        return self._constructor(
-            result, index=self.index, columns=self.columns,
-            default_kind=self._default_kind,
-            default_fill_value=self._default_fill_value).__finalize__(self)
-
     def __getstate__(self):
         # pickling
         return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data,
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 3f95acdbfb42c..3814d8bb66635 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -124,26 +124,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
                                  fill_value=result.fill_value,
                                  copy=False).__finalize__(self)
 
-    def __array_wrap__(self, result, context=None):
-        """
-        Gets called prior to a ufunc (and after)
-
-        See SparseArray.__array_wrap__ for detail.
-        """
-        result = self.values.__array_wrap__(result, context=context)
-        return self._constructor(result, index=self.index,
-                                 sparse_index=self.sp_index,
-                                 fill_value=result.fill_value,
-                                 copy=False).__finalize__(self)
-
-    def __array_finalize__(self, obj):
-        """
-        Gets called after any ufunc or other array operations, necessary
-        to pass on the index.
-        """
-        self.name = getattr(obj, 'name', None)
-        self.fill_value = getattr(obj, 'fill_value', None)
-
     # unary ops
     # TODO: See if this can be shared
     def __pos__(self):

From c95be629022fa7339b2d744004feaa1b381cf1ba Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sun, 2 Jun 2019 18:42:54 -0500
Subject: [PATCH 24/43] Add more specific error message when user passes
 incorrect matrix format to from_coo (#26584)

---
 doc/source/whatsnew/v0.25.0.rst             |  2 +-
 pandas/core/sparse/scipy_sparse.py          | 11 ++++++++++-
 pandas/tests/arrays/sparse/test_accessor.py | 10 ++++++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1cbec223008c4..461c883f542ab 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -694,7 +694,7 @@ Sparse
 - Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`)
 - Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`)
 - Bug in :class:`SparseDataFrame` when adding a column in which the length of values does not match length of index, ``AssertionError`` is raised instead of raising ``ValueError`` (:issue:`25484`)
-
+- Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`)
 
 Other
 ^^^^^
diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py
index 7630983421ff9..0dd8958e93c13 100644
--- a/pandas/core/sparse/scipy_sparse.py
+++ b/pandas/core/sparse/scipy_sparse.py
@@ -130,10 +130,19 @@ def _coo_to_sparse_series(A, dense_index: bool = False,
     Returns
     -------
     Series or SparseSeries
+
+    Raises
+    ------
+    TypeError if A is not a coo_matrix
+
     """
     from pandas import SparseDtype
 
-    s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
+    try:
+        s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
+    except AttributeError:
+        raise TypeError('Expected coo_matrix. Got {} instead.'
+                        .format(type(A).__name__))
     s = s.sort_index()
     if sparse_series:
         # TODO(SparseSeries): remove this and the sparse_series keyword.
diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index 370d222c1ab4e..d0a188a8aff3c 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -119,3 +119,13 @@ def test_series_from_coo(self, dtype, dense_index):
             )
 
         tm.assert_series_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_series_from_coo_incorrect_format_raises(self):
+        # gh-26554
+        import scipy.sparse
+        m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
+        with pytest.raises(TypeError,
+                           match='Expected coo_matrix. Got csr_matrix instead.'
+                           ):
+            pd.Series.sparse.from_coo(m)

From 21f49c41c0372b513c82a21c7641f6f3f6abfa16 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 3 Jun 2019 07:35:25 +0200
Subject: [PATCH 25/43] DOC/CI: restore travis CI doc build environment
 (#26621)

---
 .travis.yml                |  4 ++--
 ci/deps/travis-36-doc.yaml | 46 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 ci/deps/travis-36-doc.yaml

diff --git a/.travis.yml b/.travis.yml
index 90dd904e6cb1e..ce8817133a477 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,14 +51,14 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
+        - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
     allow_failures:
       - dist: trusty
         env:
           - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
       - dist: trusty
         env:
-          - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
+          - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
 
 before_install:
   - echo "before_install"
diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml
new file mode 100644
index 0000000000000..9d6cbd82fdc05
--- /dev/null
+++ b/ci/deps/travis-36-doc.yaml
@@ -0,0 +1,46 @@
+name: pandas-dev
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - beautifulsoup4
+  - bottleneck
+  - cython>=0.28.2
+  - fastparquet>=0.2.1
+  - gitpython
+  - html5lib
+  - hypothesis>=3.58.0
+  - ipykernel
+  - ipython
+  - ipywidgets
+  - lxml
+  - matplotlib
+  - nbconvert>=5.4.1
+  - nbformat
+  - nbsphinx
+  - notebook>=5.7.5
+  - numexpr
+  - numpy
+  - numpydoc
+  - openpyxl
+  - pandoc
+  - pyarrow
+  - pyqt
+  - pytables
+  - python-dateutil
+  - python-snappy
+  - python=3.6.*
+  - pytz
+  - scipy
+  - seaborn
+  - sphinx
+  - sqlalchemy
+  - statsmodels
+  - xarray
+  - xlrd
+  - xlsxwriter
+  - xlwt
+  # universal
+  - pytest>=4.0.2
+  - pytest-xdist
+  - isort

From b1e4c55ddce8e0371b4e192a519363c53070489b Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Mon, 3 Jun 2019 13:56:29 +0200
Subject: [PATCH 26/43] TST/API: Forbid str-accessor for 1-level MultiIndex
 (#26608)

---
 doc/source/whatsnew/v0.25.0.rst | 1 +
 pandas/tests/test_strings.py    | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 461c883f542ab..0e8cd95084a8d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -434,6 +434,7 @@ Other API Changes
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
 - The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`)
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
+- The `.str`-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
 - Removed support of gtk package for clipboards (:issue:`26563`)
 
 .. _whatsnew_0250.deprecations:
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 1ba0ef3918fb7..a1d522930e9aa 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -169,6 +169,14 @@ def test_api(self):
         assert Series.str is strings.StringMethods
         assert isinstance(Series(['']).str, strings.StringMethods)
 
+    def test_api_mi_raises(self):
+        # GH 23679
+        mi = MultiIndex.from_arrays([['a', 'b', 'c']])
+        with pytest.raises(AttributeError, match='Can only use .str accessor '
+                           'with Index, not MultiIndex'):
+            mi.str
+        assert not hasattr(mi, 'str')
+
     @pytest.mark.parametrize('dtype', [object, 'category'])
     @pytest.mark.parametrize('box', [Series, Index])
     def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):

From d5fad240b04c5f7ec21e78350ab46779c7ed7730 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Mon, 3 Jun 2019 22:17:40 +0000
Subject: [PATCH 27/43] Minor doc cleanup because of Panel removal (#26638)

---
 doc/source/getting_started/basics.rst | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 80e334054a986..5ec0094de0a91 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -1455,9 +1455,8 @@ Iteration
 
 The behavior of basic iteration over pandas objects depends on the type.
 When iterating over a Series, it is regarded as array-like, and basic iteration
-produces the values. Other data structures, like DataFrame,
-follow the dict-like convention of iterating over the "keys" of the
-objects.
+produces the values. DataFrames follow the dict-like convention of iterating
+over the "keys" of the objects.
 
 In short, basic iteration (``for i in object``) produces:
 
@@ -1537,9 +1536,9 @@ For example:
 
 .. ipython:: python
 
-   for item, frame in df.iteritems():
-       print(item)
-       print(frame)
+   for label, ser in df.iteritems():
+       print(label)
+       print(ser)
 
 .. _basics.iterrows:
 

From 0ee4317ec6056d90795e0c9169d0b9464a24ebae Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Mon, 3 Jun 2019 19:23:49 -0600
Subject: [PATCH 28/43] DOC: Small whatsnew cleanups (#26643)

---
 doc/source/whatsnew/v0.25.0.rst | 65 +++++++++++++++++----------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 0e8cd95084a8d..267e34efc946f 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -72,7 +72,7 @@ Other Enhancements
 - :meth:`DataFrame.pivot_table` now accepts an ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`)
 - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
 - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
-- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
+- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behavior of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
 - :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
 - :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`)
 - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
@@ -123,11 +123,11 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
 .. _whatsnew_0250.api_breaking.multi_indexing:
 
 
-MultiIndex constructed from levels and codes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``MultiIndex`` constructed from levels and codes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Constructing a :class:`MultiIndex` with NaN levels or codes value < -1 was allowed previously.
-Now, construction with codes value < -1 is not allowed and NaN levels' corresponding codes
+Constructing a :class:`MultiIndex` with ``NaN`` levels or codes value < -1 was allowed previously.
+Now, construction with codes value < -1 is not allowed and ``NaN`` levels' corresponding codes
 would be reassigned as -1. (:issue:`19387`)
 
 .. ipython:: python
@@ -157,8 +157,8 @@ would be reassigned as -1. (:issue:`19387`)
 
 .. _whatsnew_0250.api_breaking.groupby_apply_first_group_once:
 
-GroupBy.apply on ``DataFrame`` evaluates first group only once
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``GroupBy.apply`` on ``DataFrame`` evaluates first group only once
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The implementation of :meth:`DataFrameGroupBy.apply() <pandas.core.groupby.DataFrameGroupBy.apply>`
 previously evaluated the supplied function consistently twice on the first group
@@ -176,7 +176,7 @@ Now every group is evaluated only a single time.
         print(group.name)
         return group
 
-*Previous Behaviour*:
+*Previous Behavior*:
 
 .. code-block:: python
 
@@ -189,7 +189,7 @@ Now every group is evaluated only a single time.
    0  x  1
    1  y  2
 
-*New Behaviour*:
+*New Behavior*:
 
 .. ipython:: python
 
@@ -239,7 +239,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
 ``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`,
 :meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`.
 
-*Previous Behaviour*:
+*Previous Behavior*:
 
 .. code-block:: python
 
@@ -259,7 +259,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
     2    False
     dtype: bool
 
-*New Behaviour*:
+*New Behavior*:
 
 .. ipython:: python
     :okexcept:
@@ -282,6 +282,8 @@ considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`).
 
 *Previous Behavior*:
 
+.. code-block:: python
+
     In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
     ...
     ValueError: can only call with other PeriodIndex-ed objects
@@ -310,7 +312,7 @@ are returned. (:issue:`21521`)
     df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
     df
 
-*Previous Behaviour*:
+*Previous Behavior*:
 
 .. code-block:: python
 
@@ -320,7 +322,7 @@ are returned. (:issue:`21521`)
    0  x  1
    1  y  2
 
-*New Behaviour*:
+*New Behavior*:
 
 .. ipython:: python
 
@@ -355,7 +357,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397
 
     df.describe()
 
-``__str__`` methods now call ``__repr__`` rather than vica-versa
+``__str__`` methods now call ``__repr__`` rather than vice versa
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Pandas has until now mostly defined string representations in a Pandas objects's
@@ -434,7 +436,7 @@ Other API Changes
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
 - The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`)
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
-- The `.str`-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
+- The ``.str``-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
 - Removed support of gtk package for clipboards (:issue:`26563`)
 
 .. _whatsnew_0250.deprecations:
@@ -468,7 +470,7 @@ The memory usage of the two approaches is identical. See :ref:`sparse.migration`
 Other Deprecations
 ^^^^^^^^^^^^^^^^^^
 
-- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
+- The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`).
 - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
 - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
   the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
@@ -499,14 +501,13 @@ Performance Improvements
 - Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
 - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
 - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
-- Improved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
+- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
 - Improved performance of :meth:`DataFrame.to_csv` when writing datetime dtypes (:issue:`25708`)
 - Improved performance of :meth:`read_csv` by much faster parsing of ``MM/YYYY`` and ``DD/MM/YYYY`` datetime formats (:issue:`25922`)
 - Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for :meth:`Series.all` and :meth:`Series.any` (:issue:`25070`)
 - Improved performance of :meth:`Series.map` for dictionary mappers on categorical series by mapping the categories instead of mapping all values (:issue:`23785`)
-- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero
-  and float NaN; by faster checking the string for the possibility of being a date (:issue:`25754`)
-- Improved performance of :meth:`IntervalIndex.is_unique` by removing conversion to `MultiIndex` (:issue:`24813`)
+- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero and float ``NaN``; by faster checking the string for the possibility of being a date (:issue:`25754`)
+- Improved performance of :attr:`IntervalIndex.is_unique` by removing conversion to ``MultiIndex`` (:issue:`24813`)
 
 .. _whatsnew_0250.bug_fixes:
 
@@ -518,7 +519,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
-- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
+- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in ``True`` (:issue:`26504`)
 -
 
 Datetimelike
@@ -570,7 +571,7 @@ Numeric
 Conversion
 ^^^^^^^^^^
 
-- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the `errors` parameter was ignored. (:issue:`25905`)
+- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the ``errors`` parameter was ignored. (:issue:`25905`)
 -
 -
 
@@ -597,7 +598,7 @@ Indexing
 - Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`).
 - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
 - Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
-- Allow keyword arguments for callable local reference used in the :method:`DataFrame.query` string (:issue:`26426`)
+- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
 
 
 Missing
@@ -620,8 +621,8 @@ I/O
 - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
-- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
+- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
+- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string ``"nan"`` instead of ``numpy.nan`` (:issue:`25468`)
 - :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`)
 - Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`)
 - Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`)
@@ -644,7 +645,7 @@ Plotting
 
 - Fixed bug where :class:`api.extensions.ExtensionArray` could not be used in matplotlib plotting (:issue:`25587`)
 - Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`)
-- Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime (:issue:`7612` :issue:`15912` :issue:`22334`)
+- Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime (:issue:`7612`, :issue:`15912`, :issue:`22334`)
 - Fixed bug causing plots of :class:`PeriodIndex` timeseries to fail if the frequency is a multiple of the frequency rule code (:issue:`14763`)
 -
 -
@@ -655,7 +656,7 @@ Groupby/Resample/Rolling
 
 - Bug in :meth:`pandas.core.resample.Resampler.agg` with a timezone aware index where ``OverflowError`` would raise when passing a list of functions (:issue:`22660`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`)
-- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
+- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying an aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`)
 - Bug in :func:`Series.groupby` where ``observed`` kwarg was previously ignored (:issue:`24880`)
@@ -663,11 +664,11 @@ Groupby/Resample/Rolling
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
 - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`)
 - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`)
-- Bug in :meth:`pandas.core.window.Rolling.count` and `pandas.core.window.Expanding.count` was previously ignoring the axis keyword (:issue:`13503`)
+- Bug in :meth:`pandas.core.window.Rolling.count` and ``pandas.core.window.Expanding.count`` was previously ignoring the ``axis`` keyword (:issue:`13503`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
-- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`)
+- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise a ``ValueError`` (:issue:`26208`)
 - Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
 
@@ -682,11 +683,11 @@ Reshaping
 - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
 - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
 - bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`).
-- bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
+- Bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
 - Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`)
 - Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`)
 - Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`)
-- Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed DataFrame is sorted on all levels with the initial level sorted last (:issue:`26053`)
+- Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`)
 - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`)
 
 Sparse
@@ -702,7 +703,7 @@ Other
 
 - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`)
 - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`).
-- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions.
+- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions (:issue:`26125`)
 
 .. _whatsnew_0.250.contributors:
 

From da6900e149c8e33090b71e9dba9ad58827318250 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Tue, 4 Jun 2019 12:23:42 +0100
Subject: [PATCH 29/43] DOC/CI: Removing Panel specific code from
 validate_docstrings.py (#26627)

---
 scripts/validate_docstrings.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 63db50db45a7c..64eaf45376b2f 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -539,14 +539,9 @@ def first_line_ends_in_dot(self):
         if self.doc:
             return self.doc.split('\n')[0][-1] == '.'
 
-    @property
-    def deprecated_with_directive(self):
-        return '.. deprecated:: ' in (self.summary + self.extended_summary)
-
     @property
     def deprecated(self):
-        return (self.name.startswith('pandas.Panel')
-                or self.deprecated_with_directive)
+        return '.. deprecated:: ' in (self.summary + self.extended_summary)
 
     @property
     def mentioned_private_classes(self):
@@ -674,7 +669,7 @@ def get_validation_data(doc):
         errs.append(error('GL07',
                           correct_sections=', '.join(correct_order)))
 
-    if (doc.deprecated_with_directive
+    if (doc.deprecated
             and not doc.extended_summary.startswith('.. deprecated:: ')):
         errs.append(error('GL09'))
 
@@ -859,9 +854,9 @@ def validate_all(prefix, ignore_deprecated=False):
 
         seen[shared_code_key] = func_name
 
-    # functions from introspecting Series, DataFrame and Panel
+    # functions from introspecting Series and DataFrame
     api_item_names = set(list(zip(*api_items))[0])
-    for class_ in (pandas.Series, pandas.DataFrame, pandas.Panel):
+    for class_ in (pandas.Series, pandas.DataFrame):
         for member in inspect.getmembers(class_):
             func_name = 'pandas.{}.{}'.format(class_.__name__, member[0])
             if (not member[0].startswith('_')

From dbdd556d9b0b65f3054242dae7001a39f7e3bbc0 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Tue, 4 Jun 2019 23:59:01 +0000
Subject: [PATCH 30/43] Remove NDFrame.select (#26641)

---
 doc/source/reference/frame.rst                |  1 -
 doc/source/reference/series.rst               |  1 -
 doc/source/whatsnew/v0.25.0.rst               |  1 +
 pandas/core/generic.py                        | 34 ------------------
 .../tests/frame/test_axis_select_reindex.py   | 35 -------------------
 pandas/tests/series/indexing/test_indexing.py | 14 --------
 6 files changed, 1 insertion(+), 85 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index b4fb85c028b3e..7d5cd5d245631 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -204,7 +204,6 @@ Reindexing / Selection / Label manipulation
    DataFrame.rename_axis
    DataFrame.reset_index
    DataFrame.sample
-   DataFrame.select
    DataFrame.set_axis
    DataFrame.set_index
    DataFrame.tail
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 8fccdea979602..79beeb0022307 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -211,7 +211,6 @@ Reindexing / Selection / Label manipulation
    Series.rename_axis
    Series.reset_index
    Series.sample
-   Series.select
    Series.set_axis
    Series.take
    Series.tail
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 267e34efc946f..4e8af90b85f83 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -487,6 +487,7 @@ Removal of prior version deprecations/changes
 - Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`)
 - Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`)
 - Removed the previously deprecated ``convert_objects`` (:issue:`11221`)
+- Removed the previously deprecated ``select`` method of ``DataFrame`` and ``Series`` (:issue:`17633`)
 
 .. _whatsnew_0250.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2428bbad7003b..19d093dd29457 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3682,40 +3682,6 @@ class   animal   locomotion
 
     _xs = xs  # type: Callable
 
-    def select(self, crit, axis=0):
-        """
-        Return data corresponding to axis labels matching criteria.
-
-        .. deprecated:: 0.21.0
-            Use df.loc[df.index.map(crit)] to select via labels
-
-        Parameters
-        ----------
-        crit : function
-            To be called on each index (label). Should return True or False
-        axis : int
-
-        Returns
-        -------
-        selection : same type as caller
-        """
-        warnings.warn("'select' is deprecated and will be removed in a "
-                      "future release. You can use "
-                      ".loc[labels.map(crit)] as a replacement",
-                      FutureWarning, stacklevel=2)
-
-        axis = self._get_axis_number(axis)
-        axis_name = self._get_axis_name(axis)
-        axis_values = self._get_axis(axis)
-
-        if len(axis_values) > 0:
-            new_axis = axis_values[
-                np.asarray([bool(crit(label)) for label in axis_values])]
-        else:
-            new_axis = axis_values
-
-        return self.reindex(**{axis_name: new_axis})
-
     def reindex_like(self, other, method=None, copy=True, limit=None,
                      tolerance=None):
         """
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index ad6c66c911615..42f98d5c96aa5 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -895,41 +895,6 @@ def test_filter_corner(self):
         result = empty.filter(like='foo')
         assert_frame_equal(result, empty)
 
-    def test_select(self):
-
-        # deprecated: gh-12410
-        f = lambda x: x.weekday() == 2
-        index = self.tsframe.index[[f(x) for x in self.tsframe.index]]
-        expected_weekdays = self.tsframe.reindex(index=index)
-
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = self.tsframe.select(f, axis=0)
-            assert_frame_equal(result, expected_weekdays)
-
-            result = self.frame.select(lambda x: x in ('B', 'D'), axis=1)
-            expected = self.frame.reindex(columns=['B', 'D'])
-            assert_frame_equal(result, expected, check_names=False)
-
-        # replacement
-        f = lambda x: x.weekday == 2
-        result = self.tsframe.loc(axis=0)[f(self.tsframe.index)]
-        assert_frame_equal(result, expected_weekdays)
-
-        crit = lambda x: x in ['B', 'D']
-        result = self.frame.loc(axis=1)[(self.frame.columns.map(crit))]
-        expected = self.frame.reindex(columns=['B', 'D'])
-        assert_frame_equal(result, expected, check_names=False)
-
-        # doc example
-        df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz'])
-
-        crit = lambda x: x in ['bar', 'baz']
-        with tm.assert_produces_warning(FutureWarning):
-            expected = df.select(crit)
-        result = df.loc[df.index.map(crit)]
-        assert_frame_equal(result, expected, check_names=False)
-
     def test_take(self):
         # homogeneous
         order = [3, 1, 2, 0]
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index 6641311faace2..702e22b6741e4 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -772,20 +772,6 @@ def test_setitem_slice_into_readonly_backing_data():
 """
 
 
-def test_select(test_data):
-    # deprecated: gh-12410
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        n = len(test_data.ts)
-        result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
-        expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
-        assert_series_equal(result, expected)
-
-        result = test_data.ts.select(lambda x: x.weekday() == 2)
-        expected = test_data.ts[test_data.ts.index.weekday == 2]
-        assert_series_equal(result, expected)
-
-
 def test_pop():
     # GH 6600
     df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })

From 7370c1d29cc89ea067c068318734829ebb681f67 Mon Sep 17 00:00:00 2001
From: Mak Sze Chun <makszechun@gmail.com>
Date: Wed, 5 Jun 2019 15:22:08 +0800
Subject: [PATCH 31/43] [TST] Fix test_quantile_interpolation_int (#26633)

---
 pandas/tests/frame/test_quantile.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py
index 9ccbd290923ba..097477c42d249 100644
--- a/pandas/tests/frame/test_quantile.py
+++ b/pandas/tests/frame/test_quantile.py
@@ -160,8 +160,7 @@ def test_quantile_interpolation_int(self, int_frame):
         assert q['A'] == np.percentile(df['A'], 10)
 
         # test with and without interpolation keyword
-        # TODO: q1 is not different from q
-        q1 = df.quantile(0.1)
+        q1 = df.quantile(0.1, axis=0, interpolation='linear')
         assert q1['A'] == np.percentile(df['A'], 10)
         tm.assert_series_equal(q, q1)
 

From 8a1f71490fc60e74090e2b2ea43b9293636369b2 Mon Sep 17 00:00:00 2001
From: shawnbrown <shawnbrown@users.noreply.github.com>
Date: Wed, 5 Jun 2019 07:53:40 -0400
Subject: [PATCH 32/43] Update Accessors URL for PdVega package. (#26653)

See altair-viz/pdvega@7476a8a26b for details.
---
 doc/source/ecosystem.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index e232bd2157611..b1a5430752558 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -363,4 +363,5 @@ Library        Accessor   Classes
 ============== ========== =========================
 
 .. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest
-.. _pdvega: https://jakevdp.github.io/pdvega/
+.. _pdvega: https://altair-viz.github.io/pdvega/
+

From b6427263fc2aa154db6e1df203dc8280bdd99ba0 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Wed, 5 Jun 2019 13:46:37 +0100
Subject: [PATCH 33/43] DEPS: Adding missing doc dependencies to
 environment.yml (#26657)

---
 environment.yml      | 7 +++++++
 requirements-dev.txt | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/environment.yml b/environment.yml
index cf17dc1281ec9..91ea26eef4b61 100644
--- a/environment.yml
+++ b/environment.yml
@@ -17,10 +17,17 @@ dependencies:
   - flake8-rst>=0.6.0,<=0.7.0
   - gitpython
   - hypothesis>=3.82
+  - ipywidgets
   - isort
   - moto
   - mypy
+  - nbconvert>=5.4.1
+  - nbformat
+  - notebook>=5.7.5
+  - pandoc
   - pycodestyle
+  - pyqt
+  - python-snappy
   - pytest>=4.0.2
   - pytest-mock
   - sphinx
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 115a93495c95b..e6085920a9999 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -8,10 +8,17 @@ flake8-comprehensions
 flake8-rst>=0.6.0,<=0.7.0
 gitpython
 hypothesis>=3.82
+ipywidgets
 isort
 moto
 mypy
+nbconvert>=5.4.1
+nbformat
+notebook>=5.7.5
+pandoc
 pycodestyle
+pyqt
+python-snappy
 pytest>=4.0.2
 pytest-mock
 sphinx

From 5abb8c37394ecb703df3c65393da05bab7a5f8e7 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Wed, 5 Jun 2019 12:50:33 +0000
Subject: [PATCH 34/43] use range in RangeIndex instead of _start etc. (#26581)

---
 doc/source/whatsnew/v0.25.0.rst    |   3 +
 pandas/core/dtypes/common.py       |  29 +++
 pandas/core/dtypes/concat.py       |  21 +-
 pandas/core/frame.py               |  10 +-
 pandas/core/indexes/range.py       | 304 +++++++++++++----------------
 pandas/core/series.py              |   6 +-
 pandas/io/packers.py               |   7 +-
 pandas/tests/indexes/test_range.py |  22 ++-
 8 files changed, 202 insertions(+), 200 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 4e8af90b85f83..4018418294963 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -476,6 +476,9 @@ Other Deprecations
   the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
 - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
 - The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
+- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated.
+  Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).
+
 
 .. _whatsnew_0250.prior_deprecations:
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index b5cd73a81962b..4029e6f4bfdb5 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1,4 +1,5 @@
 """ common type operations """
+from typing import Union
 import warnings
 
 import numpy as np
@@ -125,6 +126,34 @@ def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array:
         return arr.astype('float64', copy=copy)
 
 
+def ensure_python_int(value: Union[int, np.integer]) -> int:
+    """
+    Ensure that a value is a python int.
+
+    Parameters
+    ----------
+    value: int or numpy.integer
+
+    Returns
+    -------
+    int
+
+    Raises
+    ------
+    TypeError: if the value isn't an int or can't be converted to one.
+    """
+    if not is_scalar(value):
+        raise TypeError("Value needs to be a scalar value, was type {}"
+                        .format(type(value)))
+    msg = "Wrong type {} for value {}"
+    try:
+        new_value = int(value)
+        assert (new_value == value)
+    except (TypeError, ValueError, AssertionError):
+        raise TypeError(msg.format(type(value), value))
+    return new_value
+
+
 def classes(*klasses):
     """ evaluate if the tipo is a subclass of the klasses """
     return lambda tipo: issubclass(tipo, klasses)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b22ed45642cf6..e2c6fba322be0 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -541,36 +541,37 @@ def _concat_rangeindex_same_dtype(indexes):
     """
     from pandas import Int64Index, RangeIndex
 
-    start = step = next = None
+    start = step = next_ = None
 
     # Filter the empty indexes
     non_empty_indexes = [obj for obj in indexes if len(obj)]
 
     for obj in non_empty_indexes:
+        rng = obj._range  # type: range
 
         if start is None:
             # This is set by the first non-empty index
-            start = obj._start
-            if step is None and len(obj) > 1:
-                step = obj._step
+            start = rng.start
+            if step is None and len(rng) > 1:
+                step = rng.step
         elif step is None:
             # First non-empty index had only one element
-            if obj._start == start:
+            if rng.start == start:
                 return _concat_index_same_dtype(indexes, klass=Int64Index)
-            step = obj._start - start
+            step = rng.start - start
 
-        non_consecutive = ((step != obj._step and len(obj) > 1) or
-                           (next is not None and obj._start != next))
+        non_consecutive = ((step != rng.step and len(rng) > 1) or
+                           (next_ is not None and rng.start != next_))
         if non_consecutive:
             return _concat_index_same_dtype(indexes, klass=Int64Index)
 
         if step is not None:
-            next = obj[-1] + step
+            next_ = rng[-1] + step
 
     if non_empty_indexes:
         # Get the stop value from "next" or alternatively
         # from the last non-empty index
-        stop = non_empty_indexes[-1]._stop if next is None else next
+        stop = non_empty_indexes[-1].stop if next_ is None else next_
         return RangeIndex(start, stop, step)
 
     # Here all "indexes" had 0 length, i.e. were empty.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5957b23535350..48dfa57c47bf6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2282,7 +2282,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         text_col     5 non-null object
         float_col    5 non-null float64
         dtypes: float64(1), int64(1), object(1)
-        memory usage: 200.0+ bytes
+        memory usage: 248.0+ bytes
 
         Prints a summary of columns count and its dtypes but not per column
         information:
@@ -2292,7 +2292,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         RangeIndex: 5 entries, 0 to 4
         Columns: 3 entries, int_col to float_col
         dtypes: float64(1), int64(1), object(1)
-        memory usage: 200.0+ bytes
+        memory usage: 248.0+ bytes
 
         Pipe output of DataFrame.info to buffer instead of sys.stdout, get
         buffer content and writes to a text file:
@@ -2494,7 +2494,7 @@ def memory_usage(self, index=True, deep=False):
         4      1      1.0    1.0+0.0j       1  True
 
         >>> df.memory_usage()
-        Index            80
+        Index           128
         int64         40000
         float64       40000
         complex128    80000
@@ -2513,7 +2513,7 @@ def memory_usage(self, index=True, deep=False):
         The memory footprint of `object` dtype columns is ignored by default:
 
         >>> df.memory_usage(deep=True)
-        Index             80
+        Index            128
         int64          40000
         float64        40000
         complex128     80000
@@ -2525,7 +2525,7 @@ def memory_usage(self, index=True, deep=False):
         many repeated values.
 
         >>> df['object'].astype('category').memory_usage(deep=True)
-        5168
+        5216
         """
         result = Series([c.memory_usage(index=False, deep=deep)
                          for col, c in self.iteritems()], index=self.columns)
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 9401de3346ccd..82fd7342c027c 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -12,7 +12,8 @@
 
 from pandas.core.dtypes import concat as _concat
 from pandas.core.dtypes.common import (
-    is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype)
+    ensure_python_int, is_int64_dtype, is_integer, is_scalar,
+    is_timedelta64_dtype)
 from pandas.core.dtypes.generic import (
     ABCDataFrame, ABCSeries, ABCTimedeltaIndex)
 
@@ -65,6 +66,7 @@ class RangeIndex(Int64Index):
 
     _typ = 'rangeindex'
     _engine_type = libindex.Int64Engine
+    _range = None  # type: range
 
     # check whether self._data has benn called
     _cached_data = None  # type: np.ndarray
@@ -91,39 +93,19 @@ def __new__(cls, start=None, stop=None, step=None,
                                    **dict(start._get_data_as_items()))
 
         # validate the arguments
-        def ensure_int(value, field):
-            msg = ("RangeIndex(...) must be called with integers,"
-                   " {value} was passed for {field}")
-            if not is_scalar(value):
-                raise TypeError(msg.format(value=type(value).__name__,
-                                           field=field))
-            try:
-                new_value = int(value)
-                assert(new_value == value)
-            except (TypeError, ValueError, AssertionError):
-                raise TypeError(msg.format(value=type(value).__name__,
-                                           field=field))
+        if com._all_none(start, stop, step):
+            raise TypeError("RangeIndex(...) must be called with integers")
 
-            return new_value
+        start = ensure_python_int(start) if start is not None else 0
 
-        if com._all_none(start, stop, step):
-            msg = "RangeIndex(...) must be called with integers"
-            raise TypeError(msg)
-        elif start is None:
-            start = 0
-        else:
-            start = ensure_int(start, 'start')
         if stop is None:
-            stop = start
-            start = 0
+            start, stop = 0, start
         else:
-            stop = ensure_int(stop, 'stop')
-        if step is None:
-            step = 1
-        elif step == 0:
+            stop = ensure_python_int(stop)
+
+        step = ensure_python_int(step) if step is not None else 1
+        if step == 0:
             raise ValueError("Step must not be zero")
-        else:
-            step = ensure_int(step, 'step')
 
         return cls._simple_new(start, stop, step, name)
 
@@ -142,7 +124,7 @@ def from_range(cls, data, name=None, dtype=None, **kwargs):
                 'range, {1} was passed'.format(cls.__name__, repr(data)))
 
         start, stop, step = data.start, data.stop, data.step
-        return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs)
+        return cls(start, stop, step, dtype=dtype, name=name, **kwargs)
 
     @classmethod
     def _simple_new(cls, start, stop=None, step=None, name=None,
@@ -156,20 +138,16 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
 
         if start is None or not is_integer(start):
             try:
-
-                return RangeIndex(start, stop, step, name=name, **kwargs)
+                return cls(start, stop, step, name=name, **kwargs)
             except TypeError:
                 return Index(start, stop, step, name=name, **kwargs)
 
-        result._start = start
-        result._stop = stop or 0
-        result._step = step or 1
+        result._range = range(start, stop or 0, step or 1)
+
         result.name = name
         for k, v in kwargs.items():
             setattr(result, k, v)
 
-        result._range = range(result._start, result._stop, result._step)
-
         result._reset_identity()
         return result
 
@@ -196,7 +174,7 @@ def _data(self):
         triggering the construction.
         """
         if self._cached_data is None:
-            self._cached_data = np.arange(self._start, self._stop, self._step,
+            self._cached_data = np.arange(self.start, self.stop, self.step,
                                           dtype=np.int64)
         return self._cached_data
 
@@ -206,9 +184,10 @@ def _int64index(self):
 
     def _get_data_as_items(self):
         """ return a list of tuples of start, stop, step """
-        return [('start', self._start),
-                ('stop', self._stop),
-                ('step', self._step)]
+        rng = self._range
+        return [('start', rng.start),
+                ('stop', rng.stop),
+                ('step', rng.step)]
 
     def __reduce__(self):
         d = self._get_attributes_dict()
@@ -235,39 +214,79 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
         return header + list(map(pprint_thing, self._range))
 
     # --------------------------------------------------------------------
-    @property
+    _deprecation_message = ("RangeIndex.{} is deprecated and will be "
+                            "removed in a future version. Use RangeIndex.{} "
+                            "instead")
+
+    @cache_readonly
     def start(self):
         """
-        The value of the `start` parameter (or ``0`` if this was not supplied)
+        The value of the `start` parameter (``0`` if this was not supplied)
         """
         # GH 25710
-        return self._start
+        return self._range.start
 
     @property
+    def _start(self):
+        """
+        The value of the `start` parameter (``0`` if this was not supplied)
+
+         .. deprecated:: 0.25.0
+            Use ``start`` instead.
+        """
+        warnings.warn(self._deprecation_message.format("_start", "start"),
+                      DeprecationWarning, stacklevel=2)
+        return self.start
+
+    @cache_readonly
     def stop(self):
         """
         The value of the `stop` parameter
         """
-        # GH 25710
-        return self._stop
+        return self._range.stop
 
     @property
+    def _stop(self):
+        """
+        The value of the `stop` parameter
+
+         .. deprecated:: 0.25.0
+            Use ``stop`` instead.
+        """
+        # GH 25710
+        warnings.warn(self._deprecation_message.format("_stop", "stop"),
+                      DeprecationWarning, stacklevel=2)
+        return self.stop
+
+    @cache_readonly
     def step(self):
         """
-        The value of the `step` parameter (or ``1`` if this was not supplied)
+        The value of the `step` parameter (``1`` if this was not supplied)
         """
         # GH 25710
-        return self._step
+        return self._range.step
+
+    @property
+    def _step(self):
+        """
+        The value of the `step` parameter (``1`` if this was not supplied)
+
+         .. deprecated:: 0.25.0
+            Use ``step`` instead.
+        """
+        # GH 25710
+        warnings.warn(self._deprecation_message.format("_step", "step"),
+                      DeprecationWarning, stacklevel=2)
+        return self.step
 
     @cache_readonly
     def nbytes(self):
         """
-        Return the number of bytes in the underlying data
-        On implementations where this is undetermined (PyPy)
-        assume 24 bytes for each value
+        Return the number of bytes in the underlying data.
         """
-        return sum(getsizeof(getattr(self, v), 24) for v in
-                   ['_start', '_stop', '_step'])
+        rng = self._range
+        return getsizeof(rng) + sum(getsizeof(getattr(rng, attr_name))
+                                    for attr_name in ['start', 'stop', 'step'])
 
     def memory_usage(self, deep=False):
         """
@@ -305,11 +324,11 @@ def is_unique(self):
 
     @cache_readonly
     def is_monotonic_increasing(self):
-        return self._step > 0 or len(self) <= 1
+        return self._range.step > 0 or len(self) <= 1
 
     @cache_readonly
     def is_monotonic_decreasing(self):
-        return self._step < 0 or len(self) <= 1
+        return self._range.step < 0 or len(self) <= 1
 
     @property
     def has_duplicates(self):
@@ -325,13 +344,13 @@ def get_loc(self, key, method=None, tolerance=None):
         return super().get_loc(key, method=method, tolerance=tolerance)
 
     def tolist(self):
-        return list(range(self._start, self._stop, self._step))
+        return list(self._range)
 
     @Appender(_index_shared_docs['_shallow_copy'])
     def _shallow_copy(self, values=None, **kwargs):
         if values is None:
             name = kwargs.get("name", self.name)
-            return RangeIndex._simple_new(
+            return self._simple_new(
                 name=name, **dict(self._get_data_as_items()))
         else:
             kwargs.setdefault('name', self.name)
@@ -342,18 +361,17 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs):
         self._validate_dtype(dtype)
         if name is None:
             name = self.name
-        return RangeIndex._simple_new(
-            name=name, **dict(self._get_data_as_items()))
+        return self.from_range(self._range, name=name)
 
     def _minmax(self, meth):
         no_steps = len(self) - 1
         if no_steps == -1:
             return np.nan
-        elif ((meth == 'min' and self._step > 0) or
-              (meth == 'max' and self._step < 0)):
-            return self._start
+        elif ((meth == 'min' and self.step > 0) or
+              (meth == 'max' and self.step < 0)):
+            return self.start
 
-        return self._start + self._step * no_steps
+        return self.start + self.step * no_steps
 
     def min(self, axis=None, skipna=True, *args, **kwargs):
         """The minimum value of the RangeIndex"""
@@ -382,7 +400,7 @@ def argsort(self, *args, **kwargs):
         """
         nv.validate_argsort(args, kwargs)
 
-        if self._step > 0:
+        if self._range.step > 0:
             return np.arange(len(self))
         else:
             return np.arange(len(self) - 1, -1, -1)
@@ -392,15 +410,7 @@ def equals(self, other):
         Determines if two Index objects contain the same elements.
         """
         if isinstance(other, RangeIndex):
-            ls = len(self)
-            lo = len(other)
-            return (ls == lo == 0 or
-                    ls == lo == 1 and
-                    self._start == other._start or
-                    ls == lo and
-                    self._start == other._start and
-                    self._step == other._step)
-
+            return self._range == other._range
         return super().equals(other)
 
     def intersection(self, other, sort=False):
@@ -433,39 +443,40 @@ def intersection(self, other, sort=False):
             return super().intersection(other, sort=sort)
 
         if not len(self) or not len(other):
-            return RangeIndex._simple_new(None)
+            return self._simple_new(None)
 
-        first = self[::-1] if self._step < 0 else self
-        second = other[::-1] if other._step < 0 else other
+        first = self._range[::-1] if self.step < 0 else self._range
+        second = other._range[::-1] if other.step < 0 else other._range
 
         # check whether intervals intersect
         # deals with in- and decreasing ranges
-        int_low = max(first._start, second._start)
-        int_high = min(first._stop, second._stop)
+        int_low = max(first.start, second.start)
+        int_high = min(first.stop, second.stop)
         if int_high <= int_low:
-            return RangeIndex._simple_new(None)
+            return self._simple_new(None)
 
         # Method hint: linear Diophantine equation
         # solve intersection problem
         # performance hint: for identical step sizes, could use
         # cheaper alternative
-        gcd, s, t = first._extended_gcd(first._step, second._step)
+        gcd, s, t = self._extended_gcd(first.step, second.step)
 
         # check whether element sets intersect
-        if (first._start - second._start) % gcd:
-            return RangeIndex._simple_new(None)
+        if (first.start - second.start) % gcd:
+            return self._simple_new(None)
 
         # calculate parameters for the RangeIndex describing the
         # intersection disregarding the lower bounds
-        tmp_start = first._start + (second._start - first._start) * \
-            first._step // gcd * s
-        new_step = first._step * second._step // gcd
-        new_index = RangeIndex._simple_new(tmp_start, int_high, new_step)
+        tmp_start = first.start + (second.start - first.start) * \
+            first.step // gcd * s
+        new_step = first.step * second.step // gcd
+        new_index = self._simple_new(tmp_start, int_high, new_step)
 
         # adjust index to limiting interval
-        new_index._start = new_index._min_fitting_element(int_low)
+        new_start = new_index._min_fitting_element(int_low)
+        new_index = self._simple_new(new_start, new_index.stop, new_index.step)
 
-        if (self._step < 0 and other._step < 0) is not (new_index._step < 0):
+        if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
             new_index = new_index[::-1]
         if sort is None:
             new_index = new_index.sort_values()
@@ -473,13 +484,13 @@ def intersection(self, other, sort=False):
 
     def _min_fitting_element(self, lower_limit):
         """Returns the smallest element greater than or equal to the limit"""
-        no_steps = -(-(lower_limit - self._start) // abs(self._step))
-        return self._start + abs(self._step) * no_steps
+        no_steps = -(-(lower_limit - self.start) // abs(self.step))
+        return self.start + abs(self.step) * no_steps
 
     def _max_fitting_element(self, upper_limit):
         """Returns the largest element smaller than or equal to the limit"""
-        no_steps = (upper_limit - self._start) // abs(self._step)
-        return self._start + abs(self._step) * no_steps
+        no_steps = (upper_limit - self.start) // abs(self.step)
+        return self.start + abs(self.step) * no_steps
 
     def _extended_gcd(self, a, b):
         """
@@ -522,16 +533,16 @@ def _union(self, other, sort):
             return super()._union(other, sort=sort)
 
         if isinstance(other, RangeIndex) and sort is None:
-            start_s, step_s = self._start, self._step
-            end_s = self._start + self._step * (len(self) - 1)
-            start_o, step_o = other._start, other._step
-            end_o = other._start + other._step * (len(other) - 1)
-            if self._step < 0:
+            start_s, step_s = self.start, self.step
+            end_s = self.start + self.step * (len(self) - 1)
+            start_o, step_o = other.start, other.step
+            end_o = other.start + other.step * (len(other) - 1)
+            if self.step < 0:
                 start_s, step_s, end_s = end_s, -step_s, start_s
-            if other._step < 0:
+            if other.step < 0:
                 start_o, step_o, end_o = end_o, -step_o, start_o
             if len(self) == 1 and len(other) == 1:
-                step_s = step_o = abs(self._start - other._start)
+                step_s = step_o = abs(self.start - other.start)
             elif len(self) == 1:
                 step_s = step_o
             elif len(other) == 1:
@@ -542,21 +553,23 @@ def _union(self, other, sort):
                 if ((start_s - start_o) % step_s == 0 and
                         (start_s - end_o) <= step_s and
                         (start_o - end_s) <= step_s):
-                    return RangeIndex(start_r, end_r + step_s, step_s)
+                    return self.__class__(start_r, end_r + step_s, step_s)
                 if ((step_s % 2 == 0) and
                         (abs(start_s - start_o) <= step_s / 2) and
                         (abs(end_s - end_o) <= step_s / 2)):
-                    return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
+                    return self.__class__(start_r,
+                                          end_r + step_s / 2,
+                                          step_s / 2)
             elif step_o % step_s == 0:
                 if ((start_o - start_s) % step_s == 0 and
                         (start_o + step_s >= start_s) and
                         (end_o - step_s <= end_s)):
-                    return RangeIndex(start_r, end_r + step_s, step_s)
+                    return self.__class__(start_r, end_r + step_s, step_s)
             elif step_s % step_o == 0:
                 if ((start_s - start_o) % step_o == 0 and
                         (start_s + step_o >= start_o) and
                         (end_s - step_o <= end_o)):
-                    return RangeIndex(start_r, end_r + step_o, step_o)
+                    return self.__class__(start_r, end_r + step_o, step_o)
         return self._int64index._union(other, sort=sort)
 
     @Appender(_index_shared_docs['join'])
@@ -576,7 +589,7 @@ def __len__(self):
         """
         return the length of the RangeIndex
         """
-        return max(0, -(-(self._stop - self._start) // self._step))
+        return len(self._range)
 
     @property
     def size(self):
@@ -597,59 +610,15 @@ def __getitem__(self, key):
             n = com.cast_scalar_indexer(key)
             if n != key:
                 return super_getitem(key)
-            if n < 0:
-                n = len(self) + key
-            if n < 0 or n > len(self) - 1:
+            try:
+                return self._range[key]
+            except IndexError:
                 raise IndexError("index {key} is out of bounds for axis 0 "
                                  "with size {size}".format(key=key,
                                                            size=len(self)))
-            return self._start + n * self._step
-
         if isinstance(key, slice):
-
-            # This is basically PySlice_GetIndicesEx, but delegation to our
-            # super routines if we don't have integers
-
-            length = len(self)
-
-            # complete missing slice information
-            step = 1 if key.step is None else key.step
-            if key.start is None:
-                start = length - 1 if step < 0 else 0
-            else:
-                start = key.start
-
-                if start < 0:
-                    start += length
-                if start < 0:
-                    start = -1 if step < 0 else 0
-                if start >= length:
-                    start = length - 1 if step < 0 else length
-
-            if key.stop is None:
-                stop = -1 if step < 0 else length
-            else:
-                stop = key.stop
-
-                if stop < 0:
-                    stop += length
-                if stop < 0:
-                    stop = -1
-                if stop > length:
-                    stop = length
-
-            # delegate non-integer slices
-            if (start != int(start) or
-                    stop != int(stop) or
-                    step != int(step)):
-                return super_getitem(key)
-
-            # convert indexes to values
-            start = self._start + self._step * start
-            stop = self._start + self._step * stop
-            step = self._step * step
-
-            return RangeIndex._simple_new(start, stop, step, name=self.name)
+            new_range = self._range[key]
+            return self.from_range(new_range, name=self.name)
 
         # fall back to Int64Index
         return super_getitem(key)
@@ -660,17 +629,15 @@ def __floordiv__(self, other):
 
         if is_integer(other) and other != 0:
             if (len(self) == 0 or
-                    self._start % other == 0 and
-                    self._step % other == 0):
-                start = self._start // other
-                step = self._step // other
+                    self.start % other == 0 and
+                    self.step % other == 0):
+                start = self.start // other
+                step = self.step // other
                 stop = start + len(self) * step
-                return RangeIndex._simple_new(
-                    start, stop, step, name=self.name)
+                return self._simple_new(start, stop, step, name=self.name)
             if len(self) == 1:
-                start = self._start // other
-                return RangeIndex._simple_new(
-                    start, start + 1, 1, name=self.name)
+                start = self.start // other
+                return self._simple_new(start, start + 1, 1, name=self.name)
         return self._int64index // other
 
     @classmethod
@@ -712,7 +679,7 @@ def _evaluate_numeric_binop(self, other):
                     # apply if we have an override
                     if step:
                         with np.errstate(all='ignore'):
-                            rstep = step(left._step, right)
+                            rstep = step(left.step, right)
 
                         # we don't have a representable op
                         # so return a base index
@@ -720,16 +687,13 @@ def _evaluate_numeric_binop(self, other):
                             raise ValueError
 
                     else:
-                        rstep = left._step
+                        rstep = left.step
 
                     with np.errstate(all='ignore'):
-                        rstart = op(left._start, right)
-                        rstop = op(left._stop, right)
+                        rstart = op(left.start, right)
+                        rstop = op(left.stop, right)
 
-                    result = RangeIndex(rstart,
-                                        rstop,
-                                        rstep,
-                                        **attrs)
+                    result = self.__class__(rstart, rstop, rstep, **attrs)
 
                     # for compat with numpy / Int64Index
                     # even if we can represent as a RangeIndex, return
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 8fb6ad3e3ccc5..472d984234275 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4010,7 +4010,7 @@ def memory_usage(self, index=True, deep=False):
         --------
         >>> s = pd.Series(range(3))
         >>> s.memory_usage()
-        104
+        152
 
         Not including the index gives the size of the rest of the data, which
         is necessarily smaller:
@@ -4024,9 +4024,9 @@ def memory_usage(self, index=True, deep=False):
         >>> s.values
         array(['a', 'b'], dtype=object)
         >>> s.memory_usage()
-        96
+        144
         >>> s.memory_usage(deep=True)
-        212
+        260
         """
         v = super().memory_usage(deep=deep)
         if index:
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 1309bd1fef421..ead0fbd263ebf 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -367,9 +367,10 @@ def encode(obj):
             return {'typ': 'range_index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
-                    'start': getattr(obj, '_start', None),
-                    'stop': getattr(obj, '_stop', None),
-                    'step': getattr(obj, '_step', None)}
+                    'start': obj._range.start,
+                    'stop': obj._range.stop,
+                    'step': obj._range.step,
+                    }
         elif isinstance(obj, PeriodIndex):
             return {'typ': 'period_index',
                     'klass': obj.__class__.__name__,
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 477a4e527f278..bca50186827de 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -51,10 +51,8 @@ def test_constructor(self, args, kwargs, start, stop, step, name):
         expected = Index(np.arange(start, stop, step, dtype=np.int64),
                          name=name)
         assert isinstance(result, RangeIndex)
-        assert result._start == start
-        assert result._stop == stop
-        assert result._step == step
         assert result.name is name
+        assert result._range == range(start, stop, step)
         tm.assert_index_equal(result, expected)
 
     def test_constructor_invalid_args(self):
@@ -169,14 +167,19 @@ def test_start_stop_step_attrs(self, index, start, stop, step):
         assert index.stop == stop
         assert index.step == step
 
+    def test_deprecated_start_stop_step_attrs(self):
+        # GH 26581
+        idx = self.create_index()
+        for attr_name in ['_start', '_stop', '_step']:
+            with tm.assert_produces_warning(DeprecationWarning):
+                getattr(idx, attr_name)
+
     def test_copy(self):
         i = RangeIndex(5, name='Foo')
         i_copy = i.copy()
         assert i_copy is not i
         assert i_copy.identical(i)
-        assert i_copy._start == 0
-        assert i_copy._stop == 5
-        assert i_copy._step == 1
+        assert i_copy._range == range(0, 5, 1)
         assert i_copy.name == 'Foo'
 
     def test_repr(self):
@@ -243,8 +246,9 @@ def test_dtype(self):
 
     def test_cached_data(self):
         # GH 26565
-        # Calling RangeIndex._data caches an int64 array of the same length at
-        # self._cached_data. This tests whether _cached_data has been set.
+        # Calling RangeIndex._data caches an int64 array of the same length as
+        # self at self._cached_data.
+        # This tests whether _cached_data is being set by various operations.
         idx = RangeIndex(0, 100, 10)
 
         assert idx._cached_data is None
@@ -273,7 +277,7 @@ def test_cached_data(self):
         df.iloc[5:10]
         assert idx._cached_data is None
 
-        # actually calling data._data
+        # actually calling idx._data
         assert isinstance(idx._data, np.ndarray)
         assert isinstance(idx._cached_data, np.ndarray)
 

From b5535dd0262113a8ca18b8bb1e5f0d35898c1c29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Heikkil=C3=A4?=
 <42970828+mahepe@users.noreply.github.com>
Date: Wed, 5 Jun 2019 15:54:34 +0300
Subject: [PATCH 35/43] TST: Test sorting levels not aligned with index
 (#25775) (#26492)

---
 pandas/tests/frame/test_sorting.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
index 246ba943a4509..96aeb608ba3b8 100644
--- a/pandas/tests/frame/test_sorting.py
+++ b/pandas/tests/frame/test_sorting.py
@@ -227,6 +227,18 @@ def test_stable_descending_multicolumn_sort(self):
                                    kind='mergesort')
         assert_frame_equal(sorted_df, expected)
 
+    def test_sort_multi_index(self):
+        # GH 25775, testing that sorting by index works with a multi-index.
+        df = DataFrame({'a': [3, 1, 2], 'b': [0, 0, 0],
+                        'c': [0, 1, 2], 'd': list('abc')})
+        result = df.set_index(list('abc')).sort_index(level=list('ba'))
+
+        expected = DataFrame({'a': [1, 2, 3], 'b': [0, 0, 0],
+                              'c': [1, 2, 0], 'd': list('bca')})
+        expected = expected.set_index(list('abc'))
+
+        tm.assert_frame_equal(result, expected)
+
     def test_stable_categorial(self):
         # GH 16793
         df = DataFrame({

From d8c2b40c0a55d2db6c5a65f4c921a0004bb6df17 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Wed, 5 Jun 2019 08:59:12 -0400
Subject: [PATCH 36/43] Remove SharedItems from test_excel (#26579)

---
 pandas/tests/io/test_excel.py | 332 +++++++++++++++++-----------------
 1 file changed, 169 insertions(+), 163 deletions(-)

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 7693caf3b31d2..b99f0336fa4c5 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -26,13 +26,22 @@
 from pandas.io.formats.excel import ExcelFormatter
 from pandas.io.parsers import read_csv
 
-_seriesd = tm.getSeriesData()
-_tsd = tm.getTimeSeriesData()
-_frame = DataFrame(_seriesd)[:10]
-_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
-_tsframe = tm.makeTimeDataFrame()[:5]
-_mixed_frame = _frame.copy()
-_mixed_frame['foo'] = 'bar'
+
+@pytest.fixture
+def frame(float_frame):
+    return float_frame[:10]
+
+
+@pytest.fixture
+def frame2(float_frame):
+    float_frame = float_frame.copy()
+    float_frame.columns = ['D', 'C', 'B', 'A']
+    return float_frame[:10]
+
+
+@pytest.fixture
+def tsframe():
+    return tm.makeTimeDataFrame()[:5]
 
 
 @contextlib.contextmanager
@@ -49,18 +58,8 @@ def ignore_xlrd_time_clock_warning():
         yield
 
 
-class SharedItems:
-
-    @pytest.fixture(autouse=True)
-    def setup_method(self, datapath):
-        self.frame = _frame.copy()
-        self.frame2 = _frame2.copy()
-        self.tsframe = _tsframe.copy()
-        self.mixed_frame = _mixed_frame.copy()
-
-
 @td.skip_if_no('xlrd', '1.0.0')
-class ReadingTestsBase(SharedItems):
+class ReadingTestsBase:
     # This is based on ExcelWriterBase
 
     @pytest.fixture(autouse=True, params=['xlrd', None])
@@ -1055,9 +1054,9 @@ class TestXlrdReader(ReadingTestsBase):
     """
 
     @td.skip_if_no("xlwt")
-    def test_read_xlrd_book(self, ext):
+    def test_read_xlrd_book(self, ext, frame):
         import xlrd
-        df = self.frame
+        df = frame
 
         engine = "xlrd"
         sheet_name = "SheetA"
@@ -1075,7 +1074,7 @@ def test_read_xlrd_book(self, ext):
             tm.assert_frame_equal(df, result)
 
 
-class _WriterBase(SharedItems):
+class _WriterBase:
 
     @pytest.fixture(autouse=True)
     def set_engine_and_path(self, request, merge_cells, engine, ext):
@@ -1150,75 +1149,79 @@ def test_excel_sheet_by_name_raise(self, *_):
         with pytest.raises(xlrd.XLRDError):
             pd.read_excel(xl, "0")
 
-    def test_excel_writer_context_manager(self, *_):
+    def test_excel_writer_context_manager(self, frame, frame2, *_):
         with ExcelWriter(self.path) as writer:
-            self.frame.to_excel(writer, "Data1")
-            self.frame2.to_excel(writer, "Data2")
+            frame.to_excel(writer, "Data1")
+            frame2.to_excel(writer, "Data2")
 
         with ExcelFile(self.path) as reader:
             found_df = pd.read_excel(reader, "Data1", index_col=0)
             found_df2 = pd.read_excel(reader, "Data2", index_col=0)
 
-            tm.assert_frame_equal(found_df, self.frame)
-            tm.assert_frame_equal(found_df2, self.frame2)
+            tm.assert_frame_equal(found_df, frame)
+            tm.assert_frame_equal(found_df2, frame2)
 
-    def test_roundtrip(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_roundtrip(self, merge_cells, engine, ext, frame):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # test roundtrip
-        self.frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1')
         recons = pd.read_excel(self.path, 'test1', index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1', index=False)
         recons = pd.read_excel(self.path, 'test1', index_col=None)
-        recons.index = self.frame.index
-        tm.assert_frame_equal(self.frame, recons)
+        recons.index = frame.index
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, 'test1', na_rep='NA')
+        frame.to_excel(self.path, 'test1', na_rep='NA')
         recons = pd.read_excel(
             self.path, 'test1', index_col=0, na_values=['NA'])
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
         # GH 3611
-        self.frame.to_excel(self.path, 'test1', na_rep='88')
+        frame.to_excel(self.path, 'test1', na_rep='88')
         recons = pd.read_excel(
             self.path, 'test1', index_col=0, na_values=['88'])
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, 'test1', na_rep='88')
+        frame.to_excel(self.path, 'test1', na_rep='88')
         recons = pd.read_excel(
             self.path, 'test1', index_col=0, na_values=[88, 88.0])
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
         # GH 6573
-        self.frame.to_excel(self.path, 'Sheet1')
+        frame.to_excel(self.path, 'Sheet1')
         recons = pd.read_excel(self.path, index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, '0')
+        frame.to_excel(self.path, '0')
         recons = pd.read_excel(self.path, index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
         # GH 8825 Pandas Series should provide to_excel method
-        s = self.frame["A"]
+        s = frame["A"]
         s.to_excel(self.path)
         recons = pd.read_excel(self.path, index_col=0)
         tm.assert_frame_equal(s.to_frame(), recons)
 
-    def test_mixed(self, merge_cells, engine, ext):
-        self.mixed_frame.to_excel(self.path, 'test1')
+    def test_mixed(self, merge_cells, engine, ext, frame):
+        mixed_frame = frame.copy()
+        mixed_frame['foo'] = 'bar'
+
+        mixed_frame.to_excel(self.path, 'test1')
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, 'test1', index_col=0)
-        tm.assert_frame_equal(self.mixed_frame, recons)
+        tm.assert_frame_equal(mixed_frame, recons)
 
-    def test_ts_frame(self, *_):
-        df = tm.makeTimeDataFrame()[:5]
+    def test_ts_frame(self, tsframe, *_):
+        df = tsframe
 
         df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
@@ -1226,33 +1229,34 @@ def test_ts_frame(self, *_):
         recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(df, recons)
 
-    def test_basics_with_nan(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+    def test_basics_with_nan(self, merge_cells, engine, ext, frame):
+        frame = frame.copy()
+        frame['A'][:5] = nan
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
     @pytest.mark.parametrize("np_type", [
         np.int8, np.int16, np.int32, np.int64])
     def test_int_types(self, merge_cells, engine, ext, np_type):
         # Test np.int values read come back as int
         # (rather than float which is Excel's format).
-        frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
-                          dtype=np_type)
-        frame.to_excel(self.path, "test1")
+        df = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
+                       dtype=np_type)
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0)
 
-        int_frame = frame.astype(np.int64)
+        int_frame = df.astype(np.int64)
         tm.assert_frame_equal(int_frame, recons)
 
         recons2 = pd.read_excel(self.path, "test1", index_col=0)
         tm.assert_frame_equal(int_frame, recons2)
 
         # Test with convert_float=False comes back as float.
-        float_frame = frame.astype(float)
+        float_frame = df.astype(float)
         recons = pd.read_excel(self.path, "test1",
                                convert_float=False, index_col=0)
         tm.assert_frame_equal(recons, float_frame,
@@ -1263,120 +1267,123 @@ def test_int_types(self, merge_cells, engine, ext, np_type):
         np.float16, np.float32, np.float64])
     def test_float_types(self, merge_cells, engine, ext, np_type):
         # Test np.float values read come back as float.
-        frame = DataFrame(np.random.random_sample(10), dtype=np_type)
-        frame.to_excel(self.path, "test1")
+        df = DataFrame(np.random.random_sample(10), dtype=np_type)
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type)
 
-        tm.assert_frame_equal(frame, recons, check_dtype=False)
+        tm.assert_frame_equal(df, recons, check_dtype=False)
 
     @pytest.mark.parametrize("np_type", [np.bool8, np.bool_])
     def test_bool_types(self, merge_cells, engine, ext, np_type):
         # Test np.bool values read come back as float.
-        frame = (DataFrame([1, 0, True, False], dtype=np_type))
-        frame.to_excel(self.path, "test1")
+        df = (DataFrame([1, 0, True, False], dtype=np_type))
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type)
 
-        tm.assert_frame_equal(frame, recons)
+        tm.assert_frame_equal(df, recons)
 
     def test_inf_roundtrip(self, *_):
-        frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
-        frame.to_excel(self.path, "test1")
+        df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0)
 
-        tm.assert_frame_equal(frame, recons)
+        tm.assert_frame_equal(df, recons)
 
-    def test_sheets(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_sheets(self, merge_cells, engine, ext, frame, tsframe):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # Test writing to separate sheets
         writer = ExcelWriter(self.path)
-        self.frame.to_excel(writer, 'test1')
-        self.tsframe.to_excel(writer, 'test2')
+        frame.to_excel(writer, 'test1')
+        tsframe.to_excel(writer, 'test2')
         writer.save()
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, 'test1', index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
         recons = pd.read_excel(reader, 'test2', index_col=0)
-        tm.assert_frame_equal(self.tsframe, recons)
+        tm.assert_frame_equal(tsframe, recons)
         assert 2 == len(reader.sheet_names)
         assert 'test1' == reader.sheet_names[0]
         assert 'test2' == reader.sheet_names[1]
 
-    def test_colaliases(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_colaliases(self, merge_cells, engine, ext, frame, frame2):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # column aliases
         col_aliases = Index(['AA', 'X', 'Y', 'Z'])
-        self.frame2.to_excel(self.path, 'test1', header=col_aliases)
+        frame2.to_excel(self.path, 'test1', header=col_aliases)
         reader = ExcelFile(self.path)
         rs = pd.read_excel(reader, 'test1', index_col=0)
-        xp = self.frame2.copy()
+        xp = frame2.copy()
         xp.columns = col_aliases
         tm.assert_frame_equal(xp, rs)
 
-    def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_roundtrip_indexlabels(self, merge_cells, engine, ext, frame):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # test index_label
-        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
-        frame.to_excel(self.path, 'test1',
-                       index_label=['test'],
-                       merge_cells=merge_cells)
+        df = (DataFrame(np.random.randn(10, 2)) >= 0)
+        df.to_excel(self.path, 'test1',
+                    index_label=['test'],
+                    merge_cells=merge_cells)
         reader = ExcelFile(self.path)
         recons = pd.read_excel(
             reader, 'test1', index_col=0).astype(np.int64)
-        frame.index.names = ['test']
-        assert frame.index.names == recons.index.names
-
-        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
-        frame.to_excel(self.path,
-                       'test1',
-                       index_label=['test', 'dummy', 'dummy2'],
-                       merge_cells=merge_cells)
+        df.index.names = ['test']
+        assert df.index.names == recons.index.names
+
+        df = (DataFrame(np.random.randn(10, 2)) >= 0)
+        df.to_excel(self.path,
+                    'test1',
+                    index_label=['test', 'dummy', 'dummy2'],
+                    merge_cells=merge_cells)
         reader = ExcelFile(self.path)
         recons = pd.read_excel(
             reader, 'test1', index_col=0).astype(np.int64)
-        frame.index.names = ['test']
-        assert frame.index.names == recons.index.names
-
-        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
-        frame.to_excel(self.path,
-                       'test1',
-                       index_label='test',
-                       merge_cells=merge_cells)
+        df.index.names = ['test']
+        assert df.index.names == recons.index.names
+
+        df = (DataFrame(np.random.randn(10, 2)) >= 0)
+        df.to_excel(self.path,
+                    'test1',
+                    index_label='test',
+                    merge_cells=merge_cells)
         reader = ExcelFile(self.path)
         recons = pd.read_excel(
             reader, 'test1', index_col=0).astype(np.int64)
-        frame.index.names = ['test']
-        tm.assert_frame_equal(frame, recons.astype(bool))
+        df.index.names = ['test']
+        tm.assert_frame_equal(df, recons.astype(bool))
 
-        self.frame.to_excel(self.path,
-                            'test1',
-                            columns=['A', 'B', 'C', 'D'],
-                            index=False, merge_cells=merge_cells)
+        frame.to_excel(self.path,
+                       'test1',
+                       columns=['A', 'B', 'C', 'D'],
+                       index=False, merge_cells=merge_cells)
         # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
-        df = self.frame.copy()
+        df = frame.copy()
         df = df.set_index(['A', 'B'])
 
         reader = ExcelFile(self.path)
@@ -1395,17 +1402,17 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext):
         tm.assert_frame_equal(result, df)
         assert result.index.name == 'foo'
 
-    def test_excel_roundtrip_datetime(self, merge_cells, *_):
+    def test_excel_roundtrip_datetime(self, merge_cells, tsframe, *_):
         # datetime.date, not sure what to test here exactly
-        tsf = self.tsframe.copy()
+        tsf = tsframe.copy()
 
-        tsf.index = [x.date() for x in self.tsframe.index]
+        tsf.index = [x.date() for x in tsframe.index]
         tsf.to_excel(self.path, "test1", merge_cells=merge_cells)
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0)
 
-        tm.assert_frame_equal(self.tsframe, recons)
+        tm.assert_frame_equal(tsframe, recons)
 
     def test_excel_date_datetime_format(self, merge_cells, engine, ext):
         # see gh-4133
@@ -1450,14 +1457,14 @@ def test_to_excel_interval_no_labels(self, *_):
         # see gh-19242
         #
         # Test writing Interval without labels.
-        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
-                          dtype=np.int64)
-        expected = frame.copy()
+        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
+                       dtype=np.int64)
+        expected = df.copy()
 
-        frame["new"] = pd.cut(frame[0], 10)
+        df["new"] = pd.cut(df[0], 10)
         expected["new"] = pd.cut(expected[0], 10).astype(str)
 
-        frame.to_excel(self.path, "test1")
+        df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
         recons = pd.read_excel(reader, "test1", index_col=0)
@@ -1467,15 +1474,15 @@ def test_to_excel_interval_labels(self, *_):
         # see gh-19242
         #
         # Test writing Interval with labels.
-        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
-                          dtype=np.int64)
-        expected = frame.copy()
-        intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E",
-                                                 "F", "G", "H", "I", "J"])
-        frame["new"] = intervals
+        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
+                       dtype=np.int64)
+        expected = df.copy()
+        intervals = pd.cut(df[0], 10, labels=["A", "B", "C", "D", "E",
+                                              "F", "G", "H", "I", "J"])
+        df["new"] = intervals
         expected["new"] = pd.Series(list(intervals))
 
-        frame.to_excel(self.path, "test1")
+        df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
         recons = pd.read_excel(reader, "test1", index_col=0)
@@ -1485,23 +1492,23 @@ def test_to_excel_timedelta(self, *_):
         # see gh-19242, gh-9155
         #
         # Test writing timedelta to xls.
-        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
-                          columns=["A"], dtype=np.int64)
-        expected = frame.copy()
+        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
+                       columns=["A"], dtype=np.int64)
+        expected = df.copy()
 
-        frame["new"] = frame["A"].apply(lambda x: timedelta(seconds=x))
+        df["new"] = df["A"].apply(lambda x: timedelta(seconds=x))
         expected["new"] = expected["A"].apply(
             lambda x: timedelta(seconds=x).total_seconds() / float(86400))
 
-        frame.to_excel(self.path, "test1")
+        df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
         recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(expected, recons)
 
-    def test_to_excel_periodindex(self, merge_cells, engine, ext):
-        frame = self.tsframe
-        xp = frame.resample('M', kind='period').mean()
+    def test_to_excel_periodindex(
+            self, merge_cells, engine, ext, tsframe):
+        xp = tsframe.resample('M', kind='period').mean()
 
         xp.to_excel(self.path, 'sht1')
 
@@ -1509,8 +1516,7 @@ def test_to_excel_periodindex(self, merge_cells, engine, ext):
         rs = pd.read_excel(reader, 'sht1', index_col=0)
         tm.assert_frame_equal(xp, rs.to_period('M'))
 
-    def test_to_excel_multiindex(self, merge_cells, engine, ext):
-        frame = self.frame
+    def test_to_excel_multiindex(self, merge_cells, engine, ext, frame):
         arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
         new_index = MultiIndex.from_arrays(arrays,
                                            names=['first', 'second'])
@@ -1526,21 +1532,21 @@ def test_to_excel_multiindex(self, merge_cells, engine, ext):
         tm.assert_frame_equal(frame, df)
 
     # GH13511
-    def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext):
-        frame = pd.DataFrame({'A': [None, 2, 3],
-                              'B': [10, 20, 30],
-                              'C': np.random.sample(3)})
-        frame = frame.set_index(['A', 'B'])
-
-        frame.to_excel(self.path, merge_cells=merge_cells)
-        df = pd.read_excel(self.path, index_col=[0, 1])
-        tm.assert_frame_equal(frame, df)
+    def test_to_excel_multiindex_nan_label(
+            self, merge_cells, engine, ext):
+        df = pd.DataFrame({'A': [None, 2, 3],
+                           'B': [10, 20, 30],
+                           'C': np.random.sample(3)})
+        df = df.set_index(['A', 'B'])
+
+        df.to_excel(self.path, merge_cells=merge_cells)
+        df1 = pd.read_excel(self.path, index_col=[0, 1])
+        tm.assert_frame_equal(df, df1)
 
     # Test for Issue 11328. If column indices are integers, make
     # sure they are handled correctly for either setting of
     # merge_cells
-    def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
-        frame = self.frame
+    def test_to_excel_multiindex_cols(self, merge_cells, engine, ext, frame):
         arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
         new_index = MultiIndex.from_arrays(arrays,
                                            names=['first', 'second'])
@@ -1563,9 +1569,9 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
             frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
         tm.assert_frame_equal(frame, df)
 
-    def test_to_excel_multiindex_dates(self, merge_cells, engine, ext):
+    def test_to_excel_multiindex_dates(
+            self, merge_cells, engine, ext, tsframe):
         # try multiindex with dates
-        tsframe = self.tsframe.copy()
         new_index = [tsframe.index, np.arange(len(tsframe.index))]
         tsframe.index = MultiIndex.from_arrays(new_index)
 

From 6a37e19db73155e514b8d14a36e0ee53b692609d Mon Sep 17 00:00:00 2001
From: DanielFEvans <41120183+DanielFEvans@users.noreply.github.com>
Date: Wed, 5 Jun 2019 19:44:38 +0100
Subject: [PATCH 37/43] ERR: include original error message for missing
 required dependencies (#26665)

---
 doc/source/whatsnew/v0.25.0.rst |  2 +-
 pandas/__init__.py              |  8 +++++---
 pandas/tests/test_base.py       | 27 +++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 4018418294963..8fd9f07442810 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -82,7 +82,7 @@ Other Enhancements
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
--
+- Error message for missing required imports now includes the original ImportError's text (:issue:`23868`)
 
 .. _whatsnew_0250.api_breaking:
 
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 4c494b4a62e39..11ea3047bb62a 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -10,11 +10,13 @@
     try:
         __import__(dependency)
     except ImportError as e:
-        missing_dependencies.append(dependency)
+        missing_dependencies.append((dependency, e))
 
 if missing_dependencies:
-    raise ImportError(
-        "Missing required dependencies {0}".format(missing_dependencies))
+    msg = "Unable to import required dependencies:"
+    for dependency, e in missing_dependencies:
+        msg += "\n{0}: {1}".format(dependency, str(e))
+    raise ImportError(msg)
 del hard_dependencies, dependency, missing_dependencies
 
 # numpy compat
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 3b4f85e680f6e..f8319999682e8 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1,7 +1,9 @@
 from datetime import datetime, timedelta
+from importlib import reload
 from io import StringIO
 import re
 import sys
+from unittest.mock import patch
 
 import numpy as np
 import pytest
@@ -1341,3 +1343,28 @@ def test_to_numpy_dtype(as_series):
     expected = np.array(['2000-01-01T05', '2001-01-01T05'],
                         dtype='M8[ns]')
     tm.assert_numpy_array_equal(result, expected)
+
+
+@patch("builtins.__import__")
+def test_missing_required_dependency(mock_import):
+    def mock_import_fail(name, *args, **kwargs):
+        if name == "numpy":
+            raise ImportError("cannot import name numpy")
+        elif name == "pytz":
+            raise ImportError("cannot import name some_dependency")
+        elif name == "dateutil":
+            raise ImportError("cannot import name some_other_dependency")
+        else:
+            return __import__(name, *args, **kwargs)
+
+    mock_import.side_effect = mock_import_fail
+
+    expected_msg = (
+        "Unable to import required dependencies:"
+        "\nnumpy: cannot import name numpy"
+        "\npytz: cannot import name some_dependency"
+        "\ndateutil: cannot import name some_other_dependency"
+    )
+
+    with pytest.raises(ImportError, match=expected_msg):
+        reload(pd)

From 5271868402a9cd2b24c2e161bd3a9b677936e459 Mon Sep 17 00:00:00 2001
From: nathalier <nathalier@gmail.com>
Date: Wed, 5 Jun 2019 20:06:13 +0100
Subject: [PATCH 38/43] BUG: fix TypeError for invalid integer dates %Y%m%d
 with errors='ignore' (# GH 26583) (#26585)

---
 doc/source/whatsnew/v0.25.0.rst              |  1 +
 pandas/_libs/tslibs/strptime.pyx             |  6 +++---
 pandas/tests/indexes/datetimes/test_tools.py | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 8fd9f07442810..02ee275bab364 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -537,6 +537,7 @@ Datetimelike
 - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
 - Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
 - Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)
+- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index af3d3fa646a12..d93858cff5e05 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -140,13 +140,13 @@ def array_strptime(object[:] values, object fmt,
                     iresult[i] = NPY_NAT
                     continue
                 raise ValueError("time data %r does not match "
-                                 "format %r (match)" % (values[i], fmt))
+                                 "format %r (match)" % (val, fmt))
             if len(val) != found.end():
                 if is_coerce:
                     iresult[i] = NPY_NAT
                     continue
                 raise ValueError("unconverted data remains: %s" %
-                                 values[i][found.end():])
+                                 val[found.end():])
 
         # search
         else:
@@ -156,7 +156,7 @@ def array_strptime(object[:] values, object fmt,
                     iresult[i] = NPY_NAT
                     continue
                 raise ValueError("time data %r does not match format "
-                                 "%r (search)" % (values[i], fmt))
+                                 "%r (search)" % (val, fmt))
 
         iso_year = -1
         year = 1900
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index c507c31ee54dd..ea33e563b31be 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -133,6 +133,25 @@ def test_to_datetime_format_integer(self, cache):
         result = to_datetime(s, format='%Y%m', cache=cache)
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('int_date, expected', [
+        # valid date, length == 8
+        [20121030, datetime(2012, 10, 30)],
+        # short valid date, length == 6
+        [199934, datetime(1999, 3, 4)],
+        # long integer date partially parsed to datetime(2012,1,1), length > 8
+        [2012010101, 2012010101],
+        # invalid date partially parsed to datetime(2012,9,9), length == 8
+        [20129930, 20129930],
+        # short integer date partially parsed to datetime(2012,9,9), length < 8
+        [2012993, 2012993],
+        # short invalid date, length == 4
+        [2121, 2121]])
+    def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date,
+                                                       expected):
+        # GH 26583
+        result = to_datetime(int_date, format='%Y%m%d', errors='ignore')
+        assert result == expected
+
     @pytest.mark.parametrize('cache', [True, False])
     def test_to_datetime_format_microsecond(self, cache):
 

From 2cc1ca0287266ee83c1cd6dab473b8f184d5fd36 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 5 Jun 2019 22:30:45 +0200
Subject: [PATCH 39/43] Revert "ERR: include original error message for missing
 required dependencies (#26665)"

This reverts commit 047d32d20640898978dbf6d9855cd6fecbbcf0d5.
---
 doc/source/whatsnew/v0.25.0.rst |  2 +-
 pandas/__init__.py              |  8 +++-----
 pandas/tests/test_base.py       | 27 ---------------------------
 3 files changed, 4 insertions(+), 33 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 02ee275bab364..1fb9b5ae695a0 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -82,7 +82,7 @@ Other Enhancements
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
-- Error message for missing required imports now includes the original ImportError's text (:issue:`23868`)
+-
 
 .. _whatsnew_0250.api_breaking:
 
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 11ea3047bb62a..4c494b4a62e39 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -10,13 +10,11 @@
     try:
         __import__(dependency)
     except ImportError as e:
-        missing_dependencies.append((dependency, e))
+        missing_dependencies.append(dependency)
 
 if missing_dependencies:
-    msg = "Unable to import required dependencies:"
-    for dependency, e in missing_dependencies:
-        msg += "\n{0}: {1}".format(dependency, str(e))
-    raise ImportError(msg)
+    raise ImportError(
+        "Missing required dependencies {0}".format(missing_dependencies))
 del hard_dependencies, dependency, missing_dependencies
 
 # numpy compat
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index f8319999682e8..3b4f85e680f6e 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1,9 +1,7 @@
 from datetime import datetime, timedelta
-from importlib import reload
 from io import StringIO
 import re
 import sys
-from unittest.mock import patch
 
 import numpy as np
 import pytest
@@ -1343,28 +1341,3 @@ def test_to_numpy_dtype(as_series):
     expected = np.array(['2000-01-01T05', '2001-01-01T05'],
                         dtype='M8[ns]')
     tm.assert_numpy_array_equal(result, expected)
-
-
-@patch("builtins.__import__")
-def test_missing_required_dependency(mock_import):
-    def mock_import_fail(name, *args, **kwargs):
-        if name == "numpy":
-            raise ImportError("cannot import name numpy")
-        elif name == "pytz":
-            raise ImportError("cannot import name some_dependency")
-        elif name == "dateutil":
-            raise ImportError("cannot import name some_other_dependency")
-        else:
-            return __import__(name, *args, **kwargs)
-
-    mock_import.side_effect = mock_import_fail
-
-    expected_msg = (
-        "Unable to import required dependencies:"
-        "\nnumpy: cannot import name numpy"
-        "\npytz: cannot import name some_dependency"
-        "\ndateutil: cannot import name some_other_dependency"
-    )
-
-    with pytest.raises(ImportError, match=expected_msg):
-        reload(pd)

From ae50e39a611a337be06109a66c0a23e37e20013e Mon Sep 17 00:00:00 2001
From: AlexTereshenkov <50622389+AlexTereshenkov@users.noreply.github.com>
Date: Wed, 5 Jun 2019 22:37:54 +0100
Subject: [PATCH 40/43] Remove redundant check arr_or_dtype is None (#26655)

---
 pandas/core/dtypes/common.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 4029e6f4bfdb5..52011d53d22cd 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1931,8 +1931,6 @@ def _is_dtype_type(arr_or_dtype, condition):
         if issubclass(arr_or_dtype, ExtensionDtype):
             arr_or_dtype = arr_or_dtype.type
         return condition(np.dtype(arr_or_dtype).type)
-    elif arr_or_dtype is None:
-        return condition(type(None))
 
     # if we have an array-like
     if hasattr(arr_or_dtype, 'dtype'):

From 077c7c276ab41f4717b2bbe32dd0b2fd17dd9f69 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 5 Jun 2019 16:48:47 -0500
Subject: [PATCH 41/43] filter warning in repr (#26669)

---
 pandas/core/sparse/frame.py        |  5 +++++
 pandas/core/sparse/series.py       | 10 ++++++----
 pandas/tests/sparse/test_format.py | 13 +++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 0320da6d9a48d..67ecbcbea67f9 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -242,6 +242,11 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
     def to_coo(self):
         return SparseFrameAccessor(self).to_coo()
 
+    def __repr__(self):
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "Sparse")
+            return super().__repr__()
+
     def __getstate__(self):
         # pickling
         return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data,
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 3814d8bb66635..3e3bae6444082 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -214,10 +214,12 @@ def as_sparse_array(self, kind=None, fill_value=None, copy=False):
                            fill_value=fill_value, kind=kind, copy=copy)
 
     def __repr__(self):
-        series_rep = Series.__repr__(self)
-        rep = '{series}\n{index!r}'.format(series=series_rep,
-                                           index=self.sp_index)
-        return rep
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "Sparse")
+            series_rep = Series.__repr__(self)
+            rep = '{series}\n{index!r}'.format(series=series_rep,
+                                               index=self.sp_index)
+            return rep
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py
index 37c2acc587cf6..7ed8c48fce333 100644
--- a/pandas/tests/sparse/test_format.py
+++ b/pandas/tests/sparse/test_format.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 import pytest
 
@@ -133,3 +135,14 @@ def test_sparse_repr_after_set(self):
 
         repr(sdf)
         tm.assert_sp_frame_equal(sdf, res)
+
+
+def test_repr_no_warning():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", FutureWarning)
+        df = pd.SparseDataFrame({"A": [1, 2]})
+        s = df['A']
+
+    with tm.assert_produces_warning(None):
+        repr(df)
+        repr(s)

From 52ed9153feda581c85b79a0822f2a44fc00ff55e Mon Sep 17 00:00:00 2001
From: Vaibhav Vishal <vaibhav.hrt@gmail.com>
Date: Fri, 7 Jun 2019 00:06:45 +0530
Subject: [PATCH 42/43] convert DatetimeLikeScalar to TypeVar

---
 pandas/_typing.py                  | 3 ++-
 pandas/core/arrays/datetimelike.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 24ee65645905b..9c059cb610c6e 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -18,6 +18,7 @@
                        ABCSparseSeries,
                        np.ndarray)
 ArrayLike = TypeVar('ArrayLike', ABCExtensionArray, np.ndarray)
-DatetimeLikeScalar = Type[Union[Period, Timestamp, Timedelta]]
+DatetimeLikeScalar = TypeVar('DatetimeLikeScalar', Period, Timestamp,
+                             Timedelta)
 Dtype = Union[str, np.dtype, ExtensionDtype]
 FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index c32f8642dc2ed..c99c09cdac96c 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1,6 +1,6 @@
 from datetime import datetime, timedelta
 import operator
-from typing import Any, Sequence, Union, cast
+from typing import Any, Sequence, Type, Union, cast
 import warnings
 
 import numpy as np
@@ -58,7 +58,7 @@ def _get_attributes_dict(self):
         return {k: getattr(self, k, None) for k in self._attributes}
 
     @property
-    def _scalar_type(self) -> DatetimeLikeScalar:
+    def _scalar_type(self) -> Type[DatetimeLikeScalar]:
         """The scalar associated with this datelike
 
         * PeriodArray : Period

From 2d3376a07abc1dd443863d25109cf41c3923398b Mon Sep 17 00:00:00 2001
From: Vaibhav Vishal <vaibhav.hrt@gmail.com>
Date: Fri, 7 Jun 2019 00:07:55 +0530
Subject: [PATCH 43/43] remove unused import

---
 pandas/_typing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 9c059cb610c6e..a2bb168c1e2da 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import IO, AnyStr, Type, TypeVar, Union
+from typing import IO, AnyStr, TypeVar, Union
 
 import numpy as np