From ee8ba611a8e3b0b081e7a74fe596ec8e0dbc16ab Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Sat, 16 Jun 2018 00:30:42 +0100
Subject: [PATCH 001/113] Fix Timestamp rounding

---
 pandas/_libs/tslibs/timestamps.pyx            | 61 ++++++++++++-------
 .../tests/scalar/timestamp/test_unary_ops.py  | 24 +++++++-
 2 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index ba5ebdab82ddc..b9ea2da2c18d5 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -72,31 +72,50 @@ def round_ns(values, rounder, freq):
     -------
     int or :obj:`ndarray`
     """
-    from pandas.tseries.frequencies import to_offset
-    unit = to_offset(freq).nanos
-    if unit < 1000:
-        # for nano rounding, work with the last 6 digits separately
-        # due to float precision
-        buff = 1000000
-        r = (buff * (values // buff) + unit *
-             (rounder((values % buff) * (1 / float(unit)))).astype('i8'))
-    else:
-        if unit % 1000 != 0:
-            msg = 'Precision will be lost using frequency: {}'
-            warnings.warn(msg.format(freq))
-
-        # GH19206
-        # to deal with round-off when unit is large
-        if unit >= 1e9:
-            divisor = 10 ** int(np.log10(unit / 1e7))
+    def _round_non_int_multiple(value):
+        if unit < 1000:
+            # for nano rounding, work with the last 6 digits separately
+            # due to float precision
+            buff = 1000000
+            r = (buff * (value // buff) + unit *
+                 (rounder((value % buff) * (1 / float(unit)))).astype('i8'))
+        else:
+            if unit % 1000 != 0:
+                msg = 'Precision will be lost using frequency: {}'
+                warnings.warn(msg.format(freq))
+
+            # GH19206
+            # to deal with round-off when unit is large
+            if unit >= 1e9:
+                divisor = 10 ** int(np.log10(unit / 1e7))
+            else:
+                divisor = 10
+
+            r = (unit * rounder((value * (divisor / float(unit))) / divisor)
+                 .astype('i8'))
+
+        return r
+
+    # GH21262 If the Timestamp is multiple of the freq str
+    # then we don't apply _round_non_int_multiple
+
+    def _apply_round(value):
+        if value % unit == 0:
+            return value
         else:
-            divisor = 10
+            return _round_non_int_multiple(value)
 
-        r = (unit * rounder((values * (divisor / float(unit))) / divisor)
-             .astype('i8'))
+    from pandas.tseries.frequencies import to_offset
+    unit = to_offset(freq).nanos
 
-    return r
+    if type(values) is int:
+      if values % unit == 0:
+        return values
+      else:
+        return _round_non_int_multiple(values)
 
+    else:
+        return np.fromiter((_apply_round(item) for item in values), np.int64)
 
 # This is PITA. Because we inherit from datetime, which has very specific
 # construction requirements, we need to do object instantiation in python
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
index aecddab8477fc..f37642569167a 100644
--- a/pandas/tests/scalar/timestamp/test_unary_ops.py
+++ b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -118,6 +118,27 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
             expected = Timestamp(expected)
             assert result == expected
 
+    @pytest.mark.parametrize('test_input, freq, expected', [
+        ('2018-01-01 00:02:06', '2s',  '2018-01-01 00:02:06'),
+        ('2018-01-01 00:02:00', '2T',  '2018-01-01 00:02:00'),
+        ('2018-01-01 00:04:00', '4T',  '2018-01-01 00:04:00'),
+        ('2018-01-01 00:15:00', '15T', '2018-01-01 00:15:00'),
+        ('2018-01-01 00:20:00', '20T', '2018-01-01 00:20:00'),
+        ('2018-01-01 03:00:00', '3H',  '2018-01-01 03:00:00'),
+    ])
+    def test_round_minute_freq(self, test_input, freq, expected):
+        # Ensure timestamps that shouldnt round dont!
+        # GH#21262
+        dt = Timestamp(test_input)
+        expected = Timestamp(expected)
+
+        result_ceil = dt.ceil(freq)
+        assert result_ceil == expected
+        result_floor = dt.floor(freq)
+        assert result_floor == expected
+        result_round = dt.round(freq)
+        assert result_round == expected
+
     def test_ceil(self):
         dt = Timestamp('20130101 09:10:11')
         result = dt.ceil('D')
@@ -257,7 +278,6 @@ def test_timestamp(self):
         if PY3:
             # datetime.timestamp() converts in the local timezone
             with tm.set_timezone('UTC'):
-
                 # should agree with datetime.timestamp method
                 dt = ts.to_pydatetime()
-                assert dt.timestamp() == ts.timestamp()
+                assert dt.timestamp() == ts.timestamp()
\ No newline at end of file

From 50986c6c6d4eb89cd367a991620f23e88ee0e14b Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Sat, 16 Jun 2018 01:18:34 +0100
Subject: [PATCH 002/113] Pep8 fixes

---
 pandas/tests/scalar/timestamp/test_unary_ops.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
index f37642569167a..4fb4f8f36ab86 100644
--- a/pandas/tests/scalar/timestamp/test_unary_ops.py
+++ b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -119,12 +119,12 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
             assert result == expected
 
     @pytest.mark.parametrize('test_input, freq, expected', [
-        ('2018-01-01 00:02:06', '2s',  '2018-01-01 00:02:06'),
-        ('2018-01-01 00:02:00', '2T',  '2018-01-01 00:02:00'),
-        ('2018-01-01 00:04:00', '4T',  '2018-01-01 00:04:00'),
+        ('2018-01-01 00:02:06', '2s', '2018-01-01 00:02:06'),
+        ('2018-01-01 00:02:00', '2T', '2018-01-01 00:02:00'),
+        ('2018-01-01 00:04:00', '4T', '2018-01-01 00:04:00'),
         ('2018-01-01 00:15:00', '15T', '2018-01-01 00:15:00'),
         ('2018-01-01 00:20:00', '20T', '2018-01-01 00:20:00'),
-        ('2018-01-01 03:00:00', '3H',  '2018-01-01 03:00:00'),
+        ('2018-01-01 03:00:00', '3H', '2018-01-01 03:00:00'),
     ])
     def test_round_minute_freq(self, test_input, freq, expected):
         # Ensure timestamps that shouldnt round dont!
@@ -280,4 +280,4 @@ def test_timestamp(self):
             with tm.set_timezone('UTC'):
                 # should agree with datetime.timestamp method
                 dt = ts.to_pydatetime()
-                assert dt.timestamp() == ts.timestamp()
\ No newline at end of file
+                assert dt.timestamp() == ts.timestamp()

From d1c6e6f372e35b4f2f47931a1485902912d41bd7 Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Sun, 17 Jun 2018 14:07:17 +0100
Subject: [PATCH 003/113] Pep8 fixes and add additional test cases

---
 pandas/_libs/tslibs/timestamps.pyx                   | 9 +++++----
 pandas/tests/indexes/datetimes/test_scalar_compat.py | 4 ++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index b9ea2da2c18d5..0115b90fb2ff6 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -109,14 +109,15 @@ def round_ns(values, rounder, freq):
     unit = to_offset(freq).nanos
 
     if type(values) is int:
-      if values % unit == 0:
-        return values
-      else:
-        return _round_non_int_multiple(values)
+        if values % unit == 0:
+            return values
+        else:
+            return _round_non_int_multiple(values)
 
     else:
         return np.fromiter((_apply_round(item) for item in values), np.int64)
 
+
 # This is PITA. Because we inherit from datetime, which has very specific
 # construction requirements, we need to do object instantiation in python
 # (see Timestamp class above). This will serve as a C extension type that
diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py
index 9180bb0af3af3..441676dd36ea7 100644
--- a/pandas/tests/indexes/datetimes/test_scalar_compat.py
+++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py
@@ -143,6 +143,10 @@ def test_round(self, tz):
          ['1823-01-01 00:00:01.000000020']),
         (['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']),
         (['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']),
+        (['2018-01-01 00:15:00'], 'ceil', '15T', ['2018-01-01 00:15:00']),
+        (['2018-01-01 00:15:00'], 'floor', '15T', ['2018-01-01 00:15:00']),
+        (['1823-01-01 03:00:00'], 'ceil', '3H', ['1823-01-01 03:00:00']),
+        (['1823-01-01 03:00:00'], 'floor', '3H', ['1823-01-01 03:00:00']),
         (('NaT', '1823-01-01 00:00:01'), 'floor', '1s',
          ('NaT', '1823-01-01 00:00:01')),
         (('NaT', '1823-01-01 00:00:01'), 'ceil', '1s',

From 03a42b82ed04b9c6a088c1c9eecacb6b457cefcf Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Sun, 17 Jun 2018 15:09:02 +0100
Subject: [PATCH 004/113] Futher test cases

---
 .../tests/indexes/datetimes/test_scalar_compat.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py
index 441676dd36ea7..801dcb91b124e 100644
--- a/pandas/tests/indexes/datetimes/test_scalar_compat.py
+++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py
@@ -134,6 +134,21 @@ def test_round(self, tz):
             ts = '2016-10-17 12:00:00.001501031'
             DatetimeIndex([ts]).round('1010ns')
 
+    def test_no_rounding_occurs(self, tz):
+        # GH 21262
+        rng = date_range(start='2016-01-01', periods=5,
+                         freq='2Min', tz=tz)
+
+        expected_rng = DatetimeIndex([
+            Timestamp('2016-01-01 00:00:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:02:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:04:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:06:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:08:00', tz=tz, freq='2T'),
+        ])
+
+        tm.assert_index_equal(rng.round(freq='2T'), expected_rng)
+
     @pytest.mark.parametrize('test_input, rounder, freq, expected', [
         (['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']),
         (['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']),

From a3020741ae4e854f9f7d10c9af8c089ff52e8fe9 Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Thu, 21 Jun 2018 01:27:16 +0100
Subject: [PATCH 005/113] Refactor timestamp rounding

---
 pandas/_libs/tslibs/timestamps.pyx            | 51 ++++++++++---------
 pandas/core/indexes/datetimelike.py           |  4 +-
 .../tests/scalar/timestamp/test_unary_ops.py  | 14 +++--
 3 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 0115b90fb2ff6..66e087568df51 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -58,21 +58,31 @@ cdef inline object create_timestamp_from_ts(int64_t value,
     return ts_base
 
 
-def round_ns(values, rounder, freq):
+cdef inline round_ns(values, rounder, freq):
+
     """
     Applies rounding function at given frequency
 
     Parameters
     ----------
-    values : int, :obj:`ndarray`
-    rounder : function
+    values : np.array
+    rounder : function, eg. 'Ceil', 'Floor', 'round'
     freq : str, obj
 
     Returns
     -------
-    int or :obj:`ndarray`
+    np.array
     """
     def _round_non_int_multiple(value):
+
+        from pandas.tseries.frequencies import to_offset
+        unit = to_offset(freq).nanos
+
+        # GH21262 If the Timestamp is multiple of the freq str
+        # don't apply any rounding
+        if value % unit == 0:
+            return value
+
         if unit < 1000:
             # for nano rounding, work with the last 6 digits separately
             # due to float precision
@@ -96,26 +106,7 @@ def round_ns(values, rounder, freq):
 
         return r
 
-    # GH21262 If the Timestamp is multiple of the freq str
-    # then we don't apply _round_non_int_multiple
-
-    def _apply_round(value):
-        if value % unit == 0:
-            return value
-        else:
-            return _round_non_int_multiple(value)
-
-    from pandas.tseries.frequencies import to_offset
-    unit = to_offset(freq).nanos
-
-    if type(values) is int:
-        if values % unit == 0:
-            return values
-        else:
-            return _round_non_int_multiple(values)
-
-    else:
-        return np.fromiter((_apply_round(item) for item in values), np.int64)
+    return np.fromiter((_round_non_int_multiple(item) for item in values), np.int64)
 
 
 # This is PITA. Because we inherit from datetime, which has very specific
@@ -663,13 +654,23 @@ class Timestamp(_Timestamp):
 
         return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
 
+    @staticmethod
+    def round_values(values, rounder, freq):
+        """
+        DatetimeIndex/PeriodIndex also use Timestamp rounding
+        """
+        return round_ns(values, rounder, freq)
+
     def _round(self, freq, rounder):
         if self.tz is not None:
             value = self.tz_localize(None).value
         else:
             value = self.value
 
-        r = round_ns(value, rounder, freq)
+        value = np.array([value], dtype=np.int64)
+
+        # Will only ever contain 1 element for timestamp
+        r = Timestamp.round_values(value, rounder, freq).item()
         result = Timestamp(r, unit='ns')
         if self.tz is not None:
             result = result.tz_localize(self.tz)
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index c7cb245263df8..45dceb53c54bb 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -15,7 +15,7 @@
 from pandas._libs import lib, iNaT, NaT, Timedelta
 from pandas._libs.tslibs.period import Period
 from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
-from pandas._libs.tslibs.timestamps import round_ns
+from pandas._libs.tslibs.timestamps import Timestamp
 
 from pandas.core.dtypes.common import (
     _ensure_int64,
@@ -183,7 +183,7 @@ class TimelikeOps(object):
     def _round(self, freq, rounder):
         # round the local times
         values = _ensure_datetimelike_to_i8(self)
-        result = round_ns(values, rounder, freq)
+        result = Timestamp.round_values(values, rounder, freq)
         result = self._maybe_mask_results(result, fill_value=NaT)
 
         attribs = self._get_attributes_dict()
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
index 4fb4f8f36ab86..dbe31ccb11114 100644
--- a/pandas/tests/scalar/timestamp/test_unary_ops.py
+++ b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -126,18 +126,16 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
         ('2018-01-01 00:20:00', '20T', '2018-01-01 00:20:00'),
         ('2018-01-01 03:00:00', '3H', '2018-01-01 03:00:00'),
     ])
-    def test_round_minute_freq(self, test_input, freq, expected):
+    @pytest.mark.parametrize('rounder', ['ceil', 'floor', 'round'])
+    def test_round_minute_freq(self, test_input, freq, expected, rounder):
         # Ensure timestamps that shouldnt round dont!
         # GH#21262
+
         dt = Timestamp(test_input)
         expected = Timestamp(expected)
-
-        result_ceil = dt.ceil(freq)
-        assert result_ceil == expected
-        result_floor = dt.floor(freq)
-        assert result_floor == expected
-        result_round = dt.round(freq)
-        assert result_round == expected
+        func = getattr(dt, rounder)
+        result = func(freq)
+        assert result == expected
 
     def test_ceil(self):
         dt = Timestamp('20130101 09:10:11')

From 33335b4300c412c5421e5a63ef33f5e6ffdd4ba0 Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Thu, 21 Jun 2018 01:48:20 +0100
Subject: [PATCH 006/113] Parameterize test cases

---
 pandas/_libs/tslibs/timestamps.pyx  | 9 +--------
 pandas/core/indexes/datetimelike.py | 4 ++--
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 66e087568df51..92781c63aeaf7 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -58,7 +58,7 @@ cdef inline object create_timestamp_from_ts(int64_t value,
     return ts_base
 
 
-cdef inline round_ns(values, rounder, freq):
+def round_ns(values, rounder, freq):
 
     """
     Applies rounding function at given frequency
@@ -654,13 +654,6 @@ class Timestamp(_Timestamp):
 
         return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
 
-    @staticmethod
-    def round_values(values, rounder, freq):
-        """
-        DatetimeIndex/PeriodIndex also use Timestamp rounding
-        """
-        return round_ns(values, rounder, freq)
-
     def _round(self, freq, rounder):
         if self.tz is not None:
             value = self.tz_localize(None).value
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 45dceb53c54bb..c7cb245263df8 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -15,7 +15,7 @@
 from pandas._libs import lib, iNaT, NaT, Timedelta
 from pandas._libs.tslibs.period import Period
 from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
-from pandas._libs.tslibs.timestamps import Timestamp
+from pandas._libs.tslibs.timestamps import round_ns
 
 from pandas.core.dtypes.common import (
     _ensure_int64,
@@ -183,7 +183,7 @@ class TimelikeOps(object):
     def _round(self, freq, rounder):
         # round the local times
         values = _ensure_datetimelike_to_i8(self)
-        result = Timestamp.round_values(values, rounder, freq)
+        result = round_ns(values, rounder, freq)
         result = self._maybe_mask_results(result, fill_value=NaT)
 
         attribs = self._get_attributes_dict()

From 0363a7201a7344948f3f5f344d5110ec626e2198 Mon Sep 17 00:00:00 2001
From: alistair <alimcmaster1@gmail.com>
Date: Thu, 21 Jun 2018 01:55:04 +0100
Subject: [PATCH 007/113] Update function error

---
 pandas/_libs/tslibs/timestamps.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 92781c63aeaf7..de852e066685c 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -663,7 +663,7 @@ class Timestamp(_Timestamp):
         value = np.array([value], dtype=np.int64)
 
         # Will only ever contain 1 element for timestamp
-        r = Timestamp.round_values(value, rounder, freq).item()
+        r = round_ns(value, rounder, freq).item()
         result = Timestamp(r, unit='ns')
         if self.tz is not None:
             result = result.tz_localize(self.tz)

From 7353c2fa05c69a6c12f1506f745f947923ffa6c1 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Sun, 24 Jun 2018 23:22:45 +0100
Subject: [PATCH 008/113] Perform manipulation with vectorization

---
 pandas/_libs/tslibs/timestamps.pyx | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index de852e066685c..f7d3367d4b0b3 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -65,23 +65,15 @@ def round_ns(values, rounder, freq):
 
     Parameters
     ----------
-    values : np.array
+    values : :obj:`ndarray`
     rounder : function, eg. 'Ceil', 'Floor', 'round'
     freq : str, obj
 
     Returns
     -------
-    np.array
+    :obj:`ndarray`
     """
-    def _round_non_int_multiple(value):
-
-        from pandas.tseries.frequencies import to_offset
-        unit = to_offset(freq).nanos
-
-        # GH21262 If the Timestamp is multiple of the freq str
-        # don't apply any rounding
-        if value % unit == 0:
-            return value
+    def _round_non_int_multiple(value, unit):
 
         if unit < 1000:
             # for nano rounding, work with the last 6 digits separately
@@ -106,7 +98,19 @@ def round_ns(values, rounder, freq):
 
         return r
 
-    return np.fromiter((_round_non_int_multiple(item) for item in values), np.int64)
+    from pandas.tseries.frequencies import to_offset
+    unit = to_offset(freq).nanos
+
+    values = np.copy(values)
+
+    # GH21262 If the Timestamp is multiple of the freq str
+    # don't apply any rounding
+    mask = values % unit == 0
+    if mask.all():
+        return values
+    values[~mask] = _round_non_int_multiple(values[~mask], unit)
+
+    return values
 
 
 # This is PITA. Because we inherit from datetime, which has very specific
@@ -663,7 +667,7 @@ class Timestamp(_Timestamp):
         value = np.array([value], dtype=np.int64)
 
         # Will only ever contain 1 element for timestamp
-        r = round_ns(value, rounder, freq).item()
+        r = round_ns(value, rounder, freq)[0]
         result = Timestamp(r, unit='ns')
         if self.tz is not None:
             result = result.tz_localize(self.tz)

From 82c7db1d8cb10d4146f322a371efd04e59d6f23d Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Mon, 25 Jun 2018 22:00:53 +0100
Subject: [PATCH 009/113] Lower case doc string

---
 pandas/_libs/tslibs/timestamps.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index f7d3367d4b0b3..d2ce8225443fa 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -66,7 +66,7 @@ def round_ns(values, rounder, freq):
     Parameters
     ----------
     values : :obj:`ndarray`
-    rounder : function, eg. 'Ceil', 'Floor', 'round'
+    rounder : function, eg. 'ceil', 'floor', 'round'
     freq : str, obj
 
     Returns

From 559bb50906841568b7fa6d74bc9cbb93d7cf170a Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Tue, 26 Jun 2018 21:50:14 +0100
Subject: [PATCH 010/113] Update copy function

---
 pandas/_libs/tslibs/timestamps.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index d2ce8225443fa..046d5d090876d 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -101,7 +101,7 @@ def round_ns(values, rounder, freq):
     from pandas.tseries.frequencies import to_offset
     unit = to_offset(freq).nanos
 
-    values = np.copy(values)
+    values = values.copy()
 
     # GH21262 If the Timestamp is multiple of the freq str
     # don't apply any rounding

From bfe0d169db1c49c21cee3a08a41544e173c0b8dd Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Tue, 26 Jun 2018 23:50:35 +0100
Subject: [PATCH 011/113] Refactor to use just one function

---
 pandas/_libs/tslibs/timestamps.pyx | 50 +++++++++++++-----------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 046d5d090876d..7e8e2777c371c 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -73,44 +73,36 @@ def round_ns(values, rounder, freq):
     -------
     :obj:`ndarray`
     """
-    def _round_non_int_multiple(value, unit):
-
-        if unit < 1000:
-            # for nano rounding, work with the last 6 digits separately
-            # due to float precision
-            buff = 1000000
-            r = (buff * (value // buff) + unit *
-                 (rounder((value % buff) * (1 / float(unit)))).astype('i8'))
-        else:
-            if unit % 1000 != 0:
-                msg = 'Precision will be lost using frequency: {}'
-                warnings.warn(msg.format(freq))
-
-            # GH19206
-            # to deal with round-off when unit is large
-            if unit >= 1e9:
-                divisor = 10 ** int(np.log10(unit / 1e7))
-            else:
-                divisor = 10
-
-            r = (unit * rounder((value * (divisor / float(unit))) / divisor)
-                 .astype('i8'))
-
-        return r
 
     from pandas.tseries.frequencies import to_offset
     unit = to_offset(freq).nanos
 
-    values = values.copy()
-
     # GH21262 If the Timestamp is multiple of the freq str
     # don't apply any rounding
     mask = values % unit == 0
     if mask.all():
         return values
-    values[~mask] = _round_non_int_multiple(values[~mask], unit)
-
-    return values
+    r = values.copy()
+
+    if unit < 1000:
+        # for nano rounding, work with the last 6 digits separately
+        # due to float precision
+        buff = 1000000
+        r[~mask] = (buff * (values[~mask] // buff) + unit *
+             (rounder((values[~mask] % buff) * (1 / float(unit)))).astype('i8'))
+    else:
+        if unit % 1000 != 0:
+            msg = 'Precision will be lost using frequency: {}'
+            warnings.warn(msg.format(freq))
+        # GH19206
+        # to deal with round-off when unit is large
+        if unit >= 1e9:
+            divisor = 10 ** int(np.log10(unit / 1e7))
+        else:
+            divisor = 10
+        r[~mask] = (unit * rounder((values[~mask] * (divisor / float(unit))) / divisor)
+             .astype('i8'))
+    return r
 
 
 # This is PITA. Because we inherit from datetime, which has very specific

From 4249fd7c33f19277b4b2786e38bbfac83aa61b6a Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Wed, 27 Jun 2018 22:24:22 +0100
Subject: [PATCH 012/113] Pep8

---
 pandas/_libs/tslibs/timestamps.pyx | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 7e8e2777c371c..123ccebf83a56 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -88,8 +88,9 @@ def round_ns(values, rounder, freq):
         # for nano rounding, work with the last 6 digits separately
         # due to float precision
         buff = 1000000
-        r[~mask] = (buff * (values[~mask] // buff) + unit *
-             (rounder((values[~mask] % buff) * (1 / float(unit)))).astype('i8'))
+        r[~mask] = (buff * (values[~mask] // buff) +
+                    unit * (rounder((values[~mask] % buff) *
+                            (1 / float(unit)))).astype('i8'))
     else:
         if unit % 1000 != 0:
             msg = 'Precision will be lost using frequency: {}'
@@ -100,8 +101,9 @@ def round_ns(values, rounder, freq):
             divisor = 10 ** int(np.log10(unit / 1e7))
         else:
             divisor = 10
-        r[~mask] = (unit * rounder((values[~mask] * (divisor / float(unit))) / divisor)
-             .astype('i8'))
+        r[~mask] = (unit * rounder((values[~mask] *
+                    (divisor / float(unit))) / divisor)
+                    .astype('i8'))
     return r
 
 

From 2abd8e2f94dd961bdc90d4156dea83c9a2501d1c Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Mon, 11 Jun 2018 12:28:54 +0100
Subject: [PATCH 013/113] DOC: MultiIndex Fixes (#21414)

---
 pandas/core/indexes/multi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 20805e33bb1d3..75b6be96feb78 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1407,7 +1407,7 @@ def _sort_levels_monotonic(self):
 
         This is an *internal* function.
 
-        create a new MultiIndex from the current to monotonically sorted
+        Create a new MultiIndex from the current to monotonically sorted
         items IN the levels. This does not actually make the entire MultiIndex
         monotonic, JUST the levels.
 
@@ -1465,8 +1465,8 @@ def _sort_levels_monotonic(self):
 
     def remove_unused_levels(self):
         """
-        create a new MultiIndex from the current that removing
-        unused levels, meaning that they are not expressed in the labels
+        Create a new MultiIndex from the current that removes
+        unused levels, meaning that they are not expressed in the labels.
 
         The resulting MultiIndex will have the same outward
         appearance, meaning the same .values and ordering. It will also

From 3546c00d937bb4fbf32dc9da771c05b91004128d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 11 Jun 2018 17:14:47 +0200
Subject: [PATCH 014/113] DOC: fix grammar of deprecation message (#21421)

---
 pandas/util/_decorators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 6b55554cdc941..7d5753d03f4fc 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -140,8 +140,8 @@ def wrapper(*args, **kwargs):
             if new_arg_name is None and old_arg_value is not None:
                 msg = (
                     "the '{old_name}' keyword is deprecated and will be "
-                    "removed in a future version "
-                    "please takes steps to stop use of '{old_name}'"
+                    "removed in a future version. "
+                    "Please take steps to stop the use of '{old_name}'"
                 ).format(old_name=old_arg_name)
                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
                 kwargs[old_arg_name] = old_arg_value

From 342fd405396cdb898dde1eb66cc8c040264c3cfa Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <bashtage@users.noreply.github.com>
Date: Tue, 12 Jun 2018 01:05:45 +0100
Subject: [PATCH 015/113] MAINT: Deprecate encoding from stata reader/writer
 (#21400)

Deprecate the encoding parameter from all Stata reading and writing
methods and classes.  The encoding depends only on the file format and
cannot be changed by users.
---
 doc/source/whatsnew/v0.24.0.txt |  2 +-
 pandas/core/frame.py            |  9 +++++----
 pandas/io/stata.py              | 25 +++++++++++--------------
 pandas/tests/io/test_stata.py   | 15 +++++----------
 4 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index de985d4db5fa3..68c1839221508 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -45,7 +45,7 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
--
+- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument.  The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`).
 -
 -
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ca572e2e56b6c..0985de3126c5a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -80,7 +80,8 @@
 from pandas.compat import PY36
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (Appender, Substitution,
-                                     rewrite_axis_style_signature)
+                                     rewrite_axis_style_signature,
+                                     deprecate_kwarg)
 from pandas.util._validators import (validate_bool_kwarg,
                                      validate_axis_style_args)
 
@@ -1764,6 +1765,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
                         startcol=startcol, freeze_panes=freeze_panes,
                         engine=engine)
 
+    @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
     def to_stata(self, fname, convert_dates=None, write_index=True,
                  encoding="latin-1", byteorder=None, time_stamp=None,
                  data_label=None, variable_labels=None, version=114,
@@ -1869,9 +1871,8 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
             kwargs['convert_strl'] = convert_strl
 
         writer = statawriter(fname, self, convert_dates=convert_dates,
-                             encoding=encoding, byteorder=byteorder,
-                             time_stamp=time_stamp, data_label=data_label,
-                             write_index=write_index,
+                             byteorder=byteorder, time_stamp=time_stamp,
+                             data_label=data_label, write_index=write_index,
                              variable_labels=variable_labels, **kwargs)
         writer.write_file()
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 8584e1f0e3f14..b2a5bec2a4837 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -33,11 +33,7 @@
 from pandas.core.series import Series
 from pandas.io.common import (get_filepath_or_buffer, BaseIterator,
                               _stringify_path)
-from pandas.util._decorators import Appender
-from pandas.util._decorators import deprecate_kwarg
-
-VALID_ENCODINGS = ('ascii', 'us-ascii', 'latin-1', 'latin_1', 'iso-8859-1',
-                   'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'L1')
+from pandas.util._decorators import Appender, deprecate_kwarg
 
 _version_error = ("Version of given Stata file is not 104, 105, 108, "
                   "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), "
@@ -169,6 +165,7 @@
 
 
 @Appender(_read_stata_doc)
+@deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
 @deprecate_kwarg(old_arg_name='index', new_arg_name='index_col')
 def read_stata(filepath_or_buffer, convert_dates=True,
                convert_categoricals=True, encoding=None, index_col=None,
@@ -952,6 +949,7 @@ def __init__(self):
 class StataReader(StataParser, BaseIterator):
     __doc__ = _stata_reader_doc
 
+    @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
     @deprecate_kwarg(old_arg_name='index', new_arg_name='index_col')
     def __init__(self, path_or_buf, convert_dates=True,
                  convert_categoricals=True, index_col=None,
@@ -970,7 +968,7 @@ def __init__(self, path_or_buf, convert_dates=True,
         self._preserve_dtypes = preserve_dtypes
         self._columns = columns
         self._order_categoricals = order_categoricals
-        self._encoding = encoding
+        self._encoding = None
         self._chunksize = chunksize
 
         # State variables for the file
@@ -1962,17 +1960,14 @@ class StataWriter(StataParser):
 
     _max_string_length = 244
 
+    @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
     def __init__(self, fname, data, convert_dates=None, write_index=True,
                  encoding="latin-1", byteorder=None, time_stamp=None,
                  data_label=None, variable_labels=None):
         super(StataWriter, self).__init__()
         self._convert_dates = {} if convert_dates is None else convert_dates
         self._write_index = write_index
-        if encoding is not None:
-            if encoding not in VALID_ENCODINGS:
-                raise ValueError('Unknown encoding. Only latin-1 and ascii '
-                                 'supported.')
-        self._encoding = encoding
+        self._encoding = 'latin-1'
         self._time_stamp = time_stamp
         self._data_label = data_label
         self._variable_labels = variable_labels
@@ -2731,6 +2726,7 @@ class StataWriter117(StataWriter):
 
     _max_string_length = 2045
 
+    @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
     def __init__(self, fname, data, convert_dates=None, write_index=True,
                  encoding="latin-1", byteorder=None, time_stamp=None,
                  data_label=None, variable_labels=None, convert_strl=None):
@@ -2738,9 +2734,10 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
         self._convert_strl = [] if convert_strl is None else convert_strl[:]
 
         super(StataWriter117, self).__init__(fname, data, convert_dates,
-                                             write_index, encoding, byteorder,
-                                             time_stamp, data_label,
-                                             variable_labels)
+                                             write_index, byteorder=byteorder,
+                                             time_stamp=time_stamp,
+                                             data_label=data_label,
+                                             variable_labels=variable_labels)
         self._map = None
         self._strl_blob = None
 
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index e5585902a9dd6..bfb72be80400e 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -361,7 +361,8 @@ def test_encoding(self, version):
 
         # GH 4626, proper encoding handling
         raw = read_stata(self.dta_encoding)
-        encoded = read_stata(self.dta_encoding, encoding="latin-1")
+        with tm.assert_produces_warning(FutureWarning):
+            encoded = read_stata(self.dta_encoding, encoding='latin-1')
         result = encoded.kreis1849[0]
 
         expected = raw.kreis1849[0]
@@ -369,8 +370,9 @@ def test_encoding(self, version):
         assert isinstance(result, compat.string_types)
 
         with tm.ensure_clean() as path:
-            encoded.to_stata(path, encoding='latin-1',
-                             write_index=False, version=version)
+            with tm.assert_produces_warning(FutureWarning):
+                encoded.to_stata(path, write_index=False, version=version,
+                                 encoding='latin-1')
             reread_encoded = read_stata(path)
             tm.assert_frame_equal(encoded, reread_encoded)
 
@@ -1349,13 +1351,6 @@ def test_out_of_range_float(self):
             assert 'ColumnTooBig' in cm.exception
             assert 'infinity' in cm.exception
 
-    def test_invalid_encoding(self):
-        # GH15723, validate encoding
-        original = self.read_csv(self.csv3)
-        with pytest.raises(ValueError):
-            with tm.ensure_clean() as path:
-                original.to_stata(path, encoding='utf-8')
-
     def test_path_pathlib(self):
         df = tm.makeDataFrame()
         df.index.name = 'index'

From a1111b38b419caab4253d97dd78a7e125148e911 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 11 Jun 2018 17:15:29 -0700
Subject: [PATCH 016/113] DOC: Add 0.23.2 whatsnew template (#21433)

---
 doc/source/whatsnew/v0.23.2.txt | 82 +++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.23.2.txt

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
new file mode 100644
index 0000000000000..ec2eddcfd4d41
--- /dev/null
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -0,0 +1,82 @@
+.. _whatsnew_0232:
+
+v0.23.2
+-------
+
+This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
+and bug fixes. We recommend that all users upgrade to this version.
+
+.. contents:: What's new in v0.23.2
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0232.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+
+.. _whatsnew_0232.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+
+-
+-
+
+.. _whatsnew_0232.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+-
+-
+
+Documentation Changes
+~~~~~~~~~~~~~~~~~~~~~
+
+-
+-
+
+.. _whatsnew_0232.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+-
+-
+
+Conversion
+^^^^^^^^^^
+
+-
+-
+
+Indexing
+^^^^^^^^
+
+-
+-
+
+I/O
+^^^
+
+-
+-
+
+Plotting
+^^^^^^^^
+
+-
+-
+
+Reshaping
+^^^^^^^^^
+
+-
+-
+
+Categorical
+^^^^^^^^^^^
+
+-

From 4423dc9d5daa9e9b5f52f60cfe1c23a48ab6bbf3 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 11 Jun 2018 17:16:36 -0700
Subject: [PATCH 017/113] MAINT: More friendly error msg on Index overflow
 (#21377)

* MAINT: More useful error msg on Index overflow

Display a more friendly error message when there
is an OverflowError during Index construction.

Partially addresses gh-15832.

* DOC: Clarify how Index.__new__ handles dtype

Partially addresses gh-15823.
---
 pandas/core/indexes/base.py       | 12 +++++++++++-
 pandas/tests/indexes/test_base.py |  7 +++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index dff6b5421d5ab..bf1051332ee19 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -181,6 +181,9 @@ class Index(IndexOpsMixin, PandasObject):
     ----------
     data : array-like (1-dimensional)
     dtype : NumPy dtype (default: object)
+        If dtype is None, we find the dtype that best fits the data.
+        If an actual dtype is provided, we coerce to that dtype if it's safe.
+        Otherwise, an error will be raised.
     copy : bool
         Make a copy of input ndarray
     name : object
@@ -306,7 +309,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                     if is_integer_dtype(dtype):
                         inferred = lib.infer_dtype(data)
                         if inferred == 'integer':
-                            data = np.array(data, copy=copy, dtype=dtype)
+                            try:
+                                data = np.array(data, copy=copy, dtype=dtype)
+                            except OverflowError:
+                                # gh-15823: a more user-friendly error message
+                                raise OverflowError(
+                                    "the elements provided in the data cannot "
+                                    "all be casted to the dtype {dtype}"
+                                    .format(dtype=dtype))
                         elif inferred in ['floating', 'mixed-integer-float']:
                             if isna(data).any():
                                 raise ValueError('cannot convert float '
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index f9f16dc0ce8b7..c264f5f79e47e 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -474,6 +474,13 @@ def test_constructor_nonhashable_name(self, indices):
         tm.assert_raises_regex(TypeError, message,
                                indices.set_names, names=renamed)
 
+    def test_constructor_overflow_int64(self):
+        # see gh-15832
+        msg = ("the elements provided in the data cannot "
+               "all be casted to the dtype int64")
+        with tm.assert_raises_regex(OverflowError, msg):
+            Index([np.iinfo(np.uint64).max - 1], dtype="int64")
+
     def test_view_with_args(self):
 
         restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',

From 0550ef78be45744075bd12e010114d5706262111 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 12 Jun 2018 09:54:11 +0200
Subject: [PATCH 018/113] DOC: follow 0.23.1 template for 0.23.2 whatsnew
 (#21435)

---
 doc/source/whatsnew/v0.23.2.txt | 36 +++++++++++++++------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index ec2eddcfd4d41..c636e73fbd6c2 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -10,16 +10,11 @@ and bug fixes. We recommend that all users upgrade to this version.
     :local:
     :backlinks: none
 
-.. _whatsnew_0232.enhancements:
 
-New features
-~~~~~~~~~~~~
+.. _whatsnew_0232.fixed_regressions:
 
-
-.. _whatsnew_0232.deprecations:
-
-Deprecations
-~~~~~~~~~~~~
+Fixed Regressions
+~~~~~~~~~~~~~~~~~
 
 -
 -
@@ -43,40 +38,41 @@ Documentation Changes
 Bug Fixes
 ~~~~~~~~~
 
+**Groupby/Resample/Rolling**
+
 -
 -
 
-Conversion
-^^^^^^^^^^
+**Conversion**
+
 
 -
 -
 
-Indexing
-^^^^^^^^
+**Indexing**
 
 -
 -
 
-I/O
-^^^
+**I/O**
 
 -
 -
 
-Plotting
-^^^^^^^^
+**Plotting**
 
 -
 -
 
-Reshaping
-^^^^^^^^^
+**Reshaping**
 
 -
 -
 
-Categorical
-^^^^^^^^^^^
+**Categorical**
+
+-
+
+**Other**
 
 -

From 57c82220f9a3ada5b20864e83a1874343de43bb2 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Tue, 12 Jun 2018 08:57:03 +0100
Subject: [PATCH 019/113] DOC: Loading sphinxcontrib.spelling to sphinx only if
 it's available (#21397)

---
 ci/environment-dev.yaml |  1 +
 ci/requirements_dev.txt |  1 +
 doc/source/conf.py      | 13 +++++++++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
index f9f9208519d61..5733857b55dd4 100644
--- a/ci/environment-dev.yaml
+++ b/ci/environment-dev.yaml
@@ -13,3 +13,4 @@ dependencies:
   - pytz
   - setuptools>=24.2.0
   - sphinx
+  - sphinxcontrib-spelling
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
index 3430e778a4573..83ee30b52071d 100644
--- a/ci/requirements_dev.txt
+++ b/ci/requirements_dev.txt
@@ -9,3 +9,4 @@ python-dateutil>=2.5.0
 pytz
 setuptools>=24.2.0
 sphinx
+sphinxcontrib-spelling
\ No newline at end of file
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 97081bec863b7..909bd5a80b76e 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -16,9 +16,11 @@
 import re
 import inspect
 import importlib
-from sphinx.ext.autosummary import _import_by_name
+import logging
 import warnings
+from sphinx.ext.autosummary import _import_by_name
 
+logger = logging.getLogger(__name__)
 
 try:
     raw_input          # Python 2
@@ -73,9 +75,16 @@
               'sphinx.ext.ifconfig',
               'sphinx.ext.linkcode',
               'nbsphinx',
-              'sphinxcontrib.spelling'
               ]
 
+try:
+    import sphinxcontrib.spelling
+except ImportError as err:
+    logger.warn(('sphinxcontrib.spelling failed to import with error "{}". '
+                '`spellcheck` command is not available.'.format(err)))
+else:
+    extensions.append('sphinxcontrib.spelling')
+
 exclude_patterns = ['**.ipynb_checkpoints']
 
 spelling_word_list_filename = ['spelling_wordlist.txt', 'names_wordlist.txt']

From 8a51d075343dafb073ea2eb2c054e5518686053e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 12 Jun 2018 11:15:12 +0200
Subject: [PATCH 020/113] Fix flake8 in conf.py (#21438)

---
 doc/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 909bd5a80b76e..5534700f0734a 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -78,7 +78,7 @@
               ]
 
 try:
-    import sphinxcontrib.spelling
+    import sphinxcontrib.spelling  # noqa
 except ImportError as err:
     logger.warn(('sphinxcontrib.spelling failed to import with error "{}". '
                 '`spellcheck` command is not available.'.format(err)))

From 7f2f340fb43963ec1444326239d98b3faa9b4b13 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Tue, 12 Jun 2018 12:28:37 +0100
Subject: [PATCH 021/113] Doc Fixes (#21415)

---
 pandas/core/indexing.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 2c40be17ce781..0e4f040253560 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -46,6 +46,15 @@ class _IndexSlice(object):
     """
     Create an object to more easily perform multi-index slicing
 
+    See Also
+    --------
+    MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
+
+    Notes
+    -----
+    See :ref:`Defined Levels <advanced.shown_levels>`
+    for further info on slicing a MultiIndex.
+
     Examples
     --------
 

From 56477a032893c307189cbe68e374c1457d6f9f89 Mon Sep 17 00:00:00 2001
From: testvinder <testvinder@users.noreply.github.com>
Date: Tue, 12 Jun 2018 13:31:10 +0200
Subject: [PATCH 022/113] Reapply all patches by @testvinder against master
 (#21413)

---
 pandas/core/reshape/pivot.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 9a2ad5d13d77a..3390451c60c0f 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -446,7 +446,18 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
     >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
     >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
     >>> crosstab(foo, bar)  # 'c' and 'f' are not represented in the data,
-    ...                     # but they still will be counted in the output
+                            # and will not be shown in the output because
+                            # dropna is True by default. Set 'dropna=False'
+                            # to preserve categories with no data
+    ... # doctest: +SKIP
+    col_0  d  e
+    row_0
+    a      1  0
+    b      0  1
+
+    >>> crosstab(foo, bar, dropna=False)  # 'c' and 'f' are not represented
+                            # in the data, but they still will be counted
+                            # and shown in the output
     ... # doctest: +SKIP
     col_0  d  e  f
     row_0

From ea922c64a19a703f43c2c5a771db7f535bbdaab4 Mon Sep 17 00:00:00 2001
From: Antti Kaihola <antti16+github@kaihola.fi>
Date: Tue, 12 Jun 2018 14:37:01 +0300
Subject: [PATCH 023/113] Two tests didn't properly assert an exception was
 raised. Fixed. (#21409)

---
 .../tests/indexes/datetimes/test_indexing.py  |  5 ++---
 pandas/tests/io/parser/c_parser_only.py       | 22 +++++++++----------
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index dd192db4b0eb3..8cffa035721b0 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -583,7 +583,6 @@ def test_get_indexer(self):
     def test_reasonable_keyerror(self):
         # GH#1062
         index = DatetimeIndex(['1/3/2000'])
-        try:
+        with pytest.raises(KeyError) as excinfo:
             index.get_loc('1/1/2000')
-        except KeyError as e:
-            assert '2000' in str(e)
+        assert '2000' in str(excinfo.value)
diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py
index e0422249289b7..9dc7b070f889d 100644
--- a/pandas/tests/io/parser/c_parser_only.py
+++ b/pandas/tests/io/parser/c_parser_only.py
@@ -23,21 +23,19 @@
 
 class CParserTests(object):
 
-    def test_buffer_overflow(self):
+    @pytest.mark.parametrize(
+        'malf',
+        ['1\r1\r1\r 1\r 1\r',
+         '1\r1\r1\r 1\r 1\r11\r',
+         '1\r1\r1\r 1\r 1\r11\r1\r'],
+        ids=['words pointer', 'stream pointer', 'lines pointer'])
+    def test_buffer_overflow(self, malf):
         # see gh-9205: test certain malformed input files that cause
         # buffer overflows in tokenizer.c
-
-        malfw = "1\r1\r1\r 1\r 1\r"         # buffer overflow in words pointer
-        malfs = "1\r1\r1\r 1\r 1\r11\r"     # buffer overflow in stream pointer
-        malfl = "1\r1\r1\r 1\r 1\r11\r1\r"  # buffer overflow in lines pointer
-
         cperr = 'Buffer overflow caught - possible malformed input file.'
-
-        for malf in (malfw, malfs, malfl):
-            try:
-                self.read_table(StringIO(malf))
-            except Exception as err:
-                assert cperr in str(err)
+        with pytest.raises(pd.errors.ParserError) as excinfo:
+            self.read_table(StringIO(malf))
+        assert cperr in str(excinfo.value)
 
     def test_buffer_rd_bytes(self):
         # see gh-12098: src->buffer in the C parser can be freed twice leading

From a37c3d228c905be6cd7e41e7b57b55aab0753c0d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 12 Jun 2018 11:42:55 -0500
Subject: [PATCH 024/113] DOC: 0.23.1 release (#21446)

---
 doc/source/release.rst | 49 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index fa03d614ed42c..7bbd4ba43e66f 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -37,10 +37,57 @@ analysis / manipulation tool available in any language.
 * Binary installers on PyPI: https://pypi.org/project/pandas
 * Documentation: http://pandas.pydata.org
 
+pandas 0.23.1
+-------------
+
+**Release date**: June 12, 2018
+
+This is a minor release from 0.23.0 and includes a number of bug fixes and
+performance improvements.
+
+See the :ref:`full whatsnew <whatsnew_0231>` for a list of all the changes.
+
+Thanks
+~~~~~~
+
+A total of 30 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Adam J. Stewart
+* Adam Kim +
+* Aly Sivji
+* Chalmer Lowe +
+* Damini Satya +
+* Dr. Irv
+* Gabe Fernando +
+* Giftlin Rajaiah
+* Jeff Reback
+* Jeremy Schendel +
+* Joris Van den Bossche
+* Kalyan Gokhale +
+* Kevin Sheppard
+* Matthew Roeschke
+* Max Kanter +
+* Ming Li
+* Pyry Kovanen +
+* Stefano Cianciulli
+* Tom Augspurger
+* Uddeshya Singh +
+* Wenhuan
+* William Ayd
+* chris-b1
+* gfyoung
+* h-vetinari
+* nprad +
+* ssikdar1 +
+* tmnhat2001
+* topper-123
+* zertrin +
+
 pandas 0.23.0
 -------------
 
-**Release date**: May 15, 2017
+**Release date**: May 15, 2018
 
 This is a major release from 0.22.0 and includes a number of API changes, new
 features, enhancements, and performance improvements along with a large number

From 9f9b63644fac430e8e2b541d00aa100a541ac324 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 13 Jun 2018 02:55:53 -0500
Subject: [PATCH 025/113] DOC: Fixed warning in doc build (#21449)

---
 doc/source/ecosystem.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index 56fea1ccfd9dc..f683fd6892ea5 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -39,7 +39,7 @@ Use pandas DataFrames in your `scikit-learn <http://scikit-learn.org/>`__
 ML pipeline.
 
 `Featuretools <https://github.com/featuretools/featuretools/>`__
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community. 
 

From c5a11d6a6ca943ae5296b2857fad403b397f2b6a Mon Sep 17 00:00:00 2001
From: Joel Ostblom <joelostblom@users.noreply.github.com>
Date: Wed, 13 Jun 2018 03:57:29 -0400
Subject: [PATCH 026/113] DOC: add favicon to doc pages (#21440)

---
 doc/source/_static/favicon.ico | Bin 0 -> 3902 bytes
 doc/source/conf.py             |  10 +++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)
 create mode 100644 doc/source/_static/favicon.ico

diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..d15c4803b62e6dd2f706a5ebe1861fe438f5d98f
GIT binary patch
literal 3902
zcmeH}Jxg3c6oyApR`3f%m!K?eBQf?Wh`|)vXg~{V3qO)NRjPnkB%oMmVIkVur4U3B
z#Si=g5-b(;7x+=Fpl+U5MjU7FF1y@&!D455cJ4W6_PpoLoteAFRPafq4c}?g*=S7C
z7}E(U_yY2)%{CZwVtICy@U```-9QrNV8OCTKMEyeDt@T)LKoaZ)?u0J;uDoHQhDJM
zT!X8*q*xqHd7-Qs!{ollxuvE`j|$ZprWLqP?a?9Fg&oT_eK&-W)SAt=ZgoCPgS&rp
z{Z+pS)AV}?+AGqW1XuG3dl&*G<u3u9LwDfj*W6BdKE+2MU+!^YYp`lrq3?hnux@>d
z|AG%N0W+5G^u6#AzFD7Qn(GuO?&kQ7-3Y4Ft@{ys01iF>8Fn4yt3AlS>E*b>ZiRWz
zpTYhNd!GLk`&l#aA$d;5s)oN_jtlIvW_fPC)e>yJ!!`7WnjYZZqI0Gn_QBn^QSOzr
z)sT+pgC;nDxHN;#k1F*1b11U=^j82{s-Ze2&2#d$#;n<c#u(^-xB5Y-iCHt~x%rYW
zV?xhuPT~R%?md#{SwJEu<Y*pq?e3L-*!7A>-K?he$_D5it9RZdY(XtVE&1|1)i*;M
m=p8FsAoPD$6`Va9$mGWmd*#gyCLW9_f*Z!XlFi&C-tr%v>Bf=(

literal 0
HcmV?d00001

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 5534700f0734a..29f947e1144ea 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -213,16 +213,16 @@
 # of the sidebar.
 # html_logo = None
 
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-# html_favicon = None
-
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+html_favicon = os.path.join(html_static_path[0], 'favicon.ico')
+
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
 # html_last_updated_fmt = '%b %d, %Y'

From bcc6b8c0386e35800400f8e4f0b41df3932c42e0 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Wed, 13 Jun 2018 11:25:58 +0100
Subject: [PATCH 027/113] Fix tests fragile to PATH (#21453)

---
 pandas/tests/plotting/test_converter.py | 3 ++-
 pandas/tests/test_downstream.py         | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
index 47cded19f5300..bb976a1e3e81c 100644
--- a/pandas/tests/plotting/test_converter.py
+++ b/pandas/tests/plotting/test_converter.py
@@ -1,4 +1,5 @@
 import subprocess
+import sys
 import pytest
 from datetime import datetime, date
 
@@ -27,7 +28,7 @@ def test_register_by_default(self):
                 "import pandas as pd; "
                 "units = dict(matplotlib.units.registry); "
                 "assert pd.Timestamp in units)'")
-        call = ['python', '-c', code]
+        call = [sys.executable, '-c', code]
         assert subprocess.check_call(call) == 0
 
     def test_warns(self):
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index afd7993fefc70..cf98cff97669a 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -3,6 +3,7 @@
 Testing that we work in the downstream packages
 """
 import subprocess
+import sys
 
 import pytest
 import numpy as np  # noqa
@@ -57,7 +58,7 @@ def test_xarray(df):
 
 def test_oo_optimizable():
     # GH 21071
-    subprocess.check_call(["python", "-OO", "-c", "import pandas"])
+    subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])
 
 
 @tm.network

From 402905d869184c2ad23cd08869c10a2788ba2efe Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 13 Jun 2018 03:32:54 -0700
Subject: [PATCH 028/113] use ccalendar.get_days_in_month, deprecate
 tslib.monthrange (#21451)

---
 pandas/_libs/tslib.pyx                       | 16 +---------------
 pandas/_libs/tslibs/resolution.pyx           |  4 ++--
 pandas/core/indexes/datetimes.py             |  5 +++--
 pandas/tests/tseries/offsets/test_offsets.py |  6 ------
 pandas/tseries/offsets.py                    |  4 ++--
 setup.py                                     |  1 +
 6 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 0f58cfa761f21..4f73f196b0d9d 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -25,9 +25,7 @@ from tslibs.np_datetime cimport (check_dts_bounds,
                                  _string_to_dts,
                                  dt64_to_dtstruct, dtstruct_to_dt64,
                                  pydatetime_to_dt64, pydate_to_dt64,
-                                 get_datetime64_value,
-                                 days_per_month_table,
-                                 dayofweek, is_leapyear)
+                                 get_datetime64_value)
 from tslibs.np_datetime import OutOfBoundsDatetime
 
 from tslibs.parsing import parse_datetime_string
@@ -763,18 +761,6 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult):
 # Some general helper functions
 
 
-def monthrange(int64_t year, int64_t month):
-    cdef:
-        int64_t days
-
-    if month < 1 or month > 12:
-        raise ValueError("bad month number 0; must be 1-12")
-
-    days = days_per_month_table[is_leapyear(year)][month - 1]
-
-    return (dayofweek(year, month, 1), days)
-
-
 cpdef normalize_date(object dt):
     """
     Normalize datetime.datetime value to midnight. Returns datetime.date as a
diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx
index d0a9501afe566..2f185f4142a09 100644
--- a/pandas/_libs/tslibs/resolution.pyx
+++ b/pandas/_libs/tslibs/resolution.pyx
@@ -25,6 +25,7 @@ from fields import build_field_sarray
 from conversion import tz_convert
 from conversion cimport tz_convert_utc_to_tzlocal
 from ccalendar import MONTH_ALIASES, int_to_weekday
+from ccalendar cimport get_days_in_month
 
 from pandas._libs.properties import cache_readonly
 from pandas._libs.tslib import Timestamp
@@ -487,7 +488,6 @@ class _FrequencyInferer(object):
         days = self.fields['D']
         weekdays = self.index.dayofweek
 
-        from calendar import monthrange
         for y, m, d, wd in zip(years, months, days, weekdays):
 
             if calendar_start:
@@ -496,7 +496,7 @@ class _FrequencyInferer(object):
                 business_start &= d == 1 or (d <= 3 and wd == 0)
 
             if calendar_end or business_end:
-                _, daysinmonth = monthrange(y, m)
+                daysinmonth = get_days_in_month(y, m)
                 cal = d == daysinmonth
                 if calendar_end:
                     calendar_end &= cal
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index d44b13172f86d..66622814f172d 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -55,6 +55,7 @@
 from pandas._libs import (lib, index as libindex, tslib as libts,
                           join as libjoin, Timestamp)
 from pandas._libs.tslibs import (timezones, conversion, fields, parsing,
+                                 ccalendar,
                                  resolution as libresolution)
 
 # -------- some conversion wrapper functions
@@ -1451,14 +1452,14 @@ def _parsed_string_to_bounds(self, reso, parsed):
                     Timestamp(datetime(parsed.year, 12, 31, 23,
                                        59, 59, 999999), tz=self.tz))
         elif reso == 'month':
-            d = libts.monthrange(parsed.year, parsed.month)[1]
+            d = ccalendar.get_days_in_month(parsed.year, parsed.month)
             return (Timestamp(datetime(parsed.year, parsed.month, 1),
                               tz=self.tz),
                     Timestamp(datetime(parsed.year, parsed.month, d, 23,
                                        59, 59, 999999), tz=self.tz))
         elif reso == 'quarter':
             qe = (((parsed.month - 1) + 2) % 12) + 1  # two months ahead
-            d = libts.monthrange(parsed.year, qe)[1]   # at end of month
+            d = ccalendar.get_days_in_month(parsed.year, qe)  # at end of month
             return (Timestamp(datetime(parsed.year, parsed.month, 1),
                               tz=self.tz),
                     Timestamp(datetime(parsed.year, qe, d, 23, 59,
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 5369b1a94a956..a1c5a825054ec 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -41,12 +41,6 @@
 from .common import assert_offset_equal, assert_onOffset
 
 
-def test_monthrange():
-    import calendar
-    for y in range(2000, 2013):
-        for m in range(1, 13):
-            assert tslib.monthrange(y, m) == calendar.monthrange(y, m)
-
 ####
 # Misc function tests
 ####
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index c294110d89ec5..a5a983bf94bb8 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -1140,7 +1140,7 @@ def apply(self, other):
         # shift `other` to self.day_of_month, incrementing `n` if necessary
         n = liboffsets.roll_convention(other.day, self.n, self.day_of_month)
 
-        days_in_month = tslib.monthrange(other.year, other.month)[1]
+        days_in_month = ccalendar.get_days_in_month(other.year, other.month)
 
         # For SemiMonthBegin on other.day == 1 and
         # SemiMonthEnd on other.day == days_in_month,
@@ -1217,7 +1217,7 @@ class SemiMonthEnd(SemiMonthOffset):
     def onOffset(self, dt):
         if self.normalize and not _is_normalized(dt):
             return False
-        _, days_in_month = tslib.monthrange(dt.year, dt.month)
+        days_in_month = ccalendar.get_days_in_month(dt.year, dt.month)
         return dt.day in (self.day_of_month, days_in_month)
 
     def _apply(self, n, other):
diff --git a/setup.py b/setup.py
index 90ec8e91a0700..d6890a08b09d0 100755
--- a/setup.py
+++ b/setup.py
@@ -603,6 +603,7 @@ def pxd(name):
         'pyxfile': '_libs/tslibs/resolution',
         'pxdfiles': ['_libs/src/util',
                      '_libs/khash',
+                     '_libs/tslibs/ccalendar',
                      '_libs/tslibs/frequencies',
                      '_libs/tslibs/timezones'],
         'depends': tseries_depends,

From 22d65e17a837e25430bd04873db0036134eb6938 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Wed, 13 Jun 2018 03:51:41 -0700
Subject: [PATCH 029/113] BUG: Construct Timestamp with tz correctly near DST
 border (#21407)

---
 doc/source/whatsnew/v0.23.2.txt               |  4 ++++
 pandas/_libs/tslibs/conversion.pyx            | 22 ++++---------------
 pandas/tests/frame/test_timezones.py          | 10 +++++++++
 .../indexes/datetimes/test_construction.py    |  9 ++++++++
 .../indexes/datetimes/test_date_range.py      | 14 ++++++++++++
 .../tests/scalar/timestamp/test_timestamp.py  |  8 +++++++
 6 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index c636e73fbd6c2..1de44ffeb4160 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -73,6 +73,10 @@ Bug Fixes
 
 -
 
+**Timezones**
+- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
+- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
+
 **Other**
 
 -
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index f4841e6abb7e8..3cbef82437544 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -347,25 +347,11 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
     if tz is not None:
         tz = maybe_get_tz(tz)
 
-        # sort of a temporary hack
         if ts.tzinfo is not None:
-            if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'):
-                ts = tz.normalize(ts)
-                obj.value = pydatetime_to_dt64(ts, &obj.dts)
-                obj.tzinfo = ts.tzinfo
-            else:
-                # tzoffset
-                try:
-                    tz = ts.astimezone(tz).tzinfo
-                except:
-                    pass
-                obj.value = pydatetime_to_dt64(ts, &obj.dts)
-                ts_offset = get_utcoffset(ts.tzinfo, ts)
-                obj.value -= int(ts_offset.total_seconds() * 1e9)
-                tz_offset = get_utcoffset(tz, ts)
-                obj.value += int(tz_offset.total_seconds() * 1e9)
-                dt64_to_dtstruct(obj.value, &obj.dts)
-                obj.tzinfo = tz
+            # Convert the current timezone to the passed timezone
+            ts = ts.astimezone(tz)
+            obj.value = pydatetime_to_dt64(ts, &obj.dts)
+            obj.tzinfo = ts.tzinfo
         elif not is_utc(tz):
             ts = _localize_pydatetime(ts, tz)
             obj.value = pydatetime_to_dt64(ts, &obj.dts)
diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py
index fa589a0aa4817..3956968173070 100644
--- a/pandas/tests/frame/test_timezones.py
+++ b/pandas/tests/frame/test_timezones.py
@@ -133,3 +133,13 @@ def test_frame_reset_index(self, tz):
         xp = df.index.tz
         rs = roundtripped.index.tz
         assert xp == rs
+
+    @pytest.mark.parametrize('tz', [None, 'America/New_York'])
+    def test_boolean_compare_transpose_tzindex_with_dst(self, tz):
+        # GH 19970
+        idx = date_range('20161101', '20161130', freq='4H', tz=tz)
+        df = DataFrame({'a': range(len(idx)), 'b': range(len(idx))},
+                       index=idx)
+        result = df.T == df.T
+        expected = DataFrame(True, index=list('ab'), columns=idx)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index dae69a86910af..b138b79caac76 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -469,6 +469,15 @@ def test_constructor_with_non_normalized_pytz(self, tz):
         result = DatetimeIndex(['2010'], tz=non_norm_tz)
         assert pytz.timezone(tz) is result.tz
 
+    def test_constructor_timestamp_near_dst(self):
+        # GH 20854
+        ts = [Timestamp('2016-10-30 03:00:00+0300', tz='Europe/Helsinki'),
+              Timestamp('2016-10-30 03:00:00+0200', tz='Europe/Helsinki')]
+        result = DatetimeIndex(ts)
+        expected = DatetimeIndex([ts[0].to_pydatetime(),
+                                  ts[1].to_pydatetime()])
+        tm.assert_index_equal(result, expected)
+
 
 class TestTimeSeries(object):
 
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index 193804b66395b..ec37bbbcb6c02 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -278,6 +278,20 @@ def test_wom_len(self, periods):
         res = date_range(start='20110101', periods=periods, freq='WOM-1MON')
         assert len(res) == periods
 
+    def test_construct_over_dst(self):
+        # GH 20854
+        pre_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific',
+                                                               ambiguous=True)
+        pst_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific',
+                                                               ambiguous=False)
+        expect_data = [Timestamp('2010-11-07 00:00:00', tz='US/Pacific'),
+                       pre_dst,
+                       pst_dst]
+        expected = DatetimeIndex(expect_data)
+        result = date_range(start='2010-11-7', periods=3,
+                            freq='H', tz='US/Pacific')
+        tm.assert_index_equal(result, expected)
+
 
 class TestGenRangeGeneration(object):
 
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index ab87d98fca8eb..4689c7bea626f 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -528,6 +528,14 @@ def test_disallow_setting_tz(self, tz):
         with pytest.raises(AttributeError):
             ts.tz = tz
 
+    @pytest.mark.parametrize('offset', ['+0300', '+0200'])
+    def test_construct_timestamp_near_dst(self, offset):
+        # GH 20854
+        expected = Timestamp('2016-10-30 03:00:00{}'.format(offset),
+                             tz='Europe/Helsinki')
+        result = Timestamp(expected, tz='Europe/Helsinki')
+        assert result == expected
+
 
 class TestTimestamp(object):
 

From 29083462a24d10df1dd727112fc362a5cf074c15 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 13 Jun 2018 03:54:55 -0700
Subject: [PATCH 030/113] parametrize tests, unify repeated tests (#21405)

---
 pandas/tests/tseries/offsets/test_offsets.py | 522 +++++++++----------
 1 file changed, 243 insertions(+), 279 deletions(-)

diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index a1c5a825054ec..8bf0d9f915d04 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -83,6 +83,7 @@ def test_to_m8():
 
 class Base(object):
     _offset = None
+    d = Timestamp(datetime(2008, 1, 2))
 
     timezones = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
                  'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']
@@ -142,6 +143,56 @@ def test_apply_out_of_range(self, tz):
             # so ignore
             pass
 
+    def test_offsets_compare_equal(self):
+        # root cause of GH#456: __ne__ was not implemented
+        if self._offset is None:
+            return
+        offset1 = self._offset()
+        offset2 = self._offset()
+        assert not offset1 != offset2
+        assert offset1 == offset2
+
+    def test_rsub(self):
+        if self._offset is None or not hasattr(self, "offset2"):
+            # i.e. skip for TestCommon and YQM subclasses that do not have
+            # offset2 attr
+            return
+        assert self.d - self.offset2 == (-self.offset2).apply(self.d)
+
+    def test_radd(self):
+        if self._offset is None or not hasattr(self, "offset2"):
+            # i.e. skip for TestCommon and YQM subclasses that do not have
+            # offset2 attr
+            return
+        assert self.d + self.offset2 == self.offset2 + self.d
+
+    def test_sub(self):
+        if self._offset is None or not hasattr(self, "offset2"):
+            # i.e. skip for TestCommon and YQM subclasses that do not have
+            # offset2 attr
+            return
+        off = self.offset2
+        with pytest.raises(Exception):
+            off - self.d
+
+        assert 2 * off - off == off
+        assert self.d - self.offset2 == self.d + self._offset(-2)
+        assert self.d - self.offset2 == self.d - (2 * off - off)
+
+    def testMult1(self):
+        if self._offset is None or not hasattr(self, "offset1"):
+            # i.e. skip for TestCommon and YQM subclasses that do not have
+            # offset1 attr
+            return
+        assert self.d + 10 * self.offset1 == self.d + self._offset(10)
+        assert self.d + 5 * self.offset1 == self.d + self._offset(5)
+
+    def testMult2(self):
+        if self._offset is None:
+            return
+        assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50)
+        assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)
+
 
 class TestCommon(Base):
     # exected value created by Base._get_offset
@@ -509,6 +560,7 @@ def setup_method(self, method):
         self.d = datetime(2008, 1, 1)
 
         self.offset = BDay()
+        self.offset1 = self.offset
         self.offset2 = BDay(2)
 
     def test_different_normalize_equals(self):
@@ -530,7 +582,7 @@ def test_with_offset(self):
 
         assert (self.d + offset) == datetime(2008, 1, 2, 2)
 
-    def testEQ(self):
+    def test_eq(self):
         assert self.offset2 == self.offset2
 
     def test_mul(self):
@@ -539,28 +591,9 @@ def test_mul(self):
     def test_hash(self):
         assert hash(self.offset2) == hash(self.offset2)
 
-    def testCall(self):
+    def test_call(self):
         assert self.offset2(self.d) == datetime(2008, 1, 3)
 
-    def testRAdd(self):
-        assert self.d + self.offset2 == self.offset2 + self.d
-
-    def testSub(self):
-        off = self.offset2
-        pytest.raises(Exception, off.__sub__, self.d)
-        assert 2 * off - off == off
-
-        assert self.d - self.offset2 == self.d + BDay(-2)
-
-    def testRSub(self):
-        assert self.d - self.offset2 == (-self.offset2).apply(self.d)
-
-    def testMult1(self):
-        assert self.d + 10 * self.offset == self.d + BDay(10)
-
-    def testMult2(self):
-        assert self.d + (-5 * BDay(-10)) == self.d + BDay(50)
-
     def testRollback1(self):
         assert BDay(10).rollback(self.d) == self.d
 
@@ -672,12 +705,6 @@ def test_apply_large_n(self):
     def test_apply_corner(self):
         pytest.raises(TypeError, BDay().apply, BMonthEnd())
 
-    def test_offsets_compare_equal(self):
-        # root cause of #456
-        offset1 = BDay()
-        offset2 = BDay()
-        assert not offset1 != offset2
-
 
 class TestBusinessHour(Base):
     _offset = BusinessHour
@@ -729,7 +756,7 @@ def test_with_offset(self):
         assert self.d + BusinessHour() * 3 == expected
         assert self.d + BusinessHour(n=3) == expected
 
-    def testEQ(self):
+    def test_eq(self):
         for offset in [self.offset1, self.offset2, self.offset3, self.offset4]:
             assert offset == offset
 
@@ -743,31 +770,22 @@ def test_hash(self):
         for offset in [self.offset1, self.offset2, self.offset3, self.offset4]:
             assert hash(offset) == hash(offset)
 
-    def testCall(self):
+    def test_call(self):
         assert self.offset1(self.d) == datetime(2014, 7, 1, 11)
         assert self.offset2(self.d) == datetime(2014, 7, 1, 13)
         assert self.offset3(self.d) == datetime(2014, 6, 30, 17)
         assert self.offset4(self.d) == datetime(2014, 6, 30, 14)
 
-    def testRAdd(self):
-        assert self.d + self.offset2 == self.offset2 + self.d
-
-    def testSub(self):
+    def test_sub(self):
+        # we have to override test_sub here becasue self.offset2 is not
+        # defined as self._offset(2)
         off = self.offset2
-        pytest.raises(Exception, off.__sub__, self.d)
+        with pytest.raises(Exception):
+            off - self.d
         assert 2 * off - off == off
 
         assert self.d - self.offset2 == self.d + self._offset(-3)
 
-    def testRSub(self):
-        assert self.d - self.offset2 == (-self.offset2).apply(self.d)
-
-    def testMult1(self):
-        assert self.d + 5 * self.offset1 == self.d + self._offset(5)
-
-    def testMult2(self):
-        assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)
-
     def testRollback1(self):
         assert self.offset1.rollback(self.d) == self.d
         assert self.offset2.rollback(self.d) == self.d
@@ -1317,12 +1335,6 @@ def test_apply_nanoseconds(self):
             for base, expected in compat.iteritems(cases):
                 assert_offset_equal(offset, base, expected)
 
-    def test_offsets_compare_equal(self):
-        # root cause of #456
-        offset1 = self._offset()
-        offset2 = self._offset()
-        assert not offset1 != offset2
-
     def test_datetimeindex(self):
         idx1 = DatetimeIndex(start='2014-07-04 15:00', end='2014-07-08 10:00',
                              freq='BH')
@@ -1361,6 +1373,8 @@ def test_datetimeindex(self):
 
 class TestCustomBusinessHour(Base):
     _offset = CustomBusinessHour
+    holidays = ['2014-06-27', datetime(2014, 6, 30),
+                np.datetime64('2014-07-02')]
 
     def setup_method(self, method):
         # 2014 Calendar to check custom holidays
@@ -1371,8 +1385,6 @@ def setup_method(self, method):
         self.d = datetime(2014, 7, 1, 10, 00)
         self.offset1 = CustomBusinessHour(weekmask='Tue Wed Thu Fri')
 
-        self.holidays = ['2014-06-27', datetime(2014, 6, 30),
-                         np.datetime64('2014-07-02')]
         self.offset2 = CustomBusinessHour(holidays=self.holidays)
 
     def test_constructor_errors(self):
@@ -1401,7 +1413,7 @@ def test_with_offset(self):
         assert self.d + CustomBusinessHour() * 3 == expected
         assert self.d + CustomBusinessHour(n=3) == expected
 
-    def testEQ(self):
+    def test_eq(self):
         for offset in [self.offset1, self.offset2]:
             assert offset == offset
 
@@ -1418,33 +1430,19 @@ def testEQ(self):
         assert (CustomBusinessHour(holidays=['2014-06-27']) !=
                 CustomBusinessHour(holidays=['2014-06-28']))
 
+    def test_sub(self):
+        # override the Base.test_sub implementation because self.offset2 is
+        # defined differently in this class than the test expects
+        pass
+
     def test_hash(self):
         assert hash(self.offset1) == hash(self.offset1)
         assert hash(self.offset2) == hash(self.offset2)
 
-    def testCall(self):
+    def test_call(self):
         assert self.offset1(self.d) == datetime(2014, 7, 1, 11)
         assert self.offset2(self.d) == datetime(2014, 7, 1, 11)
 
-    def testRAdd(self):
-        assert self.d + self.offset2 == self.offset2 + self.d
-
-    def testSub(self):
-        off = self.offset2
-        pytest.raises(Exception, off.__sub__, self.d)
-        assert 2 * off - off == off
-
-        assert self.d - self.offset2 == self.d - (2 * off - off)
-
-    def testRSub(self):
-        assert self.d - self.offset2 == (-self.offset2).apply(self.d)
-
-    def testMult1(self):
-        assert self.d + 5 * self.offset1 == self.d + self._offset(5)
-
-    def testMult2(self):
-        assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)
-
     def testRollback1(self):
         assert self.offset1.rollback(self.d) == self.d
         assert self.offset2.rollback(self.d) == self.d
@@ -1484,49 +1482,51 @@ def test_roll_date_object(self):
         result = offset.rollforward(dt)
         assert result == datetime(2014, 7, 7, 9)
 
-    def test_normalize(self):
-        tests = []
-
-        tests.append((CustomBusinessHour(normalize=True,
-                                         holidays=self.holidays),
-                      {datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
-                       datetime(2014, 7, 1, 17): datetime(2014, 7, 3),
-                       datetime(2014, 7, 1, 16): datetime(2014, 7, 3),
-                       datetime(2014, 7, 1, 23): datetime(2014, 7, 3),
-                       datetime(2014, 7, 1, 0): datetime(2014, 7, 1),
-                       datetime(2014, 7, 4, 15): datetime(2014, 7, 4),
-                       datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4),
-                       datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7),
-                       datetime(2014, 7, 5, 23): datetime(2014, 7, 7),
-                       datetime(2014, 7, 6, 10): datetime(2014, 7, 7)}))
-
-        tests.append((CustomBusinessHour(-1, normalize=True,
-                                         holidays=self.holidays),
-                      {datetime(2014, 7, 1, 8): datetime(2014, 6, 26),
-                       datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
-                       datetime(2014, 7, 1, 16): datetime(2014, 7, 1),
-                       datetime(2014, 7, 1, 10): datetime(2014, 6, 26),
-                       datetime(2014, 7, 1, 0): datetime(2014, 6, 26),
-                       datetime(2014, 7, 7, 10): datetime(2014, 7, 4),
-                       datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7),
-                       datetime(2014, 7, 5, 23): datetime(2014, 7, 4),
-                       datetime(2014, 7, 6, 10): datetime(2014, 7, 4)}))
-
-        tests.append((CustomBusinessHour(1, normalize=True, start='17:00',
-                                         end='04:00', holidays=self.holidays),
-                      {datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
-                       datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
-                       datetime(2014, 7, 1, 23): datetime(2014, 7, 2),
-                       datetime(2014, 7, 2, 2): datetime(2014, 7, 2),
-                       datetime(2014, 7, 2, 3): datetime(2014, 7, 3),
-                       datetime(2014, 7, 4, 23): datetime(2014, 7, 5),
-                       datetime(2014, 7, 5, 2): datetime(2014, 7, 5),
-                       datetime(2014, 7, 7, 2): datetime(2014, 7, 7),
-                       datetime(2014, 7, 7, 17): datetime(2014, 7, 7)}))
-
-        for offset, cases in tests:
-            for dt, expected in compat.iteritems(cases):
-                assert offset.apply(dt) == expected
+    normalize_cases = []
+    normalize_cases.append((
+        CustomBusinessHour(normalize=True, holidays=holidays),
+        {datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
+         datetime(2014, 7, 1, 17): datetime(2014, 7, 3),
+         datetime(2014, 7, 1, 16): datetime(2014, 7, 3),
+         datetime(2014, 7, 1, 23): datetime(2014, 7, 3),
+         datetime(2014, 7, 1, 0): datetime(2014, 7, 1),
+         datetime(2014, 7, 4, 15): datetime(2014, 7, 4),
+         datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4),
+         datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7),
+         datetime(2014, 7, 5, 23): datetime(2014, 7, 7),
+         datetime(2014, 7, 6, 10): datetime(2014, 7, 7)}))
+
+    normalize_cases.append((
+        CustomBusinessHour(-1, normalize=True, holidays=holidays),
+        {datetime(2014, 7, 1, 8): datetime(2014, 6, 26),
+         datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
+         datetime(2014, 7, 1, 16): datetime(2014, 7, 1),
+         datetime(2014, 7, 1, 10): datetime(2014, 6, 26),
+         datetime(2014, 7, 1, 0): datetime(2014, 6, 26),
+         datetime(2014, 7, 7, 10): datetime(2014, 7, 4),
+         datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7),
+         datetime(2014, 7, 5, 23): datetime(2014, 7, 4),
+         datetime(2014, 7, 6, 10): datetime(2014, 7, 4)}))
+
+    normalize_cases.append((
+        CustomBusinessHour(1, normalize=True,
+                           start='17:00', end='04:00',
+                           holidays=holidays),
+        {datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
+         datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
+         datetime(2014, 7, 1, 23): datetime(2014, 7, 2),
+         datetime(2014, 7, 2, 2): datetime(2014, 7, 2),
+         datetime(2014, 7, 2, 3): datetime(2014, 7, 3),
+         datetime(2014, 7, 4, 23): datetime(2014, 7, 5),
+         datetime(2014, 7, 5, 2): datetime(2014, 7, 5),
+         datetime(2014, 7, 7, 2): datetime(2014, 7, 7),
+         datetime(2014, 7, 7, 17): datetime(2014, 7, 7)}))
+
+    @pytest.mark.parametrize('norm_cases', normalize_cases)
+    def test_normalize(self, norm_cases):
+        offset, cases = norm_cases
+        for dt, expected in compat.iteritems(cases):
+            assert offset.apply(dt) == expected
 
     def test_onOffset(self):
         tests = []
@@ -1544,75 +1544,75 @@ def test_onOffset(self):
             for dt, expected in compat.iteritems(cases):
                 assert offset.onOffset(dt) == expected
 
-    def test_apply(self):
-        tests = []
-
-        tests.append((
-            CustomBusinessHour(holidays=self.holidays),
-            {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12),
-             datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14),
-             datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16),
-             datetime(2014, 7, 1, 19): datetime(2014, 7, 3, 10),
-             datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 9),
-             datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 3, 9, 30, 15),
-             datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 10),
-             datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 10),
-             # out of business hours
-             datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 10),
-             datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10),
-             datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10),
-             datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10),
-             # saturday
-             datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10),
-             datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10),
-             datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30),
-             datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30,
-                                                        30)}))
-
-        tests.append((
-            CustomBusinessHour(4, holidays=self.holidays),
-            {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15),
-             datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9),
-             datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11),
-             datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12),
-             datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13),
-             datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13),
-             datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13),
-             datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13),
-             datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13),
-             datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13),
-             datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13),
-             datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13),
-             datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30),
-             datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30,
-                                                        30)}))
-
-        for offset, cases in tests:
-            for base, expected in compat.iteritems(cases):
-                assert_offset_equal(offset, base, expected)
-
-    def test_apply_nanoseconds(self):
-        tests = []
-
-        tests.append((CustomBusinessHour(holidays=self.holidays),
-                      {Timestamp('2014-07-01 15:00') + Nano(5): Timestamp(
-                          '2014-07-01 16:00') + Nano(5),
-                       Timestamp('2014-07-01 16:00') + Nano(5): Timestamp(
-                           '2014-07-03 09:00') + Nano(5),
-                       Timestamp('2014-07-01 16:00') - Nano(5): Timestamp(
-                           '2014-07-01 17:00') - Nano(5)}))
-
-        tests.append((CustomBusinessHour(-1, holidays=self.holidays),
-                      {Timestamp('2014-07-01 15:00') + Nano(5): Timestamp(
-                          '2014-07-01 14:00') + Nano(5),
-                       Timestamp('2014-07-01 10:00') + Nano(5): Timestamp(
-                           '2014-07-01 09:00') + Nano(5),
-                       Timestamp('2014-07-01 10:00') - Nano(5): Timestamp(
-                           '2014-06-26 17:00') - Nano(5), }))
+    apply_cases = []
+    apply_cases.append((
+        CustomBusinessHour(holidays=holidays),
+        {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12),
+         datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14),
+         datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16),
+         datetime(2014, 7, 1, 19): datetime(2014, 7, 3, 10),
+         datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 9),
+         datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 3, 9, 30, 15),
+         datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 10),
+         datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 10),
+         # out of business hours
+         datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 10),
+         datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10),
+         datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10),
+         datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10),
+         # saturday
+         datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10),
+         datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10),
+         datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30),
+         datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30)}))
+
+    apply_cases.append((
+        CustomBusinessHour(4, holidays=holidays),
+        {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15),
+         datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9),
+         datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11),
+         datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12),
+         datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13),
+         datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13),
+         datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13),
+         datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13),
+         datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13),
+         datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13),
+         datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13),
+         datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13),
+         datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30),
+         datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30)}))
+
+    @pytest.mark.parametrize('apply_case', apply_cases)
+    def test_apply(self, apply_case):
+        offset, cases = apply_case
+        for base, expected in compat.iteritems(cases):
+            assert_offset_equal(offset, base, expected)
 
-        for offset, cases in tests:
-            for base, expected in compat.iteritems(cases):
-                assert_offset_equal(offset, base, expected)
+    nano_cases = []
+    nano_cases.append(
+        (CustomBusinessHour(holidays=holidays),
+         {Timestamp('2014-07-01 15:00') + Nano(5):
+            Timestamp('2014-07-01 16:00') + Nano(5),
+          Timestamp('2014-07-01 16:00') + Nano(5):
+            Timestamp('2014-07-03 09:00') + Nano(5),
+          Timestamp('2014-07-01 16:00') - Nano(5):
+            Timestamp('2014-07-01 17:00') - Nano(5)}))
+
+    nano_cases.append(
+        (CustomBusinessHour(-1, holidays=holidays),
+         {Timestamp('2014-07-01 15:00') + Nano(5):
+            Timestamp('2014-07-01 14:00') + Nano(5),
+          Timestamp('2014-07-01 10:00') + Nano(5):
+            Timestamp('2014-07-01 09:00') + Nano(5),
+          Timestamp('2014-07-01 10:00') - Nano(5):
+            Timestamp('2014-06-26 17:00') - Nano(5)}))
+
+    @pytest.mark.parametrize('nano_case', nano_cases)
+    def test_apply_nanoseconds(self, nano_case):
+        offset, cases = nano_case
+        for base, expected in compat.iteritems(cases):
+            assert_offset_equal(offset, base, expected)
 
 
 class TestCustomBusinessDay(Base):
@@ -1623,6 +1623,7 @@ def setup_method(self, method):
         self.nd = np_datetime64_compat('2008-01-01 00:00:00Z')
 
         self.offset = CDay()
+        self.offset1 = self.offset
         self.offset2 = CDay(2)
 
     def test_different_normalize_equals(self):
@@ -1644,7 +1645,7 @@ def test_with_offset(self):
 
         assert (self.d + offset) == datetime(2008, 1, 2, 2)
 
-    def testEQ(self):
+    def test_eq(self):
         assert self.offset2 == self.offset2
 
     def test_mul(self):
@@ -1653,29 +1654,10 @@ def test_mul(self):
     def test_hash(self):
         assert hash(self.offset2) == hash(self.offset2)
 
-    def testCall(self):
+    def test_call(self):
         assert self.offset2(self.d) == datetime(2008, 1, 3)
         assert self.offset2(self.nd) == datetime(2008, 1, 3)
 
-    def testRAdd(self):
-        assert self.d + self.offset2 == self.offset2 + self.d
-
-    def testSub(self):
-        off = self.offset2
-        pytest.raises(Exception, off.__sub__, self.d)
-        assert 2 * off - off == off
-
-        assert self.d - self.offset2 == self.d + CDay(-2)
-
-    def testRSub(self):
-        assert self.d - self.offset2 == (-self.offset2).apply(self.d)
-
-    def testMult1(self):
-        assert self.d + 10 * self.offset == self.d + CDay(10)
-
-    def testMult2(self):
-        assert self.d + (-5 * CDay(-10)) == self.d + CDay(50)
-
     def testRollback1(self):
         assert CDay(10).rollback(self.d) == self.d
 
@@ -1783,12 +1765,6 @@ def test_apply_large_n(self):
     def test_apply_corner(self):
         pytest.raises(Exception, CDay().apply, BMonthEnd())
 
-    def test_offsets_compare_equal(self):
-        # root cause of #456
-        offset1 = CDay()
-        offset2 = CDay()
-        assert not offset1 != offset2
-
     def test_holidays(self):
         # Define a TradingDay offset
         holidays = ['2012-05-01', datetime(2013, 5, 1),
@@ -1857,10 +1833,11 @@ class CustomBusinessMonthBase(object):
     def setup_method(self, method):
         self.d = datetime(2008, 1, 1)
 
-        self.offset = self._object()
-        self.offset2 = self._object(2)
+        self.offset = self._offset()
+        self.offset1 = self.offset
+        self.offset2 = self._offset(2)
 
-    def testEQ(self):
+    def test_eq(self):
         assert self.offset2 == self.offset2
 
     def test_mul(self):
@@ -1869,47 +1846,23 @@ def test_mul(self):
     def test_hash(self):
         assert hash(self.offset2) == hash(self.offset2)
 
-    def testRAdd(self):
-        assert self.d + self.offset2 == self.offset2 + self.d
-
-    def testSub(self):
-        off = self.offset2
-        pytest.raises(Exception, off.__sub__, self.d)
-        assert 2 * off - off == off
-
-        assert self.d - self.offset2 == self.d + self._object(-2)
-
-    def testRSub(self):
-        assert self.d - self.offset2 == (-self.offset2).apply(self.d)
-
-    def testMult1(self):
-        assert self.d + 10 * self.offset == self.d + self._object(10)
-
-    def testMult2(self):
-        assert self.d + (-5 * self._object(-10)) == self.d + self._object(50)
-
-    def test_offsets_compare_equal(self):
-        offset1 = self._object()
-        offset2 = self._object()
-        assert not offset1 != offset2
-
     def test_roundtrip_pickle(self):
         def _check_roundtrip(obj):
             unpickled = tm.round_trip_pickle(obj)
             assert unpickled == obj
 
-        _check_roundtrip(self._object())
-        _check_roundtrip(self._object(2))
-        _check_roundtrip(self._object() * 2)
+        _check_roundtrip(self._offset())
+        _check_roundtrip(self._offset(2))
+        _check_roundtrip(self._offset() * 2)
 
     def test_copy(self):
         # GH 17452
-        off = self._object(weekmask='Mon Wed Fri')
+        off = self._offset(weekmask='Mon Wed Fri')
         assert off == off.copy()
 
 
 class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base):
-    _object = CBMonthEnd
+    _offset = CBMonthEnd
 
     def test_different_normalize_equals(self):
         # equivalent in this special case
@@ -2026,7 +1979,7 @@ def test_datetimeindex(self):
 
 
 class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base):
-    _object = CBMonthBegin
+    _offset = CBMonthBegin
 
     def test_different_normalize_equals(self):
         # equivalent in this special case
@@ -2144,6 +2097,9 @@ def test_datetimeindex(self):
 
 class TestWeek(Base):
     _offset = Week
+    d = Timestamp(datetime(2008, 1, 2))
+    offset1 = _offset()
+    offset2 = _offset(2)
 
     def test_repr(self):
         assert repr(Week(weekday=0)) == "<Week: weekday=0>"
@@ -2151,9 +2107,11 @@ def test_repr(self):
         assert repr(Week(n=-2, weekday=0)) == "<-2 * Weeks: weekday=0>"
 
     def test_corner(self):
-        pytest.raises(ValueError, Week, weekday=7)
-        tm.assert_raises_regex(
-            ValueError, "Day must be", Week, weekday=-1)
+        with pytest.raises(ValueError):
+            Week(weekday=7)
+
+        with pytest.raises(ValueError, match="Day must be"):
+            Week(weekday=-1)
 
     def test_isAnchored(self):
         assert Week(weekday=0).isAnchored()
@@ -2198,38 +2156,37 @@ def test_offset(self, case):
         for base, expected in compat.iteritems(cases):
             assert_offset_equal(offset, base, expected)
 
-    def test_onOffset(self):
-        for weekday in range(7):
-            offset = Week(weekday=weekday)
-
-            for day in range(1, 8):
-                date = datetime(2008, 1, day)
+    @pytest.mark.parametrize('weekday', range(7))
+    def test_onOffset(self, weekday):
+        offset = Week(weekday=weekday)
 
-                if day % 7 == weekday:
-                    expected = True
-                else:
-                    expected = False
-            assert_onOffset(offset, date, expected)
+        for day in range(1, 8):
+            date = datetime(2008, 1, day)
 
-    def test_offsets_compare_equal(self):
-        # root cause of #456
-        offset1 = Week()
-        offset2 = Week()
-        assert not offset1 != offset2
+            if day % 7 == weekday:
+                expected = True
+            else:
+                expected = False
+        assert_onOffset(offset, date, expected)
 
 
 class TestWeekOfMonth(Base):
     _offset = WeekOfMonth
+    offset1 = _offset()
+    offset2 = _offset(2)
 
     def test_constructor(self):
-        tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth,
-                               n=1, week=4, weekday=0)
-        tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth,
-                               n=1, week=-1, weekday=0)
-        tm.assert_raises_regex(ValueError, "^Day", WeekOfMonth,
-                               n=1, week=0, weekday=-1)
-        tm.assert_raises_regex(ValueError, "^Day", WeekOfMonth,
-                               n=1, week=0, weekday=7)
+        with pytest.raises(ValueError, match="^Week"):
+            WeekOfMonth(n=1, week=4, weekday=0)
+
+        with pytest.raises(ValueError, match="^Week"):
+            WeekOfMonth(n=1, week=-1, weekday=0)
+
+        with pytest.raises(ValueError, match="^Day"):
+            WeekOfMonth(n=1, week=0, weekday=-1)
+
+        with pytest.raises(ValueError, match="^Day"):
+            WeekOfMonth(n=1, week=0, weekday=-7)
 
     def test_repr(self):
         assert (repr(WeekOfMonth(weekday=1, week=2)) ==
@@ -2316,15 +2273,18 @@ def test_onOffset(self, case):
 
 class TestLastWeekOfMonth(Base):
     _offset = LastWeekOfMonth
+    offset1 = _offset()
+    offset2 = _offset(2)
 
     def test_constructor(self):
-        tm.assert_raises_regex(ValueError, "^N cannot be 0",
-                               LastWeekOfMonth, n=0, weekday=1)
+        with pytest.raises(ValueError, match="^N cannot be 0"):
+            LastWeekOfMonth(n=0, weekday=1)
+
+        with pytest.raises(ValueError, match="^Day"):
+            LastWeekOfMonth(n=1, weekday=-1)
 
-        tm.assert_raises_regex(ValueError, "^Day", LastWeekOfMonth, n=1,
-                               weekday=-1)
-        tm.assert_raises_regex(
-            ValueError, "^Day", LastWeekOfMonth, n=1, weekday=7)
+        with pytest.raises(ValueError, match="^Day"):
+            LastWeekOfMonth(n=1, weekday=7)
 
     def test_offset(self):
         # Saturday
@@ -2390,6 +2350,8 @@ def test_onOffset(self, case):
 
 class TestSemiMonthEnd(Base):
     _offset = SemiMonthEnd
+    offset1 = _offset()
+    offset2 = _offset(2)
 
     def test_offset_whole_year(self):
         dates = (datetime(2007, 12, 31),
@@ -2560,6 +2522,8 @@ def test_vectorized_offset_addition(self, klass, assert_func):
 
 class TestSemiMonthBegin(Base):
     _offset = SemiMonthBegin
+    offset1 = _offset()
+    offset2 = _offset(2)
 
     def test_offset_whole_year(self):
         dates = (datetime(2007, 12, 15),
@@ -2767,9 +2731,9 @@ def test_get_offset_name(self):
 
 
 def test_get_offset():
-    with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR):
+    with pytest.raises(ValueError, match=_INVALID_FREQ_ERROR):
         get_offset('gibberish')
-    with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR):
+    with pytest.raises(ValueError, match=_INVALID_FREQ_ERROR):
         get_offset('QS-JAN-B')
 
     pairs = [
@@ -2787,7 +2751,7 @@ def test_get_offset():
 def test_get_offset_legacy():
     pairs = [('w@Sat', Week(weekday=5))]
     for name, expected in pairs:
-        with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR):
+        with pytest.raises(ValueError, match=_INVALID_FREQ_ERROR):
             get_offset(name)
 
 

From 0e05de4d4a5b099ef9c6797e3b1376d521965c67 Mon Sep 17 00:00:00 2001
From: Steve Baker <support@beepscore.com>
Date: Wed, 13 Jun 2018 05:12:38 -0700
Subject: [PATCH 031/113] DOC: isin() docstring change DataFrame to
 pd.DataFrame (#21403)

---
 pandas/core/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0985de3126c5a..02c86d2f4dcc8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7284,11 +7284,11 @@ def isin(self, values):
         When ``values`` is a Series or DataFrame:
 
         >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
-        >>> other = DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']})
-        >>> df.isin(other)
+        >>> df2 = pd.DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']})
+        >>> df.isin(df2)
                A      B
         0   True  False
-        1  False  False  # Column A in `other` has a 3, but not at index 1.
+        1  False  False  # Column A in `df2` has a 3, but not at index 1.
         2   True   True
         """
         if isinstance(values, dict):

From 159756ebeba7ee3a03855d8d80adef199dc17d80 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Wed, 13 Jun 2018 15:24:01 +0200
Subject: [PATCH 032/113] BUG: fix get_indexer_non_unique with CategoricalIndex
 key (#21457)

closes #21448
---
 doc/source/whatsnew/v0.23.2.txt           |  2 +-
 pandas/core/indexes/base.py               |  3 +++
 pandas/core/indexes/category.py           |  7 ++++++-
 pandas/tests/categorical/test_indexing.py | 20 +++++++++++++++++++-
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 1de44ffeb4160..3e4326dea2ecc 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -51,7 +51,7 @@ Bug Fixes
 
 **Indexing**
 
--
+- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 -
 
 **I/O**
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index bf1051332ee19..d9e4ef7db1158 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -31,6 +31,7 @@
     is_dtype_equal,
     is_dtype_union_equal,
     is_object_dtype,
+    is_categorical,
     is_categorical_dtype,
     is_interval_dtype,
     is_period_dtype,
@@ -3300,6 +3301,8 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance):
     @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
     def get_indexer_non_unique(self, target):
         target = _ensure_index(target)
+        if is_categorical(target):
+            target = target.astype(target.dtype.categories.dtype)
         pself, ptarget = self._maybe_promote(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer_non_unique(ptarget)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 150eca32e229d..587090fa72def 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -598,7 +598,12 @@ def get_indexer_non_unique(self, target):
         target = ibase._ensure_index(target)
 
         if isinstance(target, CategoricalIndex):
-            target = target.categories
+            # Indexing on codes is more efficient if categories are the same:
+            if target.categories is self.categories:
+                target = target.codes
+                indexer, missing = self._engine.get_indexer_non_unique(target)
+                return _ensure_platform_int(indexer), missing
+            target = target.values
 
         codes = self.categories.get_indexer(target)
         indexer, missing = self._engine.get_indexer_non_unique(codes)
diff --git a/pandas/tests/categorical/test_indexing.py b/pandas/tests/categorical/test_indexing.py
index 9c27b1101e5ca..cf7b5cfa55882 100644
--- a/pandas/tests/categorical/test_indexing.py
+++ b/pandas/tests/categorical/test_indexing.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 import pandas.util.testing as tm
-from pandas import Categorical, Index, PeriodIndex
+from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
 from pandas.tests.categorical.common import TestCategorical
 
 
@@ -103,3 +103,21 @@ def f():
             s.categories = [1, 2]
 
         pytest.raises(ValueError, f)
+
+    # Combinations of sorted/unique:
+    @pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
+                                            [1, 3, 3, 4], [1, 2, 2, 4]])
+    # Combinations of missing/unique
+    @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
+    @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
+    def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
+        # GH 21448
+        key = key_class(key_values, categories=range(1, 5))
+        # Test for flat index and CategoricalIndex with same/different cats:
+        for dtype in None, 'category', key.dtype:
+            idx = Index(idx_values, dtype=dtype)
+            expected, exp_miss = idx.get_indexer_non_unique(key_values)
+            result, res_miss = idx.get_indexer_non_unique(key)
+
+            tm.assert_numpy_array_equal(expected, result)
+            tm.assert_numpy_array_equal(exp_miss, res_miss)

From 70a3d6da3606a55d9921ef26b3af321f3f693575 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 13 Jun 2018 12:59:20 -0700
Subject: [PATCH 033/113] BUG: Fix DateOffset eq to depend on normalize attr
 (#21404)

Partially addresses gh-18854
---
 doc/source/whatsnew/v0.24.0.txt              |  2 +-
 pandas/tests/tseries/offsets/test_offsets.py | 50 +++++++++-----------
 pandas/tseries/offsets.py                    |  2 +-
 3 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 68c1839221508..6d5e40d37c8df 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -91,7 +91,7 @@ Categorical
 Datetimelike
 ^^^^^^^^^^^^
 
--
+- Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
 -
 -
 
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 8bf0d9f915d04..6fd525f02f55c 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -564,11 +564,10 @@ def setup_method(self, method):
         self.offset2 = BDay(2)
 
     def test_different_normalize_equals(self):
-        # equivalent in this special case
-        offset = BDay()
-        offset2 = BDay()
-        offset2.normalize = True
-        assert offset == offset2
+        # GH#21404 changed __eq__ to return False when `normalize` doesnt match
+        offset = self._offset()
+        offset2 = self._offset(normalize=True)
+        assert offset != offset2
 
     def test_repr(self):
         assert repr(self.offset) == '<BusinessDay>'
@@ -734,11 +733,10 @@ def test_constructor_errors(self):
             BusinessHour(start='14:00:05')
 
     def test_different_normalize_equals(self):
-        # equivalent in this special case
+        # GH#21404 changed __eq__ to return False when `normalize` doesnt match
         offset = self._offset()
-        offset2 = self._offset()
-        offset2.normalize = True
-        assert offset == offset2
+        offset2 = self._offset(normalize=True)
+        assert offset != offset2
 
     def test_repr(self):
         assert repr(self.offset1) == '<BusinessHour: BH=09:00-17:00>'
@@ -1397,11 +1395,10 @@ def test_constructor_errors(self):
             CustomBusinessHour(start='14:00:05')
 
     def test_different_normalize_equals(self):
-        # equivalent in this special case
+        # GH#21404 changed __eq__ to return False when `normalize` doesnt match
         offset = self._offset()
-        offset2 = self._offset()
-        offset2.normalize = True
-        assert offset == offset2
+        offset2 = self._offset(normalize=True)
+        assert offset != offset2
 
     def test_repr(self):
         assert repr(self.offset1) == '<CustomBusinessHour: CBH=09:00-17:00>'
@@ -1627,11 +1624,10 @@ def setup_method(self, method):
         self.offset2 = CDay(2)
 
     def test_different_normalize_equals(self):
-        # equivalent in this special case
-        offset = CDay()
-        offset2 = CDay()
-        offset2.normalize = True
-        assert offset == offset2
+        # GH#21404 changed __eq__ to return False when `normalize` doesnt match
+        offset = self._offset()
+        offset2 = self._offset(normalize=True)
+        assert offset != offset2
 
     def test_repr(self):
         assert repr(self.offset) == '<CustomBusinessDay>'
@@ -1865,11 +1861,10 @@ class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base):
     _offset = CBMonthEnd
 
     def test_different_normalize_equals(self):
-        # equivalent in this special case
-        offset = CBMonthEnd()
-        offset2 = CBMonthEnd()
-        offset2.normalize = True
-        assert offset == offset2
+        # GH#21404 changed __eq__ to return False when `normalize` doesnt match
+        offset = self._offset()
+        offset2 = self._offset(normalize=True)
+        assert offset != offset2
 
     def test_repr(self):
         assert repr(self.offset) == '<CustomBusinessMonthEnd>'
@@ -1982,11 +1977,10 @@ class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base):
     _offset = CBMonthBegin
 
     def test_different_normalize_equals(self):
-        # equivalent in this special case
-        offset = CBMonthBegin()
-        offset2 = CBMonthBegin()
-        offset2.normalize = True
-        assert offset == offset2
+        # GH#21404 changed __eq__ to return False when `normalize` doesnt match
+        offset = self._offset()
+        offset2 = self._offset(normalize=True)
+        assert offset != offset2
 
     def test_repr(self):
         assert repr(self.offset) == '<CustomBusinessMonthBegin>'
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index a5a983bf94bb8..99f97d8fc7bc0 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -290,7 +290,7 @@ def _params(self):
         all_paras = self.__dict__.copy()
         if 'holidays' in all_paras and not all_paras['holidays']:
             all_paras.pop('holidays')
-        exclude = ['kwds', 'name', 'normalize', 'calendar']
+        exclude = ['kwds', 'name', 'calendar']
         attrs = [(k, v) for k, v in all_paras.items()
                  if (k not in exclude) and (k[0] != '_')]
         attrs = sorted(set(attrs))

From 7d664d5b977feee64d93a23a37ee6b593af2f06b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 14 Jun 2018 03:05:28 -0700
Subject: [PATCH 034/113] API/BUG: DatetimeIndex correctly localizes integer
 data  (#21216)

---
 doc/source/whatsnew/v0.24.0.txt               |  4 +-
 pandas/core/indexes/base.py                   |  4 +
 pandas/core/indexes/datetimes.py              | 82 ++++++++-----------
 pandas/tests/indexes/datetimes/test_astype.py | 10 +++
 .../indexes/datetimes/test_construction.py    | 60 +++++++++-----
 pandas/tests/indexes/test_base.py             | 27 +++---
 6 files changed, 105 insertions(+), 82 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 6d5e40d37c8df..c29197725a2b6 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -36,7 +36,7 @@ Datetimelike API Changes
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
--
+- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
 -
 -
 
@@ -92,7 +92,7 @@ Datetimelike
 ^^^^^^^^^^^^
 
 - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
--
+- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
 -
 
 Timedelta
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d9e4ef7db1158..36345a32a3bf7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1175,6 +1175,10 @@ def astype(self, dtype, copy=True):
             return CategoricalIndex(self.values, name=self.name, dtype=dtype,
                                     copy=copy)
         try:
+            if is_datetime64tz_dtype(dtype):
+                from pandas.core.indexes.datetimes import DatetimeIndex
+                return DatetimeIndex(self.values, name=self.name, dtype=dtype,
+                                     copy=copy)
             return Index(self.values.astype(dtype, copy=copy), name=self.name,
                          dtype=dtype)
         except (TypeError, ValueError):
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 66622814f172d..e944df7aa83c6 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -395,57 +395,43 @@ def __new__(cls, data=None,
 
         # data must be Index or np.ndarray here
         if not (is_datetime64_dtype(data) or is_datetimetz(data) or
-                is_integer_dtype(data)):
+                is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
             data = tools.to_datetime(data, dayfirst=dayfirst,
                                      yearfirst=yearfirst)
 
-        if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
-
-            if isinstance(data, DatetimeIndex):
-                if tz is None:
-                    tz = data.tz
-                elif data.tz is None:
-                    data = data.tz_localize(tz, ambiguous=ambiguous)
-                else:
-                    # the tz's must match
-                    if str(tz) != str(data.tz):
-                        msg = ('data is already tz-aware {0}, unable to '
-                               'set specified tz: {1}')
-                        raise TypeError(msg.format(data.tz, tz))
+        if isinstance(data, DatetimeIndex):
+            if tz is None:
+                tz = data.tz
+            elif data.tz is None:
+                data = data.tz_localize(tz, ambiguous=ambiguous)
+            else:
+                # the tz's must match
+                if str(tz) != str(data.tz):
+                    msg = ('data is already tz-aware {0}, unable to '
+                           'set specified tz: {1}')
+                    raise TypeError(msg.format(data.tz, tz))
 
-                subarr = data.values
+            subarr = data.values
 
-                if freq is None:
-                    freq = data.freq
-                    verify_integrity = False
-            else:
-                if data.dtype != _NS_DTYPE:
-                    subarr = conversion.ensure_datetime64ns(data)
-                else:
-                    subarr = data
+            if freq is None:
+                freq = data.freq
+                verify_integrity = False
+        elif issubclass(data.dtype.type, np.datetime64):
+            if data.dtype != _NS_DTYPE:
+                data = conversion.ensure_datetime64ns(data)
+            if tz is not None:
+                # Convert tz-naive to UTC
+                tz = timezones.maybe_get_tz(tz)
+                data = conversion.tz_localize_to_utc(data.view('i8'), tz,
+                                                     ambiguous=ambiguous)
+            subarr = data.view(_NS_DTYPE)
         else:
             # must be integer dtype otherwise
-            if isinstance(data, Int64Index):
-                raise TypeError('cannot convert Int64Index->DatetimeIndex')
+            # assume this data are epoch timestamps
             if data.dtype != _INT64_DTYPE:
-                data = data.astype(np.int64)
+                data = data.astype(np.int64, copy=False)
             subarr = data.view(_NS_DTYPE)
 
-        if isinstance(subarr, DatetimeIndex):
-            if tz is None:
-                tz = subarr.tz
-        else:
-            if tz is not None:
-                tz = timezones.maybe_get_tz(tz)
-
-                if (not isinstance(data, DatetimeIndex) or
-                        getattr(data, 'tz', None) is None):
-                    # Convert tz-naive to UTC
-                    ints = subarr.view('i8')
-                    subarr = conversion.tz_localize_to_utc(ints, tz,
-                                                           ambiguous=ambiguous)
-                subarr = subarr.view(_NS_DTYPE)
-
         subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
         if dtype is not None:
             if not is_dtype_equal(subarr.dtype, dtype):
@@ -807,8 +793,9 @@ def _mpl_repr(self):
 
     @cache_readonly
     def _is_dates_only(self):
+        """Return a boolean if we are only dates (and don't have a timezone)"""
         from pandas.io.formats.format import _is_dates_only
-        return _is_dates_only(self.values)
+        return _is_dates_only(self.values) and self.tz is None
 
     @property
     def _formatter_func(self):
@@ -1244,7 +1231,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
         See Index.join
         """
         if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
-            other.inferred_type not in ('floating', 'mixed-integer',
+            other.inferred_type not in ('floating', 'integer', 'mixed-integer',
                                         'mixed-integer-float', 'mixed')):
             try:
                 other = DatetimeIndex(other)
@@ -2100,8 +2087,9 @@ def normalize(self):
                        dtype='datetime64[ns, Asia/Calcutta]', freq=None)
         """
         new_values = conversion.date_normalize(self.asi8, self.tz)
-        return DatetimeIndex(new_values, freq='infer', name=self.name,
-                             tz=self.tz)
+        return DatetimeIndex(new_values,
+                             freq='infer',
+                             name=self.name).tz_localize(self.tz)
 
     @Substitution(klass='DatetimeIndex')
     @Appender(_shared_docs['searchsorted'])
@@ -2182,8 +2170,6 @@ def insert(self, loc, item):
         try:
             new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
                                         self[loc:].asi8))
-            if self.tz is not None:
-                new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
             return DatetimeIndex(new_dates, name=self.name, freq=freq,
                                  tz=self.tz)
         except (AttributeError, TypeError):
@@ -2221,8 +2207,6 @@ def delete(self, loc):
                 if (loc.start in (0, None) or loc.stop in (len(self), None)):
                     freq = self.freq
 
-        if self.tz is not None:
-            new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
         return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
 
     def tz_convert(self, tz):
diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py
index 8acdd301f241a..64b8f48f6a4e1 100644
--- a/pandas/tests/indexes/datetimes/test_astype.py
+++ b/pandas/tests/indexes/datetimes/test_astype.py
@@ -225,6 +225,16 @@ def _check_rng(rng):
         _check_rng(rng_eastern)
         _check_rng(rng_utc)
 
+    @pytest.mark.parametrize('tz, dtype', [
+        ['US/Pacific', 'datetime64[ns, US/Pacific]'],
+        [None, 'datetime64[ns]']])
+    def test_integer_index_astype_datetime(self, tz, dtype):
+        # GH 20997, 20964
+        val = [pd.Timestamp('2018-01-01', tz=tz).value]
+        result = pd.Index(val).astype(dtype)
+        expected = pd.DatetimeIndex(['2018-01-01'], tz=tz)
+        tm.assert_index_equal(result, expected)
+
 
 class TestToPeriod(object):
 
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index b138b79caac76..f7682a965c038 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -1,8 +1,10 @@
-import pytest
+from datetime import timedelta
+from operator import attrgetter
+from functools import partial
 
+import pytest
 import pytz
 import numpy as np
-from datetime import timedelta
 
 import pandas as pd
 from pandas import offsets
@@ -26,25 +28,28 @@ def test_construction_caching(self):
                                                  freq='ns')})
         assert df.dttz.dtype.tz.zone == 'US/Eastern'
 
-    def test_construction_with_alt(self):
-
-        i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern')
-        i2 = DatetimeIndex(i, dtype=i.dtype)
-        tm.assert_index_equal(i, i2)
-        assert i.tz.zone == 'US/Eastern'
-
-        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz)
-        tm.assert_index_equal(i, i2)
-        assert i.tz.zone == 'US/Eastern'
-
-        i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype)
-        tm.assert_index_equal(i, i2)
-        assert i.tz.zone == 'US/Eastern'
-
-        i2 = DatetimeIndex(
-            i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz)
-        tm.assert_index_equal(i, i2)
-        assert i.tz.zone == 'US/Eastern'
+    @pytest.mark.parametrize('kwargs', [
+        {'tz': 'dtype.tz'},
+        {'dtype': 'dtype'},
+        {'dtype': 'dtype', 'tz': 'dtype.tz'}])
+    def test_construction_with_alt(self, kwargs, tz_aware_fixture):
+        tz = tz_aware_fixture
+        i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
+        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
+        result = DatetimeIndex(i, **kwargs)
+        tm.assert_index_equal(i, result)
+
+    @pytest.mark.parametrize('kwargs', [
+        {'tz': 'dtype.tz'},
+        {'dtype': 'dtype'},
+        {'dtype': 'dtype', 'tz': 'dtype.tz'}])
+    def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
+        tz = tz_aware_fixture
+        i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
+        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
+        result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
+        expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz)
+        tm.assert_index_equal(result, expected)
 
         # localize into the provided tz
         i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC')
@@ -478,6 +483,19 @@ def test_constructor_timestamp_near_dst(self):
                                   ts[1].to_pydatetime()])
         tm.assert_index_equal(result, expected)
 
+    @pytest.mark.parametrize('klass', [Index, DatetimeIndex])
+    @pytest.mark.parametrize('box', [
+        np.array, partial(np.array, dtype=object), list])
+    @pytest.mark.parametrize('tz, dtype', [
+        ['US/Pacific', 'datetime64[ns, US/Pacific]'],
+        [None, 'datetime64[ns]']])
+    def test_constructor_with_int_tz(self, klass, box, tz, dtype):
+        # GH 20997, 20964
+        ts = Timestamp('2018-01-01', tz=tz)
+        result = klass(box([ts.value]), dtype=dtype)
+        expected = klass([ts])
+        assert result == expected
+
 
 class TestTimeSeries(object):
 
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index c264f5f79e47e..b8bd218ec25ab 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -402,26 +402,33 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
             index = Index(vals)
             assert isinstance(index, TimedeltaIndex)
 
-    @pytest.mark.parametrize("values", [
-        # pass values without timezone, as DatetimeIndex localizes it
-        pd.date_range('2011-01-01', periods=5).values,
-        pd.date_range('2011-01-01', periods=5).asi8])
+    @pytest.mark.parametrize("attr, utc", [
+        ['values', False],
+        ['asi8', True]])
     @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
-    def test_constructor_dtypes_datetime(self, tz_naive_fixture, values,
+    def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc,
                                          klass):
-        index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture)
+        # Test constructing with a datetimetz dtype
+        # .values produces numpy datetimes, so these are considered naive
+        # .asi8 produces integers, so these are considered epoch timestamps
+        index = pd.date_range('2011-01-01', periods=5)
+        arg = getattr(index, attr)
+        if utc:
+            index = index.tz_localize('UTC').tz_convert(tz_naive_fixture)
+        else:
+            index = index.tz_localize(tz_naive_fixture)
         dtype = index.dtype
 
-        result = klass(values, tz=tz_naive_fixture)
+        result = klass(arg, tz=tz_naive_fixture)
         tm.assert_index_equal(result, index)
 
-        result = klass(values, dtype=dtype)
+        result = klass(arg, dtype=dtype)
         tm.assert_index_equal(result, index)
 
-        result = klass(list(values), tz=tz_naive_fixture)
+        result = klass(list(arg), tz=tz_naive_fixture)
         tm.assert_index_equal(result, index)
 
-        result = klass(list(values), dtype=dtype)
+        result = klass(list(arg), dtype=dtype)
         tm.assert_index_equal(result, index)
 
     @pytest.mark.parametrize("attr", ['values', 'asi8'])

From 7463576ca94f6ec9f74b1e28ee96600972ed12d3 Mon Sep 17 00:00:00 2001
From: Wenhuan <lixx0880@gmail.com>
Date: Thu, 14 Jun 2018 18:08:57 +0800
Subject: [PATCH 035/113] PERF: improve performance of groupby rank (#21237)
 (#21285)

---
 asv_bench/benchmarks/groupby.py    |  21 +++++-
 doc/source/whatsnew/v0.24.0.txt    |   1 +
 pandas/_libs/groupby_helper.pxi.in | 103 ++++++++++++++---------------
 3 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 7777322071957..0725bbeb6c36d 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
-                    TimeGrouper, Categorical)
+                    TimeGrouper, Categorical, Timestamp)
 import pandas.util.testing as tm
 
 from .pandas_vb_common import setup  # noqa
@@ -385,6 +385,25 @@ def time_dtype_as_field(self, dtype, method, application):
         self.as_field_method()
 
 
+class RankWithTies(object):
+    # GH 21237
+    goal_time = 0.2
+    param_names = ['dtype', 'tie_method']
+    params = [['float64', 'float32', 'int64', 'datetime64'],
+              ['first', 'average', 'dense', 'min', 'max']]
+
+    def setup(self, dtype, tie_method):
+        N = 10**4
+        if dtype == 'datetime64':
+            data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
+        else:
+            data = np.array([1] * N, dtype=dtype)
+        self.df = DataFrame({'values': data, 'key': ['foo'] * N})
+
+    def time_rank_ties(self, dtype, tie_method):
+        self.df.groupby('key').rank(method=tie_method)
+
+
 class Float32(object):
     # GH 13335
     goal_time = 0.2
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index c29197725a2b6..f079f151808cc 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -65,6 +65,7 @@ Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
+- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
 -
 
 .. _whatsnew_0240.docs:
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index b3e9b7c9e69ee..0062a6c8d31ab 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -429,7 +429,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
     is_datetimelike : bool, default False
         unused in this method but provided for call compatibility with other
         Cython transformations
-    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
+        'average'
         * average: average rank of group
         * min: lowest rank in group
         * max: highest rank in group
@@ -514,26 +515,22 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            # if keep_na, check for missing values and assign back
-            # to the result where appropriate
-
-            if keep_na and mask[_as[i]]:
-                grp_na_count += 1
-                out[_as[i], 0] = nan
-            else:
-                # this implementation is inefficient because it will
-                # continue overwriting previously encountered dups
-                # i.e. if 5 duplicated values are encountered it will
-                # write to the result as follows (assumes avg tiebreaker):
-                # 1
-                # .5  .5
-                # .33 .33 .33
-                # .25 .25 .25 .25
-                # .2  .2  .2  .2  .2
-                #
-                # could potentially be optimized to only write to the
-                # result once the last duplicate value is encountered
-                if tiebreak == TIEBREAK_AVERAGE:
+            # Update out only when there is a transition of values or labels.
+            # When a new value or group is encountered, go back #dups steps(
+            # the number of occurrence of current value) and assign the ranks
+            # based on the the starting index of the current group (grp_start)
+            # and the current index
+            if (i == N - 1 or
+                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
+                    (mask[_as[i]] ^ mask[_as[i+1]]) or
+                    (labels[_as[i]] != labels[_as[i+1]])):
+                # if keep_na, check for missing values and assign back
+                # to the result where appropriate
+                if keep_na and mask[_as[i]]:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j], 0] = nan
+                        grp_na_count = dups
+                elif tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
                         out[_as[j], 0] = sum_ranks / <float64_t>dups
                 elif tiebreak == TIEBREAK_MIN:
@@ -552,38 +549,38 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                     for j in range(i - dups + 1, i + 1):
                         out[_as[j], 0] = grp_vals_seen
 
-            # look forward to the next value (using the sorting in _as)
-            # if the value does not equal the current value then we need to
-            # reset the dups and sum_ranks, knowing that a new value is coming
-            # up. the conditional also needs to handle nan equality and the
-            # end of iteration
-            if (i == N - 1 or
-                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
-                    (mask[_as[i]] ^ mask[_as[i+1]])):
-                dups = sum_ranks = 0
-                val_start = i
-                grp_vals_seen += 1
-                grp_tie_count +=1
-
-            # Similar to the previous conditional, check now if we are moving
-            # to a new group. If so, keep track of the index where the new
-            # group occurs, so the tiebreaker calculations can decrement that
-            # from their position. fill in the size of each group encountered
-            # (used by pct calculations later). also be sure to reset any of
-            # the items helping to calculate dups
-            if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
-                if tiebreak != TIEBREAK_DENSE:
-                    for j in range(grp_start, i + 1):
-                        grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count
-                else:
-                    for j in range(grp_start, i + 1):
-                        grp_sizes[_as[j], 0] = (grp_tie_count -
-                                                (grp_na_count > 0))
-                dups = sum_ranks = 0
-                grp_na_count = 0
-                grp_tie_count = 0
-                grp_start = i + 1
-                grp_vals_seen = 1
+                # look forward to the next value (using the sorting in _as)
+                # if the value does not equal the current value then we need to
+                # reset the dups and sum_ranks, knowing that a new value is
+                # coming up. the conditional also needs to handle nan equality
+                # and the end of iteration
+                if (i == N - 1 or
+                        (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
+                        (mask[_as[i]] ^ mask[_as[i+1]])):
+                    dups = sum_ranks = 0
+                    grp_vals_seen += 1
+                    grp_tie_count += 1
+
+                # Similar to the previous conditional, check now if we are
+                # moving to a new group. If so, keep track of the index where
+                # the new group occurs, so the tiebreaker calculations can
+                # decrement that from their position. fill in the size of each
+                # group encountered (used by pct calculations later). also be
+                # sure to reset any of the items helping to calculate dups
+                if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
+                    if tiebreak != TIEBREAK_DENSE:
+                        for j in range(grp_start, i + 1):
+                            grp_sizes[_as[j], 0] = (i - grp_start + 1 -
+                                                    grp_na_count)
+                    else:
+                        for j in range(grp_start, i + 1):
+                            grp_sizes[_as[j], 0] = (grp_tie_count -
+                                                    (grp_na_count > 0))
+                    dups = sum_ranks = 0
+                    grp_na_count = 0
+                    grp_tie_count = 0
+                    grp_start = i + 1
+                    grp_vals_seen = 1
 
         if pct:
             for i in range(N):

From f5043003e87d0aa2af5ea5f490cd9246005bd263 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 14 Jun 2018 03:12:01 -0700
Subject: [PATCH 036/113] PERF: typing and cdefs for tslibs.resolution (#21452)

---
 pandas/_libs/tslibs/resolution.pyx | 71 ++++++++++++++++++------------
 1 file changed, 42 insertions(+), 29 deletions(-)

diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx
index 2f185f4142a09..210b201cd08ea 100644
--- a/pandas/_libs/tslibs/resolution.pyx
+++ b/pandas/_libs/tslibs/resolution.pyx
@@ -5,7 +5,7 @@ from cython cimport Py_ssize_t
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport ndarray, int64_t
+from numpy cimport ndarray, int64_t, int32_t
 cnp.import_array()
 
 from util cimport is_string_object, get_nat
@@ -45,12 +45,12 @@ cdef int RESO_MIN = 4
 cdef int RESO_HR = 5
 cdef int RESO_DAY = 6
 
-_ONE_MICRO = 1000L
-_ONE_MILLI = _ONE_MICRO * 1000
-_ONE_SECOND = _ONE_MILLI * 1000
-_ONE_MINUTE = 60 * _ONE_SECOND
-_ONE_HOUR = 60 * _ONE_MINUTE
-_ONE_DAY = 24 * _ONE_HOUR
+_ONE_MICRO = <int64_t>1000L
+_ONE_MILLI = <int64_t>(_ONE_MICRO * 1000)
+_ONE_SECOND = <int64_t>(_ONE_MILLI * 1000)
+_ONE_MINUTE = <int64_t>(60 * _ONE_SECOND)
+_ONE_HOUR = <int64_t>(60 * _ONE_MINUTE)
+_ONE_DAY = <int64_t>(24 * _ONE_HOUR)
 
 # ----------------------------------------------------------------------
 
@@ -350,7 +350,7 @@ class Resolution(object):
 # TODO: this is non performant logic here (and duplicative) and this
 # simply should call unique_1d directly
 # plus no reason to depend on khash directly
-cdef unique_deltas(ndarray[int64_t] arr):
+cdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
     cdef:
         Py_ssize_t i, n = len(arr)
         int64_t val
@@ -374,21 +374,27 @@ cdef unique_deltas(ndarray[int64_t] arr):
     return result
 
 
-def _is_multiple(us, mult):
+cdef inline bint _is_multiple(int64_t us, int64_t mult):
     return us % mult == 0
 
 
-def _maybe_add_count(base, count):
+cdef inline str _maybe_add_count(str base, int64_t count):
     if count != 1:
-        return '{count}{base}'.format(count=int(count), base=base)
+        return '{count}{base}'.format(count=count, base=base)
     else:
         return base
 
 
-class _FrequencyInferer(object):
+cdef class _FrequencyInferer(object):
     """
     Not sure if I can avoid the state machine here
     """
+    cdef public:
+        object index
+        object values
+        bint warn
+        bint is_monotonic
+        dict _cache
 
     def __init__(self, index, warn=True):
         self.index = index
@@ -476,16 +482,23 @@ class _FrequencyInferer(object):
     def rep_stamp(self):
         return Timestamp(self.values[0])
 
-    def month_position_check(self):
+    cdef month_position_check(self):
         # TODO: cythonize this, very slow
-        calendar_end = True
-        business_end = True
-        calendar_start = True
-        business_start = True
-
-        years = self.fields['Y']
-        months = self.fields['M']
-        days = self.fields['D']
+        cdef:
+            int32_t daysinmonth, y, m, d
+            bint calendar_end = True
+            bint business_end = True
+            bint calendar_start = True
+            bint business_start = True
+            bint cal
+            int32_t[:] years
+            int32_t[:] months
+            int32_t[:] days
+
+        fields = self.fields
+        years = fields['Y']
+        months = fields['M']
+        days = fields['D']
         weekdays = self.index.dayofweek
 
         for y, m, d, wd in zip(years, months, days, weekdays):
@@ -525,7 +538,7 @@ class _FrequencyInferer(object):
     def ydiffs(self):
         return unique_deltas(self.fields['Y'].astype('i8'))
 
-    def _infer_daily_rule(self):
+    cdef _infer_daily_rule(self):
         annual_rule = self._get_annual_rule()
         if annual_rule:
             nyears = self.ydiffs[0]
@@ -562,7 +575,7 @@ class _FrequencyInferer(object):
         if wom_rule:
             return wom_rule
 
-    def _get_annual_rule(self):
+    cdef _get_annual_rule(self):
         if len(self.ydiffs) > 1:
             return None
 
@@ -573,7 +586,7 @@ class _FrequencyInferer(object):
         return {'cs': 'AS', 'bs': 'BAS',
                 'ce': 'A', 'be': 'BA'}.get(pos_check)
 
-    def _get_quarterly_rule(self):
+    cdef _get_quarterly_rule(self):
         if len(self.mdiffs) > 1:
             return None
 
@@ -584,14 +597,14 @@ class _FrequencyInferer(object):
         return {'cs': 'QS', 'bs': 'BQS',
                 'ce': 'Q', 'be': 'BQ'}.get(pos_check)
 
-    def _get_monthly_rule(self):
+    cdef _get_monthly_rule(self):
         if len(self.mdiffs) > 1:
             return None
         pos_check = self.month_position_check()
         return {'cs': 'MS', 'bs': 'BMS',
                 'ce': 'M', 'be': 'BM'}.get(pos_check)
 
-    def _is_business_daily(self):
+    cdef bint _is_business_daily(self):
         # quick check: cannot be business daily
         if self.day_deltas != [1, 3]:
             return False
@@ -604,7 +617,7 @@ class _FrequencyInferer(object):
         return np.all(((weekdays == 0) & (shifts == 3)) |
                       ((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))
 
-    def _get_wom_rule(self):
+    cdef _get_wom_rule(self):
         #         wdiffs = unique(np.diff(self.index.week))
         # We also need -47, -49, -48 to catch index spanning year boundary
         #     if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
@@ -627,9 +640,9 @@ class _FrequencyInferer(object):
         return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
 
 
-class _TimedeltaFrequencyInferer(_FrequencyInferer):
+cdef class _TimedeltaFrequencyInferer(_FrequencyInferer):
 
-    def _infer_daily_rule(self):
+    cdef _infer_daily_rule(self):
         if self.is_unique:
             days = self.deltas[0] / _ONE_DAY
             if days % 7 == 0:

From 07381bb71fc14c63431e4a19cd5fc019692ad510 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 14 Jun 2018 03:18:23 -0700
Subject: [PATCH 037/113] disallow normalize=True with Tick classes (#21427)

---
 doc/source/whatsnew/v0.24.0.txt              | 35 ++++++++++++++++++++
 pandas/tests/tseries/offsets/test_offsets.py | 22 ++++++++++--
 pandas/tseries/offsets.py                    |  4 ++-
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index f079f151808cc..cae05446c00e6 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -24,6 +24,41 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _whatsnew_0240.api.datetimelike.normalize
+
+Tick DateOffset Normalize Restrictions
+--------------------------------------
+
+Creating a ``Tick`` object (:class:``Day``, :class:``Hour``, :class:``Minute``,
+:class:``Second``, :class:``Milli``, :class:``Micro``, :class:``Nano``) with
+`normalize=True` is no longer supported.  This prevents unexpected behavior
+where addition could fail to be monotone or associative.  (:issue:`21427`)
+
+.. ipython:: python
+
+    ts = pd.Timestamp('2018-06-11 18:01:14')
+    ts
+    tic = pd.offsets.Hour(n=2, normalize=True)
+    tic
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+    In [4]: ts + tic
+    Out [4]: Timestamp('2018-06-11 00:00:00')
+
+    In [5]: ts + tic + tic + tic == ts + (tic + tic + tic)
+    Out [5]: False
+
+Current Behavior:
+
+.. ipython:: python
+
+    tic = pd.offsets.Hour(n=2)
+    ts + tic + tic + tic == ts + (tic + tic + tic)
+
+
 .. _whatsnew_0240.api.datetimelike:
 
 Datetimelike API Changes
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 6fd525f02f55c..5dd2a199405bf 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -28,7 +28,7 @@
                                     YearEnd, Day,
                                     QuarterEnd, BusinessMonthEnd, FY5253,
                                     Nano, Easter, FY5253Quarter,
-                                    LastWeekOfMonth)
+                                    LastWeekOfMonth, Tick)
 from pandas.core.tools.datetimes import format, ole2datetime
 import pandas.tseries.offsets as offsets
 from pandas.io.pickle import read_pickle
@@ -270,6 +270,11 @@ def test_offset_freqstr(self, offset_types):
 
     def _check_offsetfunc_works(self, offset, funcname, dt, expected,
                                 normalize=False):
+
+        if normalize and issubclass(offset, Tick):
+            # normalize=True disallowed for Tick subclasses GH#21427
+            return
+
         offset_s = self._get_offset(offset, normalize=normalize)
         func = getattr(offset_s, funcname)
 
@@ -458,6 +463,9 @@ def test_onOffset(self, offset_types):
         assert offset_s.onOffset(dt)
 
         # when normalize=True, onOffset checks time is 00:00:00
+        if issubclass(offset_types, Tick):
+            # normalize=True disallowed for Tick subclasses GH#21427
+            return
         offset_n = self._get_offset(offset_types, normalize=True)
         assert not offset_n.onOffset(dt)
 
@@ -485,7 +493,9 @@ def test_add(self, offset_types, tz):
         assert isinstance(result, Timestamp)
         assert result == expected_localize
 
-        # normalize=True
+        # normalize=True, disallowed for Tick subclasses GH#21427
+        if issubclass(offset_types, Tick):
+            return
         offset_s = self._get_offset(offset_types, normalize=True)
         expected = Timestamp(expected.date())
 
@@ -3092,6 +3102,14 @@ def test_require_integers(offset_types):
         cls(n=1.5)
 
 
+def test_tick_normalize_raises(tick_classes):
+    # check that trying to create a Tick object with normalize=True raises
+    # GH#21427
+    cls = tick_classes
+    with pytest.raises(ValueError):
+        cls(n=3, normalize=True)
+
+
 def test_weeks_onoffset():
     # GH#18510 Week with weekday = None, normalize = False should always
     # be onOffset
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 99f97d8fc7bc0..2f4989f26b394 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -2217,8 +2217,10 @@ class Tick(SingleConstructorOffset):
     _attributes = frozenset(['n', 'normalize'])
 
     def __init__(self, n=1, normalize=False):
-        # TODO: do Tick classes with normalize=True make sense?
         self.n = self._validate_n(n)
+        if normalize:
+            raise ValueError("Tick offset with `normalize=True` are not "
+                             "allowed.")  # GH#21427
         self.normalize = normalize
 
     __gt__ = _tick_comp(operator.gt)

From e2fb27adb691a0191b9bc459604e17cc1c367480 Mon Sep 17 00:00:00 2001
From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com>
Date: Thu, 14 Jun 2018 15:53:14 +0530
Subject: [PATCH 038/113] CLN: Comparison methods for MultiIndex should have
 consistent behaviour for all nlevels  (GH21149) (#21195)

---
 doc/source/whatsnew/v0.23.2.txt    |  1 +
 pandas/core/indexes/base.py        |  3 ++-
 pandas/tests/indexes/test_multi.py | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 3e4326dea2ecc..0d3f9cb8dd3b6 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -52,6 +52,7 @@ Bug Fixes
 **Indexing**
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
+- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
 -
 
 **I/O**
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 36345a32a3bf7..4b32e5d4f5654 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -91,7 +91,8 @@ def cmp_method(self, other):
         if needs_i8_conversion(self) and needs_i8_conversion(other):
             return self._evaluate_compare(other, op)
 
-        if is_object_dtype(self) and self.nlevels == 1:
+        from .multi import MultiIndex
+        if is_object_dtype(self) and not isinstance(self, MultiIndex):
             # don't pass MultiIndex
             with np.errstate(all='ignore'):
                 result = ops._comp_method_OBJECT_ARRAY(op, self.values, other)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 0ab3447909d9b..ab53002ee1587 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -3307,3 +3307,20 @@ def test_duplicate_multiindex_labels(self):
         with pytest.raises(ValueError):
             ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
                            inplace=True)
+
+    def test_multiindex_compare(self):
+        # GH 21149
+        # Ensure comparison operations for MultiIndex with nlevels == 1
+        # behave consistently with those for MultiIndex with nlevels > 1
+
+        midx = pd.MultiIndex.from_product([[0, 1]])
+
+        # Equality self-test: MultiIndex object vs self
+        expected = pd.Series([True, True])
+        result = pd.Series(midx == midx)
+        tm.assert_series_equal(result, expected)
+
+        # Greater than comparison: MultiIndex object vs self
+        expected = pd.Series([False, False])
+        result = pd.Series(midx > midx)
+        tm.assert_series_equal(result, expected)

From f57e0eb854f1a4d2c2de61cf65331a6dac507adf Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Thu, 14 Jun 2018 11:38:23 +0100
Subject: [PATCH 039/113] PERF: Add __contains__ to CategoricalIndex (#21369)

---
 asv_bench/benchmarks/categoricals.py | 13 +++++++++++++
 doc/source/whatsnew/v0.23.2.txt      |  4 +++-
 pandas/core/indexes/category.py      | 28 ++++++++++++++++++++--------
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 5464e7cba22c3..48f42621d183d 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -193,3 +193,16 @@ def time_categorical_series_is_monotonic_increasing(self):
 
     def time_categorical_series_is_monotonic_decreasing(self):
         self.s.is_monotonic_decreasing
+
+
+class Contains(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        N = 10**5
+        self.ci = tm.makeCategoricalIndex(N)
+        self.cat = self.ci.categories[0]
+
+    def time_contains(self):
+        self.cat in self.ci
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 0d3f9cb8dd3b6..79a4c3da2ffa4 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -24,7 +24,9 @@ Fixed Regressions
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
--
+- Improved performance of membership checks in :class:`CategoricalIndex`
+  (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
+  is likewise much faster (:issue:`21369`)
 -
 
 Documentation Changes
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 587090fa72def..7f2860a963423 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -325,19 +325,31 @@ def _reverse_indexer(self):
     def __contains__(self, key):
         hash(key)
 
-        if self.categories._defer_to_indexing:
-            return key in self.categories
+        if isna(key):  # if key is a NaN, check if any NaN is in self.
+            return self.isna().any()
+
+        # is key in self.categories? Then get its location.
+        # If not (i.e. KeyError), it logically can't be in self either
+        try:
+            loc = self.categories.get_loc(key)
+        except KeyError:
+            return False
 
-        return key in self.values
+        # loc is the location of key in self.categories, but also the value
+        # for key in self.codes and in self._engine. key may be in categories,
+        # but still not in self, check this. Example:
+        # 'b' in CategoricalIndex(['a'], categories=['a', 'b']) #  False
+        if is_scalar(loc):
+            return loc in self._engine
+        else:
+            # if self.categories is IntervalIndex, loc is an array
+            # check if any scalar of the array is in self._engine
+            return any(loc_ in self._engine for loc_ in loc)
 
     @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
     def contains(self, key):
         hash(key)
-
-        if self.categories._defer_to_indexing:
-            return self.categories.contains(key)
-
-        return key in self.values
+        return key in self
 
     def __array__(self, dtype=None):
         """ the array interface, return my values """

From 0921273dde9d489d30bce199ee7ed8f1078cedd3 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Fri, 15 Jun 2018 05:50:05 -0700
Subject: [PATCH 040/113] CLN: Index imports and 0.23.1 whatsnew (#21490)

---
 doc/source/whatsnew/v0.23.1.txt |  8 ++++----
 pandas/core/indexes/base.py     | 14 ++++++--------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index db25bcf8113f5..af4eeffd87d01 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -97,8 +97,8 @@ Bug Fixes
 
 **Data-type specific**
 
-- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`)
-- Bug in :class:`Timedelta`: where passing a float with a unit would prematurely round the float precision (:issue: `14156`)
+- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue:`21078`)
+- Bug in :class:`Timedelta` where passing a float with a unit would prematurely round the float precision (:issue:`14156`)
 - Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)
 
 **Sparse**
@@ -110,12 +110,12 @@ Bug Fixes
 - Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`)
 - Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`)
 - Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`)
-- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
+- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, :issue:`21253`)
 - Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
 
 **Plotting**
 
-- New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue: `20968`)
+- New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue:`20968`)
 
 **I/O**
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 4b32e5d4f5654..6a56278b0da49 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -283,7 +283,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
             if (is_datetime64_any_dtype(data) or
                 (dtype is not None and is_datetime64_any_dtype(dtype)) or
                     'tz' in kwargs):
-                from pandas.core.indexes.datetimes import DatetimeIndex
+                from pandas import DatetimeIndex
                 result = DatetimeIndex(data, copy=copy, name=name,
                                        dtype=dtype, **kwargs)
                 if dtype is not None and is_dtype_equal(_o_dtype, dtype):
@@ -293,7 +293,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
 
             elif (is_timedelta64_dtype(data) or
                   (dtype is not None and is_timedelta64_dtype(dtype))):
-                from pandas.core.indexes.timedeltas import TimedeltaIndex
+                from pandas import TimedeltaIndex
                 result = TimedeltaIndex(data, copy=copy, name=name, **kwargs)
                 if dtype is not None and _o_dtype == dtype:
                     return Index(result.to_pytimedelta(), dtype=_o_dtype)
@@ -404,8 +404,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                         if (lib.is_datetime_with_singletz_array(subarr) or
                                 'tz' in kwargs):
                             # only when subarr has the same tz
-                            from pandas.core.indexes.datetimes import (
-                                DatetimeIndex)
+                            from pandas import DatetimeIndex
                             try:
                                 return DatetimeIndex(subarr, copy=copy,
                                                      name=name, **kwargs)
@@ -413,8 +412,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                                 pass
 
                     elif inferred.startswith('timedelta'):
-                        from pandas.core.indexes.timedeltas import (
-                            TimedeltaIndex)
+                        from pandas import TimedeltaIndex
                         return TimedeltaIndex(subarr, copy=copy, name=name,
                                               **kwargs)
                     elif inferred == 'period':
@@ -1177,7 +1175,7 @@ def astype(self, dtype, copy=True):
                                     copy=copy)
         try:
             if is_datetime64tz_dtype(dtype):
-                from pandas.core.indexes.datetimes import DatetimeIndex
+                from pandas import DatetimeIndex
                 return DatetimeIndex(self.values, name=self.name, dtype=dtype,
                                      copy=copy)
             return Index(self.values.astype(dtype, copy=copy), name=self.name,
@@ -3333,7 +3331,7 @@ def get_indexer_for(self, target, **kwargs):
 
     def _maybe_promote(self, other):
         # A hack, but it works
-        from pandas.core.indexes.datetimes import DatetimeIndex
+        from pandas import DatetimeIndex
         if self.inferred_type == 'date' and isinstance(other, DatetimeIndex):
             return DatetimeIndex(self), other
         elif self.inferred_type == 'boolean':

From d2de5070f9d8d0722a90cb328b98afc40d7c5018 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Fri, 15 Jun 2018 13:51:18 +0100
Subject: [PATCH 041/113] improve speed of nans in CategoricalIndex (#21493)

---
 pandas/core/indexes/category.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 7f2860a963423..0093d4940751e 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -326,7 +326,7 @@ def __contains__(self, key):
         hash(key)
 
         if isna(key):  # if key is a NaN, check if any NaN is in self.
-            return self.isna().any()
+            return self.hasnans
 
         # is key in self.categories? Then get its location.
         # If not (i.e. KeyError), it logically can't be in self either

From aa5e1f135dec74102fc168b8fb5a71c88badb418 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 15 Jun 2018 10:19:04 -0700
Subject: [PATCH 042/113] perf improvements in tslibs.period (#21447)

---
 pandas/_libs/src/util.pxd      | 15 +++++++++++++++
 pandas/_libs/tslib.pyx         |  2 +-
 pandas/_libs/tslibs/period.pyx | 20 ++++++++++++--------
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd
index d8249ec130f4d..2c1876fad95d2 100644
--- a/pandas/_libs/src/util.pxd
+++ b/pandas/_libs/src/util.pxd
@@ -161,3 +161,18 @@ cdef inline bint _checknull(object val):
 
 cdef inline bint is_period_object(object val):
     return getattr(val, '_typ', '_typ') == 'period'
+
+
+cdef inline bint is_offset_object(object val):
+    """
+    Check if an object is a DateOffset object.
+
+    Parameters
+    ----------
+    val : object
+
+    Returns
+    -------
+    is_date_offset : bool
+    """
+    return getattr(val, '_typ', None) == "dateoffset"
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 4f73f196b0d9d..6588b5476e2b9 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -125,7 +125,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
     elif box == "datetime":
         func_create = create_datetime_from_ts
     else:
-        raise ValueError("box must be one of 'datetime', 'date', 'time' or" +
+        raise ValueError("box must be one of 'datetime', 'date', 'time' or"
                          " 'timestamp'")
 
     if tz is not None:
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index 008747c0a9e78..cc2fb6e0617cb 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -19,7 +19,8 @@ from pandas.compat import PY2
 
 cimport cython
 
-from cpython.datetime cimport PyDateTime_Check, PyDateTime_IMPORT
+from cpython.datetime cimport (PyDateTime_Check, PyDelta_Check,
+                               PyDateTime_IMPORT)
 # import datetime C API
 PyDateTime_IMPORT
 
@@ -1058,18 +1059,21 @@ cdef class _Period(object):
         return hash((self.ordinal, self.freqstr))
 
     def _add_delta(self, other):
-        if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)):
+        cdef:
+            int64_t nanos, offset_nanos
+
+        if (PyDelta_Check(other) or util.is_timedelta64_object(other) or
+                isinstance(other, offsets.Tick)):
             offset = frequencies.to_offset(self.freq.rule_code)
             if isinstance(offset, offsets.Tick):
                 nanos = delta_to_nanoseconds(other)
                 offset_nanos = delta_to_nanoseconds(offset)
-
                 if nanos % offset_nanos == 0:
                     ordinal = self.ordinal + (nanos // offset_nanos)
                     return Period(ordinal=ordinal, freq=self.freq)
             msg = 'Input cannot be converted to Period(freq={0})'
             raise IncompatibleFrequency(msg.format(self.freqstr))
-        elif isinstance(other, offsets.DateOffset):
+        elif util.is_offset_object(other):
             freqstr = other.rule_code
             base = get_base_alias(freqstr)
             if base == self.freq.rule_code:
@@ -1082,8 +1086,8 @@ cdef class _Period(object):
 
     def __add__(self, other):
         if is_period_object(self):
-            if isinstance(other, (timedelta, np.timedelta64,
-                                  offsets.DateOffset)):
+            if (PyDelta_Check(other) or util.is_timedelta64_object(other) or
+                    util.is_offset_object(other)):
                 return self._add_delta(other)
             elif other is NaT:
                 return NaT
@@ -1109,8 +1113,8 @@ cdef class _Period(object):
 
     def __sub__(self, other):
         if is_period_object(self):
-            if isinstance(other, (timedelta, np.timedelta64,
-                                  offsets.DateOffset)):
+            if (PyDelta_Check(other) or util.is_timedelta64_object(other) or
+                    util.is_offset_object(other)):
                 neg_other = -other
                 return self + neg_other
             elif util.is_integer_object(other):

From 676ae5950de3bee2aa5916e4db8618a87476cd2f Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Fri, 15 Jun 2018 11:21:36 -0600
Subject: [PATCH 043/113] BUG: Fix Series.nlargest for integer boundary values
 (#21432)

---
 doc/source/whatsnew/v0.23.2.txt       |  1 +
 pandas/conftest.py                    | 71 ++++++++++++++++++++++++
 pandas/core/algorithms.py             |  5 +-
 pandas/tests/frame/test_analytics.py  | 78 +++++++++++++--------------
 pandas/tests/series/test_analytics.py | 35 ++++++++++++
 5 files changed, 147 insertions(+), 43 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 79a4c3da2ffa4..b8d865195cddd 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -82,4 +82,5 @@ Bug Fixes
 
 **Other**
 
+- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -
diff --git a/pandas/conftest.py b/pandas/conftest.py
index d5f399c7cd63d..9d806a91f37f7 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -129,6 +129,14 @@ def join_type(request):
     return request.param
 
 
+@pytest.fixture(params=['nlargest', 'nsmallest'])
+def nselect_method(request):
+    """
+    Fixture for trying all nselect methods
+    """
+    return request.param
+
+
 @pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
 def nulls_fixture(request):
     """
@@ -170,3 +178,66 @@ def string_dtype(request):
     * 'U'
     """
     return request.param
+
+
+@pytest.fixture(params=["float32", "float64"])
+def float_dtype(request):
+    """
+    Parameterized fixture for float dtypes.
+
+    * float32
+    * float64
+    """
+
+    return request.param
+
+
+UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
+SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
+ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
+
+
+@pytest.fixture(params=SIGNED_INT_DTYPES)
+def sint_dtype(request):
+    """
+    Parameterized fixture for signed integer dtypes.
+
+    * int8
+    * int16
+    * int32
+    * int64
+    """
+
+    return request.param
+
+
+@pytest.fixture(params=UNSIGNED_INT_DTYPES)
+def uint_dtype(request):
+    """
+    Parameterized fixture for unsigned integer dtypes.
+
+    * uint8
+    * uint16
+    * uint32
+    * uint64
+    """
+
+    return request.param
+
+
+@pytest.fixture(params=ALL_INT_DTYPES)
+def any_int_dtype(request):
+    """
+    Parameterized fixture for any integer dtypes.
+
+    * int8
+    * uint8
+    * int16
+    * uint16
+    * int32
+    * uint32
+    * int64
+    * uint64
+    """
+
+    return request.param
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index b33c10da7813e..9e34b8eb55ccb 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1133,9 +1133,12 @@ def compute(self, method):
             return dropped[slc].sort_values(ascending=ascending).head(n)
 
         # fast method
-        arr, _, _ = _ensure_data(dropped.values)
+        arr, pandas_dtype, _ = _ensure_data(dropped.values)
         if method == 'nlargest':
             arr = -arr
+            if is_integer_dtype(pandas_dtype):
+                # GH 21426: ensure reverse ordering at boundaries
+                arr -= 1
 
         if self.keep == 'last':
             arr = arr[::-1]
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index b8f1acc2aa679..6dc24ed856017 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -12,7 +12,7 @@
 from numpy.random import randn
 import numpy as np
 
-from pandas.compat import lrange, product, PY35
+from pandas.compat import lrange, PY35
 from pandas import (compat, isna, notna, DataFrame, Series,
                     MultiIndex, date_range, Timestamp, Categorical,
                     _np_version_under1p12, _np_version_under1p15,
@@ -2260,54 +2260,49 @@ class TestNLargestNSmallest(object):
 
     # ----------------------------------------------------------------------
     # Top / bottom
-    @pytest.mark.parametrize(
-        'method, n, order',
-        product(['nsmallest', 'nlargest'], range(1, 11),
-                [['a'],
-                 ['c'],
-                 ['a', 'b'],
-                 ['a', 'c'],
-                 ['b', 'a'],
-                 ['b', 'c'],
-                 ['a', 'b', 'c'],
-                 ['c', 'a', 'b'],
-                 ['c', 'b', 'a'],
-                 ['b', 'c', 'a'],
-                 ['b', 'a', 'c'],
-
-                 # dups!
-                 ['b', 'c', 'c'],
-
-                 ]))
-    def test_n(self, df_strings, method, n, order):
+    @pytest.mark.parametrize('order', [
+        ['a'],
+        ['c'],
+        ['a', 'b'],
+        ['a', 'c'],
+        ['b', 'a'],
+        ['b', 'c'],
+        ['a', 'b', 'c'],
+        ['c', 'a', 'b'],
+        ['c', 'b', 'a'],
+        ['b', 'c', 'a'],
+        ['b', 'a', 'c'],
+
+        # dups!
+        ['b', 'c', 'c']])
+    @pytest.mark.parametrize('n', range(1, 11))
+    def test_n(self, df_strings, nselect_method, n, order):
         # GH10393
         df = df_strings
         if 'b' in order:
 
             error_msg = self.dtype_error_msg_template.format(
-                column='b', method=method, dtype='object')
+                column='b', method=nselect_method, dtype='object')
             with tm.assert_raises_regex(TypeError, error_msg):
-                getattr(df, method)(n, order)
+                getattr(df, nselect_method)(n, order)
         else:
-            ascending = method == 'nsmallest'
-            result = getattr(df, method)(n, order)
+            ascending = nselect_method == 'nsmallest'
+            result = getattr(df, nselect_method)(n, order)
             expected = df.sort_values(order, ascending=ascending).head(n)
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        'method, columns',
-        product(['nsmallest', 'nlargest'],
-                product(['group'], ['category_string', 'string'])
-                ))
-    def test_n_error(self, df_main_dtypes, method, columns):
+    @pytest.mark.parametrize('columns', [
+        ('group', 'category_string'), ('group', 'string')])
+    def test_n_error(self, df_main_dtypes, nselect_method, columns):
         df = df_main_dtypes
+        col = columns[1]
         error_msg = self.dtype_error_msg_template.format(
-            column=columns[1], method=method, dtype=df[columns[1]].dtype)
+            column=col, method=nselect_method, dtype=df[col].dtype)
         # escape some characters that may be in the repr
         error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)")
                               .replace("[", "\\[").replace("]", "\\]"))
         with tm.assert_raises_regex(TypeError, error_msg):
-            getattr(df, method)(2, columns)
+            getattr(df, nselect_method)(2, columns)
 
     def test_n_all_dtypes(self, df_main_dtypes):
         df = df_main_dtypes
@@ -2328,15 +2323,14 @@ def test_n_identical_values(self):
         expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]})
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        'n, order',
-        product([1, 2, 3, 4, 5],
-                [['a', 'b', 'c'],
-                 ['c', 'b', 'a'],
-                 ['a'],
-                 ['b'],
-                 ['a', 'b'],
-                 ['c', 'b']]))
+    @pytest.mark.parametrize('order', [
+        ['a', 'b', 'c'],
+        ['c', 'b', 'a'],
+        ['a'],
+        ['b'],
+        ['a', 'b'],
+        ['c', 'b']])
+    @pytest.mark.parametrize('n', range(1, 6))
     def test_n_duplicate_index(self, df_duplicates, n, order):
         # GH 13412
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index aba472f2ce8f9..b9c7b837b8b81 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1944,6 +1944,15 @@ def test_mode_sortwarning(self):
         tm.assert_series_equal(result, expected)
 
 
+def assert_check_nselect_boundary(vals, dtype, method):
+    # helper function for 'test_boundary_{dtype}' tests
+    s = Series(vals, dtype=dtype)
+    result = getattr(s, method)(3)
+    expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
+    expected = s.loc[expected_idxr]
+    tm.assert_series_equal(result, expected)
+
+
 class TestNLargestNSmallest(object):
 
     @pytest.mark.parametrize(
@@ -2028,6 +2037,32 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
+    def test_boundary_integer(self, nselect_method, any_int_dtype):
+        # GH 21426
+        dtype_info = np.iinfo(any_int_dtype)
+        min_val, max_val = dtype_info.min, dtype_info.max
+        vals = [min_val, min_val + 1, max_val - 1, max_val]
+        assert_check_nselect_boundary(vals, any_int_dtype, nselect_method)
+
+    def test_boundary_float(self, nselect_method, float_dtype):
+        # GH 21426
+        dtype_info = np.finfo(float_dtype)
+        min_val, max_val = dtype_info.min, dtype_info.max
+        min_2nd, max_2nd = np.nextafter(
+            [min_val, max_val], 0, dtype=float_dtype)
+        vals = [min_val, min_2nd, max_2nd, max_val]
+        assert_check_nselect_boundary(vals, float_dtype, nselect_method)
+
+    @pytest.mark.parametrize('dtype', ['datetime64[ns]', 'timedelta64[ns]'])
+    def test_boundary_datetimelike(self, nselect_method, dtype):
+        # GH 21426
+        # use int64 bounds and +1 to min_val since true minimum is NaT
+        # (include min_val/NaT at end to maintain same expected_idxr)
+        dtype_info = np.iinfo('int64')
+        min_val, max_val = dtype_info.min, dtype_info.max
+        vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
+        assert_check_nselect_boundary(vals, dtype, nselect_method)
+
 
 class TestCategoricalSeriesAnalytics(object):
 

From 8ca172d52368914fffa85d4df04fdd06b2db2c6a Mon Sep 17 00:00:00 2001
From: Uddeshya Singh <singhuddeshyaofficial@gmail.com>
Date: Fri, 15 Jun 2018 22:57:19 +0530
Subject: [PATCH 044/113] Removing SimpleMock test from pandas.util.testing
 (#21482)

---
 pandas/util/testing.py | 53 ------------------------------------------
 1 file changed, 53 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 233eba6490937..d26a2116fb3ce 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -2263,59 +2263,6 @@ def wrapper(*args, **kwargs):
 with_connectivity_check = network
 
 
-class SimpleMock(object):
-
-    """
-    Poor man's mocking object
-
-    Note: only works for new-style classes, assumes  __getattribute__ exists.
-
-    >>> a = type("Duck",(),{})
-    >>> a.attr1,a.attr2 ="fizz","buzz"
-    >>> b = SimpleMock(a,"attr1","bar")
-    >>> b.attr1 == "bar" and b.attr2 == "buzz"
-    True
-    >>> a.attr1 == "fizz" and a.attr2 == "buzz"
-    True
-    """
-
-    def __init__(self, obj, *args, **kwds):
-        assert(len(args) % 2 == 0)
-        attrs = kwds.get("attrs", {})
-        for k, v in zip(args[::2], args[1::2]):
-            # dict comprehensions break 2.6
-            attrs[k] = v
-        self.attrs = attrs
-        self.obj = obj
-
-    def __getattribute__(self, name):
-        attrs = object.__getattribute__(self, "attrs")
-        obj = object.__getattribute__(self, "obj")
-        return attrs.get(name, type(obj).__getattribute__(obj, name))
-
-
-@contextmanager
-def stdin_encoding(encoding=None):
-    """
-    Context manager for running bits of code while emulating an arbitrary
-    stdin encoding.
-
-    >>> import sys
-    >>> _encoding = sys.stdin.encoding
-    >>> with stdin_encoding('AES'): sys.stdin.encoding
-    'AES'
-    >>> sys.stdin.encoding==_encoding
-    True
-
-    """
-    import sys
-
-    _stdin = sys.stdin
-    sys.stdin = SimpleMock(sys.stdin, "encoding", encoding)
-    yield
-    sys.stdin = _stdin
-
-
 def assert_raises_regex(_exception, _regexp, _callable=None,
                         *args, **kwargs):
     r"""

From 28780c27eff197cf0897bf643e9f26c81e8d117e Mon Sep 17 00:00:00 2001
From: Uddeshya Singh <singhuddeshyaofficial@gmail.com>
Date: Fri, 15 Jun 2018 23:03:34 +0530
Subject: [PATCH 045/113] TST: adding test cases for verifying correct values
 shown by pivot_table() #21378 (#21393)

---
 pandas/tests/reshape/test_pivot.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 3ec60d50f2792..ca95dde1a20c9 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -161,6 +161,24 @@ def test_pivot_with_non_observable_dropna(self, dropna):
 
         tm.assert_frame_equal(result, expected)
 
+        # gh-21378
+        df = pd.DataFrame(
+            {'A': pd.Categorical(['left', 'low', 'high', 'low', 'high'],
+                                 categories=['low', 'high', 'left'],
+                                 ordered=True),
+             'B': range(5)})
+
+        result = df.pivot_table(index='A', values='B', dropna=dropna)
+        expected = pd.DataFrame(
+            {'B': [2, 3, 0]},
+            index=pd.Index(
+                pd.Categorical.from_codes([0, 1, 2],
+                                          categories=['low', 'high', 'left'],
+                                          ordered=True),
+                name='A'))
+
+        tm.assert_frame_equal(result, expected)
+
     def test_pass_array(self):
         result = self.data.pivot_table(
             'D', index=self.data.A, columns=self.data.C)

From a231fb28616b04401ae36efe38ed7ee2759c9c23 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Mon, 18 Jun 2018 23:42:59 +0200
Subject: [PATCH 046/113] PERF: remove useless overrides (#21523)

closes #21522
---
 doc/source/whatsnew/v0.23.2.txt |  1 +
 pandas/core/indexes/multi.py    | 12 ------------
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index b8d865195cddd..f7e170cca039e 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -27,6 +27,7 @@ Performance Improvements
 - Improved performance of membership checks in :class:`CategoricalIndex`
   (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
   is likewise much faster (:issue:`21369`)
+- Improved performance of :meth:`MultiIndex.is_unique` (:issue:`21522`)
 -
 
 Documentation Changes
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 75b6be96feb78..ab23a80acdaae 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -852,14 +852,6 @@ def _has_complex_internals(self):
         # to disable groupby tricks
         return True
 
-    @cache_readonly
-    def is_monotonic(self):
-        """
-        return if the index is monotonic increasing (only equal or
-        increasing) values.
-        """
-        return self.is_monotonic_increasing
-
     @cache_readonly
     def is_monotonic_increasing(self):
         """
@@ -887,10 +879,6 @@ def is_monotonic_decreasing(self):
         # monotonic decreasing if and only if reverse is monotonic increasing
         return self[::-1].is_monotonic_increasing
 
-    @cache_readonly
-    def is_unique(self):
-        return not self.duplicated().any()
-
     @cache_readonly
     def _have_mixed_levels(self):
         """ return a boolean list indicated if we have mixed levels """

From 5f44af0b3949b8be00a73b6cba2da9d8cc730e9d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 18 Jun 2018 15:34:28 -0700
Subject: [PATCH 047/113] TST: Add unit tests for older timezone issues
 (#21491)

---
 doc/source/whatsnew/v0.23.2.txt               |  5 ++++
 .../indexes/datetimes/test_arithmetic.py      | 24 ++++++++++++++++++-
 .../tests/scalar/timestamp/test_timestamp.py  |  6 +++++
 .../tests/scalar/timestamp/test_unary_ops.py  |  7 ++++++
 .../tests/series/indexing/test_alter_index.py |  9 +++++++
 pandas/tests/test_resample.py                 | 11 +++++++++
 6 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index f7e170cca039e..70a5dd5817c3c 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -80,6 +80,11 @@ Bug Fixes
 **Timezones**
 - Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
 - Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
+- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
+- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError``(:issue:`8910`)
+- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`)
+- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
+- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
 
 **Other**
 
diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py
index eff2872a1cff3..0649083a440df 100644
--- a/pandas/tests/indexes/datetimes/test_arithmetic.py
+++ b/pandas/tests/indexes/datetimes/test_arithmetic.py
@@ -4,7 +4,7 @@
 import operator
 
 import pytest
-
+import pytz
 import numpy as np
 
 import pandas as pd
@@ -476,6 +476,28 @@ def test_dti_shift_localized(self, tzstr):
         result = dr_tz.shift(1, '10T')
         assert result.tz == dr_tz.tz
 
+    def test_dti_shift_across_dst(self):
+        # GH 8616
+        idx = date_range('2013-11-03', tz='America/Chicago',
+                         periods=7, freq='H')
+        s = Series(index=idx[:-1])
+        result = s.shift(freq='H')
+        expected = Series(index=idx[1:])
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('shift, result_time', [
+        [0, '2014-11-14 00:00:00'],
+        [-1, '2014-11-13 23:00:00'],
+        [1, '2014-11-14 01:00:00']])
+    def test_dti_shift_near_midnight(self, shift, result_time):
+        # GH 8616
+        dt = datetime(2014, 11, 14, 0)
+        dt_est = pytz.timezone('EST').localize(dt)
+        s = Series(data=[1], index=[dt_est])
+        result = s.shift(shift, freq='H')
+        expected = Series(1, index=DatetimeIndex([result_time], tz='EST'))
+        tm.assert_series_equal(result, expected)
+
     # -------------------------------------------------------------
     # Binary operations DatetimeIndex and timedelta-like
 
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index 4689c7bea626f..8dc9903b7356d 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -420,6 +420,12 @@ def test_constructor_nanosecond(self, result):
         expected = expected + Timedelta(nanoseconds=1)
         assert result == expected
 
+    @pytest.mark.parametrize('z', ['Z0', 'Z00'])
+    def test_constructor_invalid_Z0_isostring(self, z):
+        # GH 8910
+        with pytest.raises(ValueError):
+            Timestamp('2014-11-02 01:00{}'.format(z))
+
     @pytest.mark.parametrize('arg', ['year', 'month', 'day', 'hour', 'minute',
                                      'second', 'microsecond', 'nanosecond'])
     def test_invalid_date_kwarg_with_string_input(self, arg):
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
index dbe31ccb11114..b02fef707a6fe 100644
--- a/pandas/tests/scalar/timestamp/test_unary_ops.py
+++ b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -257,6 +257,13 @@ def test_replace_across_dst(self, tz, normalize):
         ts2b = normalize(ts2)
         assert ts2 == ts2b
 
+    def test_replace_dst_border(self):
+        # Gh 7825
+        t = Timestamp('2013-11-3', tz='America/Chicago')
+        result = t.replace(hour=3)
+        expected = Timestamp('2013-11-3 03:00:00', tz='America/Chicago')
+        assert result == expected
+
     # --------------------------------------------------------------
 
     @td.skip_if_windows
diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py
index 999ed5f26daee..bcd5a64402c33 100644
--- a/pandas/tests/series/indexing/test_alter_index.py
+++ b/pandas/tests/series/indexing/test_alter_index.py
@@ -453,6 +453,15 @@ def test_reindex_fill_value():
     assert_series_equal(result, expected)
 
 
+def test_reindex_datetimeindexes_tz_naive_and_aware():
+    # GH 8306
+    idx = date_range('20131101', tz='America/Chicago', periods=7)
+    newidx = date_range('20131103', periods=10, freq='H')
+    s = Series(range(7), index=idx)
+    with pytest.raises(TypeError):
+        s.reindex(newidx, method='ffill')
+
+
 def test_rename():
     # GH 17407
     s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index c1257cce9a9a4..6f0ad0535c6b4 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -2084,6 +2084,17 @@ def test_resample_dst_anchor(self):
                                        freq='D', tz='Europe/Paris')),
             'D Frequency')
 
+    def test_downsample_across_dst(self):
+        # GH 8531
+        tz = pytz.timezone('Europe/Berlin')
+        dt = datetime(2014, 10, 26)
+        dates = date_range(tz.localize(dt), periods=4, freq='2H')
+        result = Series(5, index=dates).resample('H').mean()
+        expected = Series([5., np.nan] * 3 + [5.],
+                          index=date_range(tz.localize(dt), periods=7,
+                                           freq='H'))
+        tm.assert_series_equal(result, expected)
+
     def test_resample_with_nat(self):
         # GH 13020
         index = DatetimeIndex([pd.NaT,

From 1427b690f2f3de336e6cc5e9cdcdd006d468322d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 18 Jun 2018 17:39:39 -0500
Subject: [PATCH 048/113] BUG: Timedelta.__bool__ (#21485)

Closes #21484
---
 doc/source/whatsnew/v0.23.2.txt                 |  9 ++++++---
 pandas/_libs/tslibs/timedeltas.pyx              |  3 +++
 pandas/tests/scalar/timedelta/test_timedelta.py | 14 ++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 70a5dd5817c3c..48efc02480e67 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -46,10 +46,13 @@ Bug Fixes
 -
 -
 
-**Conversion**
+**Timedelta**
 
+- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`)
 
--
+**Conversion**
+
+- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -
 
 **Indexing**
@@ -78,6 +81,7 @@ Bug Fixes
 -
 
 **Timezones**
+
 - Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
 - Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
 - Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
@@ -88,5 +92,4 @@ Bug Fixes
 
 **Other**
 
-- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 87dc371195b5b..f68dc421a1ee9 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -899,6 +899,9 @@ cdef class _Timedelta(timedelta):
     def __str__(self):
         return self._repr_base(format='long')
 
+    def __bool__(self):
+        return self.value != 0
+
     def isoformat(self):
         """
         Format Timedelta as ISO 8601 Duration like
diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
index 205fdf49d3e91..6472bd4245622 100644
--- a/pandas/tests/scalar/timedelta/test_timedelta.py
+++ b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -588,3 +588,17 @@ def test_components(self):
         result = s.dt.components
         assert not result.iloc[0].isna().all()
         assert result.iloc[1].isna().all()
+
+
+@pytest.mark.parametrize('value, expected', [
+    (Timedelta('10S'), True),
+    (Timedelta('-10S'), True),
+    (Timedelta(10, unit='ns'), True),
+    (Timedelta(0, unit='ns'), False),
+    (Timedelta(-10, unit='ns'), True),
+    (Timedelta(None), True),
+    (pd.NaT, True),
+])
+def test_truthiness(value, expected):
+    # https://github.com/pandas-dev/pandas/issues/21484
+    assert bool(value) is expected

From 2741967a8917769d77c0486cea9b57436f9e17bb Mon Sep 17 00:00:00 2001
From: David Krych <david.krych@gmail.com>
Date: Mon, 18 Jun 2018 18:43:27 -0400
Subject: [PATCH 049/113] BUG: Fix Index construction when given empty
 generator (#21470). (#21481)

---
 doc/source/whatsnew/v0.23.2.txt   |  3 ++-
 pandas/core/arrays/categorical.py |  5 ++---
 pandas/core/indexes/base.py       | 10 ++++++----
 pandas/tests/indexes/test_base.py | 19 +++++++++++--------
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 48efc02480e67..94669c5b02410 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -52,8 +52,9 @@ Bug Fixes
 
 **Conversion**
 
+- Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`)
 - Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
--
+
 
 **Indexing**
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d466198b648ef..e22b0d626a218 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -3,7 +3,6 @@
 import numpy as np
 from warnings import warn
 import textwrap
-import types
 
 from pandas import compat
 from pandas.compat import u, lzip
@@ -28,7 +27,7 @@
     is_categorical,
     is_categorical_dtype,
     is_list_like, is_sequence,
-    is_scalar,
+    is_scalar, is_iterator,
     is_dict_like)
 
 from pandas.core.algorithms import factorize, take_1d, unique1d, take
@@ -2483,7 +2482,7 @@ def _convert_to_list_like(list_like):
     if isinstance(list_like, list):
         return list_like
     if (is_sequence(list_like) or isinstance(list_like, tuple) or
-            isinstance(list_like, types.GeneratorType)):
+            is_iterator(list_like)):
         return list(list_like)
     elif is_scalar(list_like):
         return [list_like]
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 6a56278b0da49..27cc368a696e3 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -428,12 +428,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
         elif data is None or is_scalar(data):
             cls._scalar_data_error(data)
         else:
-            if tupleize_cols and is_list_like(data) and data:
+            if tupleize_cols and is_list_like(data):
+                # GH21470: convert iterable to list before determining if empty
                 if is_iterator(data):
                     data = list(data)
-                # we must be all tuples, otherwise don't construct
-                # 10697
-                if all(isinstance(e, tuple) for e in data):
+
+                if data and all(isinstance(e, tuple) for e in data):
+                    # we must be all tuples, otherwise don't construct
+                    # 10697
                     from .multi import MultiIndex
                     return MultiIndex.from_tuples(
                         data, names=name or kwargs.get('names'))
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index b8bd218ec25ab..1d8a958c3413f 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -445,21 +445,24 @@ def test_constructor_dtypes_timedelta(self, attr, klass):
         result = klass(list(values), dtype=dtype)
         tm.assert_index_equal(result, index)
 
-    def test_constructor_empty_gen(self):
-        skip_index_keys = ["repeats", "periodIndex", "rangeIndex",
-                           "tuples"]
-        for key, index in self.generate_index_types(skip_index_keys):
-            empty = index.__class__([])
-            assert isinstance(empty, index.__class__)
-            assert not len(empty)
+    @pytest.mark.parametrize("value", [[], iter([]), (x for x in [])])
+    @pytest.mark.parametrize("klass",
+                             [Index, Float64Index, Int64Index, UInt64Index,
+                              CategoricalIndex, DatetimeIndex, TimedeltaIndex])
+    def test_constructor_empty(self, value, klass):
+        empty = klass(value)
+        assert isinstance(empty, klass)
+        assert not len(empty)
 
     @pytest.mark.parametrize("empty,klass", [
         (PeriodIndex([], freq='B'), PeriodIndex),
+        (PeriodIndex(iter([]), freq='B'), PeriodIndex),
+        (PeriodIndex((x for x in []), freq='B'), PeriodIndex),
         (RangeIndex(step=1), pd.RangeIndex),
         (MultiIndex(levels=[[1, 2], ['blue', 'red']],
                     labels=[[], []]), MultiIndex)
     ])
-    def test_constructor_empty(self, empty, klass):
+    def test_constructor_empty_special(self, empty, klass):
         assert isinstance(empty, klass)
         assert not len(empty)
 

From 34b77d12b198f2dd41f5f2699cc5a7d266d8924a Mon Sep 17 00:00:00 2001
From: Ming Li <14131823+minggli@users.noreply.github.com>
Date: Mon, 18 Jun 2018 23:45:25 +0100
Subject: [PATCH 050/113] BUG/REG: file-handle object handled incorrectly in
 to_csv (#21478)

---
 doc/source/whatsnew/v0.23.2.txt   |  2 +-
 pandas/core/frame.py              |  3 +-
 pandas/core/series.py             |  3 +-
 pandas/io/common.py               |  4 +++
 pandas/io/formats/csvs.py         | 59 ++++++++++++++++++++-----------
 pandas/tests/frame/test_to_csv.py | 16 +++++----
 pandas/tests/series/test_io.py    | 18 +++++-----
 pandas/tests/test_common.py       | 34 +++++++++++++-----
 8 files changed, 91 insertions(+), 48 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 94669c5b02410..67c7ce150132a 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version.
 Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
--
+- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 -
 
 .. _whatsnew_0232.performance:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 02c86d2f4dcc8..a5dfbcc2a3142 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1690,7 +1690,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
         compression : string, optional
             A string representing the compression to use in the output file.
-            Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
+            Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
+            used when the first argument is a filename.
         line_terminator : string, default ``'\n'``
             The newline character or character sequence to use in the output
             file
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 0450f28087f66..23c4bbe082f28 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3790,7 +3790,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
             non-ascii, for python versions prior to 3
         compression : string, optional
             A string representing the compression to use in the output file.
-            Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
+            Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
+            used when the first argument is a filename.
         date_format: string, default None
             Format string for datetime objects.
         decimal: string, default '.'
diff --git a/pandas/io/common.py b/pandas/io/common.py
index a492b7c0b8e8e..ac9077f2db50e 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -445,6 +445,10 @@ def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
     def write(self, data):
         super(BytesZipFile, self).writestr(self.filename, data)
 
+    @property
+    def closed(self):
+        return self.fp is None
+
 
 class MMapWrapper(BaseIterator):
     """
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 7f660e2644fa4..60518f596e9af 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -5,11 +5,13 @@
 
 from __future__ import print_function
 
+import warnings
+
 import csv as csvlib
+from zipfile import ZipFile
 import numpy as np
 
 from pandas.core.dtypes.missing import notna
-from pandas.core.dtypes.inference import is_file_like
 from pandas.core.index import Index, MultiIndex
 from pandas import compat
 from pandas.compat import (StringIO, range, zip)
@@ -128,19 +130,31 @@ def save(self):
         else:
             encoding = self.encoding
 
-        # PR 21300 uses string buffer to receive csv writing and dump into
-        # file-like output with compression as option. GH 21241, 21118
-        f = StringIO()
-        if not is_file_like(self.path_or_buf):
-            # path_or_buf is path
-            path_or_buf = self.path_or_buf
-        elif hasattr(self.path_or_buf, 'name'):
-            # path_or_buf is file handle
-            path_or_buf = self.path_or_buf.name
-        else:
-            # path_or_buf is file-like IO objects.
+        # GH 21227 internal compression is not used when file-like passed.
+        if self.compression and hasattr(self.path_or_buf, 'write'):
+            msg = ("compression has no effect when passing file-like "
+                   "object as input.")
+            warnings.warn(msg, RuntimeWarning, stacklevel=2)
+
+        # when zip compression is called.
+        is_zip = isinstance(self.path_or_buf, ZipFile) or (
+            not hasattr(self.path_or_buf, 'write')
+            and self.compression == 'zip')
+
+        if is_zip:
+            # zipfile doesn't support writing string to archive. uses string
+            # buffer to receive csv writing and dump into zip compression
+            # file handle. GH 21241, 21118
+            f = StringIO()
+            close = False
+        elif hasattr(self.path_or_buf, 'write'):
             f = self.path_or_buf
-            path_or_buf = None
+            close = False
+        else:
+            f, handles = _get_handle(self.path_or_buf, self.mode,
+                                     encoding=encoding,
+                                     compression=self.compression)
+            close = True
 
         try:
             writer_kwargs = dict(lineterminator=self.line_terminator,
@@ -157,13 +171,18 @@ def save(self):
             self._save()
 
         finally:
-            # GH 17778 handles zip compression for byte strings separately.
-            buf = f.getvalue()
-            if path_or_buf:
-                f, handles = _get_handle(path_or_buf, self.mode,
-                                         encoding=encoding,
-                                         compression=self.compression)
-                f.write(buf)
+            if is_zip:
+                # GH 17778 handles zip compression separately.
+                buf = f.getvalue()
+                if hasattr(self.path_or_buf, 'write'):
+                    self.path_or_buf.write(buf)
+                else:
+                    f, handles = _get_handle(self.path_or_buf, self.mode,
+                                             encoding=encoding,
+                                             compression=self.compression)
+                    f.write(buf)
+                    close = True
+            if close:
                 f.close()
                 for _fh in handles:
                     _fh.close()
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
index 60dc336a85388..3ad25ae73109e 100644
--- a/pandas/tests/frame/test_to_csv.py
+++ b/pandas/tests/frame/test_to_csv.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 from pandas.compat import (lmap, range, lrange, StringIO, u)
+from pandas.io.common import _get_handle
 import pandas.core.common as com
 from pandas.errors import ParserError
 from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
@@ -935,18 +936,19 @@ def test_to_csv_compression(self, df, encoding, compression):
         with ensure_clean() as filename:
 
             df.to_csv(filename, compression=compression, encoding=encoding)
-
             # test the round trip - to_csv -> read_csv
             result = read_csv(filename, compression=compression,
                               index_col=0, encoding=encoding)
+            assert_frame_equal(df, result)
 
-            with open(filename, 'w') as fh:
-                df.to_csv(fh, compression=compression, encoding=encoding)
-
-            result_fh = read_csv(filename, compression=compression,
-                                 index_col=0, encoding=encoding)
+            # test the round trip using file handle - to_csv -> read_csv
+            f, _handles = _get_handle(filename, 'w', compression=compression,
+                                      encoding=encoding)
+            with f:
+                df.to_csv(f, encoding=encoding)
+            result = pd.read_csv(filename, compression=compression,
+                                 encoding=encoding, index_col=0, squeeze=True)
             assert_frame_equal(df, result)
-            assert_frame_equal(df, result_fh)
 
             # explicitly make sure file is compressed
             with tm.decompress_file(filename, compression) as fh:
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
index f98962685ad9a..814d794d45c18 100644
--- a/pandas/tests/series/test_io.py
+++ b/pandas/tests/series/test_io.py
@@ -11,6 +11,7 @@
 from pandas import Series, DataFrame
 
 from pandas.compat import StringIO, u
+from pandas.io.common import _get_handle
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, ensure_clean)
 import pandas.util.testing as tm
@@ -151,20 +152,19 @@ def test_to_csv_compression(self, s, encoding, compression):
 
             s.to_csv(filename, compression=compression, encoding=encoding,
                      header=True)
-
             # test the round trip - to_csv -> read_csv
             result = pd.read_csv(filename, compression=compression,
                                  encoding=encoding, index_col=0, squeeze=True)
+            assert_series_equal(s, result)
 
-            with open(filename, 'w') as fh:
-                s.to_csv(fh, compression=compression, encoding=encoding,
-                         header=True)
-
-            result_fh = pd.read_csv(filename, compression=compression,
-                                    encoding=encoding, index_col=0,
-                                    squeeze=True)
+            # test the round trip using file handle - to_csv -> read_csv
+            f, _handles = _get_handle(filename, 'w', compression=compression,
+                                      encoding=encoding)
+            with f:
+                s.to_csv(f, encoding=encoding, header=True)
+            result = pd.read_csv(filename, compression=compression,
+                                 encoding=encoding, index_col=0, squeeze=True)
             assert_series_equal(s, result)
-            assert_series_equal(s, result_fh)
 
             # explicitly ensure file was compressed
             with tm.decompress_file(filename, compression) as fh:
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index 7034e9ac2e0c8..ef5f13bfa504a 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -11,6 +11,7 @@
 from pandas.compat import range, lmap
 import pandas.core.common as com
 from pandas.core import ops
+from pandas.io.common import _get_handle
 import pandas.util.testing as tm
 
 
@@ -246,19 +247,34 @@ def test_compression_size(obj, method, compression_only):
                      [12.32112, 123123.2, 321321.2]],
               columns=['X', 'Y', 'Z']),
     Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
-@pytest.mark.parametrize('method', ['to_csv'])
+@pytest.mark.parametrize('method', ['to_csv', 'to_json'])
 def test_compression_size_fh(obj, method, compression_only):
 
     with tm.ensure_clean() as filename:
-        with open(filename, 'w') as fh:
-            getattr(obj, method)(fh, compression=compression_only)
-            assert not fh.closed
-        assert fh.closed
+        f, _handles = _get_handle(filename, 'w', compression=compression_only)
+        with f:
+            getattr(obj, method)(f)
+            assert not f.closed
+        assert f.closed
         compressed = os.path.getsize(filename)
     with tm.ensure_clean() as filename:
-        with open(filename, 'w') as fh:
-            getattr(obj, method)(fh, compression=None)
-            assert not fh.closed
-        assert fh.closed
+        f, _handles = _get_handle(filename, 'w', compression=None)
+        with f:
+            getattr(obj, method)(f)
+            assert not f.closed
+        assert f.closed
         uncompressed = os.path.getsize(filename)
         assert uncompressed > compressed
+
+
+# GH 21227
+def test_compression_warning(compression_only):
+    df = DataFrame(100 * [[0.123456, 0.234567, 0.567567],
+                          [12.32112, 123123.2, 321321.2]],
+                   columns=['X', 'Y', 'Z'])
+    with tm.ensure_clean() as filename:
+        f, _handles = _get_handle(filename, 'w', compression=compression_only)
+        with tm.assert_produces_warning(RuntimeWarning,
+                                        check_stacklevel=False):
+            with f:
+                df.to_csv(f, compression=compression_only)

From e788e47d6ff8fd3403a8d5411f8fad61509a75da Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@icloud.com>
Date: Mon, 18 Jun 2018 18:03:14 -0700
Subject: [PATCH 051/113] Append Mode for ExcelWriter with openpyxl (#21251)

---
 doc/source/whatsnew/v0.24.0.txt |  2 ++
 pandas/io/excel.py              | 51 +++++++++++++++++++++++----------
 pandas/tests/io/test_excel.py   | 39 +++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index cae05446c00e6..c23ed006ff637 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -8,6 +8,8 @@ v0.24.0
 New features
 ~~~~~~~~~~~~
 
+- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
+
 .. _whatsnew_0240.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index 5608c29637447..e86d33742b266 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -804,6 +804,10 @@ class ExcelWriter(object):
     datetime_format : string, default None
         Format string for datetime objects written into Excel files
         (e.g. 'YYYY-MM-DD HH:MM:SS')
+    mode : {'w' or 'a'}, default 'w'
+        File mode to use (write or append).
+
+    .. versionadded:: 0.24.0
 
     Notes
     -----
@@ -897,7 +901,8 @@ def save(self):
         pass
 
     def __init__(self, path, engine=None,
-                 date_format=None, datetime_format=None, **engine_kwargs):
+                 date_format=None, datetime_format=None, mode='w',
+                 **engine_kwargs):
         # validate that this engine can handle the extension
         if isinstance(path, string_types):
             ext = os.path.splitext(path)[-1]
@@ -919,6 +924,8 @@ def __init__(self, path, engine=None,
         else:
             self.datetime_format = datetime_format
 
+        self.mode = mode
+
     def __fspath__(self):
         return _stringify_path(self.path)
 
@@ -993,23 +1000,27 @@ class _OpenpyxlWriter(ExcelWriter):
     engine = 'openpyxl'
     supported_extensions = ('.xlsx', '.xlsm')
 
-    def __init__(self, path, engine=None, **engine_kwargs):
+    def __init__(self, path, engine=None, mode='w', **engine_kwargs):
         # Use the openpyxl module as the Excel writer.
         from openpyxl.workbook import Workbook
 
-        super(_OpenpyxlWriter, self).__init__(path, **engine_kwargs)
+        super(_OpenpyxlWriter, self).__init__(path, mode=mode, **engine_kwargs)
 
-        # Create workbook object with default optimized_write=True.
-        self.book = Workbook()
+        if self.mode == 'a':  # Load from existing workbook
+            from openpyxl import load_workbook
+            book = load_workbook(self.path)
+            self.book = book
+        else:
+            # Create workbook object with default optimized_write=True.
+            self.book = Workbook()
 
-        # Openpyxl 1.6.1 adds a dummy sheet. We remove it.
-        if self.book.worksheets:
-            try:
-                self.book.remove(self.book.worksheets[0])
-            except AttributeError:
+            if self.book.worksheets:
+                try:
+                    self.book.remove(self.book.worksheets[0])
+                except AttributeError:
 
-                # compat
-                self.book.remove_sheet(self.book.worksheets[0])
+                    # compat - for openpyxl <= 2.4
+                    self.book.remove_sheet(self.book.worksheets[0])
 
     def save(self):
         """
@@ -1443,11 +1454,16 @@ class _XlwtWriter(ExcelWriter):
     engine = 'xlwt'
     supported_extensions = ('.xls',)
 
-    def __init__(self, path, engine=None, encoding=None, **engine_kwargs):
+    def __init__(self, path, engine=None, encoding=None, mode='w',
+                 **engine_kwargs):
         # Use the xlwt module as the Excel writer.
         import xlwt
         engine_kwargs['engine'] = engine
-        super(_XlwtWriter, self).__init__(path, **engine_kwargs)
+
+        if mode == 'a':
+            raise ValueError('Append mode is not supported with xlwt!')
+
+        super(_XlwtWriter, self).__init__(path, mode=mode, **engine_kwargs)
 
         if encoding is None:
             encoding = 'ascii'
@@ -1713,13 +1729,18 @@ class _XlsxWriter(ExcelWriter):
     supported_extensions = ('.xlsx',)
 
     def __init__(self, path, engine=None,
-                 date_format=None, datetime_format=None, **engine_kwargs):
+                 date_format=None, datetime_format=None, mode='w',
+                 **engine_kwargs):
         # Use the xlsxwriter module as the Excel writer.
         import xlsxwriter
 
+        if mode == 'a':
+            raise ValueError('Append mode is not supported with xlsxwriter!')
+
         super(_XlsxWriter, self).__init__(path, engine=engine,
                                           date_format=date_format,
                                           datetime_format=datetime_format,
+                                          mode=mode,
                                           **engine_kwargs)
 
         self.book = xlsxwriter.Workbook(path, **engine_kwargs)
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 05423474f330a..2a225e6fe6a45 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -2006,6 +2006,31 @@ def test_write_cells_merge_styled(self, merge_cells, ext, engine):
             assert xcell_b1.font == openpyxl_sty_merged
             assert xcell_a2.font == openpyxl_sty_merged
 
+    @pytest.mark.parametrize("mode,expected", [
+        ('w', ['baz']), ('a', ['foo', 'bar', 'baz'])])
+    def test_write_append_mode(self, merge_cells, ext, engine, mode, expected):
+        import openpyxl
+        df = DataFrame([1], columns=['baz'])
+
+        with ensure_clean(ext) as f:
+            wb = openpyxl.Workbook()
+            wb.worksheets[0].title = 'foo'
+            wb.worksheets[0]['A1'].value = 'foo'
+            wb.create_sheet('bar')
+            wb.worksheets[1]['A1'].value = 'bar'
+            wb.save(f)
+
+            writer = ExcelWriter(f, engine=engine, mode=mode)
+            df.to_excel(writer, sheet_name='baz', index=False)
+            writer.save()
+
+            wb2 = openpyxl.load_workbook(f)
+            result = [sheet.title for sheet in wb2.worksheets]
+            assert result == expected
+
+            for index, cell_value in enumerate(expected):
+                assert wb2.worksheets[index]['A1'].value == cell_value
+
 
 @td.skip_if_no('xlwt')
 @pytest.mark.parametrize("merge_cells,ext,engine", [
@@ -2060,6 +2085,13 @@ def test_to_excel_styleconverter(self, merge_cells, ext, engine):
         assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz
         assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert
 
+    def test_write_append_mode_raises(self, merge_cells, ext, engine):
+        msg = "Append mode is not supported with xlwt!"
+
+        with ensure_clean(ext) as f:
+            with tm.assert_raises_regex(ValueError, msg):
+                ExcelWriter(f, engine=engine, mode='a')
+
 
 @td.skip_if_no('xlsxwriter')
 @pytest.mark.parametrize("merge_cells,ext,engine", [
@@ -2111,6 +2143,13 @@ def test_column_format(self, merge_cells, ext, engine):
 
             assert read_num_format == num_format
 
+    def test_write_append_mode_raises(self, merge_cells, ext, engine):
+        msg = "Append mode is not supported with xlsxwriter!"
+
+        with ensure_clean(ext) as f:
+            with tm.assert_raises_regex(ValueError, msg):
+                ExcelWriter(f, engine=engine, mode='a')
+
 
 class TestExcelWriterEngineTests(object):
 

From fb165558e00db6b667b06aeef9ebc53a6589e67f Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Tue, 19 Jun 2018 09:20:12 +0100
Subject: [PATCH 052/113] DOC: Improve code example for Index.get_indexer
 (#21511)

---
 pandas/core/indexes/base.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 27cc368a696e3..490fd872125ff 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3148,17 +3148,22 @@ def droplevel(self, level=0):
 
             .. versionadded:: 0.21.0 (list-like tolerance)
 
-        Examples
-        --------
-        >>> indexer = index.get_indexer(new_index)
-        >>> new_values = cur_values.take(indexer)
-
         Returns
         -------
         indexer : ndarray of int
             Integers from 0 to n - 1 indicating that the index at these
             positions matches the corresponding target values. Missing values
             in the target are marked by -1.
+
+        Examples
+        --------
+        >>> index = pd.Index(['c', 'a', 'b'])
+        >>> index.get_indexer(['a', 'b', 'x'])
+        array([ 1,  2, -1])
+
+        Notice that the return value is an array of locations in ``index``
+        and ``x`` is marked by -1, as it is not in ``index``.
+
         """
 
     @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)

From 84533b12517a33fa400d8a92cac914907c446f7a Mon Sep 17 00:00:00 2001
From: "Adam J. Stewart" <ajstewart426@gmail.com>
Date: Tue, 19 Jun 2018 03:30:47 -0500
Subject: [PATCH 053/113] DOC: remove grammar duplication in groupby docs
 (#21534)

---
 doc/source/groupby.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 1c4c3f93726a9..47d53c82b86f3 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -680,8 +680,7 @@ match the shape of the input array.
    data_range = lambda x: x.max() - x.min()
    ts.groupby(key).transform(data_range)
 
-Alternatively the built-in methods can be could be used to produce the same
-outputs
+Alternatively, the built-in methods could be used to produce the same outputs.
 
 .. ipython:: python
 

From 8cbcafd5031d446de1ab3b1fdd56e133dfa3d8cf Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 19 Jun 2018 04:12:52 -0700
Subject: [PATCH 054/113] remove daytime attr, move getstate and setstate to
 base class (#21533)

---
 pandas/_libs/tslibs/offsets.pyx | 39 ++++++++++++++++++++++++++++++++
 pandas/tseries/offsets.py       | 40 ---------------------------------
 2 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 8caf9ea0e0389..3ca9bb307da9c 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -379,6 +379,45 @@ class _BaseOffset(object):
                              'got {n}'.format(n=n))
         return nint
 
+    def __setstate__(self, state):
+        """Reconstruct an instance from a pickled state"""
+        if 'offset' in state:
+            # Older (<0.22.0) versions have offset attribute instead of _offset
+            if '_offset' in state:  # pragma: no cover
+                raise AssertionError('Unexpected key `_offset`')
+            state['_offset'] = state.pop('offset')
+            state['kwds']['offset'] = state['_offset']
+
+        if '_offset' in state and not isinstance(state['_offset'], timedelta):
+            # relativedelta, we need to populate using its kwds
+            offset = state['_offset']
+            odict = offset.__dict__
+            kwds = {key: odict[key] for key in odict if odict[key]}
+            state.update(kwds)
+
+        self.__dict__ = state
+        if 'weekmask' in state and 'holidays' in state:
+            calendar, holidays = _get_calendar(weekmask=self.weekmask,
+                                               holidays=self.holidays,
+                                               calendar=None)
+            self.calendar = calendar
+            self.holidays = holidays
+
+    def __getstate__(self):
+        """Return a pickleable state"""
+        state = self.__dict__.copy()
+
+        # we don't want to actually pickle the calendar object
+        # as its a np.busyday; we recreate on deserilization
+        if 'calendar' in state:
+            del state['calendar']
+        try:
+            state['kwds'].pop('calendar')
+        except KeyError:
+            pass
+
+        return state
+
 
 class BaseOffset(_BaseOffset):
     # Here we add __rfoo__ methods that don't play well with cdef classes
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 2f4989f26b394..ffa2c0a5e3211 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -423,30 +423,6 @@ def _offset_str(self):
     def nanos(self):
         raise ValueError("{name} is a non-fixed frequency".format(name=self))
 
-    def __setstate__(self, state):
-        """Reconstruct an instance from a pickled state"""
-        if 'offset' in state:
-            # Older (<0.22.0) versions have offset attribute instead of _offset
-            if '_offset' in state:  # pragma: no cover
-                raise AssertionError('Unexpected key `_offset`')
-            state['_offset'] = state.pop('offset')
-            state['kwds']['offset'] = state['_offset']
-
-        if '_offset' in state and not isinstance(state['_offset'], timedelta):
-            # relativedelta, we need to populate using its kwds
-            offset = state['_offset']
-            odict = offset.__dict__
-            kwds = {key: odict[key] for key in odict if odict[key]}
-            state.update(kwds)
-
-        self.__dict__ = state
-        if 'weekmask' in state and 'holidays' in state:
-            calendar, holidays = _get_calendar(weekmask=self.weekmask,
-                                               holidays=self.holidays,
-                                               calendar=None)
-            self.calendar = calendar
-            self.holidays = holidays
-
 
 class SingleConstructorOffset(DateOffset):
     @classmethod
@@ -494,21 +470,6 @@ def _repr_attrs(self):
             out += ': ' + ', '.join(attrs)
         return out
 
-    def __getstate__(self):
-        """Return a pickleable state"""
-        state = self.__dict__.copy()
-
-        # we don't want to actually pickle the calendar object
-        # as its a np.busyday; we recreate on deserilization
-        if 'calendar' in state:
-            del state['calendar']
-        try:
-            state['kwds'].pop('calendar')
-        except KeyError:
-            pass
-
-        return state
-
 
 class BusinessDay(BusinessMixin, SingleConstructorOffset):
     """
@@ -690,7 +651,6 @@ def _get_business_hours_by_sec(self):
             until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
             return (until - dtstart).total_seconds()
         else:
-            self.daytime = False
             dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
             until = datetime(2014, 4, 2, self.end.hour, self.end.minute)
             return (until - dtstart).total_seconds()

From c53e001beffb56eca21b3245330e2a1068bf7007 Mon Sep 17 00:00:00 2001
From: Jacopo Rota <jacopo.r00ta@gmail.com>
Date: Tue, 19 Jun 2018 13:26:48 +0200
Subject: [PATCH 055/113] BUG: Handle read_csv corner case (#21176)

Closes gh-21141
---
 doc/source/whatsnew/v0.23.2.txt  |  1 +
 pandas/io/parsers.py             | 12 +++++++++++-
 pandas/tests/io/parser/common.py | 15 +++++++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 67c7ce150132a..0f2c9c4756987 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -64,6 +64,7 @@ Bug Fixes
 
 **I/O**
 
+- Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`)
 -
 -
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 2c8f98732c92f..65df2bffb4abf 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -3209,12 +3209,22 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
             col = columns[k] if is_integer(k) else k
             dtype[col] = v
 
-    if index_col is None or index_col is False:
+    # Even though we have no data, the "index" of the empty DataFrame
+    # could for example still be an empty MultiIndex. Thus, we need to
+    # check whether we have any index columns specified, via either:
+    #
+    # 1) index_col (column indices)
+    # 2) index_names (column names)
+    #
+    # Both must be non-null to ensure a successful construction. Otherwise,
+    # we have to create a generic emtpy Index.
+    if (index_col is None or index_col is False) or index_names is None:
         index = Index([])
     else:
         data = [Series([], dtype=dtype[name]) for name in index_names]
         index = _ensure_index_from_sequences(data, names=index_names)
         index_col.sort()
+
         for i, n in enumerate(index_col):
             columns.pop(n - i)
 
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 2b7ff1f5a9879..b39122e5e7906 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -238,6 +238,21 @@ def test_csv_mixed_type(self):
         out = self.read_csv(StringIO(data))
         tm.assert_frame_equal(out, expected)
 
+    def test_read_csv_low_memory_no_rows_with_index(self):
+        if self.engine == "c" and not self.low_memory:
+            pytest.skip("This is a low-memory specific test")
+
+        # see gh-21141
+        data = """A,B,C
+1,1,1,2
+2,2,3,4
+3,3,4,5
+"""
+        out = self.read_csv(StringIO(data), low_memory=True,
+                            index_col=0, nrows=0)
+        expected = DataFrame(columns=["A", "B", "C"])
+        tm.assert_frame_equal(out, expected)
+
     def test_read_csv_dataframe(self):
         df = self.read_csv(self.csv1, index_col=0, parse_dates=True)
         df2 = self.read_table(self.csv1, sep=',', index_col=0,

From 34d74cec1a3c9ed0a119769832dea3206ed41a85 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 19 Jun 2018 22:36:55 +0200
Subject: [PATCH 056/113] De-duplicate code for indexing with list-likes of
 keys (#21503)

---
 pandas/core/frame.py        |   3 +-
 pandas/core/indexes/base.py |   2 +-
 pandas/core/indexing.py     | 214 +++++++++++++++++++-----------------
 3 files changed, 116 insertions(+), 103 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a5dfbcc2a3142..74bb2abc27c4b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2724,7 +2724,8 @@ def _getitem_array(self, key):
             indexer = key.nonzero()[0]
             return self._take(indexer, axis=0)
         else:
-            indexer = self.loc._convert_to_indexer(key, axis=1)
+            indexer = self.loc._convert_to_indexer(key, axis=1,
+                                                   raise_missing=True)
             return self._take(indexer, axis=1)
 
     def _getitem_multilevel(self, key):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 490fd872125ff..577b715ca9998 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3634,7 +3634,7 @@ def _reindex_non_unique(self, target):
             else:
 
                 # need to retake to have the same size as the indexer
-                indexer[~check] = 0
+                indexer[~check] = -1
 
                 # reset the new indexer to account for the new size
                 new_indexer = np.arange(len(self.take(indexer)))
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 0e4f040253560..d5e81105dd323 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -688,7 +688,8 @@ def _align_series(self, indexer, ser, multiindex_indexer=False):
         if isinstance(indexer, tuple):
 
             # flatten np.ndarray indexers
-            ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i
+            def ravel(i):
+                return i.ravel() if isinstance(i, np.ndarray) else i
             indexer = tuple(map(ravel, indexer))
 
             aligners = [not com.is_null_slice(idx) for idx in indexer]
@@ -925,33 +926,10 @@ def _multi_take(self, tup):
         """ create the reindex map for our objects, raise the _exception if we
         can't create the indexer
         """
-        try:
-            o = self.obj
-            d = {}
-            for key, axis in zip(tup, o._AXIS_ORDERS):
-                ax = o._get_axis(axis)
-                # Have the index compute an indexer or return None
-                # if it cannot handle:
-                indexer, keyarr = ax._convert_listlike_indexer(key,
-                                                               kind=self.name)
-                # We only act on all found values:
-                if indexer is not None and (indexer != -1).all():
-                    self._validate_read_indexer(key, indexer, axis)
-                    d[axis] = (ax[indexer], indexer)
-                    continue
-
-                # If we are trying to get actual keys from empty Series, we
-                # patiently wait for a KeyError later on - otherwise, convert
-                if len(ax) or not len(key):
-                    key = self._convert_for_reindex(key, axis)
-                indexer = ax.get_indexer_for(key)
-                keyarr = ax.reindex(keyarr)[0]
-                self._validate_read_indexer(keyarr, indexer,
-                                            o._get_axis_number(axis))
-                d[axis] = (keyarr, indexer)
-            return o._reindex_with_indexers(d, copy=True, allow_dups=True)
-        except (KeyError, IndexingError) as detail:
-            raise self._exception(detail)
+        o = self.obj
+        d = {axis: self._get_listlike_indexer(key, axis)
+             for (key, axis) in zip(tup, o._AXIS_ORDERS)}
+        return o._reindex_with_indexers(d, copy=True, allow_dups=True)
 
     def _convert_for_reindex(self, key, axis=None):
         return key
@@ -1124,7 +1102,88 @@ def _getitem_axis(self, key, axis=None):
 
             return self._get_label(key, axis=axis)
 
+    def _get_listlike_indexer(self, key, axis, raise_missing=False):
+        """
+        Transform a list-like of keys into a new index and an indexer.
+
+        Parameters
+        ----------
+        key : list-like
+            Target labels
+        axis: int
+            Dimension on which the indexing is being made
+        raise_missing: bool
+            Whether to raise a KeyError if some labels are not found. Will be
+            removed in the future, and then this method will always behave as
+            if raise_missing=True.
+
+        Raises
+        ------
+        KeyError
+            If at least one key was requested but none was found, and
+            raise_missing=True.
+
+        Returns
+        -------
+        keyarr: Index
+            New index (coinciding with 'key' if the axis is unique)
+        values : array-like
+            An indexer for the return object; -1 denotes keys not found
+        """
+        o = self.obj
+        ax = o._get_axis(axis)
+
+        # Have the index compute an indexer or return None
+        # if it cannot handle:
+        indexer, keyarr = ax._convert_listlike_indexer(key,
+                                                       kind=self.name)
+        # We only act on all found values:
+        if indexer is not None and (indexer != -1).all():
+            self._validate_read_indexer(key, indexer, axis,
+                                        raise_missing=raise_missing)
+            return ax[indexer], indexer
+
+        if ax.is_unique:
+            # If we are trying to get actual keys from empty Series, we
+            # patiently wait for a KeyError later on - otherwise, convert
+            if len(ax) or not len(key):
+                key = self._convert_for_reindex(key, axis)
+            indexer = ax.get_indexer_for(key)
+            keyarr = ax.reindex(keyarr)[0]
+        else:
+            keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
+
+        self._validate_read_indexer(keyarr, indexer,
+                                    o._get_axis_number(axis),
+                                    raise_missing=raise_missing)
+        return keyarr, indexer
+
     def _getitem_iterable(self, key, axis=None):
+        """
+        Index current object with an an iterable key (which can be a boolean
+        indexer, or a collection of keys).
+
+        Parameters
+        ----------
+        key : iterable
+            Target labels, or boolean indexer
+        axis: int, default None
+            Dimension on which the indexing is being made
+
+        Raises
+        ------
+        KeyError
+            If no key was found. Will change in the future to raise if not all
+            keys were found.
+        IndexingError
+            If the boolean indexer is unalignable with the object being
+            indexed.
+
+        Returns
+        -------
+        scalar, DataFrame, or Series: indexed value(s),
+        """
+
         if axis is None:
             axis = self.axis or 0
 
@@ -1133,54 +1192,18 @@ def _getitem_iterable(self, key, axis=None):
         labels = self.obj._get_axis(axis)
 
         if com.is_bool_indexer(key):
+            # A boolean indexer
             key = check_bool_indexer(labels, key)
             inds, = key.nonzero()
             return self.obj._take(inds, axis=axis)
         else:
-            # Have the index compute an indexer or return None
-            # if it cannot handle; we only act on all found values
-            indexer, keyarr = labels._convert_listlike_indexer(
-                key, kind=self.name)
-            if indexer is not None and (indexer != -1).all():
-                self._validate_read_indexer(key, indexer, axis)
-                return self.obj.take(indexer, axis=axis)
-
-            ax = self.obj._get_axis(axis)
-            # existing labels are unique and indexer are unique
-            if labels.is_unique and Index(keyarr).is_unique:
-                indexer = ax.get_indexer_for(key)
-                self._validate_read_indexer(key, indexer, axis)
-
-                d = {axis: [ax.reindex(keyarr)[0], indexer]}
-                return self.obj._reindex_with_indexers(d, copy=True,
-                                                       allow_dups=True)
-
-            # existing labels are non-unique
-            else:
-
-                # reindex with the specified axis
-                if axis + 1 > self.obj.ndim:
-                    raise AssertionError("invalid indexing error with "
-                                         "non-unique index")
-
-                new_target, indexer, new_indexer = labels._reindex_non_unique(
-                    keyarr)
-
-                if new_indexer is not None:
-                    result = self.obj._take(indexer[indexer != -1], axis=axis)
-
-                    self._validate_read_indexer(key, new_indexer, axis)
-                    result = result._reindex_with_indexers(
-                        {axis: [new_target, new_indexer]},
-                        copy=True, allow_dups=True)
+            # A collection of keys
+            keyarr, indexer = self._get_listlike_indexer(key, axis,
+                                                         raise_missing=False)
+            return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
+                                                   copy=True, allow_dups=True)
 
-                else:
-                    self._validate_read_indexer(key, indexer, axis)
-                    result = self.obj._take(indexer, axis=axis)
-
-                return result
-
-    def _validate_read_indexer(self, key, indexer, axis):
+    def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
         """
         Check that indexer can be used to return a result (e.g. at least one
         element was found, unless the list of keys was actually empty).
@@ -1193,11 +1216,16 @@ def _validate_read_indexer(self, key, indexer, axis):
             Indices corresponding to the key (with -1 indicating not found)
         axis: int
             Dimension on which the indexing is being made
+        raise_missing: bool
+            Whether to raise a KeyError if some labels are not found. Will be
+            removed in the future, and then this method will always behave as
+            if raise_missing=True.
 
         Raises
         ------
         KeyError
-            If at least one key was requested none was found.
+            If at least one key was requested but none was found, and
+            raise_missing=True.
         """
 
         ax = self.obj._get_axis(axis)
@@ -1214,6 +1242,12 @@ def _validate_read_indexer(self, key, indexer, axis):
                     u"None of [{key}] are in the [{axis}]".format(
                         key=key, axis=self.obj._get_axis_name(axis)))
 
+            # We (temporarily) allow for some missing keys with .loc, except in
+            # some cases (e.g. setting) in which "raise_missing" will be False
+            if not(self.name == 'loc' and not raise_missing):
+                not_found = list(set(key) - set(ax))
+                raise KeyError("{} not in index".format(not_found))
+
             # we skip the warning on Categorical/Interval
             # as this check is actually done (check for
             # non-missing values), but a bit later in the
@@ -1229,9 +1263,10 @@ def _validate_read_indexer(self, key, indexer, axis):
 
             if not (ax.is_categorical() or ax.is_interval()):
                 warnings.warn(_missing_key_warning,
-                              FutureWarning, stacklevel=5)
+                              FutureWarning, stacklevel=6)
 
-    def _convert_to_indexer(self, obj, axis=None, is_setter=False):
+    def _convert_to_indexer(self, obj, axis=None, is_setter=False,
+                            raise_missing=False):
         """
         Convert indexing key into something we can use to do actual fancy
         indexing on an ndarray
@@ -1310,33 +1345,10 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
                 inds, = obj.nonzero()
                 return inds
             else:
-
-                # Have the index compute an indexer or return None
-                # if it cannot handle
-                indexer, objarr = labels._convert_listlike_indexer(
-                    obj, kind=self.name)
-                if indexer is not None:
-                    return indexer
-
-                # unique index
-                if labels.is_unique:
-                    indexer = check = labels.get_indexer(objarr)
-
-                # non-unique (dups)
-                else:
-                    (indexer,
-                     missing) = labels.get_indexer_non_unique(objarr)
-                    # 'indexer' has dupes, create 'check' using 'missing'
-                    check = np.zeros(len(objarr), dtype=np.intp)
-                    check[missing] = -1
-
-                mask = check == -1
-                if mask.any():
-                    raise KeyError('{mask} not in index'
-                                   .format(mask=objarr[mask]))
-
-                return com._values_from_object(indexer)
-
+                # When setting, missing keys are not allowed, even with .loc:
+                kwargs = {'raise_missing': True if is_setter else
+                          raise_missing}
+                return self._get_listlike_indexer(obj, axis, **kwargs)[1]
         else:
             try:
                 return labels.get_loc(obj)

From a10216dac1903dbca00c65387401bafeb4500480 Mon Sep 17 00:00:00 2001
From: Wil Tan <wil@dready.org>
Date: Wed, 20 Jun 2018 19:50:21 +1000
Subject: [PATCH 057/113] Update "See Also" section of pandas/core/generic.py
 (#21550)

---
 pandas/core/generic.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 32f64b1d3e05c..555108a5d9349 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5175,8 +5175,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
         --------
         pandas.to_datetime : Convert argument to datetime.
         pandas.to_timedelta : Convert argument to timedelta.
-        pandas.to_numeric : Return a fixed frequency timedelta index,
-            with day as the default.
+        pandas.to_numeric : Convert argument to numeric type.
 
         Returns
         -------
@@ -5210,7 +5209,7 @@ def infer_objects(self):
         --------
         pandas.to_datetime : Convert argument to datetime.
         pandas.to_timedelta : Convert argument to timedelta.
-        pandas.to_numeric : Convert argument to numeric typeR
+        pandas.to_numeric : Convert argument to numeric type.
 
         Returns
         -------

From 4e6a11f62bb170700ab42af032b2522398f0ebe7 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Wed, 20 Jun 2018 11:14:25 +0100
Subject: [PATCH 058/113] Fixing documentation lists indentation (#21519)

---
 doc/source/api.rst                    |   6 +-
 doc/source/basics.rst                 |  43 ++-
 doc/source/categorical.rst            |  10 +-
 doc/source/comparison_with_r.rst      |  10 +-
 doc/source/computation.rst            |  46 +--
 doc/source/contributing.rst           |  70 ++---
 doc/source/contributing_docstring.rst |  76 ++---
 doc/source/developer.rst              |  14 +-
 doc/source/dsintro.rst                |  26 +-
 doc/source/ecosystem.rst              |  16 +-
 doc/source/enhancingperf.rst          |  42 +--
 doc/source/extending.rst              |   6 +-
 doc/source/gotchas.rst                |   4 +-
 doc/source/groupby.rst                |  54 ++--
 doc/source/indexing.rst               |  82 +++---
 doc/source/install.rst                |  14 +-
 doc/source/internals.rst              |  38 +--
 doc/source/io.rst                     | 404 +++++++++++++-------------
 doc/source/merging.rst                |  72 ++---
 doc/source/options.rst                |   8 +-
 doc/source/overview.rst               |  26 +-
 doc/source/reshaping.rst              |  48 +--
 doc/source/sparse.rst                 |   6 +-
 doc/source/timeseries.rst             |  40 +--
 doc/source/tutorials.rst              | 164 +++++------
 doc/source/visualization.rst          |   6 +-
 26 files changed, 665 insertions(+), 666 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 4faec93490fde..f2c00d5d12031 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1200,9 +1200,9 @@ Attributes and underlying data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 **Axes**
 
-  * **items**: axis 0; each item corresponds to a DataFrame contained inside
-  * **major_axis**: axis 1; the index (rows) of each of the DataFrames
-  * **minor_axis**: axis 2; the columns of each of the DataFrames
+* **items**: axis 0; each item corresponds to a DataFrame contained inside
+* **major_axis**: axis 1; the index (rows) of each of the DataFrames
+* **minor_axis**: axis 2; the columns of each of the DataFrames
 
 .. autosummary::
    :toctree: generated/
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 74f1d80c6fd3d..c460b19640f46 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -50,9 +50,8 @@ Attributes and the raw ndarray(s)
 
 pandas objects have a number of attributes enabling you to access the metadata
 
-  * **shape**: gives the axis dimensions of the object, consistent with ndarray
-  * Axis labels
-
+* **shape**: gives the axis dimensions of the object, consistent with ndarray
+* Axis labels
     * **Series**: *index* (only axis)
     * **DataFrame**: *index* (rows) and *columns*
     * **Panel**: *items*, *major_axis*, and *minor_axis*
@@ -131,9 +130,9 @@ Flexible binary operations
 With binary operations between pandas data structures, there are two key points
 of interest:
 
-  * Broadcasting behavior between higher- (e.g. DataFrame) and
-    lower-dimensional (e.g. Series) objects.
-  * Missing data in computations.
+* Broadcasting behavior between higher- (e.g. DataFrame) and
+  lower-dimensional (e.g. Series) objects.
+* Missing data in computations.
 
 We will demonstrate how to manage these issues independently, though they can
 be handled simultaneously.
@@ -462,10 +461,10 @@ produce an object of the same size. Generally speaking, these methods take an
 **axis** argument, just like *ndarray.{sum, std, ...}*, but the axis can be
 specified by name or integer:
 
-  - **Series**: no axis argument needed
-  - **DataFrame**: "index" (axis=0, default), "columns" (axis=1)
-  - **Panel**: "items" (axis=0), "major" (axis=1, default), "minor"
-    (axis=2)
+* **Series**: no axis argument needed
+* **DataFrame**: "index" (axis=0, default), "columns" (axis=1)
+* **Panel**: "items" (axis=0), "major" (axis=1, default), "minor"
+  (axis=2)
 
 For example:
 
@@ -1187,11 +1186,11 @@ It is used to implement nearly all other features relying on label-alignment
 functionality. To *reindex* means to conform the data to match a given set of
 labels along a particular axis. This accomplishes several things:
 
-  * Reorders the existing data to match a new set of labels
-  * Inserts missing value (NA) markers in label locations where no data for
-    that label existed
-  * If specified, **fill** data for missing labels using logic (highly relevant
-    to working with time series data)
+* Reorders the existing data to match a new set of labels
+* Inserts missing value (NA) markers in label locations where no data for
+  that label existed
+* If specified, **fill** data for missing labels using logic (highly relevant
+  to working with time series data)
 
 Here is a simple example:
 
@@ -1911,10 +1910,10 @@ the axis indexes, since they are immutable) and returns a new object. Note that
 **it is seldom necessary to copy objects**. For example, there are only a
 handful of ways to alter a DataFrame *in-place*:
 
-  * Inserting, deleting, or modifying a column.
-  * Assigning to the ``index`` or ``columns`` attributes.
-  * For homogeneous data, directly modifying the values via the ``values``
-    attribute or advanced indexing.
+* Inserting, deleting, or modifying a column.
+* Assigning to the ``index`` or ``columns`` attributes.
+* For homogeneous data, directly modifying the values via the ``values``
+  attribute or advanced indexing.
 
 To be clear, no pandas method has the side effect of modifying your data;
 almost every method returns a new object, leaving the original object
@@ -2112,14 +2111,14 @@ Because the data was transposed the original inference stored all columns as obj
 The following functions are available for one dimensional object arrays or scalars to perform
 hard conversion of objects to a specified type:
 
-- :meth:`~pandas.to_numeric` (conversion to numeric dtypes)
+* :meth:`~pandas.to_numeric` (conversion to numeric dtypes)
 
   .. ipython:: python
 
      m = ['1.1', 2, 3]
      pd.to_numeric(m)
 
-- :meth:`~pandas.to_datetime` (conversion to datetime objects)
+* :meth:`~pandas.to_datetime` (conversion to datetime objects)
 
   .. ipython:: python
 
@@ -2127,7 +2126,7 @@ hard conversion of objects to a specified type:
      m = ['2016-07-09', datetime.datetime(2016, 3, 2)]
      pd.to_datetime(m)
 
-- :meth:`~pandas.to_timedelta` (conversion to timedelta objects)
+* :meth:`~pandas.to_timedelta` (conversion to timedelta objects)
 
   .. ipython:: python
 
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index c6827f67a390b..acab9de905540 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -542,11 +542,11 @@ Comparisons
 
 Comparing categorical data with other objects is possible in three cases:
 
- * Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array,
-   ...) of the same length as the categorical data.
- * All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to
-   another categorical Series, when ``ordered==True`` and the `categories` are the same.
- * All comparisons of a categorical data to a scalar.
+* Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array,
+  ...) of the same length as the categorical data.
+* All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to
+  another categorical Series, when ``ordered==True`` and the `categories` are the same.
+* All comparisons of a categorical data to a scalar.
 
 All other comparisons, especially "non-equality" comparisons of two categoricals with different
 categories or a categorical with any list-like object, will raise a ``TypeError``.
diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
index a7586f623a160..eecacde8ad14e 100644
--- a/doc/source/comparison_with_r.rst
+++ b/doc/source/comparison_with_r.rst
@@ -18,11 +18,11 @@ was started to provide a more detailed look at the `R language
 party libraries as they relate to ``pandas``. In comparisons with R and CRAN
 libraries, we care about the following things:
 
-  - **Functionality / flexibility**: what can/cannot be done with each tool
-  - **Performance**: how fast are operations. Hard numbers/benchmarks are
-    preferable
-  - **Ease-of-use**: Is one tool easier/harder to use (you may have to be
-    the judge of this, given side-by-side code comparisons)
+* **Functionality / flexibility**: what can/cannot be done with each tool
+* **Performance**: how fast are operations. Hard numbers/benchmarks are
+  preferable
+* **Ease-of-use**: Is one tool easier/harder to use (you may have to be
+  the judge of this, given side-by-side code comparisons)
 
 This page is also here to offer a bit of a translation guide for users of these
 R packages.
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index ff06c369e1897..5e7b8be5f8af0 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -344,20 +344,20 @@ The weights used in the window are specified by the ``win_type`` keyword.
 The list of recognized types are the `scipy.signal window functions
 <https://docs.scipy.org/doc/scipy/reference/signal.html#window-functions>`__:
 
-- ``boxcar``
-- ``triang``
-- ``blackman``
-- ``hamming``
-- ``bartlett``
-- ``parzen``
-- ``bohman``
-- ``blackmanharris``
-- ``nuttall``
-- ``barthann``
-- ``kaiser`` (needs beta)
-- ``gaussian`` (needs std)
-- ``general_gaussian`` (needs power, width)
-- ``slepian`` (needs width).
+* ``boxcar``
+* ``triang``
+* ``blackman``
+* ``hamming``
+* ``bartlett``
+* ``parzen``
+* ``bohman``
+* ``blackmanharris``
+* ``nuttall``
+* ``barthann``
+* ``kaiser`` (needs beta)
+* ``gaussian`` (needs std)
+* ``general_gaussian`` (needs power, width)
+* ``slepian`` (needs width).
 
 .. ipython:: python
 
@@ -537,10 +537,10 @@ Binary Window Functions
 two ``Series`` or any combination of ``DataFrame/Series`` or
 ``DataFrame/DataFrame``. Here is the behavior in each case:
 
-- two ``Series``: compute the statistic for the pairing.
-- ``DataFrame/Series``: compute the statistics for each column of the DataFrame
+* two ``Series``: compute the statistic for the pairing.
+* ``DataFrame/Series``: compute the statistics for each column of the DataFrame
   with the passed Series, thus returning a DataFrame.
-- ``DataFrame/DataFrame``: by default compute the statistic for matching column
+* ``DataFrame/DataFrame``: by default compute the statistic for matching column
   names, returning a DataFrame. If the keyword argument ``pairwise=True`` is
   passed then computes the statistic for each pair of columns, returning a
   ``MultiIndexed DataFrame`` whose ``index`` are the dates in question (see :ref:`the next section
@@ -741,10 +741,10 @@ Aside from not having a ``window`` parameter, these functions have the same
 interfaces as their ``.rolling`` counterparts. Like above, the parameters they
 all accept are:
 
-- ``min_periods``: threshold of non-null data points to require. Defaults to
+* ``min_periods``: threshold of non-null data points to require. Defaults to
   minimum needed to compute statistic. No ``NaNs`` will be output once
   ``min_periods`` non-null data points have been seen.
-- ``center``: boolean, whether to set the labels at the center (default is False).
+* ``center``: boolean, whether to set the labels at the center (default is False).
 
 .. _stats.moments.expanding.note:
 .. note::
@@ -903,12 +903,12 @@ of an EW moment:
 One must specify precisely one of **span**, **center of mass**, **half-life**
 and **alpha** to the EW functions:
 
-- **Span** corresponds to what is commonly called an "N-day EW moving average".
-- **Center of mass** has a more physical interpretation and can be thought of
+* **Span** corresponds to what is commonly called an "N-day EW moving average".
+* **Center of mass** has a more physical interpretation and can be thought of
   in terms of span: :math:`c = (s - 1) / 2`.
-- **Half-life** is the period of time for the exponential weight to reduce to
+* **Half-life** is the period of time for the exponential weight to reduce to
   one half.
-- **Alpha** specifies the smoothing factor directly.
+* **Alpha** specifies the smoothing factor directly.
 
 Here is an example for a univariate time series:
 
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 6ae93ba46fa5c..ff06d024740bf 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -138,11 +138,11 @@ steps; you only need to install the compiler.
 
 For Windows developers, the following links may be helpful.
 
-- https://blogs.msdn.microsoft.com/pythonengineering/2016/04/11/unable-to-find-vcvarsall-bat/
-- https://github.com/conda/conda-recipes/wiki/Building-from-Source-on-Windows-32-bit-and-64-bit
-- https://cowboyprogrammer.org/building-python-wheels-for-windows/
-- https://blog.ionelmc.ro/2014/12/21/compiling-python-extensions-on-windows/
-- https://support.enthought.com/hc/en-us/articles/204469260-Building-Python-extensions-with-Canopy
+* https://blogs.msdn.microsoft.com/pythonengineering/2016/04/11/unable-to-find-vcvarsall-bat/
+* https://github.com/conda/conda-recipes/wiki/Building-from-Source-on-Windows-32-bit-and-64-bit
+* https://cowboyprogrammer.org/building-python-wheels-for-windows/
+* https://blog.ionelmc.ro/2014/12/21/compiling-python-extensions-on-windows/
+* https://support.enthought.com/hc/en-us/articles/204469260-Building-Python-extensions-with-Canopy
 
 Let us know if you have any difficulties by opening an issue or reaching out on
 `Gitter`_.
@@ -155,11 +155,11 @@ Creating a Python Environment
 Now that you have a C compiler, create an isolated pandas development
 environment:
 
-- Install either `Anaconda <https://www.anaconda.com/download/>`_ or `miniconda
+* Install either `Anaconda <https://www.anaconda.com/download/>`_ or `miniconda
   <https://conda.io/miniconda.html>`_
-- Make sure your conda is up to date (``conda update conda``)
-- Make sure that you have :ref:`cloned the repository <contributing.forking>`
-- ``cd`` to the *pandas* source directory
+* Make sure your conda is up to date (``conda update conda``)
+* Make sure that you have :ref:`cloned the repository <contributing.forking>`
+* ``cd`` to the *pandas* source directory
 
 We'll now kick off a three-step process:
 
@@ -286,7 +286,7 @@ complex changes to the documentation as well.
 
 Some other important things to know about the docs:
 
-- The *pandas* documentation consists of two parts: the docstrings in the code
+* The *pandas* documentation consists of two parts: the docstrings in the code
   itself and the docs in this folder ``pandas/doc/``.
 
   The docstrings provide a clear explanation of the usage of the individual
@@ -294,7 +294,7 @@ Some other important things to know about the docs:
   overviews per topic together with some other information (what's new,
   installation, etc).
 
-- The docstrings follow a pandas convention, based on the **Numpy Docstring
+* The docstrings follow a pandas convention, based on the **Numpy Docstring
   Standard**. Follow the :ref:`pandas docstring guide <docstring>` for detailed
   instructions on how to write a correct docstring.
 
@@ -303,7 +303,7 @@ Some other important things to know about the docs:
 
      contributing_docstring.rst
 
-- The tutorials make heavy use of the `ipython directive
+* The tutorials make heavy use of the `ipython directive
   <http://matplotlib.org/sampledoc/ipython_directive.html>`_ sphinx extension.
   This directive lets you put code in the documentation which will be run
   during the doc build. For example::
@@ -324,7 +324,7 @@ Some other important things to know about the docs:
   doc build. This approach means that code examples will always be up to date,
   but it does make the doc building a bit more complex.
 
-- Our API documentation in ``doc/source/api.rst`` houses the auto-generated
+* Our API documentation in ``doc/source/api.rst`` houses the auto-generated
   documentation from the docstrings. For classes, there are a few subtleties
   around controlling which methods and attributes have pages auto-generated.
 
@@ -488,8 +488,8 @@ standard. Google provides an open source style checker called ``cpplint``, but w
 use a fork of it that can be found `here <https://github.com/cpplint/cpplint>`__.
 Here are *some* of the more common ``cpplint`` issues:
 
-  - we restrict line-length to 80 characters to promote readability
-  - every header file must include a header guard to avoid name collisions if re-included
+* we restrict line-length to 80 characters to promote readability
+* every header file must include a header guard to avoid name collisions if re-included
 
 :ref:`Continuous Integration <contributing.ci>` will run the
 `cpplint <https://pypi.org/project/cpplint>`_ tool
@@ -536,8 +536,8 @@ Python (PEP8)
 There are several tools to ensure you abide by this standard. Here are *some* of
 the more common ``PEP8`` issues:
 
-  - we restrict line-length to 79 characters to promote readability
-  - passing arguments should have spaces after commas, e.g. ``foo(arg1, arg2, kw1='bar')``
+* we restrict line-length to 79 characters to promote readability
+* passing arguments should have spaces after commas, e.g. ``foo(arg1, arg2, kw1='bar')``
 
 :ref:`Continuous Integration <contributing.ci>` will run
 the `flake8 <https://pypi.org/project/flake8>`_ tool
@@ -715,14 +715,14 @@ Using ``pytest``
 
 Here is an example of a self-contained set of tests that illustrate multiple features that we like to use.
 
-- functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters
-- ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``.
-- using ``parametrize``: allow testing of multiple cases
-- to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used
-- ``fixture``, code for object construction, on a per-test basis
-- using bare ``assert`` for scalars and truth-testing
-- ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons.
-- the typical pattern of constructing an ``expected`` and comparing versus the ``result``
+* functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters
+* ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``.
+* using ``parametrize``: allow testing of multiple cases
+* to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used
+* ``fixture``, code for object construction, on a per-test basis
+* using bare ``assert`` for scalars and truth-testing
+* ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons.
+* the typical pattern of constructing an ``expected`` and comparing versus the ``result``
 
 We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``pandas/tests/`` structure.
 
@@ -969,21 +969,21 @@ Finally, commit your changes to your local repository with an explanatory messag
 uses a convention for commit message prefixes and layout.  Here are
 some common prefixes along with general guidelines for when to use them:
 
-    * ENH: Enhancement, new functionality
-    * BUG: Bug fix
-    * DOC: Additions/updates to documentation
-    * TST: Additions/updates to tests
-    * BLD: Updates to the build process/scripts
-    * PERF: Performance improvement
-    * CLN: Code cleanup
+* ENH: Enhancement, new functionality
+* BUG: Bug fix
+* DOC: Additions/updates to documentation
+* TST: Additions/updates to tests
+* BLD: Updates to the build process/scripts
+* PERF: Performance improvement
+* CLN: Code cleanup
 
 The following defines how a commit message should be structured.  Please reference the
 relevant GitHub issues in your commit message using GH1234 or #1234.  Either style
 is fine, but the former is generally preferred:
 
-    * a subject line with `< 80` chars.
-    * One blank line.
-    * Optionally, a commit message body.
+* a subject line with `< 80` chars.
+* One blank line.
+* Optionally, a commit message body.
 
 Now you can commit your changes in your local repository::
 
diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst
index 4dec2a23facca..afb554aeffbc3 100644
--- a/doc/source/contributing_docstring.rst
+++ b/doc/source/contributing_docstring.rst
@@ -68,7 +68,7 @@ As PEP-257 is quite open, and some other standards exist on top of it. In the
 case of pandas, the numpy docstring convention is followed. The conventions is
 explained in this document:
 
-- `numpydoc docstring guide <http://numpydoc.readthedocs.io/en/latest/format.html>`_
+* `numpydoc docstring guide <http://numpydoc.readthedocs.io/en/latest/format.html>`_
   (which is based in the original `Guide to NumPy/SciPy documentation
   <https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt>`_)
 
@@ -78,9 +78,9 @@ The standard uses reStructuredText (reST). reStructuredText is a markup
 language that allows encoding styles in plain text files. Documentation
 about reStructuredText can be found in:
 
-- `Sphinx reStructuredText primer <http://www.sphinx-doc.org/en/stable/rest.html>`_
-- `Quick reStructuredText reference <http://docutils.sourceforge.net/docs/user/rst/quickref.html>`_
-- `Full reStructuredText specification <http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html>`_
+* `Sphinx reStructuredText primer <http://www.sphinx-doc.org/en/stable/rest.html>`_
+* `Quick reStructuredText reference <http://docutils.sourceforge.net/docs/user/rst/quickref.html>`_
+* `Full reStructuredText specification <http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html>`_
 
 Pandas has some helpers for sharing docstrings between related classes, see
 :ref:`docstring.sharing`.
@@ -107,12 +107,12 @@ In rare occasions reST styles like bold text or italics will be used in
 docstrings, but is it common to have inline code, which is presented between
 backticks. It is considered inline code:
 
-- The name of a parameter
-- Python code, a module, function, built-in, type, literal... (e.g. ``os``,
+* The name of a parameter
+* Python code, a module, function, built-in, type, literal... (e.g. ``os``,
   ``list``, ``numpy.abs``, ``datetime.date``, ``True``)
-- A pandas class (in the form ``:class:`pandas.Series```)
-- A pandas method (in the form ``:meth:`pandas.Series.sum```)
-- A pandas function (in the form ``:func:`pandas.to_datetime```)
+* A pandas class (in the form ``:class:`pandas.Series```)
+* A pandas method (in the form ``:meth:`pandas.Series.sum```)
+* A pandas function (in the form ``:func:`pandas.to_datetime```)
 
 .. note::
     To display only the last component of the linked class, method or
@@ -352,71 +352,71 @@ When specifying the parameter types, Python built-in data types can be used
 directly (the Python type is preferred to the more verbose string, integer,
 boolean, etc):
 
-- int
-- float
-- str
-- bool
+* int
+* float
+* str
+* bool
 
 For complex types, define the subtypes. For `dict` and `tuple`, as more than
 one type is present, we use the brackets to help read the type (curly brackets
 for `dict` and normal brackets for `tuple`):
 
-- list of int
-- dict of {str : int}
-- tuple of (str, int, int)
-- tuple of (str,)
-- set of str
+* list of int
+* dict of {str : int}
+* tuple of (str, int, int)
+* tuple of (str,)
+* set of str
 
 In case where there are just a set of values allowed, list them in curly
 brackets and separated by commas (followed by a space). If the values are
 ordinal and they have an order, list them in this order. Otherwise, list
 the default value first, if there is one:
 
-- {0, 10, 25}
-- {'simple', 'advanced'}
-- {'low', 'medium', 'high'}
-- {'cat', 'dog', 'bird'}
+* {0, 10, 25}
+* {'simple', 'advanced'}
+* {'low', 'medium', 'high'}
+* {'cat', 'dog', 'bird'}
 
 If the type is defined in a Python module, the module must be specified:
 
-- datetime.date
-- datetime.datetime
-- decimal.Decimal
+* datetime.date
+* datetime.datetime
+* decimal.Decimal
 
 If the type is in a package, the module must be also specified:
 
-- numpy.ndarray
-- scipy.sparse.coo_matrix
+* numpy.ndarray
+* scipy.sparse.coo_matrix
 
 If the type is a pandas type, also specify pandas except for Series and
 DataFrame:
 
-- Series
-- DataFrame
-- pandas.Index
-- pandas.Categorical
-- pandas.SparseArray
+* Series
+* DataFrame
+* pandas.Index
+* pandas.Categorical
+* pandas.SparseArray
 
 If the exact type is not relevant, but must be compatible with a numpy
 array, array-like can be specified. If Any type that can be iterated is
 accepted, iterable can be used:
 
-- array-like
-- iterable
+* array-like
+* iterable
 
 If more than one type is accepted, separate them by commas, except the
 last two types, that need to be separated by the word 'or':
 
-- int or float
-- float, decimal.Decimal or None
-- str or list of str
+* int or float
+* float, decimal.Decimal or None
+* str or list of str
 
 If ``None`` is one of the accepted values, it always needs to be the last in
 the list.
 
 For axis, the convention is to use something like:
 
-- axis : {0 or 'index', 1 or 'columns', None}, default None
+* axis : {0 or 'index', 1 or 'columns', None}, default None
 
 .. _docstring.returns:
 
diff --git a/doc/source/developer.rst b/doc/source/developer.rst
index b8bb2b2fcbe2f..f76af394abc48 100644
--- a/doc/source/developer.rst
+++ b/doc/source/developer.rst
@@ -81,20 +81,20 @@ The ``metadata`` field is ``None`` except for:
   omitted it is assumed to be nanoseconds.
 * ``categorical``: ``{'num_categories': K, 'ordered': is_ordered, 'type': $TYPE}``
 
-  * Here ``'type'`` is optional, and can be a nested pandas type specification
-    here (but not categorical)
+    * Here ``'type'`` is optional, and can be a nested pandas type specification
+      here (but not categorical)
 
 * ``unicode``: ``{'encoding': encoding}``
 
-  * The encoding is optional, and if not present is UTF-8
+    * The encoding is optional, and if not present is UTF-8
 
 * ``object``: ``{'encoding': encoding}``. Objects can be serialized and stored
   in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of:
 
-  * ``'pickle'``
-  * ``'msgpack'``
-  * ``'bson'``
-  * ``'json'``
+    * ``'pickle'``
+    * ``'msgpack'``
+    * ``'bson'``
+    * ``'json'``
 
 * ``timedelta``: ``{'unit': 'ns'}``. The ``'unit'`` is optional, and if omitted
   it is assumed to be nanoseconds. This metadata is optional altogether
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index 4d8e7979060f4..efa52a6f7cfe2 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -51,9 +51,9 @@ labels are collectively referred to as the **index**. The basic method to create
 
 Here, ``data`` can be many different things:
 
- - a Python dict
- - an ndarray
- - a scalar value (like 5)
+* a Python dict
+* an ndarray
+* a scalar value (like 5)
 
 The passed **index** is a list of axis labels. Thus, this separates into a few
 cases depending on what **data is**:
@@ -246,12 +246,12 @@ potentially different types. You can think of it like a spreadsheet or SQL
 table, or a dict of Series objects. It is generally the most commonly used
 pandas object. Like Series, DataFrame accepts many different kinds of input:
 
- - Dict of 1D ndarrays, lists, dicts, or Series
- - 2-D numpy.ndarray
- - `Structured or record
-   <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`__ ndarray
- - A ``Series``
- - Another ``DataFrame``
+* Dict of 1D ndarrays, lists, dicts, or Series
+* 2-D numpy.ndarray
+* `Structured or record
+  <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`__ ndarray
+* A ``Series``
+* Another ``DataFrame``
 
 Along with the data, you can optionally pass **index** (row labels) and
 **columns** (column labels) arguments. If you pass an index and / or columns,
@@ -842,10 +842,10 @@ econometric analysis of panel data. However, for the strict purposes of slicing
 and dicing a collection of DataFrame objects, you may find the axis names
 slightly arbitrary:
 
-  - **items**: axis 0, each item corresponds to a DataFrame contained inside
-  - **major_axis**: axis 1, it is the **index** (rows) of each of the
-    DataFrames
-  - **minor_axis**: axis 2, it is the **columns** of each of the DataFrames
+* **items**: axis 0, each item corresponds to a DataFrame contained inside
+* **major_axis**: axis 1, it is the **index** (rows) of each of the
+  DataFrames
+* **minor_axis**: axis 2, it is the **columns** of each of the DataFrames
 
 Construction of Panels works about like you would expect:
 
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index f683fd6892ea5..4e15f9069de67 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -159,14 +159,14 @@ See more in the  `pandas-datareader docs <https://pandas-datareader.readthedocs.
 
 The following data feeds are available:
 
-  * Yahoo! Finance
-  * Google Finance
-  * FRED
-  * Fama/French
-  * World Bank
-  * OECD
-  * Eurostat
-  * EDGAR Index
+* Yahoo! Finance
+* Google Finance
+* FRED
+* Fama/French
+* World Bank
+* OECD
+* Eurostat
+* EDGAR Index
 
 `quandl/Python <https://github.com/quandl/Python>`__
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst
index 979d025111df1..8f8a9fe3e50e0 100644
--- a/doc/source/enhancingperf.rst
+++ b/doc/source/enhancingperf.rst
@@ -461,15 +461,15 @@ Supported Syntax
 
 These operations are supported by :func:`pandas.eval`:
 
-- Arithmetic operations except for the left shift (``<<``) and right shift
+* Arithmetic operations except for the left shift (``<<``) and right shift
   (``>>``) operators, e.g., ``df + 2 * pi / s ** 4 % 42 - the_golden_ratio``
-- Comparison operations, including chained comparisons, e.g., ``2 < df < df2``
-- Boolean operations, e.g., ``df < df2 and df3 < df4 or not df_bool``
-- ``list`` and ``tuple`` literals, e.g., ``[1, 2]`` or ``(1, 2)``
-- Attribute access, e.g., ``df.a``
-- Subscript expressions, e.g., ``df[0]``
-- Simple variable evaluation, e.g., ``pd.eval('df')`` (this is not very useful)
-- Math functions: `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`,
+* Comparison operations, including chained comparisons, e.g., ``2 < df < df2``
+* Boolean operations, e.g., ``df < df2 and df3 < df4 or not df_bool``
+* ``list`` and ``tuple`` literals, e.g., ``[1, 2]`` or ``(1, 2)``
+* Attribute access, e.g., ``df.a``
+* Subscript expressions, e.g., ``df[0]``
+* Simple variable evaluation, e.g., ``pd.eval('df')`` (this is not very useful)
+* Math functions: `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`,
   `sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`,
   `arcsinh`, `arctanh`, `abs` and `arctan2`.
 
@@ -477,22 +477,22 @@ This Python syntax is **not** allowed:
 
 * Expressions
 
-  - Function calls other than math functions.
-  - ``is``/``is not`` operations
-  - ``if`` expressions
-  - ``lambda`` expressions
-  - ``list``/``set``/``dict`` comprehensions
-  - Literal ``dict`` and ``set`` expressions
-  - ``yield`` expressions
-  - Generator expressions
-  - Boolean expressions consisting of only scalar values
+    * Function calls other than math functions.
+    * ``is``/``is not`` operations
+    * ``if`` expressions
+    * ``lambda`` expressions
+    * ``list``/``set``/``dict`` comprehensions
+    * Literal ``dict`` and ``set`` expressions
+    * ``yield`` expressions
+    * Generator expressions
+    * Boolean expressions consisting of only scalar values
 
 * Statements
 
-  - Neither `simple <https://docs.python.org/3/reference/simple_stmts.html>`__
-    nor `compound <https://docs.python.org/3/reference/compound_stmts.html>`__
-    statements are allowed. This includes things like ``for``, ``while``, and
-    ``if``.
+    * Neither `simple <https://docs.python.org/3/reference/simple_stmts.html>`__
+      nor `compound <https://docs.python.org/3/reference/compound_stmts.html>`__
+      statements are allowed. This includes things like ``for``, ``while``, and
+      ``if``.
 
 
 
diff --git a/doc/source/extending.rst b/doc/source/extending.rst
index 431c69bc0b6b5..8018d35770924 100644
--- a/doc/source/extending.rst
+++ b/doc/source/extending.rst
@@ -167,9 +167,9 @@ you can retain subclasses through ``pandas`` data manipulations.
 
 There are 3 constructor properties to be defined:
 
-- ``_constructor``: Used when a manipulation result has the same dimensions as the original.
-- ``_constructor_sliced``: Used when a manipulation result has one lower dimension(s) as the original, such as ``DataFrame`` single columns slicing.
-- ``_constructor_expanddim``: Used when a manipulation result has one higher dimension as the original, such as ``Series.to_frame()`` and ``DataFrame.to_panel()``.
+* ``_constructor``: Used when a manipulation result has the same dimensions as the original.
+* ``_constructor_sliced``: Used when a manipulation result has one lower dimension(s) as the original, such as ``DataFrame`` single columns slicing.
+* ``_constructor_expanddim``: Used when a manipulation result has one higher dimension as the original, such as ``Series.to_frame()`` and ``DataFrame.to_panel()``.
 
 Following table shows how ``pandas`` data structures define constructor properties by default.
 
diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index b7042ef390018..79e312ca12833 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -193,9 +193,9 @@ Choice of ``NA`` representation
 For lack of ``NA`` (missing) support from the ground up in NumPy and Python in
 general, we were given the difficult choice between either:
 
-- A *masked array* solution: an array of data and an array of boolean values
+* A *masked array* solution: an array of data and an array of boolean values
   indicating whether a value is there or is missing.
-- Using a special sentinel value, bit pattern, or set of sentinel values to
+* Using a special sentinel value, bit pattern, or set of sentinel values to
   denote ``NA`` across the dtypes.
 
 For many reasons we chose the latter. After years of production use it has
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 47d53c82b86f3..45e449d081fb0 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -22,36 +22,36 @@ Group By: split-apply-combine
 By "group by" we are referring to a process involving one or more of the following
 steps:
 
- - **Splitting** the data into groups based on some criteria.
- - **Applying** a function to each group independently.
- - **Combining** the results into a data structure.
+* **Splitting** the data into groups based on some criteria.
+* **Applying** a function to each group independently.
+* **Combining** the results into a data structure.
 
 Out of these, the split step is the most straightforward. In fact, in many
 situations we may wish to split the data set into groups and do something with
 those groups. In the apply step, we might wish to one of the
 following:
 
- - **Aggregation**: compute a summary statistic (or statistics) for each
-   group. Some examples:
+* **Aggregation**: compute a summary statistic (or statistics) for each
+  group. Some examples:
 
-    - Compute group sums or means.
-    - Compute group sizes / counts.
+    * Compute group sums or means.
+    * Compute group sizes / counts.
 
- - **Transformation**: perform some group-specific computations and return a
-   like-indexed object. Some examples:
+* **Transformation**: perform some group-specific computations and return a
+  like-indexed object. Some examples:
 
-    - Standardize data (zscore) within a group.
-    - Filling NAs within groups with a value derived from each group.
+    * Standardize data (zscore) within a group.
+    * Filling NAs within groups with a value derived from each group.
 
- - **Filtration**: discard some groups, according to a group-wise computation
-   that evaluates True or False. Some examples:
+* **Filtration**: discard some groups, according to a group-wise computation
+  that evaluates True or False. Some examples:
 
-    - Discard data that belongs to groups with only a few members.
-    - Filter out data based on the group sum or mean.
+    * Discard data that belongs to groups with only a few members.
+    * Filter out data based on the group sum or mean.
 
- - Some combination of the above: GroupBy will examine the results of the apply
-   step and try to return a sensibly combined result if it doesn't fit into
-   either of the above two categories.
+* Some combination of the above: GroupBy will examine the results of the apply
+  step and try to return a sensibly combined result if it doesn't fit into
+  either of the above two categories.
 
 Since the set of object instance methods on pandas data structures are generally
 rich and expressive, we often simply want to invoke, say, a DataFrame function
@@ -88,15 +88,15 @@ object (more on what the GroupBy object is later), you may do the following:
 
 The mapping can be specified many different ways:
 
-  - A Python function, to be called on each of the axis labels.
-  - A list or NumPy array of the same length as the selected axis.
-  - A dict or ``Series``, providing a ``label -> group name`` mapping.
-  - For ``DataFrame`` objects, a string indicating a column to be used to group.
-    Of course ``df.groupby('A')`` is just syntactic sugar for
-    ``df.groupby(df['A'])``, but it makes life simpler.
-  - For ``DataFrame`` objects, a string indicating an index level to be used to
-    group.
-  - A list of any of the above things.
+* A Python function, to be called on each of the axis labels.
+* A list or NumPy array of the same length as the selected axis.
+* A dict or ``Series``, providing a ``label -> group name`` mapping.
+* For ``DataFrame`` objects, a string indicating a column to be used to group.
+  Of course ``df.groupby('A')`` is just syntactic sugar for
+  ``df.groupby(df['A'])``, but it makes life simpler.
+* For ``DataFrame`` objects, a string indicating an index level to be used to
+  group.
+* A list of any of the above things.
 
 Collectively we refer to the grouping objects as the **keys**. For example,
 consider the following ``DataFrame``:
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 2b9fcf874ef22..1c63acce6e3fa 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -17,10 +17,10 @@ Indexing and Selecting Data
 
 The axis labeling information in pandas objects serves many purposes:
 
-  - Identifies data (i.e. provides *metadata*) using known indicators,
-    important for analysis, visualization, and interactive console display.
-  - Enables automatic and explicit data alignment.
-  - Allows intuitive getting and setting of subsets of the data set.
+* Identifies data (i.e. provides *metadata*) using known indicators,
+  important for analysis, visualization, and interactive console display.
+* Enables automatic and explicit data alignment.
+* Allows intuitive getting and setting of subsets of the data set.
 
 In this section, we will focus on the final point: namely, how to slice, dice,
 and generally get and set subsets of pandas objects. The primary focus will be
@@ -62,37 +62,37 @@ Object selection has had a number of user-requested additions in order to
 support more explicit location based indexing. Pandas now supports three types
 of multi-axis indexing.
 
-- ``.loc`` is primarily label based, but may also be used with a boolean array. ``.loc`` will raise ``KeyError`` when the items are not found. Allowed inputs are:
+* ``.loc`` is primarily label based, but may also be used with a boolean array. ``.loc`` will raise ``KeyError`` when the items are not found. Allowed inputs are:
 
-  - A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a
-    *label* of the index. This use is **not** an integer position along the
-    index.).
-  - A list or array of labels ``['a', 'b', 'c']``.
-  - A slice object with labels ``'a':'f'`` (Note that contrary to usual python
-    slices, **both** the start and the stop are included, when present in the
-    index! See :ref:`Slicing with labels
-    <indexing.slicing_with_labels>`.).
-  - A boolean array
-  - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and
-    that returns valid output for indexing (one of the above).
+    * A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a
+      *label* of the index. This use is **not** an integer position along the
+      index.).
+    * A list or array of labels ``['a', 'b', 'c']``.
+    * A slice object with labels ``'a':'f'`` (Note that contrary to usual python
+      slices, **both** the start and the stop are included, when present in the
+      index! See :ref:`Slicing with labels
+      <indexing.slicing_with_labels>`.).
+    * A boolean array
+    * A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and
+      that returns valid output for indexing (one of the above).
 
       .. versionadded:: 0.18.1
 
   See more at :ref:`Selection by Label <indexing.label>`.
 
-- ``.iloc`` is primarily integer position based (from ``0`` to
+* ``.iloc`` is primarily integer position based (from ``0`` to
   ``length-1`` of the axis), but may also be used with a boolean
   array.  ``.iloc`` will raise ``IndexError`` if a requested
   indexer is out-of-bounds, except *slice* indexers which allow
   out-of-bounds indexing.  (this conforms with Python/NumPy *slice*
   semantics).  Allowed inputs are:
 
-  - An integer e.g. ``5``.
-  - A list or array of integers ``[4, 3, 0]``.
-  - A slice object with ints ``1:7``.
-  - A boolean array.
-  - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and
-    that returns valid output for indexing (one of the above).
+    * An integer e.g. ``5``.
+    * A list or array of integers ``[4, 3, 0]``.
+    * A slice object with ints ``1:7``.
+    * A boolean array.
+    * A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and
+      that returns valid output for indexing (one of the above).
 
       .. versionadded:: 0.18.1
 
@@ -100,7 +100,7 @@ of multi-axis indexing.
   :ref:`Advanced Indexing <advanced>` and :ref:`Advanced
   Hierarchical <advanced.advanced_hierarchical>`.
 
-- ``.loc``, ``.iloc``, and also ``[]`` indexing can accept a ``callable`` as indexer. See more at :ref:`Selection By Callable <indexing.callable>`.
+* ``.loc``, ``.iloc``, and also ``[]`` indexing can accept a ``callable`` as indexer. See more at :ref:`Selection By Callable <indexing.callable>`.
 
 Getting values from an object with multi-axes selection uses the following
 notation (using ``.loc`` as an example, but the following applies to ``.iloc`` as
@@ -343,14 +343,14 @@ Integers are valid labels, but they refer to the label **and not the position**.
 
 The ``.loc`` attribute is the primary access method. The following are valid inputs:
 
-- A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index.).
-- A list or array of labels ``['a', 'b', 'c']``.
-- A slice object with labels ``'a':'f'`` (Note that contrary to usual python
+* A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index.).
+* A list or array of labels ``['a', 'b', 'c']``.
+* A slice object with labels ``'a':'f'`` (Note that contrary to usual python
   slices, **both** the start and the stop are included, when present in the
   index! See :ref:`Slicing with labels
   <indexing.slicing_with_labels>`.).
-- A boolean array.
-- A ``callable``, see :ref:`Selection By Callable <indexing.callable>`.
+* A boolean array.
+* A ``callable``, see :ref:`Selection By Callable <indexing.callable>`.
 
 .. ipython:: python
 
@@ -445,11 +445,11 @@ Pandas provides a suite of methods in order to get **purely integer based indexi
 
 The ``.iloc`` attribute is the primary access method. The following are valid inputs:
 
-- An integer e.g. ``5``.
-- A list or array of integers ``[4, 3, 0]``.
-- A slice object with ints ``1:7``.
-- A boolean array.
-- A ``callable``, see :ref:`Selection By Callable <indexing.callable>`.
+* An integer e.g. ``5``.
+* A list or array of integers ``[4, 3, 0]``.
+* A slice object with ints ``1:7``.
+* A boolean array.
+* A ``callable``, see :ref:`Selection By Callable <indexing.callable>`.
 
 .. ipython:: python
 
@@ -599,8 +599,8 @@ bit of user confusion over the years.
 
 The recommended methods of indexing are:
 
-- ``.loc`` if you want to *label* index.
-- ``.iloc`` if you want to *positionally* index.
+* ``.loc`` if you want to *label* index.
+* ``.iloc`` if you want to *positionally* index.
 
 .. ipython:: python
 
@@ -1455,15 +1455,15 @@ If you want to identify and remove duplicate rows in a DataFrame,  there are
 two methods that will help: ``duplicated`` and ``drop_duplicates``. Each
 takes as an argument the columns to use to identify duplicated rows.
 
-- ``duplicated`` returns a boolean vector whose length is the number of rows, and which indicates whether a row is duplicated.
-- ``drop_duplicates`` removes duplicate rows.
+* ``duplicated`` returns a boolean vector whose length is the number of rows, and which indicates whether a row is duplicated.
+* ``drop_duplicates`` removes duplicate rows.
 
 By default, the first observed row of a duplicate set is considered unique, but
 each method has a ``keep`` parameter to specify targets to be kept.
 
-- ``keep='first'`` (default): mark / drop duplicates except for the first occurrence.
-- ``keep='last'``: mark / drop duplicates except for the last occurrence.
-- ``keep=False``: mark  / drop all duplicates.
+* ``keep='first'`` (default): mark / drop duplicates except for the first occurrence.
+* ``keep='last'``: mark / drop duplicates except for the last occurrence.
+* ``keep=False``: mark  / drop all duplicates.
 
 .. ipython:: python
 
diff --git a/doc/source/install.rst b/doc/source/install.rst
index e655136904920..87d1b63914635 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -261,17 +261,17 @@ Optional Dependencies
 * `Apache Parquet <https://parquet.apache.org/>`__, either `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.4.1) or `fastparquet <https://fastparquet.readthedocs.io/en/latest>`__ (>= 0.0.6) for parquet-based storage. The `snappy <https://pypi.org/project/python-snappy>`__ and `brotli <https://pypi.org/project/brotlipy>`__ are available for compression support.
 * `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
 
-  * `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
-  * `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
-  * `SQLite <https://docs.python.org/3/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
+    * `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
+    * `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
+    * `SQLite <https://docs.python.org/3/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
 
 * `matplotlib <http://matplotlib.org/>`__: for plotting, Version 1.4.3 or higher.
 * For Excel I/O:
 
-  * `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd) and writing (xlwt)
-  * `openpyxl <http://https://openpyxl.readthedocs.io/en/default/>`__: openpyxl version 2.4.0
-    for writing .xlsx files (xlrd >= 0.9.0)
-  * `XlsxWriter <https://pypi.org/project/XlsxWriter>`__: Alternative Excel writer
+    * `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd) and writing (xlwt)
+    * `openpyxl <http://https://openpyxl.readthedocs.io/en/default/>`__: openpyxl version 2.4.0
+      for writing .xlsx files (xlrd >= 0.9.0)
+    * `XlsxWriter <https://pypi.org/project/XlsxWriter>`__: Alternative Excel writer
 
 * `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
 * `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index caf5790fb24c6..fce99fc633440 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -24,23 +24,23 @@ Indexing
 In pandas there are a few objects implemented which can serve as valid
 containers for the axis labels:
 
-- ``Index``: the generic "ordered set" object, an ndarray of object dtype
+* ``Index``: the generic "ordered set" object, an ndarray of object dtype
   assuming nothing about its contents. The labels must be hashable (and
   likely immutable) and unique. Populates a dict of label to location in
   Cython to do ``O(1)`` lookups.
-- ``Int64Index``: a version of ``Index`` highly optimized for 64-bit integer
+* ``Int64Index``: a version of ``Index`` highly optimized for 64-bit integer
   data, such as time stamps
-- ``Float64Index``: a version of ``Index`` highly optimized for 64-bit float data
-- ``MultiIndex``: the standard hierarchical index object
-- ``DatetimeIndex``: An Index object with ``Timestamp`` boxed elements (impl are the int64 values)
-- ``TimedeltaIndex``: An Index object with ``Timedelta`` boxed elements (impl are the in64 values)
-- ``PeriodIndex``: An Index object with Period elements
+* ``Float64Index``: a version of ``Index`` highly optimized for 64-bit float data
+* ``MultiIndex``: the standard hierarchical index object
+* ``DatetimeIndex``: An Index object with ``Timestamp`` boxed elements (impl are the int64 values)
+* ``TimedeltaIndex``: An Index object with ``Timedelta`` boxed elements (impl are the in64 values)
+* ``PeriodIndex``: An Index object with Period elements
 
 There are functions that make the creation of a regular index easy:
 
-- ``date_range``: fixed frequency date range generated from a time rule or
+* ``date_range``: fixed frequency date range generated from a time rule or
   DateOffset. An ndarray of Python datetime objects
-- ``period_range``: fixed frequency date range generated from a time rule or
+* ``period_range``: fixed frequency date range generated from a time rule or
   DateOffset. An ndarray of ``Period`` objects, representing timespans
 
 The motivation for having an ``Index`` class in the first place was to enable
@@ -52,22 +52,22 @@ From an internal implementation point of view, the relevant methods that an
 ``Index`` must define are one or more of the following (depending on how
 incompatible the new object internals are with the ``Index`` functions):
 
-- ``get_loc``: returns an "indexer" (an integer, or in some cases a
+* ``get_loc``: returns an "indexer" (an integer, or in some cases a
   slice object) for a label
-- ``slice_locs``: returns the "range" to slice between two labels
-- ``get_indexer``: Computes the indexing vector for reindexing / data
+* ``slice_locs``: returns the "range" to slice between two labels
+* ``get_indexer``: Computes the indexing vector for reindexing / data
   alignment purposes. See the source / docstrings for more on this
-- ``get_indexer_non_unique``: Computes the indexing vector for reindexing / data
+* ``get_indexer_non_unique``: Computes the indexing vector for reindexing / data
   alignment purposes when the index is non-unique. See the source / docstrings
   for more on this
-- ``reindex``: Does any pre-conversion of the input index then calls
+* ``reindex``: Does any pre-conversion of the input index then calls
   ``get_indexer``
-- ``union``, ``intersection``: computes the union or intersection of two
+* ``union``, ``intersection``: computes the union or intersection of two
   Index objects
-- ``insert``: Inserts a new label into an Index, yielding a new object
-- ``delete``: Delete a label, yielding a new object
-- ``drop``: Deletes a set of labels
-- ``take``: Analogous to ndarray.take
+* ``insert``: Inserts a new label into an Index, yielding a new object
+* ``delete``: Delete a label, yielding a new object
+* ``drop``: Deletes a set of labels
+* ``take``: Analogous to ndarray.take
 
 MultiIndex
 ~~~~~~~~~~
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 658b9ff15783d..ae6c4f12f04f7 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -252,12 +252,12 @@ Datetime Handling
 +++++++++++++++++
 
 parse_dates : boolean or list of ints or names or list of lists or dict, default ``False``.
-  - If ``True`` -> try parsing the index.
-  - If ``[1, 2, 3]`` ->  try parsing columns 1, 2, 3 each as a separate date
+  * If ``True`` -> try parsing the index.
+  * If ``[1, 2, 3]`` ->  try parsing columns 1, 2, 3 each as a separate date
     column.
-  - If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date
+  * If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date
     column.
-  - If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'.
+  * If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'.
     A fast-path exists for iso8601-formatted dates.
 infer_datetime_format : boolean, default ``False``
   If ``True`` and parse_dates is enabled for a column, attempt to infer the
@@ -961,12 +961,12 @@ negative consequences if enabled.
 Here are some examples of datetime strings that can be guessed (All
 representing December 30th, 2011 at 00:00:00):
 
-- "20111230"
-- "2011/12/30"
-- "20111230 00:00:00"
-- "12/30/2011 00:00:00"
-- "30/Dec/2011 00:00:00"
-- "30/December/2011 00:00:00"
+* "20111230"
+* "2011/12/30"
+* "20111230 00:00:00"
+* "12/30/2011 00:00:00"
+* "30/Dec/2011 00:00:00"
+* "30/December/2011 00:00:00"
 
 Note that ``infer_datetime_format`` is sensitive to ``dayfirst``.  With
 ``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With
@@ -1303,16 +1303,16 @@ with data files that have known and fixed column widths. The function parameters
 to ``read_fwf`` are largely the same as `read_csv` with two extra parameters, and
 a different usage of the ``delimiter`` parameter:
 
-  - ``colspecs``: A list of pairs (tuples) giving the extents of the
-    fixed-width fields of each line as half-open intervals (i.e.,  [from, to[ ).
-    String value 'infer' can be used to instruct the parser to try detecting
-    the column specifications from the first 100 rows of the data. Default
-    behavior, if not specified, is to infer.
-  - ``widths``: A list of field widths which can be used instead of 'colspecs'
-    if the intervals are contiguous.
-  - ``delimiter``: Characters to consider as filler characters in the fixed-width file.
-    Can be used to specify the filler character of the fields
-    if it is not spaces (e.g., '~').
+* ``colspecs``: A list of pairs (tuples) giving the extents of the
+  fixed-width fields of each line as half-open intervals (i.e.,  [from, to[ ).
+  String value 'infer' can be used to instruct the parser to try detecting
+  the column specifications from the first 100 rows of the data. Default
+  behavior, if not specified, is to infer.
+* ``widths``: A list of field widths which can be used instead of 'colspecs'
+  if the intervals are contiguous.
+* ``delimiter``: Characters to consider as filler characters in the fixed-width file.
+  Can be used to specify the filler character of the fields
+  if it is not spaces (e.g., '~').
 
 .. ipython:: python
    :suppress:
@@ -1566,9 +1566,9 @@ possible pandas uses the C parser (specified as ``engine='c'``), but may fall
 back to Python if C-unsupported options are specified. Currently, C-unsupported
 options include:
 
-- ``sep`` other than a single character (e.g. regex separators)
-- ``skipfooter``
-- ``sep=None`` with ``delim_whitespace=False``
+* ``sep`` other than a single character (e.g. regex separators)
+* ``skipfooter``
+* ``sep=None`` with ``delim_whitespace=False``
 
 Specifying any of the above options will produce a ``ParserWarning`` unless the
 python engine is selected explicitly using ``engine='python'``.
@@ -1602,29 +1602,29 @@ The ``Series`` and ``DataFrame`` objects have an instance method ``to_csv`` whic
 allows storing the contents of the object as a comma-separated-values file. The
 function takes a number of arguments. Only the first is required.
 
-  - ``path_or_buf``: A string path to the file to write or a StringIO
-  - ``sep`` : Field delimiter for the output file (default ",")
-  - ``na_rep``: A string representation of a missing value (default '')
-  - ``float_format``: Format string for floating point numbers
-  - ``cols``: Columns to write (default None)
-  - ``header``: Whether to write out the column names (default True)
-  - ``index``: whether to write row (index) names (default True)
-  - ``index_label``: Column label(s) for index column(s) if desired. If None
-    (default), and `header` and `index` are True, then the index names are
-    used. (A sequence should be given if the ``DataFrame`` uses MultiIndex).
-  - ``mode`` : Python write mode, default 'w'
-  - ``encoding``: a string representing the encoding to use if the contents are
-    non-ASCII, for Python versions prior to 3
-  - ``line_terminator``: Character sequence denoting line end (default '\\n')
-  - ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a `float_format` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric
-  - ``quotechar``: Character used to quote fields (default '"')
-  - ``doublequote``: Control quoting of ``quotechar`` in fields (default True)
-  - ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when
-    appropriate (default None)
-  - ``chunksize``: Number of rows to write at a time
-  - ``tupleize_cols``: If False (default), write as a list of tuples, otherwise
-    write in an expanded line format suitable for ``read_csv``
-  - ``date_format``: Format string for datetime objects
+* ``path_or_buf``: A string path to the file to write or a StringIO
+* ``sep`` : Field delimiter for the output file (default ",")
+* ``na_rep``: A string representation of a missing value (default '')
+* ``float_format``: Format string for floating point numbers
+* ``cols``: Columns to write (default None)
+* ``header``: Whether to write out the column names (default True)
+* ``index``: whether to write row (index) names (default True)
+* ``index_label``: Column label(s) for index column(s) if desired. If None
+  (default), and `header` and `index` are True, then the index names are
+  used. (A sequence should be given if the ``DataFrame`` uses MultiIndex).
+* ``mode`` : Python write mode, default 'w'
+* ``encoding``: a string representing the encoding to use if the contents are
+  non-ASCII, for Python versions prior to 3
+* ``line_terminator``: Character sequence denoting line end (default '\\n')
+* ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a `float_format` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric
+* ``quotechar``: Character used to quote fields (default '"')
+* ``doublequote``: Control quoting of ``quotechar`` in fields (default True)
+* ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when
+  appropriate (default None)
+* ``chunksize``: Number of rows to write at a time
+* ``tupleize_cols``: If False (default), write as a list of tuples, otherwise
+  write in an expanded line format suitable for ``read_csv``
+* ``date_format``: Format string for datetime objects
 
 Writing a formatted string
 ++++++++++++++++++++++++++
@@ -1634,22 +1634,22 @@ Writing a formatted string
 The ``DataFrame`` object has an instance method ``to_string`` which allows control
 over the string representation of the object. All arguments are optional:
 
-  - ``buf`` default None, for example a StringIO object
-  - ``columns`` default None, which columns to write
-  - ``col_space`` default None, minimum width of each column.
-  - ``na_rep`` default ``NaN``, representation of NA value
-  - ``formatters`` default None, a dictionary (by column) of functions each of
-    which takes a single argument and returns a formatted string
-  - ``float_format`` default None, a function which takes a single (float)
-    argument and returns a formatted string; to be applied to floats in the
-    ``DataFrame``.
-  - ``sparsify`` default True, set to False for a ``DataFrame`` with a hierarchical
-    index to print every MultiIndex key at each row.
-  - ``index_names`` default True, will print the names of the indices
-  - ``index`` default True, will print the index (ie, row labels)
-  - ``header`` default True, will print the column labels
-  - ``justify`` default ``left``, will print column headers left- or
-    right-justified
+* ``buf`` default None, for example a StringIO object
+* ``columns`` default None, which columns to write
+* ``col_space`` default None, minimum width of each column.
+* ``na_rep`` default ``NaN``, representation of NA value
+* ``formatters`` default None, a dictionary (by column) of functions each of
+  which takes a single argument and returns a formatted string
+* ``float_format`` default None, a function which takes a single (float)
+  argument and returns a formatted string; to be applied to floats in the
+  ``DataFrame``.
+* ``sparsify`` default True, set to False for a ``DataFrame`` with a hierarchical
+  index to print every MultiIndex key at each row.
+* ``index_names`` default True, will print the names of the indices
+* ``index`` default True, will print the index (ie, row labels)
+* ``header`` default True, will print the column labels
+* ``justify`` default ``left``, will print column headers left- or
+  right-justified
 
 The ``Series`` object also has a ``to_string`` method, but with only the ``buf``,
 ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument
@@ -1670,17 +1670,17 @@ Writing JSON
 A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
 with optional parameters:
 
-- ``path_or_buf`` : the pathname or buffer to write the output
+* ``path_or_buf`` : the pathname or buffer to write the output
   This can be ``None`` in which case a JSON string is returned
-- ``orient`` :
+* ``orient`` :
 
   ``Series``:
-      - default is ``index``
-      - allowed values are {``split``, ``records``, ``index``}
+      * default is ``index``
+      * allowed values are {``split``, ``records``, ``index``}
 
   ``DataFrame``:
-      - default is ``columns``
-      - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``}
+      * default is ``columns``
+      * allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``}
 
   The format of the JSON string
 
@@ -1694,12 +1694,12 @@ with optional parameters:
      ``columns``; dict like {column -> {index -> value}}
      ``values``; just the values array
 
-- ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601.
-- ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
-- ``force_ascii`` : force encoded string to be ASCII, default True.
-- ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
-- ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object.
-- ``lines`` : If ``records`` orient, then will write each record per line as json.
+* ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601.
+* ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
+* ``force_ascii`` : force encoded string to be ASCII, default True.
+* ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
+* ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object.
+* ``lines`` : If ``records`` orient, then will write each record per line as json.
 
 Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.
 
@@ -1818,19 +1818,19 @@ Fallback Behavior
 If the JSON serializer cannot handle the container contents directly it will
 fall back in the following manner:
 
-- if the dtype is unsupported (e.g. ``np.complex``) then the ``default_handler``, if provided, will be called
+* if the dtype is unsupported (e.g. ``np.complex``) then the ``default_handler``, if provided, will be called
   for each value, otherwise an exception is raised.
 
-- if an object is unsupported it will attempt the following:
+* if an object is unsupported it will attempt the following:
 
 
-  * check if the object has defined a ``toDict`` method and call it.
-    A ``toDict`` method should return a ``dict`` which will then be JSON serialized.
+    * check if the object has defined a ``toDict`` method and call it.
+      A ``toDict`` method should return a ``dict`` which will then be JSON serialized.
 
-  * invoke the ``default_handler`` if one was provided.
+    * invoke the ``default_handler`` if one was provided.
 
-  * convert the object to a ``dict`` by traversing its contents. However this will often fail
-    with an ``OverflowError`` or give unexpected results.
+    * convert the object to a ``dict`` by traversing its contents. However this will often fail
+      with an ``OverflowError`` or give unexpected results.
 
 In general the best approach for unsupported objects or dtypes is to provide a ``default_handler``.
 For example:
@@ -1856,20 +1856,20 @@ Reading a JSON string to pandas object can take a number of parameters.
 The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or
 is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
 
-- ``filepath_or_buffer`` : a **VALID** JSON string or file handle / StringIO. The string could be
+* ``filepath_or_buffer`` : a **VALID** JSON string or file handle / StringIO. The string could be
   a URL. Valid URL schemes include http, ftp, S3, and file. For file URLs, a host
   is expected. For instance, a local file could be
   file ://localhost/path/to/table.json
-- ``typ``    : type of object to recover (series or frame), default 'frame'
-- ``orient`` :
+* ``typ``    : type of object to recover (series or frame), default 'frame'
+* ``orient`` :
 
   Series :
-      - default is ``index``
-      - allowed values are {``split``, ``records``, ``index``}
+      * default is ``index``
+      * allowed values are {``split``, ``records``, ``index``}
 
   DataFrame
-      - default is ``columns``
-      - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``}
+      * default is ``columns``
+      * allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``}
 
   The format of the JSON string
 
@@ -1885,20 +1885,20 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
      ``table``; adhering to the JSON `Table Schema`_
 
 
-- ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data.
-- ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True``
-- ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``.
-- ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns.
-- ``numpy`` : direct decoding to NumPy arrays. default is ``False``;
+* ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data.
+* ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True``
+* ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``.
+* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns.
+* ``numpy`` : direct decoding to NumPy arrays. default is ``False``;
   Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True``.
-- ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality.
-- ``date_unit`` : string, the timestamp unit to detect if converting dates. Default
+* ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality.
+* ``date_unit`` : string, the timestamp unit to detect if converting dates. Default
   None. By default the timestamp precision will be detected, if this is not desired
   then pass one of 's', 'ms', 'us' or 'ns' to force timestamp precision to
   seconds, milliseconds, microseconds or nanoseconds respectively.
-- ``lines`` : reads file as one json object per line.
-- ``encoding`` : The encoding to use to decode py3 bytes.
-- ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration.
+* ``lines`` : reads file as one json object per line.
+* ``encoding`` : The encoding to use to decode py3 bytes.
+* ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration.
 
 The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable.
 
@@ -2175,10 +2175,10 @@ object          str
 
 A few notes on the generated table schema:
 
-- The ``schema`` object contains a ``pandas_version`` field. This contains
+* The ``schema`` object contains a ``pandas_version`` field. This contains
   the version of pandas' dialect of the schema, and will be incremented
   with each revision.
-- All dates are converted to UTC when serializing. Even timezone naive values,
+* All dates are converted to UTC when serializing. Even timezone naive values,
   which are treated as UTC with an offset of 0.
 
   .. ipython:: python
@@ -2187,7 +2187,7 @@ A few notes on the generated table schema:
      s = pd.Series(pd.date_range('2016', periods=4))
      build_table_schema(s)
 
-- datetimes with a timezone (before serializing), include an additional field
+* datetimes with a timezone (before serializing), include an additional field
   ``tz`` with the time zone name (e.g. ``'US/Central'``).
 
   .. ipython:: python
@@ -2196,7 +2196,7 @@ A few notes on the generated table schema:
                                     tz='US/Central'))
      build_table_schema(s_tz)
 
-- Periods are converted to timestamps before serialization, and so have the
+* Periods are converted to timestamps before serialization, and so have the
   same behavior of being converted to UTC. In addition, periods will contain
   and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'``.
 
@@ -2206,7 +2206,7 @@ A few notes on the generated table schema:
                                                 periods=4))
      build_table_schema(s_per)
 
-- Categoricals use the ``any`` type and an ``enum`` constraint listing
+* Categoricals use the ``any`` type and an ``enum`` constraint listing
   the set of possible values. Additionally, an ``ordered`` field is included:
 
   .. ipython:: python
@@ -2214,7 +2214,7 @@ A few notes on the generated table schema:
      s_cat = pd.Series(pd.Categorical(['a', 'b', 'a']))
      build_table_schema(s_cat)
 
-- A ``primaryKey`` field, containing an array of labels, is included
+* A ``primaryKey`` field, containing an array of labels, is included
   *if the index is unique*:
 
   .. ipython:: python
@@ -2222,7 +2222,7 @@ A few notes on the generated table schema:
      s_dupe = pd.Series([1, 2], index=[1, 1])
      build_table_schema(s_dupe)
 
-- The ``primaryKey`` behavior is the same with MultiIndexes, but in this
+* The ``primaryKey`` behavior is the same with MultiIndexes, but in this
   case the ``primaryKey`` is an array:
 
   .. ipython:: python
@@ -2231,15 +2231,15 @@ A few notes on the generated table schema:
                                                               (0, 1)]))
      build_table_schema(s_multi)
 
-- The default naming roughly follows these rules:
+* The default naming roughly follows these rules:
 
-  + For series, the ``object.name`` is used. If that's none, then the
-    name is ``values``
-  + For ``DataFrames``, the stringified version of the column name is used
-  + For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a
-    fallback to ``index`` if that is None.
-  + For ``MultiIndex``, ``mi.names`` is used. If any level has no name,
-    then ``level_<i>`` is used.
+    * For series, the ``object.name`` is used. If that's none, then the
+      name is ``values``
+    * For ``DataFrames``, the stringified version of the column name is used
+    * For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a
+      fallback to ``index`` if that is None.
+    * For ``MultiIndex``, ``mi.names`` is used. If any level has no name,
+      then ``level_<i>`` is used.
 
 
 .. versionadded:: 0.23.0
@@ -2601,55 +2601,55 @@ parse HTML tables in the top-level pandas io function ``read_html``.
 
 **Issues with** |lxml|_
 
-   * Benefits
+* Benefits
 
-     * |lxml|_ is very fast.
+    * |lxml|_ is very fast.
 
-     * |lxml|_ requires Cython to install correctly.
+    * |lxml|_ requires Cython to install correctly.
 
-   * Drawbacks
+* Drawbacks
 
-     * |lxml|_ does *not* make any guarantees about the results of its parse
-       *unless* it is given |svm|_.
+    * |lxml|_ does *not* make any guarantees about the results of its parse
+      *unless* it is given |svm|_.
 
-     * In light of the above, we have chosen to allow you, the user, to use the
-       |lxml|_ backend, but **this backend will use** |html5lib|_ if |lxml|_
-       fails to parse
+    * In light of the above, we have chosen to allow you, the user, to use the
+      |lxml|_ backend, but **this backend will use** |html5lib|_ if |lxml|_
+      fails to parse
 
-     * It is therefore *highly recommended* that you install both
-       |BeautifulSoup4|_ and |html5lib|_, so that you will still get a valid
-       result (provided everything else is valid) even if |lxml|_ fails.
+    * It is therefore *highly recommended* that you install both
+      |BeautifulSoup4|_ and |html5lib|_, so that you will still get a valid
+      result (provided everything else is valid) even if |lxml|_ fails.
 
 **Issues with** |BeautifulSoup4|_ **using** |lxml|_ **as a backend**
 
-   * The above issues hold here as well since |BeautifulSoup4|_ is essentially
-     just a wrapper around a parser backend.
+* The above issues hold here as well since |BeautifulSoup4|_ is essentially
+  just a wrapper around a parser backend.
 
 **Issues with** |BeautifulSoup4|_ **using** |html5lib|_ **as a backend**
 
-   * Benefits
+* Benefits
 
-     * |html5lib|_ is far more lenient than |lxml|_ and consequently deals
-       with *real-life markup* in a much saner way rather than just, e.g.,
-       dropping an element without notifying you.
+    * |html5lib|_ is far more lenient than |lxml|_ and consequently deals
+      with *real-life markup* in a much saner way rather than just, e.g.,
+      dropping an element without notifying you.
 
-     * |html5lib|_ *generates valid HTML5 markup from invalid markup
-       automatically*. This is extremely important for parsing HTML tables,
-       since it guarantees a valid document. However, that does NOT mean that
-       it is "correct", since the process of fixing markup does not have a
-       single definition.
+    * |html5lib|_ *generates valid HTML5 markup from invalid markup
+      automatically*. This is extremely important for parsing HTML tables,
+      since it guarantees a valid document. However, that does NOT mean that
+      it is "correct", since the process of fixing markup does not have a
+      single definition.
 
-     * |html5lib|_ is pure Python and requires no additional build steps beyond
-       its own installation.
+    * |html5lib|_ is pure Python and requires no additional build steps beyond
+      its own installation.
 
-   * Drawbacks
+* Drawbacks
 
-     * The biggest drawback to using |html5lib|_ is that it is slow as
-       molasses.  However consider the fact that many tables on the web are not
-       big enough for the parsing algorithm runtime to matter. It is more
-       likely that the bottleneck will be in the process of reading the raw
-       text from the URL over the web, i.e., IO (input-output). For very large
-       tables, this might not be true.
+    * The biggest drawback to using |html5lib|_ is that it is slow as
+      molasses.  However consider the fact that many tables on the web are not
+      big enough for the parsing algorithm runtime to matter. It is more
+      likely that the bottleneck will be in the process of reading the raw
+      text from the URL over the web, i.e., IO (input-output). For very large
+      tables, this might not be true.
 
 
 .. |svm| replace:: **strictly valid markup**
@@ -2753,13 +2753,13 @@ Specifying Sheets
 
 .. note :: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets.
 
-- The arguments ``sheet_name`` allows specifying the sheet or sheets to read.
-- The default value for ``sheet_name`` is 0, indicating to read the first sheet
-- Pass a string to refer to the name of a particular sheet in the workbook.
-- Pass an integer to refer to the index of a sheet. Indices follow Python
+* The arguments ``sheet_name`` allows specifying the sheet or sheets to read.
+* The default value for ``sheet_name`` is 0, indicating to read the first sheet
+* Pass a string to refer to the name of a particular sheet in the workbook.
+* Pass an integer to refer to the index of a sheet. Indices follow Python
   convention, beginning at 0.
-- Pass a list of either strings or integers, to return a dictionary of specified sheets.
-- Pass a ``None`` to return a dictionary of all available sheets.
+* Pass a list of either strings or integers, to return a dictionary of specified sheets.
+* Pass a ``None`` to return a dictionary of all available sheets.
 
 .. code-block:: python
 
@@ -3030,9 +3030,9 @@ files if `Xlsxwriter`_ is not available.
 To specify which writer you want to use, you can pass an engine keyword
 argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
 
-- ``openpyxl``: version 2.4 or higher is required
-- ``xlsxwriter``
-- ``xlwt``
+* ``openpyxl``: version 2.4 or higher is required
+* ``xlsxwriter``
+* ``xlwt``
 
 .. code-block:: python
 
@@ -3055,8 +3055,8 @@ Style and Formatting
 
 The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method.
 
-- ``float_format`` : Format string for floating point numbers (default ``None``).
-- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``).
+* ``float_format`` : Format string for floating point numbers (default ``None``).
+* ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``).
 
 
 
@@ -3654,10 +3654,10 @@ data.
 
 A query is specified using the ``Term`` class under the hood, as a boolean expression.
 
-- ``index`` and ``columns`` are supported indexers of a ``DataFrames``.
-- ``major_axis``, ``minor_axis``, and ``items`` are supported indexers of
+* ``index`` and ``columns`` are supported indexers of a ``DataFrames``.
+* ``major_axis``, ``minor_axis``, and ``items`` are supported indexers of
   the Panel.
-- if ``data_columns`` are specified, these can be used as additional indexers.
+* if ``data_columns`` are specified, these can be used as additional indexers.
 
 Valid comparison operators are:
 
@@ -3665,9 +3665,9 @@ Valid comparison operators are:
 
 Valid boolean expressions are combined with:
 
-- ``|`` : or
-- ``&`` : and
-- ``(`` and ``)`` : for grouping
+* ``|`` : or
+* ``&`` : and
+* ``(`` and ``)`` : for grouping
 
 These rules are similar to how boolean expressions are used in pandas for indexing.
 
@@ -3680,16 +3680,16 @@ These rules are similar to how boolean expressions are used in pandas for indexi
 
 The following are valid expressions:
 
-- ``'index >= date'``
-- ``"columns = ['A', 'D']"``
-- ``"columns in ['A', 'D']"``
-- ``'columns = A'``
-- ``'columns == A'``
-- ``"~(columns = ['A', 'B'])"``
-- ``'index > df.index[3] & string = "bar"'``
-- ``'(index > df.index[3] & index <= df.index[6]) | string = "bar"'``
-- ``"ts >= Timestamp('2012-02-01')"``
-- ``"major_axis>=20130101"``
+* ``'index >= date'``
+* ``"columns = ['A', 'D']"``
+* ``"columns in ['A', 'D']"``
+* ``'columns = A'``
+* ``'columns == A'``
+* ``"~(columns = ['A', 'B'])"``
+* ``'index > df.index[3] & string = "bar"'``
+* ``'(index > df.index[3] & index <= df.index[6]) | string = "bar"'``
+* ``"ts >= Timestamp('2012-02-01')"``
+* ``"major_axis>=20130101"``
 
 The ``indexers`` are on the left-hand side of the sub-expression:
 
@@ -3697,11 +3697,11 @@ The ``indexers`` are on the left-hand side of the sub-expression:
 
 The right-hand side of the sub-expression (after a comparison operator) can be:
 
-- functions that will be evaluated, e.g. ``Timestamp('2012-02-01')``
-- strings, e.g. ``"bar"``
-- date-like, e.g. ``20130101``, or ``"20130101"``
-- lists, e.g. ``"['A', 'B']"``
-- variables that are defined in the local names space, e.g. ``date``
+* functions that will be evaluated, e.g. ``Timestamp('2012-02-01')``
+* strings, e.g. ``"bar"``
+* date-like, e.g. ``20130101``, or ``"20130101"``
+* lists, e.g. ``"['A', 'B']"``
+* variables that are defined in the local names space, e.g. ``date``
 
 .. note::
 
@@ -4080,15 +4080,15 @@ simple use case. You store panel-type data, with dates in the
 ``major_axis`` and ids in the ``minor_axis``. The data is then
 interleaved like this:
 
-- date_1
-  - id_1
-  - id_2
-  -  .
-  - id_n
-- date_2
-  - id_1
-  -  .
-  - id_n
+* date_1
+    * id_1
+    * id_2
+    *  .
+    * id_n
+* date_2
+    * id_1
+    *  .
+    * id_n
 
 It should be clear that a delete operation on the ``major_axis`` will be
 fairly quick, as one chunk is removed, then the following data moved. On
@@ -4216,12 +4216,12 @@ Caveats
    need to serialize these operations in a single thread in a single
    process. You will corrupt your data otherwise. See the (:issue:`2397`) for more information.
 
-- If you use locks to manage write access between multiple processes, you
+* If you use locks to manage write access between multiple processes, you
   may want to use :py:func:`~os.fsync` before releasing write locks. For
   convenience you can use ``store.flush(fsync=True)`` to do this for you.
-- Once a ``table`` is created its items (Panel) / columns (DataFrame)
+* Once a ``table`` is created its items (Panel) / columns (DataFrame)
   are fixed; only exactly the same columns can be appended
-- Be aware that timezones (e.g., ``pytz.timezone('US/Eastern')``)
+* Be aware that timezones (e.g., ``pytz.timezone('US/Eastern')``)
   are not necessarily equal across timezone versions.  So if data is
   localized to a specific timezone in the HDFStore using one version
   of a timezone library and that data is updated with another version, the data
@@ -4438,21 +4438,21 @@ Now you can import the ``DataFrame`` into R:
 Performance
 '''''''''''
 
-- ``tables`` format come with a writing performance penalty as compared to
+* ``tables`` format come with a writing performance penalty as compared to
   ``fixed`` stores. The benefit is the ability to append/delete and
   query (potentially very large amounts of data).  Write times are
   generally longer as compared with regular stores. Query times can
   be quite fast, especially on an indexed axis.
-- You can pass ``chunksize=<int>`` to ``append``, specifying the
+* You can pass ``chunksize=<int>`` to ``append``, specifying the
   write chunksize (default is 50000). This will significantly lower
   your memory usage on writing.
-- You can pass ``expectedrows=<int>`` to the first ``append``,
+* You can pass ``expectedrows=<int>`` to the first ``append``,
   to set the TOTAL number of expected rows that ``PyTables`` will
   expected. This will optimize read/write performance.
-- Duplicate rows can be written to tables, but are filtered out in
+* Duplicate rows can be written to tables, but are filtered out in
   selection (with the last items being selected; thus a table is
   unique on major, minor pairs)
-- A ``PerformanceWarning`` will be raised if you are attempting to
+* A ``PerformanceWarning`` will be raised if you are attempting to
   store types that will be pickled by PyTables (rather than stored as
   endemic types). See
   `Here <http://stackoverflow.com/questions/14355151/how-to-make-pandas-hdfstore-put-operation-faster/14370190#14370190>`__
@@ -4482,14 +4482,14 @@ dtypes, including extension dtypes such as categorical and datetime with tz.
 
 Several caveats.
 
-- This is a newer library, and the format, though stable, is not guaranteed to be backward compatible
+* This is a newer library, and the format, though stable, is not guaranteed to be backward compatible
   to the earlier versions.
-- The format will NOT write an ``Index``, or ``MultiIndex`` for the
+* The format will NOT write an ``Index``, or ``MultiIndex`` for the
   ``DataFrame`` and will raise an error if a non-default one is provided. You
   can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
   ignore it.
-- Duplicate column names and non-string columns names are not supported
-- Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
+* Duplicate column names and non-string columns names are not supported
+* Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
   on an attempt at serialization.
 
 See the `Full Documentation <https://github.com/wesm/feather>`__.
@@ -4550,10 +4550,10 @@ dtypes, including extension dtypes such as datetime with tz.
 
 Several caveats.
 
-- Duplicate column names and non-string columns names are not supported.
-- Index level names, if specified, must be strings.
-- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
-- Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
+* Duplicate column names and non-string columns names are not supported.
+* Index level names, if specified, must be strings.
+* Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
+* Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
   on an attempt at serialization.
 
 You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``.
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 45944ba56d4e7..b2cb388e3cd03 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -81,33 +81,33 @@ some configurable handling of "what to do with the other axes":
               keys=None, levels=None, names=None, verify_integrity=False,
               copy=True)
 
-- ``objs`` : a sequence or mapping of Series, DataFrame, or Panel objects. If a
+* ``objs`` : a sequence or mapping of Series, DataFrame, or Panel objects. If a
   dict is passed, the sorted keys will be used as the `keys` argument, unless
   it is passed, in which case the values will be selected (see below). Any None
   objects will be dropped silently unless they are all None in which case a
   ValueError will be raised.
-- ``axis`` : {0, 1, ...}, default 0. The axis to concatenate along.
-- ``join`` : {'inner', 'outer'}, default 'outer'. How to handle indexes on
+* ``axis`` : {0, 1, ...}, default 0. The axis to concatenate along.
+* ``join`` : {'inner', 'outer'}, default 'outer'. How to handle indexes on
   other axis(es). Outer for union and inner for intersection.
-- ``ignore_index`` : boolean, default False. If True, do not use the index
+* ``ignore_index`` : boolean, default False. If True, do not use the index
   values on the concatenation axis. The resulting axis will be labeled 0, ...,
   n - 1. This is useful if you are concatenating objects where the
   concatenation axis does not have meaningful indexing information. Note
   the index values on the other axes are still respected in the join.
-- ``join_axes`` : list of Index objects. Specific indexes to use for the other
+* ``join_axes`` : list of Index objects. Specific indexes to use for the other
   n - 1 axes instead of performing inner/outer set logic.
-- ``keys`` : sequence, default None. Construct hierarchical index using the
+* ``keys`` : sequence, default None. Construct hierarchical index using the
   passed keys as the outermost level. If multiple levels passed, should
   contain tuples.
-- ``levels`` : list of sequences, default None. Specific levels (unique values)
+* ``levels`` : list of sequences, default None. Specific levels (unique values)
   to use for constructing a MultiIndex. Otherwise they will be inferred from the
   keys.
-- ``names`` : list, default None. Names for the levels in the resulting
+* ``names`` : list, default None. Names for the levels in the resulting
   hierarchical index.
-- ``verify_integrity`` : boolean, default False. Check whether the new
+* ``verify_integrity`` : boolean, default False. Check whether the new
   concatenated axis contains duplicates. This can be very expensive relative
   to the actual data concatenation.
-- ``copy`` : boolean, default True. If False, do not copy data unnecessarily.
+* ``copy`` : boolean, default True. If False, do not copy data unnecessarily.
 
 Without a little bit of context many of these arguments don't make much sense. 
 Let's revisit the above example. Suppose we wanted to associate specific keys 
@@ -156,10 +156,10 @@ When gluing together multiple DataFrames, you have a choice of how to handle
 the other axes (other than the one being concatenated). This can be done in
 the following three ways:
 
-- Take the union of them all, ``join='outer'``. This is the default
+* Take the union of them all, ``join='outer'``. This is the default
   option as it results in zero information loss.
-- Take the intersection, ``join='inner'``.
-- Use a specific index, as passed to the ``join_axes`` argument.
+* Take the intersection, ``join='inner'``.
+* Use a specific index, as passed to the ``join_axes`` argument.
 
 Here is an example of each of these methods. First, the default ``join='outer'``
 behavior:
@@ -531,52 +531,52 @@ all standard database join operations between ``DataFrame`` objects:
              suffixes=('_x', '_y'), copy=True, indicator=False,
              validate=None)
 
-- ``left``: A DataFrame object.
-- ``right``: Another DataFrame object.
-- ``on``: Column or index level names to join on. Must be found in both the left
+* ``left``: A DataFrame object.
+* ``right``: Another DataFrame object.
+* ``on``: Column or index level names to join on. Must be found in both the left
   and right DataFrame objects. If not passed and ``left_index`` and
   ``right_index`` are ``False``, the intersection of the columns in the
   DataFrames will be inferred to be the join keys.
-- ``left_on``: Columns or index levels from the left DataFrame to use as
+* ``left_on``: Columns or index levels from the left DataFrame to use as
   keys. Can either be column names, index level names, or arrays with length
   equal to the length of the DataFrame.
-- ``right_on``: Columns or index levels from the right DataFrame to use as
+* ``right_on``: Columns or index levels from the right DataFrame to use as
   keys. Can either be column names, index level names, or arrays with length
   equal to the length of the DataFrame.
-- ``left_index``: If ``True``, use the index (row labels) from the left
+* ``left_index``: If ``True``, use the index (row labels) from the left
   DataFrame as its join key(s). In the case of a DataFrame with a MultiIndex
   (hierarchical), the number of levels must match the number of join keys
   from the right DataFrame.
-- ``right_index``: Same usage as ``left_index`` for the right DataFrame
-- ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults
+* ``right_index``: Same usage as ``left_index`` for the right DataFrame
+* ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults
   to ``inner``. See below for more detailed description of each method.
-- ``sort``: Sort the result DataFrame by the join keys in lexicographical
+* ``sort``: Sort the result DataFrame by the join keys in lexicographical
   order. Defaults to ``True``, setting to ``False`` will improve performance
   substantially in many cases.
-- ``suffixes``: A tuple of string suffixes to apply to overlapping
+* ``suffixes``: A tuple of string suffixes to apply to overlapping
   columns. Defaults to ``('_x', '_y')``.
-- ``copy``: Always copy data (default ``True``) from the passed DataFrame
+* ``copy``: Always copy data (default ``True``) from the passed DataFrame
   objects, even when reindexing is not necessary. Cannot be avoided in many
   cases but may improve performance / memory usage. The cases where copying
   can be avoided are somewhat pathological but this option is provided
   nonetheless.
-- ``indicator``: Add a column to the output DataFrame called ``_merge``
+* ``indicator``: Add a column to the output DataFrame called ``_merge``
   with information on the source of each row. ``_merge`` is Categorical-type
   and takes on a value of ``left_only`` for observations whose merge key
   only appears in ``'left'`` DataFrame, ``right_only`` for observations whose
   merge key only appears in ``'right'`` DataFrame, and ``both`` if the
   observation's merge key is found in both.
 
-- ``validate`` : string, default None.
+* ``validate`` : string, default None.
   If specified, checks if merge is of specified type.
 
-  * "one_to_one" or "1:1": checks if merge keys are unique in both
-    left and right datasets.
-  * "one_to_many" or "1:m": checks if merge keys are unique in left
-    dataset.
-  * "many_to_one" or "m:1": checks if merge keys are unique in right
-    dataset.
-  * "many_to_many" or "m:m": allowed, but does not result in checks.
+    * "one_to_one" or "1:1": checks if merge keys are unique in both
+      left and right datasets.
+    * "one_to_many" or "1:m": checks if merge keys are unique in left
+      dataset.
+    * "many_to_one" or "m:1": checks if merge keys are unique in right
+      dataset.
+    * "many_to_many" or "m:m": allowed, but does not result in checks.
 
   .. versionadded:: 0.21.0
 
@@ -605,11 +605,11 @@ terminology used to describe join operations between two SQL-table like
 structures (``DataFrame`` objects). There are several cases to consider which 
 are very important to understand:
 
-- **one-to-one** joins: for example when joining two ``DataFrame`` objects on
+* **one-to-one** joins: for example when joining two ``DataFrame`` objects on
   their indexes (which must contain unique values).
-- **many-to-one** joins: for example when joining an index (unique) to one or
+* **many-to-one** joins: for example when joining an index (unique) to one or
   more columns in a different ``DataFrame``.
-- **many-to-many** joins: joining columns on columns.
+* **many-to-many** joins: joining columns on columns.
 
 .. note::
 
diff --git a/doc/source/options.rst b/doc/source/options.rst
index 697cc0682e39a..cbe0264f442bc 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -31,10 +31,10 @@ You can get/set options directly as attributes of the top-level ``options`` attr
 The API is composed of 5 relevant functions, available directly from the ``pandas``
 namespace:
 
-- :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option.
-- :func:`~pandas.reset_option` - reset one or more options to their default value.
-- :func:`~pandas.describe_option` - print the descriptions of one or more options.
-- :func:`~pandas.option_context` - execute a codeblock with a set of options
+* :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option.
+* :func:`~pandas.reset_option` - reset one or more options to their default value.
+* :func:`~pandas.describe_option` - print the descriptions of one or more options.
+* :func:`~pandas.option_context` - execute a codeblock with a set of options
   that revert to prior settings after execution.
 
 **Note:** Developers can check out `pandas/core/config.py <https://github.com/pandas-dev/pandas/blob/master/pandas/core/config.py>`_ for more information.
diff --git a/doc/source/overview.rst b/doc/source/overview.rst
index f86b1c67e6843..6ba9501ba0b5e 100644
--- a/doc/source/overview.rst
+++ b/doc/source/overview.rst
@@ -12,19 +12,19 @@ programming language.
 
 :mod:`pandas` consists of the following elements:
 
- * A set of labeled array data structures, the primary of which are
-   Series and DataFrame.
- * Index objects enabling both simple axis indexing and multi-level /
-   hierarchical axis indexing.
- * An integrated group by engine for aggregating and transforming data sets.
- * Date range generation (date_range) and custom date offsets enabling the
-   implementation of customized frequencies.
- * Input/Output tools: loading tabular data from flat files (CSV, delimited,
-   Excel 2003), and saving and loading pandas objects from the fast and
-   efficient PyTables/HDF5 format.
- * Memory-efficient "sparse" versions of the standard data structures for storing
-   data that is mostly missing or mostly constant (some fixed value).
- * Moving window statistics (rolling mean, rolling standard deviation, etc.).
+* A set of labeled array data structures, the primary of which are
+  Series and DataFrame.
+* Index objects enabling both simple axis indexing and multi-level /
+  hierarchical axis indexing.
+* An integrated group by engine for aggregating and transforming data sets.
+* Date range generation (date_range) and custom date offsets enabling the
+  implementation of customized frequencies.
+* Input/Output tools: loading tabular data from flat files (CSV, delimited,
+  Excel 2003), and saving and loading pandas objects from the fast and
+  efficient PyTables/HDF5 format.
+* Memory-efficient "sparse" versions of the standard data structures for storing
+  data that is mostly missing or mostly constant (some fixed value).
+* Moving window statistics (rolling mean, rolling standard deviation, etc.).
 
 Data Structures
 ---------------
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index 250a1808e496e..88b7114cf4101 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -106,12 +106,12 @@ Closely related to the :meth:`~DataFrame.pivot` method are the related
 ``MultiIndex`` objects (see the section on :ref:`hierarchical indexing
 <advanced.hierarchical>`). Here are essentially what these methods do:
 
-  - ``stack``: "pivot" a level of the (possibly hierarchical) column labels,
-    returning a ``DataFrame`` with an index with a new inner-most level of row
-    labels.
-  - ``unstack``: (inverse operation of ``stack``) "pivot" a level of the
-    (possibly hierarchical) row index to the column axis, producing a reshaped
-    ``DataFrame`` with a new inner-most level of column labels.
+* ``stack``: "pivot" a level of the (possibly hierarchical) column labels,
+  returning a ``DataFrame`` with an index with a new inner-most level of row
+  labels.
+* ``unstack``: (inverse operation of ``stack``) "pivot" a level of the
+  (possibly hierarchical) row index to the column axis, producing a reshaped
+  ``DataFrame`` with a new inner-most level of column labels.
 
 .. image:: _static/reshaping_unstack.png
 
@@ -132,8 +132,8 @@ from the hierarchical indexing section:
 The ``stack`` function "compresses" a level in the ``DataFrame``'s columns to
 produce either:
 
-  - A ``Series``, in the case of a simple column Index.
-  - A ``DataFrame``, in the case of a ``MultiIndex`` in the columns.
+* A ``Series``, in the case of a simple column Index.
+* A ``DataFrame``, in the case of a ``MultiIndex`` in the columns.
 
 If the columns have a ``MultiIndex``, you can choose which level to stack. The
 stacked level becomes the new lowest level in a ``MultiIndex`` on the columns:
@@ -351,13 +351,13 @@ strategies.
 
 It takes a number of arguments:
 
-- ``data``: a DataFrame object.
-- ``values``: a column or a list of columns to aggregate.
-- ``index``: a column, Grouper, array which has the same length as data, or list of them.
+* ``data``: a DataFrame object.
+* ``values``: a column or a list of columns to aggregate.
+* ``index``: a column, Grouper, array which has the same length as data, or list of them.
   Keys to group by on the pivot table index. If an array is passed, it is being used as the same manner as column values.
-- ``columns``: a column, Grouper, array which has the same length as data, or list of them.
+* ``columns``: a column, Grouper, array which has the same length as data, or list of them.
   Keys to group by on the pivot table column. If an array is passed, it is being used as the same manner as column values.
-- ``aggfunc``: function to use for aggregation, defaulting to ``numpy.mean``.
+* ``aggfunc``: function to use for aggregation, defaulting to ``numpy.mean``.
 
 Consider a data set like this:
 
@@ -431,17 +431,17 @@ unless an array of values and an aggregation function are passed.
 
 It takes a number of arguments
 
-- ``index``: array-like, values to group by in the rows.
-- ``columns``: array-like, values to group by in the columns.
-- ``values``: array-like, optional, array of values to aggregate according to
+* ``index``: array-like, values to group by in the rows.
+* ``columns``: array-like, values to group by in the columns.
+* ``values``: array-like, optional, array of values to aggregate according to
   the factors.
-- ``aggfunc``: function, optional, If no values array is passed, computes a
+* ``aggfunc``: function, optional, If no values array is passed, computes a
   frequency table.
-- ``rownames``: sequence, default ``None``, must match number of row arrays passed.
-- ``colnames``: sequence, default ``None``, if passed, must match number of column
+* ``rownames``: sequence, default ``None``, must match number of row arrays passed.
+* ``colnames``: sequence, default ``None``, if passed, must match number of column
   arrays passed.
-- ``margins``: boolean, default ``False``, Add row/column margins (subtotals)
-- ``normalize``: boolean, {'all', 'index', 'columns'}, or {0,1}, default ``False``.
+* ``margins``: boolean, default ``False``, Add row/column margins (subtotals)
+* ``normalize``: boolean, {'all', 'index', 'columns'}, or {0,1}, default ``False``.
   Normalize by dividing all values by the sum of values.
 
 
@@ -615,10 +615,10 @@ As with the ``Series`` version, you can pass values for the ``prefix`` and
 ``prefix_sep``. By default the column name is used as the prefix, and '_' as
 the prefix separator. You can specify ``prefix`` and ``prefix_sep`` in 3 ways:
 
-- string: Use the same value for ``prefix`` or ``prefix_sep`` for each column
+* string: Use the same value for ``prefix`` or ``prefix_sep`` for each column
   to be encoded.
-- list: Must be the same length as the number of columns being encoded.
-- dict: Mapping column name to prefix.
+* list: Must be the same length as the number of columns being encoded.
+* dict: Mapping column name to prefix.
 
 .. ipython:: python
 
diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst
index 260d8aa32ef52..2bb99dd1822b6 100644
--- a/doc/source/sparse.rst
+++ b/doc/source/sparse.rst
@@ -104,9 +104,9 @@ Sparse data should have the same dtype as its dense representation. Currently,
 ``float64``, ``int64`` and ``bool`` dtypes are supported. Depending on the original
 dtype, ``fill_value`` default changes:
 
-- ``float64``: ``np.nan``
-- ``int64``: ``0``
-- ``bool``: ``False``
+* ``float64``: ``np.nan``
+* ``int64``: ``0``
+* ``bool``: ``False``
 
 .. ipython:: python
 
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index ded54d2d355f1..ba58d65b00714 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -28,11 +28,11 @@ a tremendous amount of new functionality for manipulating time series data.
 
 In working with time series data, we will frequently seek to:
 
-  - generate sequences of fixed-frequency dates and time spans
-  - conform or convert time series to a particular frequency
-  - compute "relative" dates based on various non-standard time increments
-    (e.g. 5 business days before the last business day of the year), or "roll"
-    dates forward or backward
+* generate sequences of fixed-frequency dates and time spans
+* conform or convert time series to a particular frequency
+* compute "relative" dates based on various non-standard time increments
+  (e.g. 5 business days before the last business day of the year), or "roll"
+  dates forward or backward
 
 pandas provides a relatively compact and self-contained set of tools for
 performing the above tasks.
@@ -226,8 +226,8 @@ You can pass only the columns that you need to assemble.
 
 ``pd.to_datetime`` looks for standard designations of the datetime component in the column names, including:
 
-- required: ``year``, ``month``, ``day``
-- optional: ``hour``, ``minute``, ``second``, ``millisecond``, ``microsecond``, ``nanosecond``
+* required: ``year``, ``month``, ``day``
+* optional: ``hour``, ``minute``, ``second``, ``millisecond``, ``microsecond``, ``nanosecond``
 
 Invalid Data
 ~~~~~~~~~~~~
@@ -463,14 +463,14 @@ Indexing
 One of the main uses for ``DatetimeIndex`` is as an index for pandas objects.
 The ``DatetimeIndex`` class contains many time series related optimizations:
 
-  - A large range of dates for various offsets are pre-computed and cached
-    under the hood in order to make generating subsequent date ranges very fast
-    (just have to grab a slice).
-  - Fast shifting using the ``shift`` and ``tshift`` method on pandas objects.
-  - Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is
-    very fast (important for fast data alignment).
-  - Quick access to date fields via properties such as ``year``, ``month``, etc.
-  - Regularization functions like ``snap`` and very fast ``asof`` logic.
+* A large range of dates for various offsets are pre-computed and cached
+  under the hood in order to make generating subsequent date ranges very fast
+  (just have to grab a slice).
+* Fast shifting using the ``shift`` and ``tshift`` method on pandas objects.
+* Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is
+  very fast (important for fast data alignment).
+* Quick access to date fields via properties such as ``year``, ``month``, etc.
+* Regularization functions like ``snap`` and very fast ``asof`` logic.
 
 ``DatetimeIndex`` objects have all the basic functionality of regular ``Index``
 objects, and a smorgasbord of advanced time series specific methods for easy
@@ -797,11 +797,11 @@ We could have done the same thing with ``DateOffset``:
 
 The key features of a ``DateOffset`` object are:
 
-- It can be added / subtracted to/from a datetime object to obtain a
+* It can be added / subtracted to/from a datetime object to obtain a
   shifted date.
-- It can be multiplied by an integer (positive or negative) so that the
+* It can be multiplied by an integer (positive or negative) so that the
   increment will be applied multiple times.
-- It has :meth:`~pandas.DateOffset.rollforward` and 
+* It has :meth:`~pandas.DateOffset.rollforward` and
   :meth:`~pandas.DateOffset.rollback` methods for moving a date forward or 
   backward to the next or previous "offset date".
 
@@ -2064,9 +2064,9 @@ To supply the time zone, you can use the ``tz`` keyword to ``date_range`` and
 other functions. Dateutil time zone strings are distinguished from ``pytz``
 time zones by starting with ``dateutil/``.
 
-- In ``pytz`` you can find a list of common (and less common) time zones using
+* In ``pytz`` you can find a list of common (and less common) time zones using
   ``from pytz import common_timezones, all_timezones``.
-- ``dateutil`` uses the OS timezones so there isn't a fixed list available. For
+* ``dateutil`` uses the OS timezones so there isn't a fixed list available. For
   common zones, the names are the same as ``pytz``.
 
 .. ipython:: python
diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst
index 895fe595de205..381031fa128e6 100644
--- a/doc/source/tutorials.rst
+++ b/doc/source/tutorials.rst
@@ -28,33 +28,33 @@ repository <http://github.com/jvns/pandas-cookbook>`_. To run the examples in th
 clone the GitHub repository and get IPython Notebook running.
 See `How to use this cookbook <https://github.com/jvns/pandas-cookbook#how-to-use-this-cookbook>`_.
 
--  `A quick tour of the IPython Notebook: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/A%20quick%20tour%20of%20IPython%20Notebook.ipynb>`_
+*  `A quick tour of the IPython Notebook: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/A%20quick%20tour%20of%20IPython%20Notebook.ipynb>`_
    Shows off IPython's awesome tab completion and magic functions.
--  `Chapter 1: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%201%20-%20Reading%20from%20a%20CSV.ipynb>`_
+*  `Chapter 1: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%201%20-%20Reading%20from%20a%20CSV.ipynb>`_
    Reading your data into pandas is pretty much the easiest thing. Even
    when the encoding is wrong!
--  `Chapter 2: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%202%20-%20Selecting%20data%20%26%20finding%20the%20most%20common%20complaint%20type.ipynb>`_
+*  `Chapter 2: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%202%20-%20Selecting%20data%20%26%20finding%20the%20most%20common%20complaint%20type.ipynb>`_
    It's not totally obvious how to select data from a pandas dataframe.
    Here we explain the basics (how to take slices and get columns)
--  `Chapter 3: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%203%20-%20Which%20borough%20has%20the%20most%20noise%20complaints%20%28or%2C%20more%20selecting%20data%29.ipynb>`_
+*  `Chapter 3: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%203%20-%20Which%20borough%20has%20the%20most%20noise%20complaints%20%28or%2C%20more%20selecting%20data%29.ipynb>`_
    Here we get into serious slicing and dicing and learn how to filter
    dataframes in complicated ways, really fast.
--  `Chapter 4: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%204%20-%20Find%20out%20on%20which%20weekday%20people%20bike%20the%20most%20with%20groupby%20and%20aggregate.ipynb>`_
+*  `Chapter 4: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%204%20-%20Find%20out%20on%20which%20weekday%20people%20bike%20the%20most%20with%20groupby%20and%20aggregate.ipynb>`_
    Groupby/aggregate is seriously my favorite thing about pandas
    and I use it all the time. You should probably read this.
--  `Chapter 5:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%205%20-%20Combining%20dataframes%20and%20scraping%20Canadian%20weather%20data.ipynb>`_
+*  `Chapter 5:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%205%20-%20Combining%20dataframes%20and%20scraping%20Canadian%20weather%20data.ipynb>`_
    Here you get to find out if it's cold in Montreal in the winter
    (spoiler: yes). Web scraping with pandas is fun! Here we combine dataframes.
--  `Chapter 6:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%206%20-%20String%20Operations-%20Which%20month%20was%20the%20snowiest.ipynb>`_
+*  `Chapter 6:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%206%20-%20String%20Operations-%20Which%20month%20was%20the%20snowiest.ipynb>`_
    Strings with pandas are great. It has all these vectorized string
    operations and they're the best. We will turn a bunch of strings
    containing "Snow" into vectors of numbers in a trice.
--  `Chapter 7: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%207%20-%20Cleaning%20up%20messy%20data.ipynb>`_
+*  `Chapter 7: <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%207%20-%20Cleaning%20up%20messy%20data.ipynb>`_
    Cleaning up messy data is never a joy, but with pandas it's easier.
--  `Chapter 8:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%208%20-%20How%20to%20deal%20with%20timestamps.ipynb>`_
+*  `Chapter 8:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%208%20-%20How%20to%20deal%20with%20timestamps.ipynb>`_
    Parsing Unix timestamps is confusing at first but it turns out
    to be really easy.
--  `Chapter 9:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%209%20-%20Loading%20data%20from%20SQL%20databases.ipynb>`_
+*  `Chapter 9:  <http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/blob/v0.2/cookbook/Chapter%209%20-%20Loading%20data%20from%20SQL%20databases.ipynb>`_
    Reading data from SQL databases.
 
 
@@ -63,54 +63,54 @@ Lessons for new pandas users
 
 For more resources, please visit the main `repository <https://bitbucket.org/hrojas/learn-pandas>`__.
 
-- `01 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/01%20-%20Lesson.ipynb>`_
-  - Importing libraries
-  - Creating data sets
-  - Creating data frames
-  - Reading from CSV
-  - Exporting to CSV
-  - Finding maximums
-  - Plotting data
+* `01 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/01%20-%20Lesson.ipynb>`_
+    * Importing libraries
+    * Creating data sets
+    * Creating data frames
+    * Reading from CSV
+    * Exporting to CSV
+    * Finding maximums
+    * Plotting data
 
-- `02 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/02%20-%20Lesson.ipynb>`_
-  - Reading from TXT
-  - Exporting to TXT
-  - Selecting top/bottom records
-  - Descriptive statistics
-  - Grouping/sorting data
+* `02 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/02%20-%20Lesson.ipynb>`_
+    * Reading from TXT
+    * Exporting to TXT
+    * Selecting top/bottom records
+    * Descriptive statistics
+    * Grouping/sorting data
 
-- `03 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/03%20-%20Lesson.ipynb>`_
-  - Creating functions
-  - Reading from EXCEL
-  - Exporting to EXCEL
-  - Outliers
-  - Lambda functions
-  - Slice and dice data
+* `03 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/03%20-%20Lesson.ipynb>`_
+    * Creating functions
+    * Reading from EXCEL
+    * Exporting to EXCEL
+    * Outliers
+    * Lambda functions
+    * Slice and dice data
 
-- `04 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/04%20-%20Lesson.ipynb>`_
-  - Adding/deleting columns
-  - Index operations
+* `04 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/04%20-%20Lesson.ipynb>`_
+    * Adding/deleting columns
+    * Index operations
 
-- `05 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/05%20-%20Lesson.ipynb>`_
-  - Stack/Unstack/Transpose functions
+* `05 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/05%20-%20Lesson.ipynb>`_
+    * Stack/Unstack/Transpose functions
 
-- `06 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/06%20-%20Lesson.ipynb>`_
-  - GroupBy function
+* `06 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/06%20-%20Lesson.ipynb>`_
+    * GroupBy function
 
-- `07 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/07%20-%20Lesson.ipynb>`_
-  - Ways to calculate outliers
+* `07 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/07%20-%20Lesson.ipynb>`_
+    * Ways to calculate outliers
 
-- `08 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/08%20-%20Lesson.ipynb>`_
-  - Read from Microsoft SQL databases
+* `08 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/08%20-%20Lesson.ipynb>`_
+    * Read from Microsoft SQL databases
 
-- `09 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/09%20-%20Lesson.ipynb>`_
-  - Export to CSV/EXCEL/TXT
+* `09 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/09%20-%20Lesson.ipynb>`_
+    * Export to CSV/EXCEL/TXT
 
-- `10 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/10%20-%20Lesson.ipynb>`_
-  - Converting between different kinds of formats
+* `10 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/10%20-%20Lesson.ipynb>`_
+    * Converting between different kinds of formats
 
-- `11 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/11%20-%20Lesson.ipynb>`_
-  - Combining data from various sources
+* `11 - Lesson: <http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/11%20-%20Lesson.ipynb>`_
+    * Combining data from various sources
 
 
 Practical data analysis with Python
@@ -119,13 +119,13 @@ Practical data analysis with Python
 This `guide <http://wavedatalab.github.io/datawithpython>`_ is a comprehensive introduction to the data analysis process using the Python data ecosystem and an interesting open dataset.
 There are four sections covering selected topics as follows:
 
--  `Munging Data <http://wavedatalab.github.io/datawithpython/munge.html>`_
+* `Munging Data <http://wavedatalab.github.io/datawithpython/munge.html>`_
 
--  `Aggregating Data <http://wavedatalab.github.io/datawithpython/aggregate.html>`_
+* `Aggregating Data <http://wavedatalab.github.io/datawithpython/aggregate.html>`_
 
--  `Visualizing Data <http://wavedatalab.github.io/datawithpython/visualize.html>`_
+* `Visualizing Data <http://wavedatalab.github.io/datawithpython/visualize.html>`_
 
--  `Time Series <http://wavedatalab.github.io/datawithpython/timeseries.html>`_
+* `Time Series <http://wavedatalab.github.io/datawithpython/timeseries.html>`_
 
 .. _tutorial-exercises-new-users:
 
@@ -134,25 +134,25 @@ Exercises for new users
 Practice your skills with real data sets and exercises.
 For more resources, please visit the main `repository <https://github.com/guipsamora/pandas_exercises>`__.
 
-- `01 - Getting & Knowing Your Data <https://github.com/guipsamora/pandas_exercises/tree/master/01_Getting_%26_Knowing_Your_Data>`_
+* `01 - Getting & Knowing Your Data <https://github.com/guipsamora/pandas_exercises/tree/master/01_Getting_%26_Knowing_Your_Data>`_
 
-- `02 - Filtering & Sorting <https://github.com/guipsamora/pandas_exercises/tree/master/02_Filtering_%26_Sorting>`_
+* `02 - Filtering & Sorting <https://github.com/guipsamora/pandas_exercises/tree/master/02_Filtering_%26_Sorting>`_
 
-- `03 - Grouping <https://github.com/guipsamora/pandas_exercises/tree/master/03_Grouping>`_
+* `03 - Grouping <https://github.com/guipsamora/pandas_exercises/tree/master/03_Grouping>`_
 
-- `04 - Apply <https://github.com/guipsamora/pandas_exercises/tree/master/04_Apply>`_
+* `04 - Apply <https://github.com/guipsamora/pandas_exercises/tree/master/04_Apply>`_
 
-- `05 - Merge <https://github.com/guipsamora/pandas_exercises/tree/master/05_Merge>`_
+* `05 - Merge <https://github.com/guipsamora/pandas_exercises/tree/master/05_Merge>`_
 
-- `06 - Stats <https://github.com/guipsamora/pandas_exercises/tree/master/06_Stats>`_
+* `06 - Stats <https://github.com/guipsamora/pandas_exercises/tree/master/06_Stats>`_
 
-- `07 - Visualization <https://github.com/guipsamora/pandas_exercises/tree/master/07_Visualization>`_
+* `07 - Visualization <https://github.com/guipsamora/pandas_exercises/tree/master/07_Visualization>`_
 
-- `08 - Creating Series and DataFrames <https://github.com/guipsamora/pandas_exercises/tree/master/08_Creating_Series_and_DataFrames/Pokemon>`_
+* `08 - Creating Series and DataFrames <https://github.com/guipsamora/pandas_exercises/tree/master/08_Creating_Series_and_DataFrames/Pokemon>`_
 
-- `09 - Time Series <https://github.com/guipsamora/pandas_exercises/tree/master/09_Time_Series>`_
+* `09 - Time Series <https://github.com/guipsamora/pandas_exercises/tree/master/09_Time_Series>`_
 
-- `10 - Deleting <https://github.com/guipsamora/pandas_exercises/tree/master/10_Deleting>`_
+* `10 - Deleting <https://github.com/guipsamora/pandas_exercises/tree/master/10_Deleting>`_
 
 .. _tutorial-modern:
 
@@ -164,29 +164,29 @@ Tutorial series written in 2016 by
 The source may be found in the GitHub repository
 `TomAugspurger/effective-pandas <https://github.com/TomAugspurger/effective-pandas>`_.
 
-- `Modern Pandas <http://tomaugspurger.github.io/modern-1-intro.html>`_
-- `Method Chaining <http://tomaugspurger.github.io/method-chaining.html>`_
-- `Indexes <http://tomaugspurger.github.io/modern-3-indexes.html>`_
-- `Performance <http://tomaugspurger.github.io/modern-4-performance.html>`_
-- `Tidy Data <http://tomaugspurger.github.io/modern-5-tidy.html>`_
-- `Visualization <http://tomaugspurger.github.io/modern-6-visualization.html>`_
-- `Timeseries <http://tomaugspurger.github.io/modern-7-timeseries.html>`_
+* `Modern Pandas <http://tomaugspurger.github.io/modern-1-intro.html>`_
+* `Method Chaining <http://tomaugspurger.github.io/method-chaining.html>`_
+* `Indexes <http://tomaugspurger.github.io/modern-3-indexes.html>`_
+* `Performance <http://tomaugspurger.github.io/modern-4-performance.html>`_
+* `Tidy Data <http://tomaugspurger.github.io/modern-5-tidy.html>`_
+* `Visualization <http://tomaugspurger.github.io/modern-6-visualization.html>`_
+* `Timeseries <http://tomaugspurger.github.io/modern-7-timeseries.html>`_
 
 Excel charts with pandas, vincent and xlsxwriter
 ------------------------------------------------
 
--  `Using Pandas and XlsxWriter to create Excel charts <https://pandas-xlsxwriter-charts.readthedocs.io/>`_
+*  `Using Pandas and XlsxWriter to create Excel charts <https://pandas-xlsxwriter-charts.readthedocs.io/>`_
 
 Video Tutorials
 ---------------
 
-- `Pandas From The Ground Up <https://www.youtube.com/watch?v=5JnMutdy6Fw>`_ 
+* `Pandas From The Ground Up <https://www.youtube.com/watch?v=5JnMutdy6Fw>`_
   (2015) (2:24)
   `GitHub repo <https://github.com/brandon-rhodes/pycon-pandas-tutorial>`__
-- `Introduction Into Pandas <https://www.youtube.com/watch?v=-NR-ynQg0YM>`_ 
+* `Introduction Into Pandas <https://www.youtube.com/watch?v=-NR-ynQg0YM>`_
   (2016) (1:28)
   `GitHub repo <https://github.com/chendaniely/2016-pydata-carolinas-pandas>`__
-- `Pandas: .head() to .tail() <https://www.youtube.com/watch?v=7vuO9QXDN50>`_ 
+* `Pandas: .head() to .tail() <https://www.youtube.com/watch?v=7vuO9QXDN50>`_
   (2016) (1:26)
   `GitHub repo <https://github.com/TomAugspurger/pydata-chi-h2t>`__
 
@@ -194,12 +194,12 @@ Video Tutorials
 Various Tutorials
 -----------------
 
-- `Wes McKinney's (pandas BDFL) blog <http://blog.wesmckinney.com/>`_
-- `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson <http://www.randalolson.com/2012/08/06/statistical-analysis-made-easy-in-python/>`_
-- `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 <http://conference.scipy.org/scipy2013/tutorial_detail.php?id=109>`_
-- `Financial analysis in Python, by Thomas Wiecki <http://nbviewer.ipython.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
-- `Intro to pandas data structures, by Greg Reda <http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/>`_
-- `Pandas and Python: Top 10, by Manish Amde <http://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/>`_
-- `Pandas Tutorial, by Mikhail Semeniuk <http://www.bearrelroll.com/2013/05/python-pandas-tutorial>`_
-- `Pandas DataFrames Tutorial, by Karlijn Willems <http://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python>`_
-- `A concise tutorial with real life examples <https://tutswiki.com/pandas-cookbook/chapter1>`_
+* `Wes McKinney's (pandas BDFL) blog <http://blog.wesmckinney.com/>`_
+* `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson <http://www.randalolson.com/2012/08/06/statistical-analysis-made-easy-in-python/>`_
+* `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 <http://conference.scipy.org/scipy2013/tutorial_detail.php?id=109>`_
+* `Financial analysis in Python, by Thomas Wiecki <http://nbviewer.ipython.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
+* `Intro to pandas data structures, by Greg Reda <http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/>`_
+* `Pandas and Python: Top 10, by Manish Amde <http://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/>`_
+* `Pandas Tutorial, by Mikhail Semeniuk <http://www.bearrelroll.com/2013/05/python-pandas-tutorial>`_
+* `Pandas DataFrames Tutorial, by Karlijn Willems <http://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python>`_
+* `A concise tutorial with real life examples <https://tutswiki.com/pandas-cookbook/chapter1>`_
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 17197b805e86a..569a6fb7b7a0d 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -1381,9 +1381,9 @@ Plotting with error bars is supported in :meth:`DataFrame.plot` and :meth:`Serie
 
 Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats:
 
-- As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`.
-- As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values.
-- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`.
+* As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`.
+* As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values.
+* As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`.
 
 Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length :class:`Series`, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array.
 

From ca9ce8d70465c02ee1593f95971d1bf433b058b3 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Wed, 20 Jun 2018 11:29:49 +0100
Subject: [PATCH 059/113] PERF: add method Categorical.__contains__ (#21508)

---
 asv_bench/benchmarks/categoricals.py       | 10 ++--
 doc/source/whatsnew/v0.23.2.txt            |  2 +-
 pandas/core/arrays/categorical.py          | 59 ++++++++++++++++++++++
 pandas/core/indexes/category.py            | 29 ++---------
 pandas/tests/categorical/test_operators.py | 17 +++++++
 5 files changed, 88 insertions(+), 29 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 48f42621d183d..73e3933122628 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -202,7 +202,11 @@ class Contains(object):
     def setup(self):
         N = 10**5
         self.ci = tm.makeCategoricalIndex(N)
-        self.cat = self.ci.categories[0]
+        self.c = self.ci.values
+        self.key = self.ci.categories[0]
 
-    def time_contains(self):
-        self.cat in self.ci
+    def time_categorical_index_contains(self):
+        self.key in self.ci
+
+    def time_categorical_contains(self):
+        self.key in self.c
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 0f2c9c4756987..5454dc9eca360 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -26,7 +26,7 @@ Performance Improvements
 
 - Improved performance of membership checks in :class:`CategoricalIndex`
   (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
-  is likewise much faster (:issue:`21369`)
+  is likewise much faster (:issue:`21369`, :issue:`21508`)
 - Improved performance of :meth:`MultiIndex.is_unique` (:issue:`21522`)
 -
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index e22b0d626a218..7b3cce0f2585d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -157,6 +157,57 @@ def _maybe_to_categorical(array):
     return array
 
 
+def contains(cat, key, container):
+    """
+    Helper for membership check for ``key`` in ``cat``.
+
+    This is a helper method for :method:`__contains__`
+    and :class:`CategoricalIndex.__contains__`.
+
+    Returns True if ``key`` is in ``cat.categories`` and the
+    location of ``key`` in ``categories`` is in ``container``.
+
+    Parameters
+    ----------
+    cat : :class:`Categorical`or :class:`categoricalIndex`
+    key : a hashable object
+        The key to check membership for.
+    container : Container (e.g. list-like or mapping)
+        The container to check for membership in.
+
+    Returns
+    -------
+    is_in : bool
+        True if ``key`` is in ``self.categories`` and location of
+        ``key`` in ``categories`` is in ``container``, else False.
+
+    Notes
+    -----
+    This method does not check for NaN values. Do that separately
+    before calling this method.
+    """
+    hash(key)
+
+    # get location of key in categories.
+    # If a KeyError, the key isn't in categories, so logically
+    #  can't be in container either.
+    try:
+        loc = cat.categories.get_loc(key)
+    except KeyError:
+        return False
+
+    # loc is the location of key in categories, but also the *value*
+    # for key in container. So, `key` may be in categories,
+    # but still not in `container`. Example ('b' in categories,
+    # but not in values):
+    # 'b' in Categorical(['a'], categories=['a', 'b'])  # False
+    if is_scalar(loc):
+        return loc in container
+    else:
+        # if categories is an IntervalIndex, loc is an array.
+        return any(loc_ in container for loc_ in loc)
+
+
 _codes_doc = """The category codes of this categorical.
 
 Level codes are an array if integer which are the positions of the real
@@ -1846,6 +1897,14 @@ def __iter__(self):
         """Returns an Iterator over the values of this Categorical."""
         return iter(self.get_values().tolist())
 
+    def __contains__(self, key):
+        """Returns True if `key` is in this Categorical."""
+        # if key is a NaN, check if any NaN is in self.
+        if isna(key):
+            return self.isna().any()
+
+        return contains(self, key, container=self._codes)
+
     def _tidy_repr(self, max_vals=10, footer=True):
         """ a short repr displaying only max_vals and an optional (but default
         footer)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 0093d4940751e..fc669074758da 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -24,6 +24,7 @@
 import pandas.core.common as com
 import pandas.core.missing as missing
 import pandas.core.indexes.base as ibase
+from pandas.core.arrays.categorical import Categorical, contains
 
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 _index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
@@ -125,7 +126,6 @@ def _create_from_codes(self, codes, categories=None, ordered=None,
         CategoricalIndex
         """
 
-        from pandas.core.arrays import Categorical
         if categories is None:
             categories = self.categories
         if ordered is None:
@@ -162,7 +162,6 @@ def _create_categorical(self, data, categories=None, ordered=None,
         if not isinstance(data, ABCCategorical):
             if ordered is None and dtype is None:
                 ordered = False
-            from pandas.core.arrays import Categorical
             data = Categorical(data, categories=categories, ordered=ordered,
                                dtype=dtype)
         else:
@@ -323,32 +322,14 @@ def _reverse_indexer(self):
 
     @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
     def __contains__(self, key):
-        hash(key)
-
-        if isna(key):  # if key is a NaN, check if any NaN is in self.
+        # if key is a NaN, check if any NaN is in self.
+        if isna(key):
             return self.hasnans
 
-        # is key in self.categories? Then get its location.
-        # If not (i.e. KeyError), it logically can't be in self either
-        try:
-            loc = self.categories.get_loc(key)
-        except KeyError:
-            return False
-
-        # loc is the location of key in self.categories, but also the value
-        # for key in self.codes and in self._engine. key may be in categories,
-        # but still not in self, check this. Example:
-        # 'b' in CategoricalIndex(['a'], categories=['a', 'b']) #  False
-        if is_scalar(loc):
-            return loc in self._engine
-        else:
-            # if self.categories is IntervalIndex, loc is an array
-            # check if any scalar of the array is in self._engine
-            return any(loc_ in self._engine for loc_ in loc)
+        return contains(self, key, container=self._engine)
 
     @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
     def contains(self, key):
-        hash(key)
         return key in self
 
     def __array__(self, dtype=None):
@@ -479,7 +460,6 @@ def where(self, cond, other=None):
             other = self._na_value
         values = np.where(cond, self.values, other)
 
-        from pandas.core.arrays import Categorical
         cat = Categorical(values,
                           categories=self.categories,
                           ordered=self.ordered)
@@ -862,7 +842,6 @@ def _delegate_method(self, name, *args, **kwargs):
     def _add_accessors(cls):
         """ add in Categorical accessor methods """
 
-        from pandas.core.arrays import Categorical
         CategoricalIndex._add_delegate_accessors(
             delegate=Categorical, accessors=["rename_categories",
                                              "reorder_categories",
diff --git a/pandas/tests/categorical/test_operators.py b/pandas/tests/categorical/test_operators.py
index fa8bb817616e4..a26de32d7446c 100644
--- a/pandas/tests/categorical/test_operators.py
+++ b/pandas/tests/categorical/test_operators.py
@@ -291,3 +291,20 @@ def test_numeric_like_ops(self):
 
         # invalid ufunc
         pytest.raises(TypeError, lambda: np.log(s))
+
+    def test_contains(self):
+        # GH21508
+        c = pd.Categorical(list('aabbca'), categories=list('cab'))
+
+        assert 'b' in c
+        assert 'z' not in c
+        assert np.nan not in c
+        with pytest.raises(TypeError):
+            assert [1] in c
+
+        # assert codes NOT in index
+        assert 0 not in c
+        assert 1 not in c
+
+        c = pd.Categorical(list('aabbca') + [np.nan], categories=list('cab'))
+        assert np.nan in c

From 6289c76085bab7f2d713b7c6acc21dd329f6dfbe Mon Sep 17 00:00:00 2001
From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com>
Date: Wed, 20 Jun 2018 16:03:07 +0530
Subject: [PATCH 060/113] REGR: Fixes first_valid_index when DataFrame or
 Series has duplicate row index (GH21441) (#21497)

---
 doc/source/whatsnew/v0.23.2.txt       |  3 ++-
 pandas/core/generic.py                | 23 +++++++++++------------
 pandas/tests/frame/test_timeseries.py | 15 ++++++++++++++-
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 5454dc9eca360..5b3e607956f7a 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -17,7 +17,8 @@ Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
--
+- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- 
 
 .. _whatsnew_0232.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 555108a5d9349..1780e359164e2 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8968,18 +8968,17 @@ def _find_valid_index(self, how):
             is_valid = is_valid.any(1)  # reduce axis 1
 
         if how == 'first':
-            # First valid value case
-            i = is_valid.idxmax()
-            if not is_valid[i]:
-                return None
-            return i
-
-        elif how == 'last':
-            # Last valid value case
-            i = is_valid.values[::-1].argmax()
-            if not is_valid.iat[len(self) - i - 1]:
-                return None
-            return self.index[len(self) - i - 1]
+            idxpos = is_valid.values[::].argmax()
+
+        if how == 'last':
+            idxpos = len(self) - 1 - is_valid.values[::-1].argmax()
+
+        chk_notna = is_valid.iat[idxpos]
+        idx = self.index[idxpos]
+
+        if not chk_notna:
+            return None
+        return idx
 
     @Appender(_shared_docs['valid_index'] % {'position': 'first',
                                              'klass': 'NDFrame'})
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
index 90fbc6e628369..fb9bd74d9876d 100644
--- a/pandas/tests/frame/test_timeseries.py
+++ b/pandas/tests/frame/test_timeseries.py
@@ -506,7 +506,15 @@ def test_asfreq_fillvalue(self):
         actual_series = ts.asfreq(freq='1S', fill_value=9.0)
         assert_series_equal(expected_series, actual_series)
 
-    def test_first_last_valid(self):
+    @pytest.mark.parametrize("data,idx,expected_first,expected_last", [
+        ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
+        ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
+        ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
+        ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
+        ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
+        ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
+    def test_first_last_valid(self, data, idx,
+                              expected_first, expected_last):
         N = len(self.frame.index)
         mat = randn(N)
         mat[:5] = nan
@@ -539,6 +547,11 @@ def test_first_last_valid(self):
         assert frame.first_valid_index().freq == frame.index.freq
         assert frame.last_valid_index().freq == frame.index.freq
 
+        # GH 21441
+        df = DataFrame(data, index=idx)
+        assert expected_first == df.first_valid_index()
+        assert expected_last == df.last_valid_index()
+
     def test_first_subset(self):
         ts = tm.makeTimeDataFrame(freq='12h')
         result = ts.first('10d')

From b19219df541a9b6a3cbcf4608751218f79ee0e89 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Wed, 20 Jun 2018 03:35:09 -0700
Subject: [PATCH 061/113] API/BUG: Raise when int-dtype coercions fail (#21456)

Closes gh-15832.
---
 doc/source/whatsnew/v0.24.0.txt          | 28 ++++++++-
 pandas/core/dtypes/cast.py               | 72 ++++++++++++++++++++++++
 pandas/core/indexes/base.py              | 17 +++---
 pandas/core/series.py                    |  8 ++-
 pandas/tests/generic/test_generic.py     | 10 ++--
 pandas/tests/indexes/test_base.py        |  9 ++-
 pandas/tests/indexes/test_numeric.py     | 20 +++++++
 pandas/tests/io/test_pytables.py         |  2 +-
 pandas/tests/series/test_constructors.py | 26 +++++++--
 9 files changed, 170 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index c23ed006ff637..15c5cc97b8426 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -26,7 +26,7 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _whatsnew_0240.api.datetimelike.normalize
+.. _whatsnew_0240.api.datetimelike.normalize:
 
 Tick DateOffset Normalize Restrictions
 --------------------------------------
@@ -73,6 +73,32 @@ Datetimelike API Changes
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
+.. _whatsnew_0240.api.other.incompatibilities:
+
+Series and Index Data-Dtype Incompatibilities
+---------------------------------------------
+
+``Series`` and ``Index`` constructors now raise when the
+data is incompatible with a passed ``dtype=`` (:issue:`15832`)
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+    In [4]: pd.Series([-1], dtype="uint64")
+    Out [4]:
+    0    18446744073709551615
+    dtype: uint64
+
+Current Behavior:
+
+.. code-block:: ipython
+
+    In [4]: pd.Series([-1], dtype="uint64")
+    Out [4]:
+    ...
+    OverflowError: Trying to coerce negative values to unsigned integers
+
 - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
 -
 -
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index ebc7a13234a98..65328dfc7347e 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -20,6 +20,7 @@
                      is_dtype_equal,
                      is_float_dtype, is_complex_dtype,
                      is_integer_dtype,
+                     is_unsigned_integer_dtype,
                      is_datetime_or_timedelta_dtype,
                      is_bool_dtype, is_scalar,
                      is_string_dtype, _string_dtypes,
@@ -1269,3 +1270,74 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False):
         subarr = subarr2
 
     return subarr
+
+
+def maybe_cast_to_integer_array(arr, dtype, copy=False):
+    """
+    Takes any dtype and returns the casted version, raising for when data is
+    incompatible with integer/unsigned integer dtypes.
+
+    .. versionadded:: 0.24.0
+
+    Parameters
+    ----------
+    arr : array-like
+        The array to cast.
+    dtype : str, np.dtype
+        The integer dtype to cast the array to.
+    copy: boolean, default False
+        Whether to make a copy of the array before returning.
+
+    Returns
+    -------
+    int_arr : ndarray
+        An array of integer or unsigned integer dtype
+
+    Raises
+    ------
+    OverflowError : the dtype is incompatible with the data
+    ValueError : loss of precision has occurred during casting
+
+    Examples
+    --------
+    If you try to coerce negative values to unsigned integers, it raises:
+
+    >>> Series([-1], dtype="uint64")
+    Traceback (most recent call last):
+        ...
+    OverflowError: Trying to coerce negative values to unsigned integers
+
+    Also, if you try to coerce float values to integers, it raises:
+
+    >>> Series([1, 2, 3.5], dtype="int64")
+    Traceback (most recent call last):
+        ...
+    ValueError: Trying to coerce float values to integers
+    """
+
+    try:
+        if not hasattr(arr, "astype"):
+            casted = np.array(arr, dtype=dtype, copy=copy)
+        else:
+            casted = arr.astype(dtype, copy=copy)
+    except OverflowError:
+        raise OverflowError("The elements provided in the data cannot all be "
+                            "casted to the dtype {dtype}".format(dtype=dtype))
+
+    if np.array_equal(arr, casted):
+        return casted
+
+    # We do this casting to allow for proper
+    # data and dtype checking.
+    #
+    # We didn't do this earlier because NumPy
+    # doesn't handle `uint64` correctly.
+    arr = np.asarray(arr)
+
+    if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
+        raise OverflowError("Trying to coerce negative values "
+                            "to unsigned integers")
+
+    if is_integer_dtype(dtype) and (is_float_dtype(arr) or
+                                    is_object_dtype(arr)):
+        raise ValueError("Trying to coerce float values to integers")
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 577b715ca9998..ac33ffad762cd 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -21,6 +21,7 @@
     ABCPeriodIndex, ABCTimedeltaIndex,
     ABCDateOffset)
 from pandas.core.dtypes.missing import isna, array_equivalent
+from pandas.core.dtypes.cast import maybe_cast_to_integer_array
 from pandas.core.dtypes.common import (
     _ensure_int64,
     _ensure_object,
@@ -311,19 +312,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                     if is_integer_dtype(dtype):
                         inferred = lib.infer_dtype(data)
                         if inferred == 'integer':
-                            try:
-                                data = np.array(data, copy=copy, dtype=dtype)
-                            except OverflowError:
-                                # gh-15823: a more user-friendly error message
-                                raise OverflowError(
-                                    "the elements provided in the data cannot "
-                                    "all be casted to the dtype {dtype}"
-                                    .format(dtype=dtype))
+                            data = maybe_cast_to_integer_array(data, dtype,
+                                                               copy=copy)
                         elif inferred in ['floating', 'mixed-integer-float']:
                             if isna(data).any():
                                 raise ValueError('cannot convert float '
                                                  'NaN to integer')
 
+                            if inferred == "mixed-integer-float":
+                                data = maybe_cast_to_integer_array(data, dtype)
+
                             # If we are actually all equal to integers,
                             # then coerce to integer.
                             try:
@@ -352,7 +350,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
 
                 except (TypeError, ValueError) as e:
                     msg = str(e)
-                    if 'cannot convert float' in msg:
+                    if ("cannot convert float" in msg or
+                            "Trying to coerce float values to integer" in msg):
                         raise
 
             # maybe coerce to a sub-class
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 23c4bbe082f28..2f762dff4aeab 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -41,7 +41,8 @@
     maybe_cast_to_datetime, maybe_castable,
     construct_1d_arraylike_from_scalar,
     construct_1d_ndarray_preserving_na,
-    construct_1d_object_array_from_listlike)
+    construct_1d_object_array_from_listlike,
+    maybe_cast_to_integer_array)
 from pandas.core.dtypes.missing import (
     isna,
     notna,
@@ -4068,6 +4069,11 @@ def _try_cast(arr, take_fast_path):
                 return arr
 
         try:
+            # gh-15832: Check if we are requesting a numeric dype and
+            # that we can convert the data to the requested dtype.
+            if is_float_dtype(dtype) or is_integer_dtype(dtype):
+                subarr = maybe_cast_to_integer_array(arr, dtype)
+
             subarr = maybe_cast_to_datetime(arr, dtype)
             # Take care in creating object arrays (but iterators are not
             # supported):
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 311c71f734945..533bff0384ad9 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -199,11 +199,11 @@ def test_downcast(self):
         self._compare(result, expected)
 
     def test_constructor_compound_dtypes(self):
-        # GH 5191
-        # compound dtypes should raise not-implementederror
+        # see gh-5191
+        # Compound dtypes should raise NotImplementedError.
 
         def f(dtype):
-            return self._construct(shape=3, dtype=dtype)
+            return self._construct(shape=3, value=1, dtype=dtype)
 
         pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
                                                ("B", "str"),
@@ -534,14 +534,14 @@ def test_truncate_out_of_bounds(self):
 
         # small
         shape = [int(2e3)] + ([1] * (self._ndim - 1))
-        small = self._construct(shape, dtype='int8')
+        small = self._construct(shape, dtype='int8', value=1)
         self._compare(small.truncate(), small)
         self._compare(small.truncate(before=0, after=3e3), small)
         self._compare(small.truncate(before=-1, after=2e3), small)
 
         # big
         shape = [int(2e6)] + ([1] * (self._ndim - 1))
-        big = self._construct(shape, dtype='int8')
+        big = self._construct(shape, dtype='int8', value=1)
         self._compare(big.truncate(), big)
         self._compare(big.truncate(before=0, after=3e6), big)
         self._compare(big.truncate(before=-1, after=2e6), big)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 1d8a958c3413f..daba56e0c1e29 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -486,11 +486,18 @@ def test_constructor_nonhashable_name(self, indices):
 
     def test_constructor_overflow_int64(self):
         # see gh-15832
-        msg = ("the elements provided in the data cannot "
+        msg = ("The elements provided in the data cannot "
                "all be casted to the dtype int64")
         with tm.assert_raises_regex(OverflowError, msg):
             Index([np.iinfo(np.uint64).max - 1], dtype="int64")
 
+    @pytest.mark.xfail(reason="see gh-21311: Index "
+                              "doesn't enforce dtype argument")
+    def test_constructor_cast(self):
+        msg = "could not convert string to float"
+        with tm.assert_raises_regex(ValueError, msg):
+            Index(["a", "b", "c"], dtype=float)
+
     def test_view_with_args(self):
 
         restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 49322d9b7abd6..166af4c89877d 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -451,6 +451,18 @@ def test_astype(self):
             i = Float64Index([0, 1.1, np.NAN])
             pytest.raises(ValueError, lambda: i.astype(dtype))
 
+    def test_type_coercion_fail(self, any_int_dtype):
+        # see gh-15832
+        msg = "Trying to coerce float values to integers"
+        with tm.assert_raises_regex(ValueError, msg):
+            Index([1, 2, 3.5], dtype=any_int_dtype)
+
+    def test_type_coercion_valid(self, float_dtype):
+        # There is no Float32Index, so we always
+        # generate Float64Index.
+        i = Index([1, 2, 3.5], dtype=float_dtype)
+        tm.assert_index_equal(i, Index([1, 2, 3.5]))
+
     def test_equals_numeric(self):
 
         i = Float64Index([1.0, 2.0])
@@ -862,6 +874,14 @@ def test_constructor_corner(self):
         with tm.assert_raises_regex(TypeError, 'casting'):
             Int64Index(arr_with_floats)
 
+    def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):
+
+        # see gh-15832
+        msg = "Trying to coerce negative values to unsigned integers"
+
+        with tm.assert_raises_regex(OverflowError, msg):
+            Index([-1], dtype=uint_dtype)
+
     def test_coerce_list(self):
         # coerce things
         arr = Index([1, 2, 3, 4])
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index d590cfd6b6c64..f96e7eeb40ea2 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -2047,7 +2047,7 @@ def test_table_values_dtypes_roundtrip(self):
             assert df1.dtypes[0] == 'float32'
 
             # check with mixed dtypes
-            df1 = DataFrame(dict((c, Series(np.random.randn(5), dtype=c))
+            df1 = DataFrame(dict((c, Series(np.random.randint(5), dtype=c))
                                  for c in ['float32', 'float64', 'int32',
                                            'int64', 'int16', 'int8']))
             df1['string'] = 'foo'
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 906d2aacd5586..27cfec0dbf20d 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -542,12 +542,30 @@ def test_constructor_pass_nan_nat(self):
         tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)
 
     def test_constructor_cast(self):
-        pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)
+        msg = "could not convert string to float"
+        with tm.assert_raises_regex(ValueError, msg):
+            Series(["a", "b", "c"], dtype=float)
+
+    def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
+        # see gh-15832
+        msg = 'Trying to coerce negative values to unsigned integers'
+        with tm.assert_raises_regex(OverflowError, msg):
+            Series([-1], dtype=uint_dtype)
+
+    def test_constructor_coerce_float_fail(self, any_int_dtype):
+        # see gh-15832
+        msg = "Trying to coerce float values to integers"
+        with tm.assert_raises_regex(ValueError, msg):
+            Series([1, 2, 3.5], dtype=any_int_dtype)
+
+    def test_constructor_coerce_float_valid(self, float_dtype):
+        s = Series([1, 2, 3.5], dtype=float_dtype)
+        expected = Series([1, 2, 3.5]).astype(float_dtype)
+        assert_series_equal(s, expected)
 
-    def test_constructor_dtype_nocast(self):
-        # 1572
+    def test_constructor_dtype_no_cast(self):
+        # see gh-1572
         s = Series([1, 2, 3])
-
         s2 = Series(s, dtype=np.int64)
 
         s2[1] = 5

From 3814d0c3f77868809e2b343310a1534ea4b87b1d Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Wed, 20 Jun 2018 18:09:15 -0600
Subject: [PATCH 062/113] DOC: Add documentation for freq='infer' option of
 DatetimeIndex and TimedeltaIndex constructors (#21566)

---
 doc/source/timedeltas.rst         |  7 +++++++
 doc/source/timeseries.rst         | 13 +++++++++++++
 pandas/core/indexes/datetimes.py  |  5 ++++-
 pandas/core/indexes/timedeltas.py |  5 ++++-
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst
index 745810704f665..e602e45784f4a 100644
--- a/doc/source/timedeltas.rst
+++ b/doc/source/timedeltas.rst
@@ -363,6 +363,13 @@ or ``np.timedelta64`` objects. Passing ``np.nan/pd.NaT/nat`` will represent miss
    pd.TimedeltaIndex(['1 days', '1 days, 00:00:05',
                      np.timedelta64(2,'D'), datetime.timedelta(days=2,seconds=2)])
 
+The string 'infer' can be passed in order to set the frequency of the index as the
+inferred frequency upon creation:
+
+.. ipython:: python
+
+   pd.TimedeltaIndex(['0 days', '10 days', '20 days'], freq='infer')
+
 Generating Ranges of Time Deltas
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index ba58d65b00714..11157264304b0 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -185,6 +185,19 @@ options like ``dayfirst`` or ``format``, so use ``to_datetime`` if these are req
 
     pd.Timestamp('2010/11/12')
 
+You can also use the ``DatetimeIndex`` constructor directly:
+
+.. ipython:: python
+
+    pd.DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'])
+
+The string 'infer' can be passed in order to set the frequency of the index as the
+inferred frequency upon creation:
+
+.. ipython:: python
+
+    pd.DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], freq='infer')
+
 Providing a Format Argument
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index e944df7aa83c6..9515d41080f87 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -187,7 +187,10 @@ class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin,
     copy  : bool
         Make a copy of input ndarray
     freq : string or pandas offset object, optional
-        One of pandas date offset strings or corresponding objects
+        One of pandas date offset strings or corresponding objects. The string
+        'infer' can be passed in order to set the frequency of the index as the
+        inferred frequency upon creation
+
     start : starting value, datetime-like, optional
         If data is None, start is used as the start point in generating regular
         timestamp data.
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 9707d19953418..e90e1264638b0 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -107,7 +107,10 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, TimelikeOps, Int64Index):
         Optional timedelta-like data to construct index with
     unit: unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional
         which is an integer/float number
-    freq: a frequency for the index, optional
+    freq : string or pandas offset object, optional
+        One of pandas date offset strings or corresponding objects. The string
+        'infer' can be passed in order to set the frequency of the index as the
+        inferred frequency upon creation
     copy  : bool
         Make a copy of input ndarray
     start : starting value, timedelta-like, optional

From ea205c0c32c8e857a1e60fe2cd093627a9153ff0 Mon Sep 17 00:00:00 2001
From: Michael Odintsov <michael@datarobot.com>
Date: Thu, 21 Jun 2018 05:54:23 +0300
Subject: [PATCH 063/113] BUG: Fix group index calculation to prevent hitting
 maximum recursion depth (#21541)

---
 doc/source/whatsnew/v0.23.2.txt      |  1 +
 pandas/core/sorting.py               | 29 ++++++++++++++++------------
 pandas/tests/frame/test_analytics.py | 17 ++++++++++++++++
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 5b3e607956f7a..9271f58947f95 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -61,6 +61,7 @@ Bug Fixes
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
+- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
 -
 
 **I/O**
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index e550976d1deeb..212f44e55c489 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -52,7 +52,21 @@ def _int64_cut_off(shape):
                 return i
         return len(shape)
 
-    def loop(labels, shape):
+    def maybe_lift(lab, size):
+        # promote nan values (assigned -1 label in lab array)
+        # so that all output values are non-negative
+        return (lab + 1, size + 1) if (lab == -1).any() else (lab, size)
+
+    labels = map(_ensure_int64, labels)
+    if not xnull:
+        labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
+
+    labels = list(labels)
+    shape = list(shape)
+
+    # Iteratively process all the labels in chunks sized so less
+    # than _INT64_MAX unique int ids will be required for each chunk
+    while True:
         # how many levels can be done without overflow:
         nlev = _int64_cut_off(shape)
 
@@ -74,7 +88,7 @@ def loop(labels, shape):
             out[mask] = -1
 
         if nlev == len(shape):  # all levels done!
-            return out
+            break
 
         # compress what has been done so far in order to avoid overflow
         # to retain lexical ranks, obs_ids should be sorted
@@ -83,16 +97,7 @@ def loop(labels, shape):
         labels = [comp_ids] + labels[nlev:]
         shape = [len(obs_ids)] + shape[nlev:]
 
-        return loop(labels, shape)
-
-    def maybe_lift(lab, size):  # pormote nan values
-        return (lab + 1, size + 1) if (lab == -1).any() else (lab, size)
-
-    labels = map(_ensure_int64, labels)
-    if not xnull:
-        labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
-
-    return loop(list(labels), list(shape))
+    return out
 
 
 def get_compressed_ids(labels, sizes):
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 6dc24ed856017..12ebdbe0fd3c7 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1527,6 +1527,23 @@ def test_duplicated_with_misspelled_column_name(self, subset):
         with pytest.raises(KeyError):
             df.drop_duplicates(subset)
 
+    @pytest.mark.slow
+    def test_duplicated_do_not_fail_on_wide_dataframes(self):
+        # gh-21524
+        # Given the wide dataframe with a lot of columns
+        # with different (important!) values
+        data = {'col_{0:02d}'.format(i): np.random.randint(0, 1000, 30000)
+                for i in range(100)}
+        df = pd.DataFrame(data).T
+        result = df.duplicated()
+
+        # Then duplicates produce the bool pd.Series as a result
+        # and don't fail during calculation.
+        # Actual values doesn't matter here, though usually
+        # it's all False in this case
+        assert isinstance(result, pd.Series)
+        assert result.dtype == np.bool
+
     def test_drop_duplicates_with_duplicate_column_names(self):
         # GH17836
         df = DataFrame([

From fd8d6bcd82bfbaccc8b22ec0ecf7e049ff35fa93 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Thu, 21 Jun 2018 09:13:01 +0100
Subject: [PATCH 064/113] BUG: Fix passing empty label to df drop (#21515)

Closes #21494
---
 doc/source/whatsnew/v0.23.2.txt               |   1 +
 pandas/core/generic.py                        |  21 ++--
 pandas/core/indexes/base.py                   |   4 +-
 pandas/core/indexes/multi.py                  |   1 -
 .../tests/frame/test_axis_select_reindex.py   |  15 +++
 .../tests/series/indexing/test_alter_index.py | 106 ++++++++++++------
 6 files changed, 98 insertions(+), 50 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 9271f58947f95..cae0d1a754d89 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -61,6 +61,7 @@ Bug Fixes
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
+- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
 - Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
 -
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 1780e359164e2..9902da4094404 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
         """
         axis = self._get_axis_number(axis)
         axis_name = self._get_axis_name(axis)
-        axis, axis_ = self._get_axis(axis), axis
+        axis = self._get_axis(axis)
 
         if axis.is_unique:
             if level is not None:
@@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
                 new_axis = axis.drop(labels, level=level, errors=errors)
             else:
                 new_axis = axis.drop(labels, errors=errors)
-            dropped = self.reindex(**{axis_name: new_axis})
-            try:
-                dropped.axes[axis_].set_names(axis.names, inplace=True)
-            except AttributeError:
-                pass
-            result = dropped
+            result = self.reindex(**{axis_name: new_axis})
 
+        # Case for non-unique axis
         else:
             labels = _ensure_object(com._index_labels_to_array(labels))
             if level is not None:
                 if not isinstance(axis, MultiIndex):
                     raise AssertionError('axis must be a MultiIndex')
                 indexer = ~axis.get_level_values(level).isin(labels)
+
+                # GH 18561 MultiIndex.drop should raise if label is absent
+                if errors == 'raise' and indexer.all():
+                    raise KeyError('{} not found in axis'.format(labels))
             else:
                 indexer = ~axis.isin(labels)
-
-            if errors == 'raise' and indexer.all():
-                raise KeyError('{} not found in axis'.format(labels))
+                # Check if label doesn't exist along axis
+                labels_missing = (axis.get_indexer_for(labels) == -1).any()
+                if errors == 'raise' and labels_missing:
+                    raise KeyError('{} not found in axis'.format(labels))
 
             slicer = [slice(None)] * self.ndim
             slicer[self._get_axis_number(axis_name)] = indexer
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ac33ffad762cd..4f140a6e77b2f 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4341,7 +4341,7 @@ def drop(self, labels, errors='raise'):
         Raises
         ------
         KeyError
-            If none of the labels are found in the selected axis
+            If not all of the labels are found in the selected axis
         """
         arr_dtype = 'object' if self.dtype == 'object' else None
         labels = com._index_labels_to_array(labels, dtype=arr_dtype)
@@ -4350,7 +4350,7 @@ def drop(self, labels, errors='raise'):
         if mask.any():
             if errors != 'ignore':
                 raise KeyError(
-                    'labels %s not contained in axis' % labels[mask])
+                    '{} not found in axis'.format(labels[mask]))
             indexer = indexer[~mask]
         return self.delete(indexer)
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ab23a80acdaae..61b50f139dd10 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'):
                 if errors != 'ignore':
                     raise ValueError('labels %s not contained in axis' %
                                      labels[mask])
-                indexer = indexer[~mask]
         except Exception:
             pass
 
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index 28e82f7585850..0e0d6598f5101 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual):
         expected_no_err = actual.T.drop('c', axis=1, level=level,
                                         errors='ignore')
         assert_frame_equal(expected_no_err.T, actual)
+
+    @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
+    @pytest.mark.parametrize('drop_labels', [[], [1], [2]])
+    def test_drop_empty_list(self, index, drop_labels):
+        # GH 21494
+        expected_index = [i for i in index if i not in drop_labels]
+        frame = pd.DataFrame(index=index).drop(drop_labels)
+        tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
+
+    @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
+    @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
+    def test_drop_non_empty_list(self, index, drop_labels):
+        # GH 21494
+        with tm.assert_raises_regex(KeyError, 'not found in axis'):
+            pd.DataFrame(index=index).drop(drop_labels)
diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py
index bcd5a64402c33..561d6a9b42508 100644
--- a/pandas/tests/series/indexing/test_alter_index.py
+++ b/pandas/tests/series/indexing/test_alter_index.py
@@ -472,54 +472,86 @@ def test_rename():
     assert result.name == expected.name
 
 
-def test_drop():
-    # unique
-    s = Series([1, 2], index=['one', 'two'])
-    expected = Series([1], index=['one'])
-    result = s.drop(['two'])
-    assert_series_equal(result, expected)
-    result = s.drop('two', axis='rows')
-    assert_series_equal(result, expected)
-
-    # non-unique
-    # GH 5248
-    s = Series([1, 1, 2], index=['one', 'two', 'one'])
-    expected = Series([1, 2], index=['one', 'one'])
-    result = s.drop(['two'], axis=0)
-    assert_series_equal(result, expected)
-    result = s.drop('two')
-    assert_series_equal(result, expected)
-
-    expected = Series([1], index=['two'])
-    result = s.drop(['one'])
-    assert_series_equal(result, expected)
-    result = s.drop('one')
-    assert_series_equal(result, expected)
+@pytest.mark.parametrize(
+    'data, index, drop_labels,'
+    ' axis, expected_data, expected_index',
+    [
+        # Unique Index
+        ([1, 2], ['one', 'two'], ['two'],
+         0, [1], ['one']),
+        ([1, 2], ['one', 'two'], ['two'],
+         'rows', [1], ['one']),
+        ([1, 1, 2], ['one', 'two', 'one'], ['two'],
+         0, [1, 2], ['one', 'one']),
+
+        # GH 5248 Non-Unique Index
+        ([1, 1, 2], ['one', 'two', 'one'], 'two',
+         0, [1, 2], ['one', 'one']),
+        ([1, 1, 2], ['one', 'two', 'one'], ['one'],
+         0, [1], ['two']),
+        ([1, 1, 2], ['one', 'two', 'one'], 'one',
+         0, [1], ['two'])])
+def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
+                                          expected_data, expected_index):
+
+    s = Series(data=data, index=index)
+    result = s.drop(drop_labels, axis=axis)
+    expected = Series(data=expected_data, index=expected_index)
+    tm.assert_series_equal(result, expected)
 
-    # single string/tuple-like
-    s = Series(range(3), index=list('abc'))
-    pytest.raises(KeyError, s.drop, 'bc')
-    pytest.raises(KeyError, s.drop, ('a',))
 
+@pytest.mark.parametrize(
+    'data, index, drop_labels,'
+    ' axis, error_type, error_desc',
+    [
+        # single string/tuple-like
+        (range(3), list('abc'), 'bc',
+         0, KeyError, 'not found in axis'),
+
+        # bad axis
+        (range(3), list('abc'), ('a',),
+         0, KeyError, 'not found in axis'),
+        (range(3), list('abc'), 'one',
+         'columns', ValueError, 'No axis named columns')])
+def test_drop_exception_raised(data, index, drop_labels,
+                               axis, error_type, error_desc):
+
+    with tm.assert_raises_regex(error_type, error_desc):
+        Series(data, index=index).drop(drop_labels, axis=axis)
+
+
+def test_drop_with_ignore_errors():
     # errors='ignore'
     s = Series(range(3), index=list('abc'))
     result = s.drop('bc', errors='ignore')
-    assert_series_equal(result, s)
+    tm.assert_series_equal(result, s)
     result = s.drop(['a', 'd'], errors='ignore')
     expected = s.iloc[1:]
-    assert_series_equal(result, expected)
-
-    # bad axis
-    pytest.raises(ValueError, s.drop, 'one', axis='columns')
+    tm.assert_series_equal(result, expected)
 
     # GH 8522
     s = Series([2, 3], index=[True, False])
     assert s.index.is_object()
     result = s.drop(True)
     expected = Series([3], index=[False])
-    assert_series_equal(result, expected)
+    tm.assert_series_equal(result, expected)
+
 
-    # GH 16877
-    s = Series([2, 3], index=[0, 1])
-    with tm.assert_raises_regex(KeyError, 'not contained in axis'):
-        s.drop([False, True])
+@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
+@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
+def test_drop_empty_list(index, drop_labels):
+    # GH 21494
+    expected_index = [i for i in index if i not in drop_labels]
+    series = pd.Series(index=index).drop(drop_labels)
+    tm.assert_series_equal(series, pd.Series(index=expected_index))
+
+
+@pytest.mark.parametrize('data, index, drop_labels', [
+    (None, [1, 2, 3], [1, 4]),
+    (None, [1, 2, 2], [1, 4]),
+    ([2, 3], [0, 1], [False, True])
+])
+def test_drop_non_empty_list(data, index, drop_labels):
+    # GH 21494 and GH 16877
+    with tm.assert_raises_regex(KeyError, 'not found in axis'):
+        pd.Series(data=data, index=index).drop(drop_labels)

From f3a89f319d34c4155d71ee180de61e8a7800d8d1 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Thu, 21 Jun 2018 11:42:41 +0200
Subject: [PATCH 065/113] ERR: Raise a simpler backtrace for missing key
 (#21558)

---
 doc/source/whatsnew/v0.24.0.txt | 2 +-
 pandas/core/indexing.py         | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 15c5cc97b8426..a9c49b7476fa6 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -197,7 +197,7 @@ Strings
 Indexing
 ^^^^^^^^
 
--
+- The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`)
 -
 -
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index d5e81105dd323..38b6aaa2230fb 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1807,8 +1807,6 @@ def error():
 
             try:
                 key = self._convert_scalar_indexer(key, axis)
-                if not ax.contains(key):
-                    error()
             except TypeError as e:
 
                 # python 3 type errors should be raised
@@ -1818,6 +1816,9 @@ def error():
             except:
                 error()
 
+            if not ax.contains(key):
+                error()
+
     def _is_scalar_access(self, key):
         # this is a shortcut accessor to both .loc and .iloc
         # that provide the equivalent access of .at and .iat

From eb47287453dc33bc745485504c999d4bc39beb64 Mon Sep 17 00:00:00 2001
From: Andrew Spott <andrew.spott@gmail.com>
Date: Thu, 21 Jun 2018 03:47:12 -0600
Subject: [PATCH 066/113] Fixed HDFSTore.groups() performance.   (#21543)

---
 doc/source/whatsnew/v0.23.2.txt | 5 ++++-
 pandas/io/pytables.py           | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index cae0d1a754d89..54ddea9a25254 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -18,7 +18,7 @@ Fixed Regressions
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
-- 
+-
 
 .. _whatsnew_0232.performance:
 
@@ -28,6 +28,9 @@ Performance Improvements
 - Improved performance of membership checks in :class:`CategoricalIndex`
   (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
   is likewise much faster (:issue:`21369`, :issue:`21508`)
+- Improved performance of :meth:`HDFStore.groups` (and dependent functions like
+  :meth:`~HDFStore.keys`.  (i.e. ``x in store`` checks are much faster)
+  (:issue:`21372`)
 - Improved performance of :meth:`MultiIndex.is_unique` (:issue:`21522`)
 -
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index aa39e341792c7..aad387e0cdd58 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1098,7 +1098,7 @@ def groups(self):
         _tables()
         self._check_if_open()
         return [
-            g for g in self._handle.walk_nodes()
+            g for g in self._handle.walk_groups()
             if (not isinstance(g, _table_mod.link.Link) and
                 (getattr(g._v_attrs, 'pandas_type', None) or
                  getattr(g, 'table', None) or

From c815e0b626db9b6056ce85ccd5065506ac402039 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Thu, 21 Jun 2018 11:10:31 +0100
Subject: [PATCH 067/113] DOC: Fixing spaces around backticks, and linting
 (#21570)

---
 ci/lint.sh                | 8 ++++++++
 doc/source/merging.rst    | 8 ++++----
 doc/source/release.rst    | 4 ++--
 doc/source/reshaping.rst  | 2 +-
 doc/source/timeseries.rst | 4 ++--
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/ci/lint.sh b/ci/lint.sh
index 2cbf6f7ae52a9..9bcee55e1344c 100755
--- a/ci/lint.sh
+++ b/ci/lint.sh
@@ -174,6 +174,14 @@ if [ "$LINT" ]; then
     fi
     echo "Check for old-style classes DONE"
     
+    echo "Check for backticks incorrectly rendering because of missing spaces"
+    grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
+
+    if [ $? = "0" ]; then
+        RET=1
+    fi
+    echo "Check for backticks incorrectly rendering because of missing spaces DONE"
+
 else
     echo "NOT Linting"
 fi
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index b2cb388e3cd03..2eb5962ead986 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -279,9 +279,9 @@ need to be:
 
 Ignoring indexes on the concatenation axis
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-For ``DataFrame``s which don't have a meaningful index, you may wish to append
-them and ignore the fact that they may have overlapping indexes. To do this, use
-the ``ignore_index`` argument:
+For ``DataFrame`` objects which don't have a meaningful index, you may wish
+to append them and ignore the fact that they may have overlapping indexes. To
+do this, use the ``ignore_index`` argument:
 
 .. ipython:: python
 
@@ -314,7 +314,7 @@ This is also a valid argument to :meth:`DataFrame.append`:
 Concatenating with mixed ndims
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-You can concatenate a mix of ``Series`` and ``DataFrame``s. The
+You can concatenate a mix of ``Series`` and ``DataFrame`` objects. The
 ``Series`` will be transformed to ``DataFrame`` with the column name as
 the name of the ``Series``.
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 7bbd4ba43e66f..16fe896d9f58f 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -2641,7 +2641,7 @@ Improvements to existing features
   option it is no longer possible to round trip Excel files with merged
   MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to
   restore the previous behaviour.  (:issue:`5254`)
-- The FRED DataReader now accepts multiple series (:issue`3413`)
+- The FRED DataReader now accepts multiple series (:issue:`3413`)
 - StataWriter adjusts variable names to Stata's limitations (:issue:`5709`)
 
 API Changes
@@ -2837,7 +2837,7 @@ API Changes
   copy through chained assignment is detected, settable via option ``mode.chained_assignment``
 - test the list of ``NA`` values in the csv parser. add ``N/A``, ``#NA`` as independent default
   na values (:issue:`5521`)
-- The refactoring involving``Series`` deriving from ``NDFrame`` breaks ``rpy2<=2.3.8``. an Issue
+- The refactoring involving ``Series`` deriving from ``NDFrame`` breaks ``rpy2<=2.3.8``. an Issue
   has been opened against rpy2 and a workaround is detailed in :issue:`5698`. Thanks @JanSchulz.
 - ``Series.argmin`` and ``Series.argmax`` are now aliased to ``Series.idxmin`` and ``Series.idxmax``.
   These return the *index* of the min or max element respectively. Prior to 0.13.0 these would return
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index 88b7114cf4101..7d9925d800441 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -654,7 +654,7 @@ When a column contains only one level, it will be omitted in the result.
     pd.get_dummies(df, drop_first=True)
 
 By default new columns will have ``np.uint8`` dtype.
-To choose another dtype, use the``dtype`` argument:
+To choose another dtype, use the ``dtype`` argument:
 
 .. ipython:: python
 
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 11157264304b0..9e01296d9c9c7 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -2169,8 +2169,8 @@ still considered to be equal even if they are in different time zones:
    rng_berlin[5]
    rng_eastern[5] == rng_berlin[5]
 
-Like ``Series``, ``DataFrame``, and ``DatetimeIndex``, ``Timestamp``s can be converted to other
-time zones using ``tz_convert``:
+Like ``Series``, ``DataFrame``, and ``DatetimeIndex``; ``Timestamp`` objects
+can be converted to other time zones using ``tz_convert``:
 
 .. ipython:: python
 

From 91c9ec3b6741f88d29769fa3cd1c352db09a0119 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 21 Jun 2018 03:18:53 -0700
Subject: [PATCH 068/113] fix hashing string-casting error (#21187)

---
 doc/source/whatsnew/v0.23.2.txt  |  1 +
 pandas/_libs/hashing.pyx         |  7 ++-----
 pandas/tests/series/test_repr.py | 30 ++++++++++++++++++++++++++++++
 pandas/util/testing.py           | 22 ++++++++++++++++++++++
 4 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 54ddea9a25254..c781f45715bd4 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -86,6 +86,7 @@ Bug Fixes
 
 **Categorical**
 
+- Bug in rendering :class:`Series` with ``Categorical`` dtype in rare conditions under Python 2.7 (:issue:`21002`)
 -
 
 **Timezones**
diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
index c6f182ac5003f..4489847518a1d 100644
--- a/pandas/_libs/hashing.pyx
+++ b/pandas/_libs/hashing.pyx
@@ -8,8 +8,7 @@ import numpy as np
 from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
 
 from util cimport _checknull
-from cpython cimport (PyString_Check,
-                      PyBytes_Check,
+from cpython cimport (PyBytes_Check,
                       PyUnicode_Check)
 from libc.stdlib cimport malloc, free
 
@@ -62,9 +61,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
     cdef list datas = []
     for i in range(n):
         val = arr[i]
-        if PyString_Check(val):
-            data = <bytes>val.encode(encoding)
-        elif PyBytes_Check(val):
+        if PyBytes_Check(val):
             data = <bytes>val
         elif PyUnicode_Check(val):
             data = <bytes>val.encode(encoding)
diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
index 97236f028b1c4..730c2b7865f1f 100644
--- a/pandas/tests/series/test_repr.py
+++ b/pandas/tests/series/test_repr.py
@@ -11,6 +11,7 @@
 from pandas import (Index, Series, DataFrame, date_range, option_context,
                     Categorical, period_range, timedelta_range)
 from pandas.core.index import MultiIndex
+from pandas.core.base import StringMixin
 
 from pandas.compat import lrange, range, u
 from pandas import compat
@@ -202,6 +203,35 @@ def test_latex_repr(self):
 
 class TestCategoricalRepr(object):
 
+    def test_categorical_repr_unicode(self):
+        # GH#21002 if len(index) > 60, sys.getdefaultencoding()=='ascii',
+        # and we are working in PY2, then rendering a Categorical could raise
+        # UnicodeDecodeError by trying to decode when it shouldn't
+
+        class County(StringMixin):
+            name = u'San Sebastián'
+            state = u'PR'
+
+            def __unicode__(self):
+                return self.name + u', ' + self.state
+
+        cat = pd.Categorical([County() for n in range(61)])
+        idx = pd.Index(cat)
+        ser = idx.to_series()
+
+        if compat.PY3:
+            # no reloading of sys, just check that the default (utf8) works
+            # as expected
+            repr(ser)
+            str(ser)
+
+        else:
+            # set sys.defaultencoding to ascii, then change it back after
+            # the test
+            with tm.set_defaultencoding('ascii'):
+                repr(ser)
+                str(ser)
+
     def test_categorical_repr(self):
         a = Series(Categorical([1, 2, 3, 4]))
         exp = u("0    1\n1    2\n2    3\n3    4\n" +
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index d26a2116fb3ce..b9e53dfc80020 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -553,6 +553,28 @@ def _valid_locales(locales, normalize):
 # Stdout / stderr decorators
 
 
+@contextmanager
+def set_defaultencoding(encoding):
+    """
+    Set default encoding (as given by sys.getdefaultencoding()) to the given
+    encoding; restore on exit.
+
+    Parameters
+    ----------
+    encoding : str
+    """
+    if not PY2:
+        raise ValueError("set_defaultencoding context is only available "
+                         "in Python 2.")
+    orig = sys.getdefaultencoding()
+    reload(sys)  # noqa:F821
+    sys.setdefaultencoding(encoding)
+    try:
+        yield
+    finally:
+        sys.setdefaultencoding(orig)
+
+
 def capture_stdout(f):
     """
     Decorator to capture stdout in a buffer so that it can be checked

From c019f6dc1c639e0c0630cb855252d93c6e23fe6d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 21 Jun 2018 03:24:20 -0700
Subject: [PATCH 069/113] make DateOffset immutable (#21341)

---
 doc/source/whatsnew/v0.24.0.txt              |   2 +-
 pandas/_libs/tslibs/offsets.pyx              |  16 ++-
 pandas/tests/tseries/offsets/test_offsets.py |   8 ++
 pandas/tseries/offsets.py                    | 118 +++++++++----------
 4 files changed, 75 insertions(+), 69 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index a9c49b7476fa6..fd34424dedc52 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -67,6 +67,7 @@ Datetimelike API Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`)
+- :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`)
 
 .. _whatsnew_0240.api.other:
 
@@ -176,7 +177,6 @@ Timezones
 Offsets
 ^^^^^^^
 
--
 -
 -
 
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 3ca9bb307da9c..a9ef9166e4d33 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -304,6 +304,15 @@ class _BaseOffset(object):
     _day_opt = None
     _attributes = frozenset(['n', 'normalize'])
 
+    def __init__(self, n=1, normalize=False):
+        n = self._validate_n(n)
+        object.__setattr__(self, "n", n)
+        object.__setattr__(self, "normalize", normalize)
+        object.__setattr__(self, "_cache", {})
+
+    def __setattr__(self, name, value):
+        raise AttributeError("DateOffset objects are immutable.")
+
     @property
     def kwds(self):
         # for backwards-compatibility
@@ -395,13 +404,14 @@ class _BaseOffset(object):
             kwds = {key: odict[key] for key in odict if odict[key]}
             state.update(kwds)
 
-        self.__dict__ = state
+        self.__dict__.update(state)
+
         if 'weekmask' in state and 'holidays' in state:
             calendar, holidays = _get_calendar(weekmask=self.weekmask,
                                                holidays=self.holidays,
                                                calendar=None)
-            self.calendar = calendar
-            self.holidays = holidays
+            object.__setattr__(self, "calendar", calendar)
+            object.__setattr__(self, "holidays", holidays)
 
     def __getstate__(self):
         """Return a pickleable state"""
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 5dd2a199405bf..66cb9baeb9357 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -234,6 +234,14 @@ class TestCommon(Base):
                  'Nano': Timestamp(np_datetime64_compat(
                                    '2011-01-01T09:00:00.000000001Z'))}
 
+    def test_immutable(self, offset_types):
+        # GH#21341 check that __setattr__ raises
+        offset = self._get_offset(offset_types)
+        with pytest.raises(AttributeError):
+            offset.normalize = True
+        with pytest.raises(AttributeError):
+            offset.n = 91
+
     def test_return_type(self, offset_types):
         offset = self._get_offset(offset_types)
 
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index ffa2c0a5e3211..da8fdb4d79e34 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -23,7 +23,6 @@
     ApplyTypeError,
     as_datetime, _is_normalized,
     _get_calendar, _to_dt64,
-    _determine_offset,
     apply_index_wraps,
     roll_yearday,
     shift_month,
@@ -192,11 +191,14 @@ def __add__(date):
     normalize = False
 
     def __init__(self, n=1, normalize=False, **kwds):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+        BaseOffset.__init__(self, n, normalize)
 
-        self._offset, self._use_relativedelta = _determine_offset(kwds)
-        self.__dict__.update(kwds)
+        off, use_rd = liboffsets._determine_offset(kwds)
+        object.__setattr__(self, "_offset", off)
+        object.__setattr__(self, "_use_relativedelta", use_rd)
+        for key in kwds:
+            val = kwds[key]
+            object.__setattr__(self, key, val)
 
     @apply_wraps
     def apply(self, other):
@@ -446,9 +448,9 @@ def __init__(self, weekmask, holidays, calendar):
         # following two attributes. See DateOffset._params()
         # holidays, weekmask
 
-        self.weekmask = weekmask
-        self.holidays = holidays
-        self.calendar = calendar
+        object.__setattr__(self, "weekmask", weekmask)
+        object.__setattr__(self, "holidays", holidays)
+        object.__setattr__(self, "calendar", calendar)
 
 
 class BusinessMixin(object):
@@ -480,9 +482,8 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset):
     _attributes = frozenset(['n', 'normalize', 'offset'])
 
     def __init__(self, n=1, normalize=False, offset=timedelta(0)):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self._offset = offset
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "_offset", offset)
 
     def _offset_str(self):
         def get_str(td):
@@ -578,9 +579,11 @@ class BusinessHourMixin(BusinessMixin):
 
     def __init__(self, start='09:00', end='17:00', offset=timedelta(0)):
         # must be validated here to equality check
-        self.start = liboffsets._validate_business_time(start)
-        self.end = liboffsets._validate_business_time(end)
-        self._offset = offset
+        start = liboffsets._validate_business_time(start)
+        object.__setattr__(self, "start", start)
+        end = liboffsets._validate_business_time(end)
+        object.__setattr__(self, "end", end)
+        object.__setattr__(self, "_offset", offset)
 
     @cache_readonly
     def next_bday(self):
@@ -807,8 +810,7 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset):
 
     def __init__(self, n=1, normalize=False, start='09:00',
                  end='17:00', offset=timedelta(0)):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+        BaseOffset.__init__(self, n, normalize)
         super(BusinessHour, self).__init__(start=start, end=end, offset=offset)
 
 
@@ -837,9 +839,8 @@ class CustomBusinessDay(_CustomMixin, BusinessDay):
 
     def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
                  holidays=None, calendar=None, offset=timedelta(0)):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self._offset = offset
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "_offset", offset)
 
         _CustomMixin.__init__(self, weekmask, holidays, calendar)
 
@@ -898,9 +899,8 @@ class CustomBusinessHour(_CustomMixin, BusinessHourMixin,
     def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
                  holidays=None, calendar=None,
                  start='09:00', end='17:00', offset=timedelta(0)):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self._offset = offset
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "_offset", offset)
 
         _CustomMixin.__init__(self, weekmask, holidays, calendar)
         BusinessHourMixin.__init__(self, start=start, end=end, offset=offset)
@@ -914,9 +914,7 @@ class MonthOffset(SingleConstructorOffset):
     _adjust_dst = True
     _attributes = frozenset(['n', 'normalize'])
 
-    def __init__(self, n=1, normalize=False):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+    __init__ = BaseOffset.__init__
 
     @property
     def name(self):
@@ -995,9 +993,8 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset):
 
     def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
                  holidays=None, calendar=None, offset=timedelta(0)):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self._offset = offset
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "_offset", offset)
 
         _CustomMixin.__init__(self, weekmask, holidays, calendar)
 
@@ -1074,18 +1071,18 @@ class SemiMonthOffset(DateOffset):
     _attributes = frozenset(['n', 'normalize', 'day_of_month'])
 
     def __init__(self, n=1, normalize=False, day_of_month=None):
+        BaseOffset.__init__(self, n, normalize)
+
         if day_of_month is None:
-            self.day_of_month = self._default_day_of_month
+            object.__setattr__(self, "day_of_month",
+                               self._default_day_of_month)
         else:
-            self.day_of_month = int(day_of_month)
+            object.__setattr__(self, "day_of_month", int(day_of_month))
         if not self._min_day_of_month <= self.day_of_month <= 27:
             msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}'
             raise ValueError(msg.format(min=self._min_day_of_month,
                                         day=self.day_of_month))
 
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-
     @classmethod
     def _from_name(cls, suffix=None):
         return cls(day_of_month=suffix)
@@ -1291,9 +1288,8 @@ class Week(DateOffset):
     _attributes = frozenset(['n', 'normalize', 'weekday'])
 
     def __init__(self, n=1, normalize=False, weekday=None):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self.weekday = weekday
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "weekday", weekday)
 
         if self.weekday is not None:
             if self.weekday < 0 or self.weekday > 6:
@@ -1421,10 +1417,9 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset):
     _attributes = frozenset(['n', 'normalize', 'week', 'weekday'])
 
     def __init__(self, n=1, normalize=False, week=0, weekday=0):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self.weekday = weekday
-        self.week = week
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "weekday", weekday)
+        object.__setattr__(self, "week", week)
 
         if self.weekday < 0 or self.weekday > 6:
             raise ValueError('Day must be 0<=day<=6, got {day}'
@@ -1493,9 +1488,8 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset):
     _attributes = frozenset(['n', 'normalize', 'weekday'])
 
     def __init__(self, n=1, normalize=False, weekday=0):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self.weekday = weekday
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "weekday", weekday)
 
         if self.n == 0:
             raise ValueError('N cannot be 0')
@@ -1553,11 +1547,11 @@ class QuarterOffset(DateOffset):
     #       startingMonth vs month attr names are resolved
 
     def __init__(self, n=1, normalize=False, startingMonth=None):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+        BaseOffset.__init__(self, n, normalize)
+
         if startingMonth is None:
             startingMonth = self._default_startingMonth
-        self.startingMonth = startingMonth
+        object.__setattr__(self, "startingMonth", startingMonth)
 
     def isAnchored(self):
         return (self.n == 1 and self.startingMonth is not None)
@@ -1679,11 +1673,10 @@ def onOffset(self, dt):
         return dt.month == self.month and dt.day == self._get_offset_day(dt)
 
     def __init__(self, n=1, normalize=False, month=None):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+        BaseOffset.__init__(self, n, normalize)
 
         month = month if month is not None else self._default_month
-        self.month = month
+        object.__setattr__(self, "month", month)
 
         if self.month < 1 or self.month > 12:
             raise ValueError('Month must go from 1 to 12')
@@ -1776,12 +1769,11 @@ class FY5253(DateOffset):
 
     def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1,
                  variation="nearest"):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
-        self.startingMonth = startingMonth
-        self.weekday = weekday
+        BaseOffset.__init__(self, n, normalize)
+        object.__setattr__(self, "startingMonth", startingMonth)
+        object.__setattr__(self, "weekday", weekday)
 
-        self.variation = variation
+        object.__setattr__(self, "variation", variation)
 
         if self.n == 0:
             raise ValueError('N cannot be 0')
@@ -1976,13 +1968,12 @@ class FY5253Quarter(DateOffset):
 
     def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1,
                  qtr_with_extra_week=1, variation="nearest"):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+        BaseOffset.__init__(self, n, normalize)
 
-        self.weekday = weekday
-        self.startingMonth = startingMonth
-        self.qtr_with_extra_week = qtr_with_extra_week
-        self.variation = variation
+        object.__setattr__(self, "startingMonth", startingMonth)
+        object.__setattr__(self, "weekday", weekday)
+        object.__setattr__(self, "qtr_with_extra_week", qtr_with_extra_week)
+        object.__setattr__(self, "variation", variation)
 
         if self.n == 0:
             raise ValueError('N cannot be 0')
@@ -2129,9 +2120,7 @@ class Easter(DateOffset):
     _adjust_dst = True
     _attributes = frozenset(['n', 'normalize'])
 
-    def __init__(self, n=1, normalize=False):
-        self.n = self._validate_n(n)
-        self.normalize = normalize
+    __init__ = BaseOffset.__init__
 
     @apply_wraps
     def apply(self, other):
@@ -2177,11 +2166,10 @@ class Tick(SingleConstructorOffset):
     _attributes = frozenset(['n', 'normalize'])
 
     def __init__(self, n=1, normalize=False):
-        self.n = self._validate_n(n)
+        BaseOffset.__init__(self, n, normalize)
         if normalize:
             raise ValueError("Tick offset with `normalize=True` are not "
                              "allowed.")  # GH#21427
-        self.normalize = normalize
 
     __gt__ = _tick_comp(operator.gt)
     __ge__ = _tick_comp(operator.ge)

From c03ed3857715d5f43750f3c142c5dd000da59d27 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Thu, 21 Jun 2018 15:05:43 +0200
Subject: [PATCH 070/113] REF: multi_take is now able to tackle all list-like
 (non-bool) cases (#21569)

---
 pandas/core/indexing.py | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 38b6aaa2230fb..1f9fe5f947d0c 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -902,30 +902,45 @@ def _getitem_tuple(self, tup):
         return retval
 
     def _multi_take_opportunity(self, tup):
-        from pandas.core.generic import NDFrame
+        """
+        Check whether there is the possibility to use ``_multi_take``.
+        Currently the limit is that all axes being indexed must be indexed with
+        list-likes.
 
-        # ugly hack for GH #836
-        if not isinstance(self.obj, NDFrame):
-            return False
+        Parameters
+        ----------
+        tup : tuple
+            Tuple of indexers, one per axis
 
+        Returns
+        -------
+        boolean: Whether the current indexing can be passed through _multi_take
+        """
         if not all(is_list_like_indexer(x) for x in tup):
             return False
 
         # just too complicated
-        for indexer, ax in zip(tup, self.obj._data.axes):
-            if isinstance(ax, MultiIndex):
-                return False
-            elif com.is_bool_indexer(indexer):
-                return False
-            elif not ax.is_unique:
-                return False
+        if any(com.is_bool_indexer(x) for x in tup):
+            return False
 
         return True
 
     def _multi_take(self, tup):
-        """ create the reindex map for our objects, raise the _exception if we
-        can't create the indexer
         """
+        Create the indexers for the passed tuple of keys, and execute the take
+        operation. This allows the take operation to be executed all at once -
+        rather than once for each dimension - improving efficiency.
+
+        Parameters
+        ----------
+        tup : tuple
+            Tuple of indexers, one per axis
+
+        Returns
+        -------
+        values: same type as the object being indexed
+        """
+        # GH 836
         o = self.obj
         d = {axis: self._get_listlike_indexer(key, axis)
              for (key, axis) in zip(tup, o._AXIS_ORDERS)}

From 4668dba95cf905cd449e5cadcdbaad75e3afe220 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Fri, 22 Jun 2018 02:58:40 -0700
Subject: [PATCH 071/113] TST: Use int fixtures in test_construction.py
 (#21588)

Partially addresses gh-21500.
---
 pandas/tests/indexes/datetimes/test_construction.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index f7682a965c038..ae98510951845 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -524,14 +524,13 @@ def test_dti_constructor_years_only(self, tz_naive_fixture):
                               (rng3, expected3), (rng4, expected4)]:
             tm.assert_index_equal(rng, expected)
 
-    @pytest.mark.parametrize('dtype', [np.int64, np.int32, np.int16, np.int8])
-    def test_dti_constructor_small_int(self, dtype):
-        # GH 13721
+    def test_dti_constructor_small_int(self, any_int_dtype):
+        # see gh-13721
         exp = DatetimeIndex(['1970-01-01 00:00:00.00000000',
                              '1970-01-01 00:00:00.00000001',
                              '1970-01-01 00:00:00.00000002'])
 
-        arr = np.array([0, 10, 20], dtype=dtype)
+        arr = np.array([0, 10, 20], dtype=any_int_dtype)
         tm.assert_index_equal(DatetimeIndex(arr), exp)
 
     def test_ctor_str_intraday(self):

From 07e161dca7dbe211efb5df50ff4ba5a0edfbc576 Mon Sep 17 00:00:00 2001
From: Uddeshya Singh <singhuddeshyaofficial@gmail.com>
Date: Fri, 22 Jun 2018 15:35:45 +0530
Subject: [PATCH 072/113] DOC: Adding clarification on return dtype of
 to_numeric (#21585)

---
 pandas/core/tools/numeric.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index c584e29f682dd..ebe135dfb184c 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -16,6 +16,10 @@ def to_numeric(arg, errors='raise', downcast=None):
     """
     Convert argument to a numeric type.
 
+    The default return dtype is `float64` or `int64`
+    depending on the data supplied. Use the `downcast` parameter
+    to obtain other dtypes.
+
     Parameters
     ----------
     arg : list, tuple, 1-d array, or Series

From 15b040c220909c2106c1cbdecbc39ec2955fe19c Mon Sep 17 00:00:00 2001
From: Uddeshya Singh <singhuddeshyaofficial@gmail.com>
Date: Fri, 22 Jun 2018 15:37:07 +0530
Subject: [PATCH 073/113] Update v0.24.0.txt (#21586)

---
 doc/source/whatsnew/v0.24.0.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index fd34424dedc52..4bfae7de01b8f 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -31,8 +31,8 @@ Backwards incompatible API changes
 Tick DateOffset Normalize Restrictions
 --------------------------------------
 
-Creating a ``Tick`` object (:class:``Day``, :class:``Hour``, :class:``Minute``,
-:class:``Second``, :class:``Milli``, :class:``Micro``, :class:``Nano``) with
+Creating a ``Tick`` object (:class:`Day`, :class:`Hour`, :class:`Minute`,
+:class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano`) with
 `normalize=True` is no longer supported.  This prevents unexpected behavior
 where addition could fail to be monotone or associative.  (:issue:`21427`)
 

From b21efccbce9f66663932faec1c80e0d4bd4d20f5 Mon Sep 17 00:00:00 2001
From: Mitch Negus <21086604+mitchnegus@users.noreply.github.com>
Date: Fri, 22 Jun 2018 03:11:20 -0700
Subject: [PATCH 074/113] clarifying regex pipe behavior (#21589)

---
 pandas/core/strings.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 9632df46d3bbf..08239ae4dae20 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -335,11 +335,11 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
     4    False
     dtype: bool
 
-    Returning 'house' and 'parrot' within same string.
+    Returning 'house' or 'dog' when either expression occurs in a string.
 
-    >>> s1.str.contains('house|parrot', regex=True)
+    >>> s1.str.contains('house|dog', regex=True)
     0    False
-    1    False
+    1     True
     2     True
     3    False
     4      NaN

From a7d5bb73643855fc2c7990b8ec13961b9d5d1d72 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Fri, 22 Jun 2018 03:21:13 -0700
Subject: [PATCH 075/113] DOC: Note assert_almost_equal impl. detail (#21580)

Note the hard-coded switch between absolute
and relative tolerance during checking.

Closes gh-21528.
---
 pandas/util/testing.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index b9e53dfc80020..675dd94d49750 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -224,9 +224,15 @@ def assert_almost_equal(left, right, check_exact=False,
     check_dtype: bool, default True
         check dtype if both a and b are the same type
     check_less_precise : bool or int, default False
-        Specify comparison precision. Only used when check_exact is False.
+        Specify comparison precision. Only used when `check_exact` is False.
         5 digits (False) or 3 digits (True) after decimal points are compared.
-        If int, then specify the digits to compare
+        If int, then specify the digits to compare.
+
+        When comparing two numbers, if the first number has magnitude less
+        than 1e-5, we compare the two numbers directly and check whether
+        they are equivalent within the specified precision. Otherwise, we
+        compare the **ratio** of the second number to the first number and
+        check whether it is equivalent to 1 within the specified precision.
     """
     if isinstance(left, pd.Index):
         return assert_index_equal(left, right, check_exact=check_exact,

From 8944d3510699c8f229b6c4d574c529d27fce777f Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <strickvl@users.noreply.github.com>
Date: Fri, 22 Jun 2018 11:33:13 +0100
Subject: [PATCH 076/113] DOC: update the Series.any / Dataframe.any docstring
 (#21579)

---
 pandas/core/generic.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 9902da4094404..04ba0b5de3f7f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -463,7 +463,7 @@ def ndim(self):
 
         See Also
         --------
-        ndarray.ndim
+        ndarray.ndim : Number of array dimensions.
 
         Examples
         --------
@@ -487,7 +487,7 @@ def size(self):
 
         See Also
         --------
-        ndarray.size
+        ndarray.size : Number of elements in the array.
 
         Examples
         --------
@@ -9420,7 +9420,11 @@ def _doc_parms(cls):
 _any_see_also = """\
 See Also
 --------
-pandas.DataFrame.all : Return whether all elements are True.
+numpy.any : Numpy version of this method.
+Series.any : Return whether any element is True.
+Series.all : Return whether all elements are True.
+DataFrame.any : Return whether any element is True over requested axis.
+DataFrame.all : Return whether all elements are True over requested axis.
 """
 
 _any_desc = """\

From 828008426a026377e156acd693b25acd59153fb6 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Fri, 22 Jun 2018 09:52:20 -0700
Subject: [PATCH 077/113] TST: Clean up tests in test_take.py (#21591)

Utilizes pytest's fixtures to
cleanup the code significantly.
---
 pandas/tests/test_take.py | 525 +++++++++++++++++---------------------
 1 file changed, 239 insertions(+), 286 deletions(-)

diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py
index 9ab147edb8d1b..ade847923c083 100644
--- a/pandas/tests/test_take.py
+++ b/pandas/tests/test_take.py
@@ -10,315 +10,268 @@
 from pandas._libs.tslib import iNaT
 
 
+@pytest.fixture(params=[True, False])
+def writeable(request):
+    return request.param
+
+
+# Check that take_nd works both with writeable arrays
+# (in which case fast typed memory-views implementation)
+# and read-only arrays alike.
+@pytest.fixture(params=[
+    (np.float64, True),
+    (np.float32, True),
+    (np.uint64, False),
+    (np.uint32, False),
+    (np.uint16, False),
+    (np.uint8, False),
+    (np.int64, False),
+    (np.int32, False),
+    (np.int16, False),
+    (np.int8, False),
+    (np.object_, True),
+    (np.bool, False),
+])
+def dtype_can_hold_na(request):
+    return request.param
+
+
+@pytest.fixture(params=[
+    (np.int8, np.int16(127), np.int8),
+    (np.int8, np.int16(128), np.int16),
+    (np.int32, 1, np.int32),
+    (np.int32, 2.0, np.float64),
+    (np.int32, 3.0 + 4.0j, np.complex128),
+    (np.int32, True, np.object_),
+    (np.int32, "", np.object_),
+    (np.float64, 1, np.float64),
+    (np.float64, 2.0, np.float64),
+    (np.float64, 3.0 + 4.0j, np.complex128),
+    (np.float64, True, np.object_),
+    (np.float64, "", np.object_),
+    (np.complex128, 1, np.complex128),
+    (np.complex128, 2.0, np.complex128),
+    (np.complex128, 3.0 + 4.0j, np.complex128),
+    (np.complex128, True, np.object_),
+    (np.complex128, "", np.object_),
+    (np.bool_, 1, np.object_),
+    (np.bool_, 2.0, np.object_),
+    (np.bool_, 3.0 + 4.0j, np.object_),
+    (np.bool_, True, np.bool_),
+    (np.bool_, '', np.object_),
+])
+def dtype_fill_out_dtype(request):
+    return request.param
+
+
 class TestTake(object):
-    # standard incompatible fill error
+    # Standard incompatible fill error.
     fill_error = re.compile("Incompatible type for fill_value")
 
-    def test_1d_with_out(self):
-        def _test_dtype(dtype, can_hold_na, writeable=True):
-            data = np.random.randint(0, 2, 4).astype(dtype)
-            data.flags.writeable = writeable
+    def test_1d_with_out(self, dtype_can_hold_na, writeable):
+        dtype, can_hold_na = dtype_can_hold_na
+
+        data = np.random.randint(0, 2, 4).astype(dtype)
+        data.flags.writeable = writeable
+
+        indexer = [2, 1, 0, 1]
+        out = np.empty(4, dtype=dtype)
+        algos.take_1d(data, indexer, out=out)
 
-            indexer = [2, 1, 0, 1]
-            out = np.empty(4, dtype=dtype)
+        expected = data.take(indexer)
+        tm.assert_almost_equal(out, expected)
+
+        indexer = [2, 1, 0, -1]
+        out = np.empty(4, dtype=dtype)
+
+        if can_hold_na:
             algos.take_1d(data, indexer, out=out)
             expected = data.take(indexer)
+            expected[3] = np.nan
             tm.assert_almost_equal(out, expected)
-
-            indexer = [2, 1, 0, -1]
-            out = np.empty(4, dtype=dtype)
-            if can_hold_na:
+        else:
+            with tm.assert_raises_regex(TypeError, self.fill_error):
                 algos.take_1d(data, indexer, out=out)
-                expected = data.take(indexer)
-                expected[3] = np.nan
-                tm.assert_almost_equal(out, expected)
-            else:
-                with tm.assert_raises_regex(TypeError, self.fill_error):
-                    algos.take_1d(data, indexer, out=out)
-                # no exception o/w
-                data.take(indexer, out=out)
-
-        for writeable in [True, False]:
-            # Check that take_nd works both with writeable arrays (in which
-            # case fast typed memoryviews implementation) and read-only
-            # arrays alike.
-            _test_dtype(np.float64, True, writeable=writeable)
-            _test_dtype(np.float32, True, writeable=writeable)
-            _test_dtype(np.uint64, False, writeable=writeable)
-            _test_dtype(np.uint32, False, writeable=writeable)
-            _test_dtype(np.uint16, False, writeable=writeable)
-            _test_dtype(np.uint8, False, writeable=writeable)
-            _test_dtype(np.int64, False, writeable=writeable)
-            _test_dtype(np.int32, False, writeable=writeable)
-            _test_dtype(np.int16, False, writeable=writeable)
-            _test_dtype(np.int8, False, writeable=writeable)
-            _test_dtype(np.object_, True, writeable=writeable)
-            _test_dtype(np.bool, False, writeable=writeable)
-
-    def test_1d_fill_nonna(self):
-        def _test_dtype(dtype, fill_value, out_dtype):
-            data = np.random.randint(0, 2, 4).astype(dtype)
-
-            indexer = [2, 1, 0, -1]
-
-            result = algos.take_1d(data, indexer, fill_value=fill_value)
-            assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all())
-            assert (result[3] == fill_value)
-            assert (result.dtype == out_dtype)
-
-            indexer = [2, 1, 0, 1]
-
-            result = algos.take_1d(data, indexer, fill_value=fill_value)
-            assert ((result[[0, 1, 2, 3]] == data[indexer]).all())
-            assert (result.dtype == dtype)
-
-        _test_dtype(np.int8, np.int16(127), np.int8)
-        _test_dtype(np.int8, np.int16(128), np.int16)
-        _test_dtype(np.int32, 1, np.int32)
-        _test_dtype(np.int32, 2.0, np.float64)
-        _test_dtype(np.int32, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.int32, True, np.object_)
-        _test_dtype(np.int32, '', np.object_)
-        _test_dtype(np.float64, 1, np.float64)
-        _test_dtype(np.float64, 2.0, np.float64)
-        _test_dtype(np.float64, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.float64, True, np.object_)
-        _test_dtype(np.float64, '', np.object_)
-        _test_dtype(np.complex128, 1, np.complex128)
-        _test_dtype(np.complex128, 2.0, np.complex128)
-        _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.complex128, True, np.object_)
-        _test_dtype(np.complex128, '', np.object_)
-        _test_dtype(np.bool_, 1, np.object_)
-        _test_dtype(np.bool_, 2.0, np.object_)
-        _test_dtype(np.bool_, 3.0 + 4.0j, np.object_)
-        _test_dtype(np.bool_, True, np.bool_)
-        _test_dtype(np.bool_, '', np.object_)
-
-    def test_2d_with_out(self):
-        def _test_dtype(dtype, can_hold_na, writeable=True):
-            data = np.random.randint(0, 2, (5, 3)).astype(dtype)
-            data.flags.writeable = writeable
-
-            indexer = [2, 1, 0, 1]
-            out0 = np.empty((4, 3), dtype=dtype)
-            out1 = np.empty((5, 4), dtype=dtype)
+
+            # No Exception otherwise.
+            data.take(indexer, out=out)
+
+    def test_1d_fill_nonna(self, dtype_fill_out_dtype):
+        dtype, fill_value, out_dtype = dtype_fill_out_dtype
+        data = np.random.randint(0, 2, 4).astype(dtype)
+        indexer = [2, 1, 0, -1]
+
+        result = algos.take_1d(data, indexer, fill_value=fill_value)
+        assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all())
+        assert (result[3] == fill_value)
+        assert (result.dtype == out_dtype)
+
+        indexer = [2, 1, 0, 1]
+
+        result = algos.take_1d(data, indexer, fill_value=fill_value)
+        assert ((result[[0, 1, 2, 3]] == data[indexer]).all())
+        assert (result.dtype == dtype)
+
+    def test_2d_with_out(self, dtype_can_hold_na, writeable):
+        dtype, can_hold_na = dtype_can_hold_na
+
+        data = np.random.randint(0, 2, (5, 3)).astype(dtype)
+        data.flags.writeable = writeable
+
+        indexer = [2, 1, 0, 1]
+        out0 = np.empty((4, 3), dtype=dtype)
+        out1 = np.empty((5, 4), dtype=dtype)
+        algos.take_nd(data, indexer, out=out0, axis=0)
+        algos.take_nd(data, indexer, out=out1, axis=1)
+
+        expected0 = data.take(indexer, axis=0)
+        expected1 = data.take(indexer, axis=1)
+        tm.assert_almost_equal(out0, expected0)
+        tm.assert_almost_equal(out1, expected1)
+
+        indexer = [2, 1, 0, -1]
+        out0 = np.empty((4, 3), dtype=dtype)
+        out1 = np.empty((5, 4), dtype=dtype)
+
+        if can_hold_na:
             algos.take_nd(data, indexer, out=out0, axis=0)
             algos.take_nd(data, indexer, out=out1, axis=1)
+
             expected0 = data.take(indexer, axis=0)
             expected1 = data.take(indexer, axis=1)
+            expected0[3, :] = np.nan
+            expected1[:, 3] = np.nan
+
             tm.assert_almost_equal(out0, expected0)
             tm.assert_almost_equal(out1, expected1)
-
-            indexer = [2, 1, 0, -1]
-            out0 = np.empty((4, 3), dtype=dtype)
-            out1 = np.empty((5, 4), dtype=dtype)
-            if can_hold_na:
-                algos.take_nd(data, indexer, out=out0, axis=0)
-                algos.take_nd(data, indexer, out=out1, axis=1)
-                expected0 = data.take(indexer, axis=0)
-                expected1 = data.take(indexer, axis=1)
-                expected0[3, :] = np.nan
-                expected1[:, 3] = np.nan
-                tm.assert_almost_equal(out0, expected0)
-                tm.assert_almost_equal(out1, expected1)
-            else:
-                for i, out in enumerate([out0, out1]):
-                    with tm.assert_raises_regex(TypeError,
-                                                self.fill_error):
-                        algos.take_nd(data, indexer, out=out, axis=i)
-                    # no exception o/w
-                    data.take(indexer, out=out, axis=i)
-
-        for writeable in [True, False]:
-            # Check that take_nd works both with writeable arrays (in which
-            # case fast typed memoryviews implementation) and read-only
-            # arrays alike.
-            _test_dtype(np.float64, True, writeable=writeable)
-            _test_dtype(np.float32, True, writeable=writeable)
-            _test_dtype(np.uint64, False, writeable=writeable)
-            _test_dtype(np.uint32, False, writeable=writeable)
-            _test_dtype(np.uint16, False, writeable=writeable)
-            _test_dtype(np.uint8, False, writeable=writeable)
-            _test_dtype(np.int64, False, writeable=writeable)
-            _test_dtype(np.int32, False, writeable=writeable)
-            _test_dtype(np.int16, False, writeable=writeable)
-            _test_dtype(np.int8, False, writeable=writeable)
-            _test_dtype(np.object_, True, writeable=writeable)
-            _test_dtype(np.bool, False, writeable=writeable)
-
-    def test_2d_fill_nonna(self):
-        def _test_dtype(dtype, fill_value, out_dtype):
-            data = np.random.randint(0, 2, (5, 3)).astype(dtype)
-
-            indexer = [2, 1, 0, -1]
-
-            result = algos.take_nd(data, indexer, axis=0,
-                                   fill_value=fill_value)
-            assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all())
-            assert ((result[3, :] == fill_value).all())
-            assert (result.dtype == out_dtype)
-
-            result = algos.take_nd(data, indexer, axis=1,
-                                   fill_value=fill_value)
-            assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all())
-            assert ((result[:, 3] == fill_value).all())
-            assert (result.dtype == out_dtype)
-
-            indexer = [2, 1, 0, 1]
-
-            result = algos.take_nd(data, indexer, axis=0,
-                                   fill_value=fill_value)
-            assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all())
-            assert (result.dtype == dtype)
-
-            result = algos.take_nd(data, indexer, axis=1,
-                                   fill_value=fill_value)
-            assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all())
-            assert (result.dtype == dtype)
-
-        _test_dtype(np.int8, np.int16(127), np.int8)
-        _test_dtype(np.int8, np.int16(128), np.int16)
-        _test_dtype(np.int32, 1, np.int32)
-        _test_dtype(np.int32, 2.0, np.float64)
-        _test_dtype(np.int32, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.int32, True, np.object_)
-        _test_dtype(np.int32, '', np.object_)
-        _test_dtype(np.float64, 1, np.float64)
-        _test_dtype(np.float64, 2.0, np.float64)
-        _test_dtype(np.float64, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.float64, True, np.object_)
-        _test_dtype(np.float64, '', np.object_)
-        _test_dtype(np.complex128, 1, np.complex128)
-        _test_dtype(np.complex128, 2.0, np.complex128)
-        _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.complex128, True, np.object_)
-        _test_dtype(np.complex128, '', np.object_)
-        _test_dtype(np.bool_, 1, np.object_)
-        _test_dtype(np.bool_, 2.0, np.object_)
-        _test_dtype(np.bool_, 3.0 + 4.0j, np.object_)
-        _test_dtype(np.bool_, True, np.bool_)
-        _test_dtype(np.bool_, '', np.object_)
-
-    def test_3d_with_out(self):
-        def _test_dtype(dtype, can_hold_na):
-            data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
-
-            indexer = [2, 1, 0, 1]
-            out0 = np.empty((4, 4, 3), dtype=dtype)
-            out1 = np.empty((5, 4, 3), dtype=dtype)
-            out2 = np.empty((5, 4, 4), dtype=dtype)
+        else:
+            for i, out in enumerate([out0, out1]):
+                with tm.assert_raises_regex(TypeError,
+                                            self.fill_error):
+                    algos.take_nd(data, indexer, out=out, axis=i)
+
+                # No Exception otherwise.
+                data.take(indexer, out=out, axis=i)
+
+    def test_2d_fill_nonna(self, dtype_fill_out_dtype):
+        dtype, fill_value, out_dtype = dtype_fill_out_dtype
+        data = np.random.randint(0, 2, (5, 3)).astype(dtype)
+        indexer = [2, 1, 0, -1]
+
+        result = algos.take_nd(data, indexer, axis=0,
+                               fill_value=fill_value)
+        assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all())
+        assert ((result[3, :] == fill_value).all())
+        assert (result.dtype == out_dtype)
+
+        result = algos.take_nd(data, indexer, axis=1,
+                               fill_value=fill_value)
+        assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all())
+        assert ((result[:, 3] == fill_value).all())
+        assert (result.dtype == out_dtype)
+
+        indexer = [2, 1, 0, 1]
+        result = algos.take_nd(data, indexer, axis=0,
+                               fill_value=fill_value)
+        assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all())
+        assert (result.dtype == dtype)
+
+        result = algos.take_nd(data, indexer, axis=1,
+                               fill_value=fill_value)
+        assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all())
+        assert (result.dtype == dtype)
+
+    def test_3d_with_out(self, dtype_can_hold_na):
+        dtype, can_hold_na = dtype_can_hold_na
+
+        data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
+        indexer = [2, 1, 0, 1]
+
+        out0 = np.empty((4, 4, 3), dtype=dtype)
+        out1 = np.empty((5, 4, 3), dtype=dtype)
+        out2 = np.empty((5, 4, 4), dtype=dtype)
+
+        algos.take_nd(data, indexer, out=out0, axis=0)
+        algos.take_nd(data, indexer, out=out1, axis=1)
+        algos.take_nd(data, indexer, out=out2, axis=2)
+
+        expected0 = data.take(indexer, axis=0)
+        expected1 = data.take(indexer, axis=1)
+        expected2 = data.take(indexer, axis=2)
+
+        tm.assert_almost_equal(out0, expected0)
+        tm.assert_almost_equal(out1, expected1)
+        tm.assert_almost_equal(out2, expected2)
+
+        indexer = [2, 1, 0, -1]
+        out0 = np.empty((4, 4, 3), dtype=dtype)
+        out1 = np.empty((5, 4, 3), dtype=dtype)
+        out2 = np.empty((5, 4, 4), dtype=dtype)
+
+        if can_hold_na:
             algos.take_nd(data, indexer, out=out0, axis=0)
             algos.take_nd(data, indexer, out=out1, axis=1)
             algos.take_nd(data, indexer, out=out2, axis=2)
+
             expected0 = data.take(indexer, axis=0)
             expected1 = data.take(indexer, axis=1)
             expected2 = data.take(indexer, axis=2)
+
+            expected0[3, :, :] = np.nan
+            expected1[:, 3, :] = np.nan
+            expected2[:, :, 3] = np.nan
+
             tm.assert_almost_equal(out0, expected0)
             tm.assert_almost_equal(out1, expected1)
             tm.assert_almost_equal(out2, expected2)
-
-            indexer = [2, 1, 0, -1]
-            out0 = np.empty((4, 4, 3), dtype=dtype)
-            out1 = np.empty((5, 4, 3), dtype=dtype)
-            out2 = np.empty((5, 4, 4), dtype=dtype)
-            if can_hold_na:
-                algos.take_nd(data, indexer, out=out0, axis=0)
-                algos.take_nd(data, indexer, out=out1, axis=1)
-                algos.take_nd(data, indexer, out=out2, axis=2)
-                expected0 = data.take(indexer, axis=0)
-                expected1 = data.take(indexer, axis=1)
-                expected2 = data.take(indexer, axis=2)
-                expected0[3, :, :] = np.nan
-                expected1[:, 3, :] = np.nan
-                expected2[:, :, 3] = np.nan
-                tm.assert_almost_equal(out0, expected0)
-                tm.assert_almost_equal(out1, expected1)
-                tm.assert_almost_equal(out2, expected2)
-            else:
-                for i, out in enumerate([out0, out1, out2]):
-                    with tm.assert_raises_regex(TypeError,
-                                                self.fill_error):
-                        algos.take_nd(data, indexer, out=out, axis=i)
-                    # no exception o/w
-                    data.take(indexer, out=out, axis=i)
-
-        _test_dtype(np.float64, True)
-        _test_dtype(np.float32, True)
-        _test_dtype(np.uint64, False)
-        _test_dtype(np.uint32, False)
-        _test_dtype(np.uint16, False)
-        _test_dtype(np.uint8, False)
-        _test_dtype(np.int64, False)
-        _test_dtype(np.int32, False)
-        _test_dtype(np.int16, False)
-        _test_dtype(np.int8, False)
-        _test_dtype(np.object_, True)
-        _test_dtype(np.bool, False)
-
-    def test_3d_fill_nonna(self):
-        def _test_dtype(dtype, fill_value, out_dtype):
-            data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
-
-            indexer = [2, 1, 0, -1]
-
-            result = algos.take_nd(data, indexer, axis=0,
-                                   fill_value=fill_value)
-            assert ((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all())
-            assert ((result[3, :, :] == fill_value).all())
-            assert (result.dtype == out_dtype)
-
-            result = algos.take_nd(data, indexer, axis=1,
-                                   fill_value=fill_value)
-            assert ((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all())
-            assert ((result[:, 3, :] == fill_value).all())
-            assert (result.dtype == out_dtype)
-
-            result = algos.take_nd(data, indexer, axis=2,
-                                   fill_value=fill_value)
-            assert ((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all())
-            assert ((result[:, :, 3] == fill_value).all())
-            assert (result.dtype == out_dtype)
-
-            indexer = [2, 1, 0, 1]
-
-            result = algos.take_nd(data, indexer, axis=0,
-                                   fill_value=fill_value)
-            assert ((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all())
-            assert (result.dtype == dtype)
-
-            result = algos.take_nd(data, indexer, axis=1,
-                                   fill_value=fill_value)
-            assert ((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all())
-            assert (result.dtype == dtype)
-
-            result = algos.take_nd(data, indexer, axis=2,
-                                   fill_value=fill_value)
-            assert ((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all())
-            assert (result.dtype == dtype)
-
-        _test_dtype(np.int8, np.int16(127), np.int8)
-        _test_dtype(np.int8, np.int16(128), np.int16)
-        _test_dtype(np.int32, 1, np.int32)
-        _test_dtype(np.int32, 2.0, np.float64)
-        _test_dtype(np.int32, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.int32, True, np.object_)
-        _test_dtype(np.int32, '', np.object_)
-        _test_dtype(np.float64, 1, np.float64)
-        _test_dtype(np.float64, 2.0, np.float64)
-        _test_dtype(np.float64, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.float64, True, np.object_)
-        _test_dtype(np.float64, '', np.object_)
-        _test_dtype(np.complex128, 1, np.complex128)
-        _test_dtype(np.complex128, 2.0, np.complex128)
-        _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128)
-        _test_dtype(np.complex128, True, np.object_)
-        _test_dtype(np.complex128, '', np.object_)
-        _test_dtype(np.bool_, 1, np.object_)
-        _test_dtype(np.bool_, 2.0, np.object_)
-        _test_dtype(np.bool_, 3.0 + 4.0j, np.object_)
-        _test_dtype(np.bool_, True, np.bool_)
-        _test_dtype(np.bool_, '', np.object_)
+        else:
+            for i, out in enumerate([out0, out1, out2]):
+                with tm.assert_raises_regex(TypeError,
+                                            self.fill_error):
+                    algos.take_nd(data, indexer, out=out, axis=i)
+
+                # No Exception otherwise.
+                data.take(indexer, out=out, axis=i)
+
+    def test_3d_fill_nonna(self, dtype_fill_out_dtype):
+        dtype, fill_value, out_dtype = dtype_fill_out_dtype
+
+        data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
+        indexer = [2, 1, 0, -1]
+
+        result = algos.take_nd(data, indexer, axis=0,
+                               fill_value=fill_value)
+        assert ((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all())
+        assert ((result[3, :, :] == fill_value).all())
+        assert (result.dtype == out_dtype)
+
+        result = algos.take_nd(data, indexer, axis=1,
+                               fill_value=fill_value)
+        assert ((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all())
+        assert ((result[:, 3, :] == fill_value).all())
+        assert (result.dtype == out_dtype)
+
+        result = algos.take_nd(data, indexer, axis=2,
+                               fill_value=fill_value)
+        assert ((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all())
+        assert ((result[:, :, 3] == fill_value).all())
+        assert (result.dtype == out_dtype)
+
+        indexer = [2, 1, 0, 1]
+        result = algos.take_nd(data, indexer, axis=0,
+                               fill_value=fill_value)
+        assert ((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all())
+        assert (result.dtype == dtype)
+
+        result = algos.take_nd(data, indexer, axis=1,
+                               fill_value=fill_value)
+        assert ((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all())
+        assert (result.dtype == dtype)
+
+        result = algos.take_nd(data, indexer, axis=2,
+                               fill_value=fill_value)
+        assert ((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all())
+        assert (result.dtype == dtype)
 
     def test_1d_other_dtypes(self):
         arr = np.random.randn(10).astype(np.float32)

From 5f956cea01fefe7ba53a5b57b819c7ad98db5c9f Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Fri, 22 Jun 2018 16:45:26 -0600
Subject: [PATCH 078/113] TST: Add interval closed fixture to top-level
 conftest (#21595)

---
 pandas/conftest.py                                   | 8 ++++++++
 pandas/tests/indexes/interval/test_construction.py   | 5 -----
 pandas/tests/indexes/interval/test_interval.py       | 5 -----
 pandas/tests/indexes/interval/test_interval_range.py | 5 -----
 pandas/tests/indexes/interval/test_interval_tree.py  | 5 -----
 pandas/tests/indexing/interval/test_interval.py      | 5 +----
 6 files changed, 9 insertions(+), 24 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 9d806a91f37f7..d6b18db4e71f2 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -137,6 +137,14 @@ def nselect_method(request):
     return request.param
 
 
+@pytest.fixture(params=['left', 'right', 'both', 'neither'])
+def closed(request):
+    """
+    Fixture for trying all interval closed parameters
+    """
+    return request.param
+
+
 @pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
 def nulls_fixture(request):
     """
diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py
index b1711c3444586..ac946a3421e53 100644
--- a/pandas/tests/indexes/interval/test_construction.py
+++ b/pandas/tests/indexes/interval/test_construction.py
@@ -14,11 +14,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture(params=['left', 'right', 'both', 'neither'])
-def closed(request):
-    return request.param
-
-
 @pytest.fixture(params=[None, 'foo'])
 def name(request):
     return request.param
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index 9920809a18a24..6a7330f8cfb68 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -12,11 +12,6 @@
 import pandas as pd
 
 
-@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither'])
-def closed(request):
-    return request.param
-
-
 @pytest.fixture(scope='class', params=[None, 'foo'])
 def name(request):
     return request.param
diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py
index 29fe2b0185662..447856e7e9d51 100644
--- a/pandas/tests/indexes/interval/test_interval_range.py
+++ b/pandas/tests/indexes/interval/test_interval_range.py
@@ -11,11 +11,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither'])
-def closed(request):
-    return request.param
-
-
 @pytest.fixture(scope='class', params=[None, 'foo'])
 def name(request):
     return request.param
diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py
index 056d3e1087a2e..5f248bf7725e5 100644
--- a/pandas/tests/indexes/interval/test_interval_tree.py
+++ b/pandas/tests/indexes/interval/test_interval_tree.py
@@ -7,11 +7,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither'])
-def closed(request):
-    return request.param
-
-
 @pytest.fixture(
     scope='class', params=['int32', 'int64', 'float32', 'float64', 'uint64'])
 def dtype(request):
diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py
index 233fbd2c8d7be..f2f59159032a2 100644
--- a/pandas/tests/indexing/interval/test_interval.py
+++ b/pandas/tests/indexing/interval/test_interval.py
@@ -3,7 +3,6 @@
 import pandas as pd
 
 from pandas import Series, DataFrame, IntervalIndex, Interval
-from pandas.compat import product
 import pandas.util.testing as tm
 
 
@@ -51,9 +50,7 @@ def test_getitem_with_scalar(self):
         tm.assert_series_equal(expected, s[s >= 2])
 
     # TODO: check this behavior is consistent with test_interval_new.py
-    @pytest.mark.parametrize('direction, closed',
-                             product(('increasing', 'decreasing'),
-                                     ('left', 'right', 'neither', 'both')))
+    @pytest.mark.parametrize('direction', ['increasing', 'decreasing'])
     def test_nonoverlapping_monotonic(self, direction, closed):
         tpls = [(0, 1), (2, 3), (4, 5)]
         if direction == 'decreasing':

From 19b78ca10df56e90082aad1f84347476f5dc4248 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 22 Jun 2018 15:57:40 -0700
Subject: [PATCH 079/113] cache DateOffset attrs now that they are immutable
 (#21582)

---
 asv_bench/benchmarks/period.py  |  5 +++++
 doc/source/whatsnew/v0.24.0.txt |  1 +
 pandas/_libs/tslibs/offsets.pyx |  3 +++
 pandas/tseries/offsets.py       | 28 ++++++++++++----------------
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
index 897a3338c164c..c34f9a737473e 100644
--- a/asv_bench/benchmarks/period.py
+++ b/asv_bench/benchmarks/period.py
@@ -64,6 +64,11 @@ def setup(self):
     def time_setitem_period_column(self):
         self.df['col'] = self.rng
 
+    def time_set_index(self):
+        # GH#21582 limited by comparisons of Period objects
+        self.df['col2'] = self.rng
+        self.df.set_index('col2', append=True)
+
 
 class Algorithms(object):
 
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 4bfae7de01b8f..5f05bbdfdb948 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -130,6 +130,7 @@ Performance Improvements
 
 - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
+- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`)
 -
 
 .. _whatsnew_0240.docs:
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index a9ef9166e4d33..63add06db17b4 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -404,6 +404,9 @@ class _BaseOffset(object):
             kwds = {key: odict[key] for key in odict if odict[key]}
             state.update(kwds)
 
+        if '_cache' not in state:
+            state['_cache'] = {}
+
         self.__dict__.update(state)
 
         if 'weekmask' in state and 'holidays' in state:
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index da8fdb4d79e34..a3f82c1a0902e 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -288,6 +288,7 @@ def isAnchored(self):
         # if there were a canonical docstring for what isAnchored means.
         return (self.n == 1)
 
+    @cache_readonly
     def _params(self):
         all_paras = self.__dict__.copy()
         if 'holidays' in all_paras and not all_paras['holidays']:
@@ -322,8 +323,6 @@ def name(self):
         return self.rule_code
 
     def __eq__(self, other):
-        if other is None:
-            return False
 
         if isinstance(other, compat.string_types):
             from pandas.tseries.frequencies import to_offset
@@ -333,13 +332,13 @@ def __eq__(self, other):
         if not isinstance(other, DateOffset):
             return False
 
-        return self._params() == other._params()
+        return self._params == other._params
 
     def __ne__(self, other):
         return not self == other
 
     def __hash__(self):
-        return hash(self._params())
+        return hash(self._params)
 
     def __add__(self, other):
         if isinstance(other, (ABCDatetimeIndex, ABCSeries)):
@@ -397,7 +396,7 @@ def _prefix(self):
     def rule_code(self):
         return self._prefix
 
-    @property
+    @cache_readonly
     def freqstr(self):
         try:
             code = self.rule_code
@@ -601,7 +600,7 @@ def next_bday(self):
         else:
             return BusinessDay(n=nb_offset)
 
-    # TODO: Cache this once offsets are immutable
+    @cache_readonly
     def _get_daytime_flag(self):
         if self.start == self.end:
             raise ValueError('start and end must not be the same')
@@ -643,12 +642,12 @@ def _prev_opening_time(self, other):
         return datetime(other.year, other.month, other.day,
                         self.start.hour, self.start.minute)
 
-    # TODO: cache this once offsets are immutable
+    @cache_readonly
     def _get_business_hours_by_sec(self):
         """
         Return business hours in a day by seconds.
         """
-        if self._get_daytime_flag():
+        if self._get_daytime_flag:
             # create dummy datetime to calculate businesshours in a day
             dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
             until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
@@ -662,7 +661,7 @@ def _get_business_hours_by_sec(self):
     def rollback(self, dt):
         """Roll provided date backward to next offset only if not on offset"""
         if not self.onOffset(dt):
-            businesshours = self._get_business_hours_by_sec()
+            businesshours = self._get_business_hours_by_sec
             if self.n >= 0:
                 dt = self._prev_opening_time(
                     dt) + timedelta(seconds=businesshours)
@@ -683,9 +682,8 @@ def rollforward(self, dt):
 
     @apply_wraps
     def apply(self, other):
-        # calculate here because offset is not immutable
-        daytime = self._get_daytime_flag()
-        businesshours = self._get_business_hours_by_sec()
+        daytime = self._get_daytime_flag
+        businesshours = self._get_business_hours_by_sec
         bhdelta = timedelta(seconds=businesshours)
 
         if isinstance(other, datetime):
@@ -766,7 +764,7 @@ def onOffset(self, dt):
                           dt.minute, dt.second, dt.microsecond)
         # Valid BH can be on the different BusinessDay during midnight
         # Distinguish by the time spent from previous opening time
-        businesshours = self._get_business_hours_by_sec()
+        businesshours = self._get_business_hours_by_sec
         return self._onOffset(dt, businesshours)
 
     def _onOffset(self, dt, businesshours):
@@ -2203,13 +2201,12 @@ def __eq__(self, other):
         if isinstance(other, Tick):
             return self.delta == other.delta
         else:
-            # TODO: Are there cases where this should raise TypeError?
             return False
 
     # This is identical to DateOffset.__hash__, but has to be redefined here
     # for Python 3, because we've redefined __eq__.
     def __hash__(self):
-        return hash(self._params())
+        return hash(self._params)
 
     def __ne__(self, other):
         if isinstance(other, compat.string_types):
@@ -2220,7 +2217,6 @@ def __ne__(self, other):
         if isinstance(other, Tick):
             return self.delta != other.delta
         else:
-            # TODO: Are there cases where this should raise TypeError?
             return True
 
     @property

From 303a27974a1f553ae73775464cb8c5864fa28098 Mon Sep 17 00:00:00 2001
From: Ming Li <14131823+minggli@users.noreply.github.com>
Date: Fri, 22 Jun 2018 23:59:27 +0100
Subject: [PATCH 080/113] BUG: Series dot product __rmatmul__ doesn't allow
 matrix vector multiplication (#21578)

---
 doc/source/whatsnew/v0.24.0.txt       |  2 +-
 pandas/core/series.py                 |  2 +-
 pandas/tests/series/test_analytics.py | 21 ++++++++++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 5f05bbdfdb948..00d358ad8b522 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -184,7 +184,7 @@ Offsets
 Numeric
 ^^^^^^^
 
--
+- Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`)
 -
 -
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 2f762dff4aeab..a608db806d20b 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2066,7 +2066,7 @@ def __matmul__(self, other):
 
     def __rmatmul__(self, other):
         """ Matrix multiplication using binary `@` operator in Python>=3.5 """
-        return self.dot(other)
+        return self.dot(np.transpose(other))
 
     @Substitution(klass='Series')
     @Appender(base._shared_docs['searchsorted'])
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index b9c7b837b8b81..36342b5ba4ee1 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -849,11 +849,30 @@ def test_matmul(self):
         expected = np.dot(a.values, a.values)
         assert_almost_equal(result, expected)
 
-        # np.array @ Series (__rmatmul__)
+        # GH 21530
+        # vector (1D np.array) @ Series (__rmatmul__)
         result = operator.matmul(a.values, a)
         expected = np.dot(a.values, a.values)
         assert_almost_equal(result, expected)
 
+        # GH 21530
+        # vector (1D list) @ Series (__rmatmul__)
+        result = operator.matmul(a.values.tolist(), a)
+        expected = np.dot(a.values, a.values)
+        assert_almost_equal(result, expected)
+
+        # GH 21530
+        # matrix (2D np.array) @ Series (__rmatmul__)
+        result = operator.matmul(b.T.values, a)
+        expected = np.dot(b.T.values, a.values)
+        assert_almost_equal(result, expected)
+
+        # GH 21530
+        # matrix (2D nested lists) @ Series (__rmatmul__)
+        result = operator.matmul(b.T.values.tolist(), a)
+        expected = np.dot(b.T.values, a.values)
+        assert_almost_equal(result, expected)
+
         # mixed dtype DataFrame @ Series
         a['p'] = int(a.p)
         result = operator.matmul(b.T, a)

From a4798c3abce3040ea50966561251f31f62f3706c Mon Sep 17 00:00:00 2001
From: Paul Reidy <paul_reidy@outlook.com>
Date: Sat, 23 Jun 2018 00:01:39 +0100
Subject: [PATCH 081/113] BUG: first/last lose timezone in groupby with
 as_index=False (#21573)

---
 doc/source/whatsnew/v0.24.0.txt  |  2 +-
 pandas/core/groupby/groupby.py   |  2 +-
 pandas/tests/groupby/test_nth.py | 61 +++++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 00d358ad8b522..90fc579ae69e5 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -226,7 +226,7 @@ Plotting
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
+- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
 -
 -
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3bc59157055ce..0bbdfbbe52ac4 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4740,7 +4740,7 @@ def _wrap_transformed_output(self, output, names=None):
 
     def _wrap_agged_blocks(self, items, blocks):
         if not self.as_index:
-            index = np.arange(blocks[0].values.shape[1])
+            index = np.arange(blocks[0].values.shape[-1])
             mgr = BlockManager(blocks, [items, index])
             result = DataFrame(mgr)
 
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index a32ba9ad76f14..a1b748cd50e8f 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -1,11 +1,12 @@
 import numpy as np
 import pandas as pd
-from pandas import DataFrame, MultiIndex, Index, Series, isna
+from pandas import DataFrame, MultiIndex, Index, Series, isna, Timestamp
 from pandas.compat import lrange
 from pandas.util.testing import (
     assert_frame_equal,
     assert_produces_warning,
     assert_series_equal)
+import pytest
 
 
 def test_first_last_nth(df):
@@ -219,6 +220,64 @@ def test_nth_multi_index(three_group):
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize('data, expected_first, expected_last', [
+    ({'id': ['A'],
+      'time': Timestamp('2012-02-01 14:00:00',
+                        tz='US/Central'),
+      'foo': [1]},
+     {'id': ['A'],
+      'time': Timestamp('2012-02-01 14:00:00',
+                        tz='US/Central'),
+      'foo': [1]},
+     {'id': ['A'],
+      'time': Timestamp('2012-02-01 14:00:00',
+                        tz='US/Central'),
+      'foo': [1]}),
+    ({'id': ['A', 'B', 'A'],
+      'time': [Timestamp('2012-01-01 13:00:00',
+                         tz='America/New_York'),
+               Timestamp('2012-02-01 14:00:00',
+                         tz='US/Central'),
+               Timestamp('2012-03-01 12:00:00',
+                         tz='Europe/London')],
+      'foo': [1, 2, 3]},
+     {'id': ['A', 'B'],
+      'time': [Timestamp('2012-01-01 13:00:00',
+                         tz='America/New_York'),
+               Timestamp('2012-02-01 14:00:00',
+                         tz='US/Central')],
+      'foo': [1, 2]},
+     {'id': ['A', 'B'],
+      'time': [Timestamp('2012-03-01 12:00:00',
+                         tz='Europe/London'),
+               Timestamp('2012-02-01 14:00:00',
+                         tz='US/Central')],
+      'foo': [3, 2]})
+])
+def test_first_last_tz(data, expected_first, expected_last):
+    # GH15884
+    # Test that the timezone is retained when calling first
+    # or last on groupby with as_index=False
+
+    df = DataFrame(data)
+
+    result = df.groupby('id', as_index=False).first()
+    expected = DataFrame(expected_first)
+    cols = ['id', 'time', 'foo']
+    assert_frame_equal(result[cols], expected[cols])
+
+    result = df.groupby('id', as_index=False)['time'].first()
+    assert_frame_equal(result, expected[['id', 'time']])
+
+    result = df.groupby('id', as_index=False).last()
+    expected = DataFrame(expected_last)
+    cols = ['id', 'time', 'foo']
+    assert_frame_equal(result[cols], expected[cols])
+
+    result = df.groupby('id', as_index=False)['time'].last()
+    assert_frame_equal(result, expected[['id', 'time']])
+
+
 def test_nth_multi_index_as_expected():
     # PR 9090, related to issue 8979
     # test nth on MultiIndex

From f901431f6faf1d829cfd33969bc4813f9add8505 Mon Sep 17 00:00:00 2001
From: Jacopo Rota <jacopo.r00ta@gmail.com>
Date: Sat, 23 Jun 2018 01:04:38 +0200
Subject: [PATCH 082/113] add test case when to_csv argument is sys.stdout
 (#21572)

---
 pandas/tests/io/formats/test_to_csv.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index dfa3751bff57a..36c4ae547ad4e 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -285,3 +285,18 @@ def test_to_csv_string_array_utf8(self):
             df.to_csv(path, encoding='utf-8')
             with open(path, 'r') as f:
                 assert f.read() == expected_utf8
+
+    @tm.capture_stdout
+    def test_to_csv_stdout_file(self):
+        # GH 21561
+        df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']],
+                          columns=['name_1', 'name_2'])
+        expected_ascii = '''\
+,name_1,name_2
+0,foo,bar
+1,baz,qux
+'''
+        df.to_csv(sys.stdout, encoding='ascii')
+        output = sys.stdout.getvalue()
+        assert output == expected_ascii
+        assert not sys.stdout.closed

From 8794ef2c44f8659ace718ad5d93fd4943f708116 Mon Sep 17 00:00:00 2001
From: Vu Le <vuminhle@users.noreply.github.com>
Date: Sat, 23 Jun 2018 06:07:21 +0700
Subject: [PATCH 083/113] BUG: Fix json_normalize throwing TypeError (#21536)
 (#21540)

---
 doc/source/whatsnew/v0.23.2.txt        | 2 +-
 pandas/io/json/normalize.py            | 8 +++++++-
 pandas/tests/io/json/test_normalize.py | 6 ++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index c781f45715bd4..ff872cfc6b3ef 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -71,7 +71,7 @@ Bug Fixes
 **I/O**
 
 - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`)
--
+- Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`)
 -
 
 **Plotting**
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index b845a43b9ca9e..2004a24c2ec5a 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -170,6 +170,11 @@ def json_normalize(data, record_path=None, meta=None,
     3      Summit        1234   John Kasich     Ohio        OH
     4    Cuyahoga        1337   John Kasich     Ohio        OH
 
+    >>> data = {'A': [1, 2]}
+    >>> json_normalize(data, 'A', record_prefix='Prefix.')
+        Prefix.0
+    0          1
+    1          2
     """
     def _pull_field(js, spec):
         result = js
@@ -259,7 +264,8 @@ def _recursive_extract(data, path, seen_meta, level=0):
     result = DataFrame(records)
 
     if record_prefix is not None:
-        result.rename(columns=lambda x: record_prefix + x, inplace=True)
+        result = result.rename(
+            columns=lambda x: "{p}{c}".format(p=record_prefix, c=x))
 
     # Data types, a problem
     for k, v in compat.iteritems(meta_vals):
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 395c2c90767d3..200a853c48900 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -123,6 +123,12 @@ def test_simple_normalize_with_separator(self, deep_nested):
                           'country', 'states_name']).sort_values()
         assert result.columns.sort_values().equals(expected)
 
+    def test_value_array_record_prefix(self):
+        # GH 21536
+        result = json_normalize({'A': [1, 2]}, 'A', record_prefix='Prefix.')
+        expected = DataFrame([[1], [2]], columns=['Prefix.0'])
+        tm.assert_frame_equal(result, expected)
+
     def test_more_deeply_nested(self, deep_nested):
 
         result = json_normalize(deep_nested, ['states', 'cities'],

From 6eb0341173759e12499ec1ebe763dcdea4ab3a0d Mon Sep 17 00:00:00 2001
From: Ryan Nazareth <ryan.nazareth@city.ac.uk>
Date: Sat, 23 Jun 2018 00:36:44 +0100
Subject: [PATCH 084/113] DOC: updated the Series.str.rsplit and
 Series.str.split docstrings (#21026)

---
 pandas/core/strings.py | 244 +++++++++++++++++++++--------------------
 1 file changed, 123 insertions(+), 121 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 08239ae4dae20..b27cfdfe3f1bd 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1343,108 +1343,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
 
 
 def str_split(arr, pat=None, n=None):
-    """
-    Split strings around given separator/delimiter.
-
-    Split each string in the caller's values by given
-    pattern, propagating NaN values. Equivalent to :meth:`str.split`.
-
-    Parameters
-    ----------
-    pat : str, optional
-        String or regular expression to split on.
-        If not specified, split on whitespace.
-    n : int, default -1 (all)
-        Limit number of splits in output.
-        ``None``, 0 and -1 will be interpreted as return all splits.
-    expand : bool, default False
-        Expand the split strings into separate columns.
-
-        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
-        * If ``False``, return Series/Index, containing lists of strings.
 
-    Returns
-    -------
-    Series, Index, DataFrame or MultiIndex
-        Type matches caller unless ``expand=True`` (see Notes).
-
-    Notes
-    -----
-    The handling of the `n` keyword depends on the number of found splits:
-
-    - If found splits > `n`,  make first `n` splits only
-    - If found splits <= `n`, make all splits
-    - If for a certain row the number of found splits < `n`,
-      append `None` for padding up to `n` if ``expand=True``
-
-    If using ``expand=True``, Series and Index callers return DataFrame and
-    MultiIndex objects, respectively.
-
-    See Also
-    --------
-    str.split : Standard library version of this method.
-    Series.str.get_dummies : Split each string into dummy variables.
-    Series.str.partition : Split string on a separator, returning
-        the before, separator, and after components.
-
-    Examples
-    --------
-    >>> s = pd.Series(["this is good text", "but this is even better"])
-
-    By default, split will return an object of the same size
-    having lists containing the split elements
-
-    >>> s.str.split()
-    0           [this, is, good, text]
-    1    [but, this, is, even, better]
-    dtype: object
-    >>> s.str.split("random")
-    0          [this is good text]
-    1    [but this is even better]
-    dtype: object
-
-    When using ``expand=True``, the split elements will expand out into
-    separate columns.
-
-    For Series object, output return type is DataFrame.
-
-    >>> s.str.split(expand=True)
-          0     1     2     3       4
-    0  this    is  good  text    None
-    1   but  this    is  even  better
-    >>> s.str.split(" is ", expand=True)
-              0            1
-    0      this    good text
-    1  but this  even better
-
-    For Index object, output return type is MultiIndex.
-
-    >>> i = pd.Index(["ba 100 001", "ba 101 002", "ba 102 003"])
-    >>> i.str.split(expand=True)
-    MultiIndex(levels=[['ba'], ['100', '101', '102'], ['001', '002', '003']],
-           labels=[[0, 0, 0], [0, 1, 2], [0, 1, 2]])
-
-    Parameter `n` can be used to limit the number of splits in the output.
-
-    >>> s.str.split("is", n=1)
-    0          [th,  is good text]
-    1    [but th,  is even better]
-    dtype: object
-    >>> s.str.split("is", n=1, expand=True)
-            0                1
-    0      th     is good text
-    1  but th   is even better
-
-    If NaN is present, it is propagated throughout the columns
-    during the split.
-
-    >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
-    >>> s.str.split(n=3, expand=True)
-          0     1     2            3
-    0  this    is  good         text
-    1   but  this    is  even better
-    2   NaN   NaN   NaN          NaN
-    """
     if pat is None:
         if n is None or n == 0:
             n = -1
@@ -1464,25 +1363,7 @@ def str_split(arr, pat=None, n=None):
 
 
 def str_rsplit(arr, pat=None, n=None):
-    """
-    Split each string in the Series/Index by the given delimiter
-    string, starting at the end of the string and working to the front.
-    Equivalent to :meth:`str.rsplit`.
 
-    Parameters
-    ----------
-    pat : string, default None
-        Separator to split on. If None, splits on whitespace
-    n : int, default -1 (all)
-        None, 0 and -1 will be interpreted as return all splits
-    expand : bool, default False
-        * If True, return DataFrame/MultiIndex expanding dimensionality.
-        * If False, return Series/Index.
-
-    Returns
-    -------
-    split : Series/Index or DataFrame/MultiIndex of objects
-    """
     if n is None or n == 0:
         n = -1
     f = lambda x: x.rsplit(pat, n)
@@ -2325,12 +2206,133 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             res = Series(res, index=data.index, name=self._orig.name)
         return res
 
-    @copy(str_split)
+    _shared_docs['str_split'] = ("""
+    Split strings around given separator/delimiter.
+
+    Splits the string in the Series/Index from the %(side)s,
+    at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
+
+    Parameters
+    ----------
+    pat : str, optional
+        String or regular expression to split on.
+        If not specified, split on whitespace.
+    n : int, default -1 (all)
+        Limit number of splits in output.
+        ``None``, 0 and -1 will be interpreted as return all splits.
+    expand : bool, default False
+        Expand the splitted strings into separate columns.
+
+        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
+        * If ``False``, return Series/Index, containing lists of strings.
+
+    Returns
+    -------
+    Series, Index, DataFrame or MultiIndex
+        Type matches caller unless ``expand=True`` (see Notes).
+
+    See Also
+    --------
+     Series.str.split : Split strings around given separator/delimiter.
+     Series.str.rsplit : Splits string around given separator/delimiter,
+     starting from the right.
+     Series.str.join : Join lists contained as elements in the Series/Index
+     with passed delimiter.
+     str.split : Standard library version for split.
+     str.rsplit : Standard library version for rsplit.
+
+    Notes
+    -----
+    The handling of the `n` keyword depends on the number of found splits:
+
+    - If found splits > `n`,  make first `n` splits only
+    - If found splits <= `n`, make all splits
+    - If for a certain row the number of found splits < `n`,
+      append `None` for padding up to `n` if ``expand=True``
+
+    If using ``expand=True``, Series and Index callers return DataFrame and
+    MultiIndex objects, respectively.
+
+    Examples
+    --------
+    >>> s = pd.Series(["this is a regular sentence",
+    "https://docs.python.org/3/tutorial/index.html", np.nan])
+
+    In the default setting, the string is split by whitespace.
+
+    >>> s.str.split()
+    0                   [this, is, a, regular, sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    Without the `n` parameter, the outputs of `rsplit` and `split`
+    are identical.
+
+    >>> s.str.rsplit()
+    0                   [this, is, a, regular, sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    The `n` parameter can be used to limit the number of splits on the
+    delimiter. The outputs of `split` and `rsplit` are different.
+
+    >>> s.str.split(n=2)
+    0                     [this, is, a regular sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    >>> s.str.rsplit(n=2)
+    0                     [this is a, regular, sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    The `pat` parameter can be used to split by other characters.
+
+    >>> s.str.split(pat = "/")
+    0                         [this is a regular sentence]
+    1    [https:, , docs.python.org, 3, tutorial, index...
+    2                                                  NaN
+    dtype: object
+
+    When using ``expand=True``, the split elements will expand out into
+    separate columns. If NaN is present, it is propagated throughout
+    the columns during the split.
+
+    >>> s.str.split(expand=True)
+                                                   0     1     2        3
+    0                                           this    is     a  regular
+    1  https://docs.python.org/3/tutorial/index.html  None  None     None
+    2                                            NaN   NaN   NaN      NaN \
+
+                 4
+    0     sentence
+    1         None
+    2          NaN
+
+    For slightly more complex use cases like splitting the html document name
+    from a url, a combination of parameter settings can be used.
+
+    >>> s.str.rsplit("/", n=1, expand=True)
+                                        0           1
+    0          this is a regular sentence        None
+    1  https://docs.python.org/3/tutorial  index.html
+    2                                 NaN         NaN
+    """)
+
+    @Appender(_shared_docs['str_split'] % {
+        'side': 'beginning',
+        'method': 'split'})
     def split(self, pat=None, n=-1, expand=False):
         result = str_split(self._data, pat, n=n)
         return self._wrap_result(result, expand=expand)
 
-    @copy(str_rsplit)
+    @Appender(_shared_docs['str_split'] % {
+        'side': 'end',
+        'method': 'rsplit'})
     def rsplit(self, pat=None, n=-1, expand=False):
         result = str_rsplit(self._data, pat, n=n)
         return self._wrap_result(result, expand=expand)

From 289f3adaaa91a8cf491b11fd72ddda4852a23611 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Sat, 23 Jun 2018 05:27:58 -0600
Subject: [PATCH 085/113] TST: Use multiple instances of parametrize instead of
 product (#21602)

---
 pandas/tests/dtypes/test_dtypes.py        |  14 +--
 pandas/tests/frame/test_rank.py           | 124 +++++++++++-----------
 pandas/tests/groupby/test_function.py     |   7 +-
 pandas/tests/groupby/test_whitelist.py    |  12 +--
 pandas/tests/reshape/test_concat.py       |  11 +-
 pandas/tests/sparse/series/test_series.py |  20 ++--
 pandas/tests/test_multilevel.py           |  80 +++++++-------
 pandas/tests/test_resample.py             |  98 +++++++++--------
 pandas/tests/test_window.py               |   7 +-
 9 files changed, 185 insertions(+), 188 deletions(-)

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index cc833af03ae66..eee53a2fcac6a 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -2,8 +2,6 @@
 import re
 import pytest
 
-from itertools import product
-
 import numpy as np
 import pandas as pd
 from pandas import (
@@ -233,12 +231,14 @@ def test_dst(self):
         assert is_datetimetz(s2)
         assert s1.dtype == s2.dtype
 
-    def test_parser(self):
+    @pytest.mark.parametrize('tz', ['UTC', 'US/Eastern'])
+    @pytest.mark.parametrize('constructor', ['M8', 'datetime64'])
+    def test_parser(self, tz, constructor):
         # pr #11245
-        for tz, constructor in product(('UTC', 'US/Eastern'),
-                                       ('M8', 'datetime64')):
-            assert (DatetimeTZDtype('%s[ns, %s]' % (constructor, tz)) ==
-                    DatetimeTZDtype('ns', tz))
+        dtz_str = '{con}[ns, {tz}]'.format(con=constructor, tz=tz)
+        result = DatetimeTZDtype(dtz_str)
+        expected = DatetimeTZDtype('ns', tz)
+        assert result == expected
 
     def test_empty(self):
         dt = DatetimeTZDtype()
diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py
index b8ba408b54715..a1210f1ed54e4 100644
--- a/pandas/tests/frame/test_rank.py
+++ b/pandas/tests/frame/test_rank.py
@@ -10,7 +10,6 @@
 from pandas.util.testing import assert_frame_equal
 from pandas.tests.frame.common import TestData
 from pandas import Series, DataFrame
-from pandas.compat import product
 
 
 class TestRank(TestData):
@@ -26,6 +25,13 @@ class TestRank(TestData):
         'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]),
     }
 
+    @pytest.fixture(params=['average', 'min', 'max', 'first', 'dense'])
+    def method(self, request):
+        """
+        Fixture for trying all rank methods
+        """
+        return request.param
+
     def test_rank(self):
         rankdata = pytest.importorskip('scipy.stats.rankdata')
 
@@ -217,34 +223,35 @@ def test_rank_methods_frame(self):
                         expected = expected.astype('float64')
                     tm.assert_frame_equal(result, expected)
 
-    def test_rank_descending(self):
-        dtypes = ['O', 'f8', 'i8']
+    @pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
+    def test_rank_descending(self, method, dtype):
 
-        for dtype, method in product(dtypes, self.results):
-            if 'i' in dtype:
-                df = self.df.dropna()
-            else:
-                df = self.df.astype(dtype)
+        if 'i' in dtype:
+            df = self.df.dropna()
+        else:
+            df = self.df.astype(dtype)
 
-            res = df.rank(ascending=False)
-            expected = (df.max() - df).rank()
-            assert_frame_equal(res, expected)
+        res = df.rank(ascending=False)
+        expected = (df.max() - df).rank()
+        assert_frame_equal(res, expected)
 
-            if method == 'first' and dtype == 'O':
-                continue
+        if method == 'first' and dtype == 'O':
+            return
 
-            expected = (df.max() - df).rank(method=method)
+        expected = (df.max() - df).rank(method=method)
 
-            if dtype != 'O':
-                res2 = df.rank(method=method, ascending=False,
-                               numeric_only=True)
-                assert_frame_equal(res2, expected)
+        if dtype != 'O':
+            res2 = df.rank(method=method, ascending=False,
+                           numeric_only=True)
+            assert_frame_equal(res2, expected)
 
-            res3 = df.rank(method=method, ascending=False,
-                           numeric_only=False)
-            assert_frame_equal(res3, expected)
+        res3 = df.rank(method=method, ascending=False,
+                       numeric_only=False)
+        assert_frame_equal(res3, expected)
 
-    def test_rank_2d_tie_methods(self):
+    @pytest.mark.parametrize('axis', [0, 1])
+    @pytest.mark.parametrize('dtype', [None, object])
+    def test_rank_2d_tie_methods(self, method, axis, dtype):
         df = self.df
 
         def _check2d(df, expected, method='average', axis=0):
@@ -257,43 +264,38 @@ def _check2d(df, expected, method='average', axis=0):
             result = df.rank(method=method, axis=axis)
             assert_frame_equal(result, exp_df)
 
-        dtypes = [None, object]
         disabled = set([(object, 'first')])
-        results = self.results
-
-        for method, axis, dtype in product(results, [0, 1], dtypes):
-            if (dtype, method) in disabled:
-                continue
-            frame = df if dtype is None else df.astype(dtype)
-            _check2d(frame, results[method], method=method, axis=axis)
-
-
-@pytest.mark.parametrize(
-    "method,exp", [("dense",
-                    [[1., 1., 1.],
-                     [1., 0.5, 2. / 3],
-                     [1., 0.5, 1. / 3]]),
-                   ("min",
-                    [[1. / 3, 1., 1.],
-                     [1. / 3, 1. / 3, 2. / 3],
-                     [1. / 3, 1. / 3, 1. / 3]]),
-                   ("max",
-                    [[1., 1., 1.],
-                     [1., 2. / 3, 2. / 3],
-                     [1., 2. / 3, 1. / 3]]),
-                   ("average",
-                    [[2. / 3, 1., 1.],
-                     [2. / 3, 0.5, 2. / 3],
-                     [2. / 3, 0.5, 1. / 3]]),
-                   ("first",
-                    [[1. / 3, 1., 1.],
-                     [2. / 3, 1. / 3, 2. / 3],
-                     [3. / 3, 2. / 3, 1. / 3]])])
-def test_rank_pct_true(method, exp):
-    # see gh-15630.
-
-    df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]])
-    result = df.rank(method=method, pct=True)
-
-    expected = DataFrame(exp)
-    tm.assert_frame_equal(result, expected)
+        if (dtype, method) in disabled:
+            return
+        frame = df if dtype is None else df.astype(dtype)
+        _check2d(frame, self.results[method], method=method, axis=axis)
+
+    @pytest.mark.parametrize(
+        "method,exp", [("dense",
+                        [[1., 1., 1.],
+                         [1., 0.5, 2. / 3],
+                         [1., 0.5, 1. / 3]]),
+                       ("min",
+                        [[1. / 3, 1., 1.],
+                         [1. / 3, 1. / 3, 2. / 3],
+                         [1. / 3, 1. / 3, 1. / 3]]),
+                       ("max",
+                        [[1., 1., 1.],
+                         [1., 2. / 3, 2. / 3],
+                         [1., 2. / 3, 1. / 3]]),
+                       ("average",
+                        [[2. / 3, 1., 1.],
+                         [2. / 3, 0.5, 2. / 3],
+                         [2. / 3, 0.5, 1. / 3]]),
+                       ("first",
+                        [[1. / 3, 1., 1.],
+                         [2. / 3, 1. / 3, 2. / 3],
+                         [3. / 3, 2. / 3, 1. / 3]])])
+    def test_rank_pct_true(self, method, exp):
+        # see gh-15630.
+
+        df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]])
+        result = df.rank(method=method, pct=True)
+
+        expected = DataFrame(exp)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index f1d678db4ff7f..9df362a8e132f 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -778,9 +778,10 @@ def test_frame_describe_unstacked_format():
 # nunique
 # --------------------------------
 
-@pytest.mark.parametrize("n, m", cart_product(10 ** np.arange(2, 6),
-                                              (10, 100, 1000)))
-@pytest.mark.parametrize("sort, dropna", cart_product((False, True), repeat=2))
+@pytest.mark.parametrize('n', 10 ** np.arange(2, 6))
+@pytest.mark.parametrize('m', [10, 100, 1000])
+@pytest.mark.parametrize('sort', [False, True])
+@pytest.mark.parametrize('dropna', [False, True])
 def test_series_groupby_nunique(n, m, sort, dropna):
 
     def check_nunique(df, keys, as_index=True):
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index 8d6e074881cbb..f4a58b9cbe61b 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -8,7 +8,6 @@
 import numpy as np
 from pandas import DataFrame, Series, compat, date_range, Index, MultiIndex
 from pandas.util import testing as tm
-from pandas.compat import lrange, product
 
 AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
                  'mad', 'std', 'var', 'sem']
@@ -175,12 +174,11 @@ def raw_frame():
     return raw_frame
 
 
-@pytest.mark.parametrize(
-    "op, level, axis, skipna, sort",
-    product(AGG_FUNCTIONS,
-            lrange(2), lrange(2),
-            [True, False],
-            [True, False]))
+@pytest.mark.parametrize('op', AGG_FUNCTIONS)
+@pytest.mark.parametrize('level', [0, 1])
+@pytest.mark.parametrize('axis', [0, 1])
+@pytest.mark.parametrize('skipna', [True, False])
+@pytest.mark.parametrize('sort', [True, False])
 def test_regression_whitelist_methods(
         raw_frame, op, level,
         axis, skipna, sort):
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index dea305d4b3fee..8d819f9926abb 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1,5 +1,5 @@
 from warnings import catch_warnings
-from itertools import combinations, product
+from itertools import combinations
 
 import datetime as dt
 import dateutil
@@ -941,10 +941,11 @@ def test_append_different_columns_types(self, df_columns, series_index):
                                 columns=combined_columns)
         assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "index_can_append, index_cannot_append_with_other",
-        product(indexes_can_append, indexes_cannot_append_with_other),
-        ids=lambda x: x.__class__.__name__)
+    @pytest.mark.parametrize('index_can_append', indexes_can_append,
+                             ids=lambda x: x.__class__.__name__)
+    @pytest.mark.parametrize('index_cannot_append_with_other',
+                             indexes_cannot_append_with_other,
+                             ids=lambda x: x.__class__.__name__)
     def test_append_different_columns_types_raises(
             self, index_can_append, index_cannot_append_with_other):
         # GH18359
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
index eb63c87820070..921c30234660f 100644
--- a/pandas/tests/sparse/series/test_series.py
+++ b/pandas/tests/sparse/series/test_series.py
@@ -23,8 +23,6 @@
 from pandas.core.sparse.api import SparseSeries
 from pandas.tests.series.test_api import SharedWithSparse
 
-from itertools import product
-
 
 def _test_data1():
     # nan-based
@@ -985,16 +983,16 @@ def test_combine_first(self):
         tm.assert_sp_series_equal(result, result2)
         tm.assert_sp_series_equal(result, expected)
 
-    @pytest.mark.parametrize('deep,fill_values', [([True, False],
-                                                   [0, 1, np.nan, None])])
-    def test_memory_usage_deep(self, deep, fill_values):
-        for deep, fill_value in product(deep, fill_values):
-            sparse_series = SparseSeries(fill_values, fill_value=fill_value)
-            dense_series = Series(fill_values)
-            sparse_usage = sparse_series.memory_usage(deep=deep)
-            dense_usage = dense_series.memory_usage(deep=deep)
+    @pytest.mark.parametrize('deep', [True, False])
+    @pytest.mark.parametrize('fill_value', [0, 1, np.nan, None])
+    def test_memory_usage_deep(self, deep, fill_value):
+        values = [0, 1, np.nan, None]
+        sparse_series = SparseSeries(values, fill_value=fill_value)
+        dense_series = Series(values)
+        sparse_usage = sparse_series.memory_usage(deep=deep)
+        dense_usage = dense_series.memory_usage(deep=deep)
 
-            assert sparse_usage < dense_usage
+        assert sparse_usage < dense_usage
 
 
 class TestSparseHandlingMultiIndexes(object):
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 79e05c90a21b0..3caee2b44c579 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -20,6 +20,9 @@
 import pandas as pd
 import pandas._libs.index as _index
 
+AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad',
+                 'std', 'var', 'sem']
+
 
 class Base(object):
 
@@ -1389,60 +1392,57 @@ def test_count(self):
         pytest.raises(KeyError, series.count, 'x')
         pytest.raises(KeyError, frame.count, level='x')
 
-    AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
-                     'mad', 'std', 'var', 'sem']
-
+    @pytest.mark.parametrize('op', AGG_FUNCTIONS)
+    @pytest.mark.parametrize('level', [0, 1])
+    @pytest.mark.parametrize('skipna', [True, False])
     @pytest.mark.parametrize('sort', [True, False])
-    def test_series_group_min_max(self, sort):
+    def test_series_group_min_max(self, op, level, skipna, sort):
         # GH 17537
-        for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2),
-                                              [False, True]):
-            grouped = self.series.groupby(level=level, sort=sort)
-            aggf = lambda x: getattr(x, op)(skipna=skipna)
-            # skipna=True
-            leftside = grouped.agg(aggf)
-            rightside = getattr(self.series, op)(level=level, skipna=skipna)
-            if sort:
-                rightside = rightside.sort_index(level=level)
-            tm.assert_series_equal(leftside, rightside)
-
+        grouped = self.series.groupby(level=level, sort=sort)
+        # skipna=True
+        leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna))
+        rightside = getattr(self.series, op)(level=level, skipna=skipna)
+        if sort:
+            rightside = rightside.sort_index(level=level)
+        tm.assert_series_equal(leftside, rightside)
+
+    @pytest.mark.parametrize('op', AGG_FUNCTIONS)
+    @pytest.mark.parametrize('level', [0, 1])
+    @pytest.mark.parametrize('axis', [0, 1])
+    @pytest.mark.parametrize('skipna', [True, False])
     @pytest.mark.parametrize('sort', [True, False])
-    def test_frame_group_ops(self, sort):
+    def test_frame_group_ops(self, op, level, axis, skipna, sort):
         # GH 17537
         self.frame.iloc[1, [1, 2]] = np.nan
         self.frame.iloc[7, [0, 1]] = np.nan
 
-        for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
-                                                    lrange(2), lrange(2),
-                                                    [False, True]):
-
-            if axis == 0:
-                frame = self.frame
-            else:
-                frame = self.frame.T
+        if axis == 0:
+            frame = self.frame
+        else:
+            frame = self.frame.T
 
-            grouped = frame.groupby(level=level, axis=axis, sort=sort)
+        grouped = frame.groupby(level=level, axis=axis, sort=sort)
 
-            pieces = []
+        pieces = []
 
-            def aggf(x):
-                pieces.append(x)
-                return getattr(x, op)(skipna=skipna, axis=axis)
+        def aggf(x):
+            pieces.append(x)
+            return getattr(x, op)(skipna=skipna, axis=axis)
 
-            leftside = grouped.agg(aggf)
-            rightside = getattr(frame, op)(level=level, axis=axis,
-                                           skipna=skipna)
-            if sort:
-                rightside = rightside.sort_index(level=level, axis=axis)
-                frame = frame.sort_index(level=level, axis=axis)
+        leftside = grouped.agg(aggf)
+        rightside = getattr(frame, op)(level=level, axis=axis,
+                                       skipna=skipna)
+        if sort:
+            rightside = rightside.sort_index(level=level, axis=axis)
+            frame = frame.sort_index(level=level, axis=axis)
 
-            # for good measure, groupby detail
-            level_index = frame._get_axis(axis).levels[level]
+        # for good measure, groupby detail
+        level_index = frame._get_axis(axis).levels[level]
 
-            tm.assert_index_equal(leftside._get_axis(axis), level_index)
-            tm.assert_index_equal(rightside._get_axis(axis), level_index)
+        tm.assert_index_equal(leftside._get_axis(axis), level_index)
+        tm.assert_index_equal(rightside._get_axis(axis), level_index)
 
-            tm.assert_frame_equal(leftside, rightside)
+        tm.assert_frame_equal(leftside, rightside)
 
     def test_stat_op_corner(self):
         obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)]))
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index 6f0ad0535c6b4..60f23309b11d9 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -17,7 +17,7 @@
 from pandas import (Series, DataFrame, Panel, Index, isna,
                     notna, Timestamp)
 
-from pandas.compat import range, lrange, zip, product, OrderedDict
+from pandas.compat import range, lrange, zip, OrderedDict
 from pandas.errors import UnsupportedFunctionCall
 from pandas.core.groupby.groupby import DataError
 import pandas.core.common as com
@@ -1951,30 +1951,32 @@ def test_resample_nunique_with_date_gap(self):
         assert_series_equal(results[0], results[2])
         assert_series_equal(results[0], results[3])
 
-    def test_resample_group_info(self):  # GH10914
-        for n, k in product((10000, 100000), (10, 100, 1000)):
-            dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
-            ts = Series(np.random.randint(0, n // k, n).astype('int64'),
-                        index=np.random.choice(dr, n))
+    @pytest.mark.parametrize('n', [10000, 100000])
+    @pytest.mark.parametrize('k', [10, 100, 1000])
+    def test_resample_group_info(self, n, k):
+        # GH10914
+        dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
+        ts = Series(np.random.randint(0, n // k, n).astype('int64'),
+                    index=np.random.choice(dr, n))
 
-            left = ts.resample('30T').nunique()
-            ix = date_range(start=ts.index.min(), end=ts.index.max(),
-                            freq='30T')
+        left = ts.resample('30T').nunique()
+        ix = date_range(start=ts.index.min(), end=ts.index.max(),
+                        freq='30T')
 
-            vals = ts.values
-            bins = np.searchsorted(ix.values, ts.index, side='right')
+        vals = ts.values
+        bins = np.searchsorted(ix.values, ts.index, side='right')
 
-            sorter = np.lexsort((vals, bins))
-            vals, bins = vals[sorter], bins[sorter]
+        sorter = np.lexsort((vals, bins))
+        vals, bins = vals[sorter], bins[sorter]
 
-            mask = np.r_[True, vals[1:] != vals[:-1]]
-            mask |= np.r_[True, bins[1:] != bins[:-1]]
+        mask = np.r_[True, vals[1:] != vals[:-1]]
+        mask |= np.r_[True, bins[1:] != bins[:-1]]
 
-            arr = np.bincount(bins[mask] - 1,
-                              minlength=len(ix)).astype('int64', copy=False)
-            right = Series(arr, index=ix)
+        arr = np.bincount(bins[mask] - 1,
+                          minlength=len(ix)).astype('int64', copy=False)
+        right = Series(arr, index=ix)
 
-            assert_series_equal(left, right)
+        assert_series_equal(left, right)
 
     def test_resample_size(self):
         n = 10000
@@ -2323,28 +2325,25 @@ def test_annual_upsample(self):
                                                        method='ffill')
         assert_series_equal(result, expected)
 
-    def test_quarterly_upsample(self):
-        targets = ['D', 'B', 'M']
-
-        for month in MONTHS:
-            ts = _simple_pts('1/1/1990', '12/31/1995', freq='Q-%s' % month)
-
-            for targ, conv in product(targets, ['start', 'end']):
-                result = ts.resample(targ, convention=conv).ffill()
-                expected = result.to_timestamp(targ, how=conv)
-                expected = expected.asfreq(targ, 'ffill').to_period()
-                assert_series_equal(result, expected)
-
-    def test_monthly_upsample(self):
-        targets = ['D', 'B']
+    @pytest.mark.parametrize('month', MONTHS)
+    @pytest.mark.parametrize('target', ['D', 'B', 'M'])
+    @pytest.mark.parametrize('convention', ['start', 'end'])
+    def test_quarterly_upsample(self, month, target, convention):
+        freq = 'Q-{month}'.format(month=month)
+        ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq)
+        result = ts.resample(target, convention=convention).ffill()
+        expected = result.to_timestamp(target, how=convention)
+        expected = expected.asfreq(target, 'ffill').to_period()
+        assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('target', ['D', 'B'])
+    @pytest.mark.parametrize('convention', ['start', 'end'])
+    def test_monthly_upsample(self, target, convention):
         ts = _simple_pts('1/1/1990', '12/31/1995', freq='M')
-
-        for targ, conv in product(targets, ['start', 'end']):
-            result = ts.resample(targ, convention=conv).ffill()
-            expected = result.to_timestamp(targ, how=conv)
-            expected = expected.asfreq(targ, 'ffill').to_period()
-            assert_series_equal(result, expected)
+        result = ts.resample(target, convention=convention).ffill()
+        expected = result.to_timestamp(target, how=convention)
+        expected = expected.asfreq(target, 'ffill').to_period()
+        assert_series_equal(result, expected)
 
     def test_resample_basic(self):
         # GH3609
@@ -2455,17 +2454,16 @@ def test_fill_method_and_how_upsample(self):
         both = s.resample('M').ffill().resample('M').last().astype('int64')
         assert_series_equal(last, both)
 
-    def test_weekly_upsample(self):
-        targets = ['D', 'B']
-
-        for day in DAYS:
-            ts = _simple_pts('1/1/1990', '12/31/1995', freq='W-%s' % day)
-
-            for targ, conv in product(targets, ['start', 'end']):
-                result = ts.resample(targ, convention=conv).ffill()
-                expected = result.to_timestamp(targ, how=conv)
-                expected = expected.asfreq(targ, 'ffill').to_period()
-                assert_series_equal(result, expected)
+    @pytest.mark.parametrize('day', DAYS)
+    @pytest.mark.parametrize('target', ['D', 'B'])
+    @pytest.mark.parametrize('convention', ['start', 'end'])
+    def test_weekly_upsample(self, day, target, convention):
+        freq = 'W-{day}'.format(day=day)
+        ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq)
+        result = ts.resample(target, convention=convention).ffill()
+        expected = result.to_timestamp(target, how=convention)
+        expected = expected.asfreq(target, 'ffill').to_period()
+        assert_series_equal(result, expected)
 
     def test_resample_to_timestamps(self):
         ts = _simple_pts('1/1/1990', '12/31/1995', freq='M')
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index cfd88f41f855e..78d1fa84cc5db 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -2105,10 +2105,9 @@ def _non_null_values(x):
                                              (mean_x * mean_y))
 
     @pytest.mark.slow
-    @pytest.mark.parametrize(
-        'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4],
-                                                  [True, False],
-                                                  [False, True]))
+    @pytest.mark.parametrize('min_periods', [0, 1, 2, 3, 4])
+    @pytest.mark.parametrize('adjust', [True, False])
+    @pytest.mark.parametrize('ignore_na', [True, False])
     def test_ewm_consistency(self, min_periods, adjust, ignore_na):
         def _weights(s, com, adjust, ignore_na):
             if isinstance(s, DataFrame):

From a65ea90cc8ad974f556b76ee229c58cdd051c49d Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@icloud.com>
Date: Sat, 23 Jun 2018 12:12:18 -0400
Subject: [PATCH 086/113] MyPy cleanup and absolute imports in
 pandas.core.dtypes.common (#21008)

---
 pandas/core/arrays/base.py   |  2 +-
 pandas/core/base.py          |  2 +-
 pandas/core/dtypes/common.py | 24 +++++++++++++-----------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index ce87c0a8b0c5a..30949ca6d1d6b 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -195,13 +195,13 @@ def __setitem__(self, key, value):
         )
 
     def __len__(self):
+        # type: () -> int
         """Length of this array
 
         Returns
         -------
         length : int
         """
-        # type: () -> int
         raise AbstractMethodError(self)
 
     def __iter__(self):
diff --git a/pandas/core/base.py b/pandas/core/base.py
index c331ead8d2fef..6625a3bbe97d7 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -788,6 +788,7 @@ def base(self):
 
     @property
     def _ndarray_values(self):
+        # type: () -> np.ndarray
         """The data as an ndarray, possibly losing information.
 
         The expectation is that this is cheap to compute, and is primarily
@@ -795,7 +796,6 @@ def _ndarray_values(self):
 
         - categorical -> codes
         """
-        # type: () -> np.ndarray
         if is_extension_array_dtype(self):
             return self.values._ndarray_values
         return self.values
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index c45838e6040a9..05f82c67ddb8b 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -5,17 +5,19 @@
                            PY3, PY36)
 from pandas._libs import algos, lib
 from pandas._libs.tslibs import conversion
-from .dtypes import (CategoricalDtype, CategoricalDtypeType,
-                     DatetimeTZDtype, DatetimeTZDtypeType,
-                     PeriodDtype, PeriodDtypeType,
-                     IntervalDtype, IntervalDtypeType,
-                     ExtensionDtype, PandasExtensionDtype)
-from .generic import (ABCCategorical, ABCPeriodIndex,
-                      ABCDatetimeIndex, ABCSeries,
-                      ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex,
-                      ABCIndexClass, ABCDateOffset)
-from .inference import is_string_like, is_list_like
-from .inference import *  # noqa
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype,
+    DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype,
+    IntervalDtypeType, ExtensionDtype, PandasExtensionDtype)
+from pandas.core.dtypes.generic import (
+    ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries,
+    ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass,
+    ABCDateOffset)
+from pandas.core.dtypes.inference import (  # noqa:F401
+    is_bool, is_integer, is_hashable, is_iterator, is_float,
+    is_dict_like, is_scalar, is_string_like, is_list_like, is_number,
+    is_file_like, is_re, is_re_compilable, is_sequence, is_nested_list_like,
+    is_named_tuple, is_array_like, is_decimal, is_complex, is_interval)
 
 
 _POSSIBLY_CAST_DTYPES = set([np.dtype(t).name

From 13a3d5abc479426558a53f4a27e84b4365880ca0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 25 Jun 2018 03:56:55 -0700
Subject: [PATCH 087/113] remove unused cimport (#21619)

---
 pandas/_libs/hashtable_class_helper.pxi.in | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index b92eb0e651276..4d2b6f845eb71 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -4,8 +4,6 @@ Template for each `dtype` helper function for hashtable
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
-from missing cimport is_null_datetimelike
-
 
 #----------------------------------------------------------------------
 # VectorData

From afdcac1411f697c8c0cd4b879dc5b3eef9cd8c26 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 25 Jun 2018 09:57:43 -0500
Subject: [PATCH 088/113] CI: Test against Python 3.7 (#21604)

---
 .travis.yml                                  |  5 +++++
 ci/travis-37.yaml                            | 14 ++++++++++++++
 doc/source/install.rst                       |  2 +-
 doc/source/whatsnew/v0.23.2.txt              |  6 ++++++
 pandas/compat/__init__.py                    |  9 +++++----
 pandas/tests/tseries/offsets/test_offsets.py | 10 ++++++++--
 setup.py                                     |  1 +
 7 files changed, 40 insertions(+), 7 deletions(-)
 create mode 100644 ci/travis-37.yaml

diff --git a/.travis.yml b/.travis.yml
index 4e25380a7d941..2d2a0bc019c80 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,11 @@ matrix:
       language: generic
       env:
         - JOB="3.5, OSX" ENV_FILE="ci/travis-35-osx.yaml" TEST_ARGS="--skip-slow --skip-network"
+
+    - dist: trusty
+      env:
+        - JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
+
     - dist: trusty
       env:
         - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
diff --git a/ci/travis-37.yaml b/ci/travis-37.yaml
new file mode 100644
index 0000000000000..8b255c9e6ec72
--- /dev/null
+++ b/ci/travis-37.yaml
@@ -0,0 +1,14 @@
+name: pandas
+channels:
+  - defaults
+  - conda-forge
+  - c3i_test
+dependencies:
+  - python=3.7
+  - cython
+  - numpy
+  - python-dateutil
+  - nomkl
+  - pytz
+  - pytest
+  - pytest-xdist
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 87d1b63914635..fa6b9f4fc7f4d 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -43,7 +43,7 @@ For more information, see the `Python 3 statement`_ and the `Porting to Python 3
 Python version support
 ----------------------
 
-Officially Python 2.7, 3.5, and 3.6.
+Officially Python 2.7, 3.5, 3.6, and 3.7.
 
 Installing pandas
 -----------------
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index ff872cfc6b3ef..d163ad8564efb 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -6,6 +6,12 @@ v0.23.2
 This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
 and bug fixes. We recommend that all users upgrade to this version.
 
+.. note::
+
+   Pandas 0.23.2 is first pandas release that's compatible with
+   Python 3.7 (:issue:`20552`)
+
+
 .. contents:: What's new in v0.23.2
     :local:
     :backlinks: none
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 5ae22694d0da7..28a55133e68aa 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -40,10 +40,11 @@
 from collections import namedtuple
 
 PY2 = sys.version_info[0] == 2
-PY3 = (sys.version_info[0] >= 3)
-PY35 = (sys.version_info >= (3, 5))
-PY36 = (sys.version_info >= (3, 6))
-PYPY = (platform.python_implementation() == 'PyPy')
+PY3 = sys.version_info[0] >= 3
+PY35 = sys.version_info >= (3, 5)
+PY36 = sys.version_info >= (3, 6)
+PY37 = sys.version_info >= (3, 7)
+PYPY = platform.python_implementation() == 'PyPy'
 
 try:
     import __builtin__ as builtins
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 66cb9baeb9357..74bc08ee9649b 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -591,7 +591,10 @@ def test_repr(self):
         assert repr(self.offset) == '<BusinessDay>'
         assert repr(self.offset2) == '<2 * BusinessDays>'
 
-        expected = '<BusinessDay: offset=datetime.timedelta(1)>'
+        if compat.PY37:
+            expected = '<BusinessDay: offset=datetime.timedelta(days=1)>'
+        else:
+            expected = '<BusinessDay: offset=datetime.timedelta(1)>'
         assert repr(self.offset + timedelta(1)) == expected
 
     def test_with_offset(self):
@@ -1651,7 +1654,10 @@ def test_repr(self):
         assert repr(self.offset) == '<CustomBusinessDay>'
         assert repr(self.offset2) == '<2 * CustomBusinessDays>'
 
-        expected = '<BusinessDay: offset=datetime.timedelta(1)>'
+        if compat.PY37:
+            expected = '<BusinessDay: offset=datetime.timedelta(days=1)>'
+        else:
+            expected = '<BusinessDay: offset=datetime.timedelta(1)>'
         assert repr(self.offset + timedelta(1)) == expected
 
     def test_with_offset(self):
diff --git a/setup.py b/setup.py
index d6890a08b09d0..dd026bd611727 100755
--- a/setup.py
+++ b/setup.py
@@ -217,6 +217,7 @@ def build_extensions(self):
     'Programming Language :: Python :: 2.7',
     'Programming Language :: Python :: 3.5',
     'Programming Language :: Python :: 3.6',
+    'Programming Language :: Python :: 3.7',
     'Programming Language :: Cython',
     'Topic :: Scientific/Engineering']
 

From 9f6c1540f63a92efb8880fe220216cc0d5474c22 Mon Sep 17 00:00:00 2001
From: aberres <github@space-based.de>
Date: Tue, 26 Jun 2018 00:22:23 +0200
Subject: [PATCH 089/113] DOC: Do no use 'type' as first word when specifying a
 return type (#21622) (#21623)

---
 pandas/core/frame.py           |  2 +-
 pandas/core/generic.py         | 28 ++++++++++++++--------------
 pandas/core/groupby/groupby.py |  2 +-
 pandas/core/sparse/series.py   |  2 +-
 pandas/core/window.py          |  4 ++--
 pandas/io/packers.py           |  2 +-
 pandas/io/pickle.py            |  2 +-
 pandas/io/pytables.py          |  2 +-
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 74bb2abc27c4b..34d3eb0a6db73 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4675,7 +4675,7 @@ def swaplevel(self, i=-2, j=-1, axis=0):
 
         Returns
         -------
-        swapped : type of caller (new object)
+        swapped : same type as caller (new object)
 
         .. versionchanged:: 0.18.1
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 04ba0b5de3f7f..4efdd3812accd 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -800,7 +800,7 @@ def swaplevel(self, i=-2, j=-1, axis=0):
 
         Returns
         -------
-        swapped : type of caller (new object)
+        swapped : same type as caller (new object)
 
         .. versionchanged:: 0.18.1
 
@@ -1073,7 +1073,7 @@ def _set_axis_name(self, name, axis=0, inplace=False):
 
         Returns
         -------
-        renamed : type of caller or None if inplace=True
+        renamed : same type as caller or None if inplace=True
 
         See Also
         --------
@@ -2468,7 +2468,7 @@ def get(self, key, default=None):
 
         Returns
         -------
-        value : type of items contained in object
+        value : same type as items contained in object
         """
         try:
             return self[key]
@@ -2768,7 +2768,7 @@ def __delitem__(self, key):
 
         Returns
         -------
-        taken : type of caller
+        taken : same type as caller
             An array-like containing the elements taken from the object.
 
         See Also
@@ -2824,7 +2824,7 @@ def _take(self, indices, axis=0, is_copy=True):
 
         Returns
         -------
-        taken : type of caller
+        taken : same type as caller
             An array-like containing the elements taken from the object.
 
         See Also
@@ -3033,7 +3033,7 @@ def select(self, crit, axis=0):
 
         Returns
         -------
-        selection : type of caller
+        selection : same type as caller
         """
         warnings.warn("'select' is deprecated and will be removed in a "
                       "future release. You can use "
@@ -3924,7 +3924,7 @@ def head(self, n=5):
 
         Returns
         -------
-        obj_head : type of caller
+        obj_head : same type as caller
             The first `n` rows of the caller object.
 
         See Also
@@ -4447,7 +4447,7 @@ def _consolidate(self, inplace=False):
 
         Returns
         -------
-        consolidated : type of caller
+        consolidated : same type as caller
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
         if inplace:
@@ -4916,7 +4916,7 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
 
         Returns
         -------
-        casted : type of caller
+        casted : same type as caller
 
         Examples
         --------
@@ -6691,7 +6691,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False,
 
         Returns
         -------
-        converted : type of caller
+        converted : same type as caller
 
         Examples
         --------
@@ -6772,7 +6772,7 @@ def at_time(self, time, asof=False):
 
         Returns
         -------
-        values_at_time : type of caller
+        values_at_time : same type as caller
 
         Examples
         --------
@@ -6826,7 +6826,7 @@ def between_time(self, start_time, end_time, include_start=True,
 
         Returns
         -------
-        values_between_time : type of caller
+        values_between_time : same type as caller
 
         Examples
         --------
@@ -7145,7 +7145,7 @@ def first(self, offset):
 
         Returns
         -------
-        subset : type of caller
+        subset : same type as caller
 
         See Also
         --------
@@ -7209,7 +7209,7 @@ def last(self, offset):
 
         Returns
         -------
-        subset : type of caller
+        subset : same type as caller
 
         See Also
         --------
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 0bbdfbbe52ac4..c69d7f43de8ea 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -867,7 +867,7 @@ def get_group(self, name, obj=None):
 
         Returns
         -------
-        group : type of obj
+        group : same type as obj
         """
         if obj is None:
             obj = self._selected_obj
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 714cd09a27294..09d958059d355 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -398,7 +398,7 @@ def abs(self):
 
         Returns
         -------
-        abs: type of caller
+        abs: same type as caller
         """
         return self._constructor(np.abs(self.values),
                                  index=self.index).__finalize__(self)
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 9d0f9dc4f75f9..f089e402261db 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -665,7 +665,7 @@ def _apply_window(self, mean=True, **kwargs):
 
         Returns
         -------
-        y : type of input argument
+        y : same type as input argument
 
         """
         window = self._prep_window(**kwargs)
@@ -2139,7 +2139,7 @@ def _apply(self, func, **kwargs):
 
         Returns
         -------
-        y : type of input argument
+        y : same type as input argument
 
         """
         blocks, obj, index = self._create_blocks()
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index f9b1d1574d45c..03a5e8528f72d 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -178,7 +178,7 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
 
     Returns
     -------
-    obj : type of object stored in file
+    obj : same type as object stored in file
 
     """
     path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index d27735fbca318..d347d76c33e0f 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -103,7 +103,7 @@ def read_pickle(path, compression='infer'):
 
     Returns
     -------
-    unpickled : type of object stored in file
+    unpickled : same type as object stored in file
 
     See Also
     --------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index aad387e0cdd58..580c7923017e5 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -687,7 +687,7 @@ def get(self, key):
 
         Returns
         -------
-        obj : type of object stored in file
+        obj : same type as object stored in file
         """
         group = self.get_node(key)
         if group is None:

From c19017b465f4ef8e681535fcf6c84bb3217179b1 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Mon, 25 Jun 2018 23:24:30 +0100
Subject: [PATCH 090/113] CLN: make CategoricalIndex._create_categorical a
 classmethod (#21618)

---
 pandas/core/indexes/base.py     |  3 ++-
 pandas/core/indexes/category.py | 18 +++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 4f140a6e77b2f..122f8662abb61 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1130,7 +1130,8 @@ def to_frame(self, index=True):
         """
 
         from pandas import DataFrame
-        result = DataFrame(self._shallow_copy(), columns=[self.name or 0])
+        name = self.name or 0
+        result = DataFrame({name: self.values.copy()})
 
         if index:
             result.index = self
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index fc669074758da..a2efe2c49c747 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -85,11 +85,11 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
             name = data.name
 
         if isinstance(data, ABCCategorical):
-            data = cls._create_categorical(cls, data, categories, ordered,
+            data = cls._create_categorical(data, categories, ordered,
                                            dtype)
         elif isinstance(data, CategoricalIndex):
             data = data._data
-            data = cls._create_categorical(cls, data, categories, ordered,
+            data = cls._create_categorical(data, categories, ordered,
                                            dtype)
         else:
 
@@ -99,7 +99,7 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
                 if data is not None or categories is None:
                     cls._scalar_data_error(data)
                 data = []
-            data = cls._create_categorical(cls, data, categories, ordered,
+            data = cls._create_categorical(data, categories, ordered,
                                            dtype)
 
         if copy:
@@ -136,8 +136,8 @@ def _create_from_codes(self, codes, categories=None, ordered=None,
                                      ordered=self.ordered)
         return CategoricalIndex(cat, name=name)
 
-    @staticmethod
-    def _create_categorical(self, data, categories=None, ordered=None,
+    @classmethod
+    def _create_categorical(cls, data, categories=None, ordered=None,
                             dtype=None):
         """
         *this is an internal non-public method*
@@ -155,7 +155,7 @@ def _create_categorical(self, data, categories=None, ordered=None,
         -------
         Categorical
         """
-        if (isinstance(data, (ABCSeries, type(self))) and
+        if (isinstance(data, (cls, ABCSeries)) and
                 is_categorical_dtype(data)):
             data = data.values
 
@@ -179,7 +179,7 @@ def _simple_new(cls, values, name=None, categories=None, ordered=None,
                     dtype=None, **kwargs):
         result = object.__new__(cls)
 
-        values = cls._create_categorical(cls, values, categories, ordered,
+        values = cls._create_categorical(values, categories, ordered,
                                          dtype=dtype)
         result._data = values
         result.name = name
@@ -236,7 +236,7 @@ def _is_dtype_compat(self, other):
             if not is_list_like(values):
                 values = [values]
             other = CategoricalIndex(self._create_categorical(
-                self, other, categories=self.categories, ordered=self.ordered))
+                other, categories=self.categories, ordered=self.ordered))
             if not other.isin(values).all():
                 raise TypeError("cannot append a non-category item to a "
                                 "CategoricalIndex")
@@ -798,7 +798,7 @@ def _evaluate_compare(self, other):
                     other = other._values
                 elif isinstance(other, Index):
                     other = self._create_categorical(
-                        self, other._values, categories=self.categories,
+                        other._values, categories=self.categories,
                         ordered=self.ordered)
 
                 if isinstance(other, (ABCCategorical, np.ndarray,

From 5e4882ebfa31488fd431b6a2b7e3413afd0c6708 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 26 Jun 2018 00:29:57 +0200
Subject: [PATCH 091/113] PERF: do not check for label presence preventively
 (#21594)

closes #21593
---
 doc/source/whatsnew/v0.24.0.txt               |  3 +-
 pandas/core/indexing.py                       | 36 +++----------------
 .../indexes/datetimes/test_partial_slicing.py |  8 +++--
 pandas/tests/indexing/test_multiindex.py      |  9 ++++-
 4 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 90fc579ae69e5..a63276efc5b7c 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -17,7 +17,7 @@ Other Enhancements
 - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`)
 - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`)
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
-- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`)
+- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)
 
 
 .. _whatsnew_0240.api_breaking:
@@ -199,6 +199,7 @@ Indexing
 ^^^^^^^^
 
 - The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`)
+- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError`` - consistently with the case of a flat :class:`Int64Index` - rather than falling back to positional indexing (:issue:`21593`)
 -
 -
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 1f9fe5f947d0c..a69313a2d4a43 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -13,7 +13,6 @@
     is_iterator,
     is_scalar,
     is_sparse,
-    _is_unorderable_exception,
     _ensure_platform_int)
 from pandas.core.dtypes.missing import isna, _infer_fill_value
 from pandas.errors import AbstractMethodError
@@ -139,10 +138,7 @@ def _get_label(self, label, axis=None):
             # as its basically direct indexing
             # but will fail when the index is not present
             # see GH5667
-            try:
-                return self.obj._xs(label, axis=axis)
-            except:
-                return self.obj[label]
+            return self.obj._xs(label, axis=axis)
         elif isinstance(label, tuple) and isinstance(label[axis], slice):
             raise IndexingError('no slices here, handle elsewhere')
 
@@ -1797,9 +1793,8 @@ class _LocIndexer(_LocationIndexer):
 
     @Appender(_NDFrameIndexer._validate_key.__doc__)
     def _validate_key(self, key, axis):
-        ax = self.obj._get_axis(axis)
 
-        # valid for a label where all labels are in the index
+        # valid for a collection of labels (we check their presence later)
         # slice of labels (where start-end in labels)
         # slice of integers (only if in the labels)
         # boolean
@@ -1807,32 +1802,11 @@ def _validate_key(self, key, axis):
         if isinstance(key, slice):
             return
 
-        elif com.is_bool_indexer(key):
+        if com.is_bool_indexer(key):
             return
 
-        elif not is_list_like_indexer(key):
-
-            def error():
-                if isna(key):
-                    raise TypeError("cannot use label indexing with a null "
-                                    "key")
-                raise KeyError(u"the label [{key}] is not in the [{axis}]"
-                               .format(key=key,
-                                       axis=self.obj._get_axis_name(axis)))
-
-            try:
-                key = self._convert_scalar_indexer(key, axis)
-            except TypeError as e:
-
-                # python 3 type errors should be raised
-                if _is_unorderable_exception(e):
-                    error()
-                raise
-            except:
-                error()
-
-            if not ax.contains(key):
-                error()
+        if not is_list_like_indexer(key):
+            self._convert_scalar_indexer(key, axis)
 
     def _is_scalar_access(self, key):
         # this is a shortcut accessor to both .loc and .iloc
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
index 4580d9fff31d5..e1e80e50e31f0 100644
--- a/pandas/tests/indexes/datetimes/test_partial_slicing.py
+++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -11,6 +11,8 @@
                     date_range, Index, Timedelta, Timestamp)
 from pandas.util import testing as tm
 
+from pandas.core.indexing import IndexingError
+
 
 class TestSlicing(object):
     def test_dti_slicing(self):
@@ -313,12 +315,12 @@ def test_partial_slicing_with_multiindex(self):
         result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')]
         tm.assert_series_equal(result, expected)
 
-        # this is a KeyError as we don't do partial string selection on
-        # multi-levels
+        # this is an IndexingError as we don't do partial string selection on
+        # multi-levels.
         def f():
             df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')]
 
-        pytest.raises(KeyError, f)
+        pytest.raises(IndexingError, f)
 
         # GH 4294
         # partial slice on a series mi
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
index 43656a392e582..d2c4c8f5e149b 100644
--- a/pandas/tests/indexing/test_multiindex.py
+++ b/pandas/tests/indexing/test_multiindex.py
@@ -230,7 +230,8 @@ def test_iloc_getitem_multiindex(self):
         # corner column
         rs = mi_int.iloc[2, 2]
         with catch_warnings(record=True):
-            xp = mi_int.ix[:, 2].ix[2]
+            # First level is int - so use .loc rather than .ix (GH 21593)
+            xp = mi_int.loc[(8, 12), (4, 10)]
         assert rs == xp
 
         # this is basically regular indexing
@@ -278,6 +279,12 @@ def test_loc_multiindex(self):
             xp = mi_int.ix[4]
         tm.assert_frame_equal(rs, xp)
 
+        # missing label
+        pytest.raises(KeyError, lambda: mi_int.loc[2])
+        with catch_warnings(record=True):
+            # GH 21593
+            pytest.raises(KeyError, lambda: mi_int.ix[2])
+
     def test_getitem_partial_int(self):
         # GH 12416
         # with single item

From b3443daadd2f3971f33dddd93f2b38dcaa1a9748 Mon Sep 17 00:00:00 2001
From: Ming Li <14131823+minggli@users.noreply.github.com>
Date: Mon, 25 Jun 2018 23:31:08 +0100
Subject: [PATCH 092/113] TST: Refactor test_maybe_match_name and
 test_hash_pandas_object (#21600)

---
 pandas/tests/test_common.py       | 36 +++++-------------
 pandas/tests/util/test_hashing.py | 62 +++++++++++++++----------------
 2 files changed, 39 insertions(+), 59 deletions(-)

diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index ef5f13bfa504a..61f838eeeeb30 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -25,7 +25,6 @@ def test_mut_exclusive():
 
 
 def test_get_callable_name():
-    from functools import partial
     getname = com._get_callable_name
 
     def fn(x):
@@ -154,8 +153,7 @@ def test_random_state():
 
     # Check with random state object
     state2 = npr.RandomState(10)
-    assert (com._random_state(state2).uniform() ==
-            npr.RandomState(10).uniform())
+    assert com._random_state(state2).uniform() == npr.RandomState(10).uniform()
 
     # check with no arg random state
     assert com._random_state() is np.random
@@ -168,29 +166,15 @@ def test_random_state():
         com._random_state(5.5)
 
 
-def test_maybe_match_name():
-
-    matched = ops._maybe_match_name(
-        Series([1], name='x'), Series(
-            [2], name='x'))
-    assert (matched == 'x')
-
-    matched = ops._maybe_match_name(
-        Series([1], name='x'), Series(
-            [2], name='y'))
-    assert (matched is None)
-
-    matched = ops._maybe_match_name(Series([1]), Series([2], name='x'))
-    assert (matched is None)
-
-    matched = ops._maybe_match_name(Series([1], name='x'), Series([2]))
-    assert (matched is None)
-
-    matched = ops._maybe_match_name(Series([1], name='x'), [2])
-    assert (matched == 'x')
-
-    matched = ops._maybe_match_name([1], Series([2], name='y'))
-    assert (matched == 'y')
+@pytest.mark.parametrize('left, right, expected', [
+    (Series([1], name='x'), Series([2], name='x'), 'x'),
+    (Series([1], name='x'), Series([2], name='y'), None),
+    (Series([1]), Series([2], name='x'), None),
+    (Series([1], name='x'), Series([2]), None),
+    (Series([1], name='x'), [2], 'x'),
+    ([1], Series([2], name='y'), 'y')])
+def test_maybe_match_name(left, right, expected):
+    assert ops._maybe_match_name(left, right) == expected
 
 
 def test_dict_compat():
diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
index fe8d75539879e..82b870c156cc8 100644
--- a/pandas/tests/util/test_hashing.py
+++ b/pandas/tests/util/test_hashing.py
@@ -142,39 +142,35 @@ def test_multiindex_objects(self):
         tm.assert_numpy_array_equal(np.sort(result),
                                     np.sort(expected))
 
-    def test_hash_pandas_object(self):
-
-        for obj in [Series([1, 2, 3]),
-                    Series([1.0, 1.5, 3.2]),
-                    Series([1.0, 1.5, np.nan]),
-                    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
-                    Series(['a', 'b', 'c']),
-                    Series(['a', np.nan, 'c']),
-                    Series(['a', None, 'c']),
-                    Series([True, False, True]),
-                    Series(),
-                    Index([1, 2, 3]),
-                    Index([True, False, True]),
-                    DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
-                    DataFrame(),
-                    tm.makeMissingDataframe(),
-                    tm.makeMixedDataFrame(),
-                    tm.makeTimeDataFrame(),
-                    tm.makeTimeSeries(),
-                    tm.makeTimedeltaIndex(),
-                    tm.makePeriodIndex(),
-                    Series(tm.makePeriodIndex()),
-                    Series(pd.date_range('20130101',
-                                         periods=3, tz='US/Eastern')),
-                    MultiIndex.from_product(
-                        [range(5),
-                         ['foo', 'bar', 'baz'],
-                         pd.date_range('20130101', periods=2)]),
-                    MultiIndex.from_product(
-                        [pd.CategoricalIndex(list('aabc')),
-                         range(3)])]:
-            self.check_equal(obj)
-            self.check_not_equal_with_index(obj)
+    @pytest.mark.parametrize('obj', [
+        Series([1, 2, 3]),
+        Series([1.0, 1.5, 3.2]),
+        Series([1.0, 1.5, np.nan]),
+        Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
+        Series(['a', 'b', 'c']),
+        Series(['a', np.nan, 'c']),
+        Series(['a', None, 'c']),
+        Series([True, False, True]),
+        Series(),
+        Index([1, 2, 3]),
+        Index([True, False, True]),
+        DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
+        DataFrame(),
+        tm.makeMissingDataframe(),
+        tm.makeMixedDataFrame(),
+        tm.makeTimeDataFrame(),
+        tm.makeTimeSeries(),
+        tm.makeTimedeltaIndex(),
+        tm.makePeriodIndex(),
+        Series(tm.makePeriodIndex()),
+        Series(pd.date_range('20130101', periods=3, tz='US/Eastern')),
+        MultiIndex.from_product([range(5), ['foo', 'bar', 'baz'],
+                                 pd.date_range('20130101', periods=2)]),
+        MultiIndex.from_product([pd.CategoricalIndex(list('aabc')), range(3)])
+    ])
+    def test_hash_pandas_object(self, obj):
+        self.check_equal(obj)
+        self.check_not_equal_with_index(obj)
 
     def test_hash_pandas_object2(self):
         for name, s in self.df.iteritems():

From ebeccfce0318252ddec15b21df5e26013a77baf5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 26 Jun 2018 02:34:15 -0500
Subject: [PATCH 093/113] API/COMPAT: support axis=None for logical reduction
 (reduce over all axes) (#21486)

* Compat with NumPy 1.15 logical func
* Accepts axis=None as reduce all dims
---
 doc/source/whatsnew/v0.23.2.txt      |  30 +++++++
 pandas/core/frame.py                 |  22 ++++-
 pandas/core/generic.py               |  44 ++++++----
 pandas/core/panel.py                 |  17 +++-
 pandas/core/series.py                |   3 +-
 pandas/tests/frame/test_analytics.py | 119 +++++++++++++++++++++++++--
 pandas/tests/test_panel.py           |   7 ++
 pandas/util/_test_decorators.py      |   4 +
 8 files changed, 215 insertions(+), 31 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index d163ad8564efb..c5de6f0a61720 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -16,6 +16,36 @@ and bug fixes. We recommend that all users upgrade to this version.
     :local:
     :backlinks: none
 
+.. _whatsnew_0232.enhancements:
+
+Logical Reductions over Entire DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({"A": [1, 2], "B": [True, False]})
+   df.all(axis=None)
+
+
+This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``.
+With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will no longer reduce over every axis:
+
+.. code-block:: python
+
+   >>> # NumPy 1.15, pandas 0.23.1
+   >>> np.any(pd.DataFrame({"A": [False], "B": [False]}))
+   A    False
+   B    False
+   dtype: bool
+
+With pandas 0.23.2, that will correctly return False, as it did with NumPy < 1.15.
+
+.. ipython:: python
+
+   np.any(pd.DataFrame({"A": [False], "B": [False]}))
+
 
 .. _whatsnew_0232.fixed_regressions:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 34d3eb0a6db73..0bf5acf14294a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6846,13 +6846,18 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
-        axis = self._get_axis_number(axis)
+        if axis is None and filter_type == 'bool':
+            labels = None
+            constructor = None
+        else:
+            # TODO: Make other agg func handle axis=None properly
+            axis = self._get_axis_number(axis)
+            labels = self._get_agg_axis(axis)
+            constructor = self._constructor
 
         def f(x):
             return op(x, axis=axis, skipna=skipna, **kwds)
 
-        labels = self._get_agg_axis(axis)
-
         # exclude timedelta/datetime unless we are uniform types
         if axis == 1 and self._is_mixed_type and self._is_datelike_mixed_type:
             numeric_only = True
@@ -6861,6 +6866,13 @@ def f(x):
             try:
                 values = self.values
                 result = f(values)
+
+                if (filter_type == 'bool' and is_object_dtype(values) and
+                        axis is None):
+                    # work around https://github.com/numpy/numpy/issues/10489
+                    # TODO: combine with hasattr(result, 'dtype') further down
+                    # hard since we don't have `values` down there.
+                    result = np.bool_(result)
             except Exception as e:
 
                 # try by-column first
@@ -6927,7 +6939,9 @@ def f(x):
                 if axis == 0:
                     result = coerce_to_dtypes(result, self.dtypes)
 
-        return Series(result, index=labels)
+        if constructor is not None:
+            result = Series(result, index=labels)
+        return result
 
     def nunique(self, axis=0, dropna=True):
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4efdd3812accd..8fa79a130d1f8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8728,6 +8728,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
         return rs
 
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
+        if axis is None:
+            raise ValueError("Must specify 'axis' when aggregating by level.")
         grouped = self.groupby(level=level, axis=axis, sort=False)
         if hasattr(grouped, name) and skipna:
             return getattr(grouped, name)(**kwargs)
@@ -9054,8 +9056,15 @@ def _doc_parms(cls):
 
 Parameters
 ----------
-axis : int, default 0
-    Select the axis which can be 0 for indices and 1 for columns.
+axis : {0 or 'index', 1 or 'columns', None}, default 0
+    Indicate which axis or axes should be reduced.
+
+    * 0 / 'index' : reduce the index, return a Series whose index is the
+      original column labels.
+    * 1 / 'columns' : reduce the columns, return a Series whose index is the
+      original index.
+    * None : reduce all axes, return a scalar.
+
 skipna : boolean, default True
     Exclude NA/null values. If an entire row/column is NA, the result
     will be NA.
@@ -9077,9 +9086,9 @@ def _doc_parms(cls):
 %(examples)s"""
 
 _all_doc = """\
-Return whether all elements are True over series or dataframe axis.
+Return whether all elements are True, potentially over an axis.
 
-Returns True if all elements within a series or along a dataframe
+Returns True if all elements within a series or along a Dataframe
 axis are non-zero, not-empty or not-False."""
 
 _all_examples = """\
@@ -9092,7 +9101,7 @@ def _doc_parms(cls):
 >>> pd.Series([True, False]).all()
 False
 
-Dataframes
+DataFrames
 
 Create a dataframe from a dictionary.
 
@@ -9109,12 +9118,17 @@ def _doc_parms(cls):
 col2    False
 dtype: bool
 
-Adding axis=1 argument will check if row-wise values all return True.
+Specify ``axis='columns'`` to check if row-wise values all return True.
 
->>> df.all(axis=1)
+>>> df.all(axis='columns')
 0     True
 1    False
 dtype: bool
+
+Or ``axis=None`` for whether every value is True.
+
+>>> df.all(axis=None)
+False
 """
 
 _all_see_also = """\
@@ -9484,6 +9498,11 @@ def _doc_parms(cls):
 1    False
 dtype: bool
 
+Aggregating over the entire DataFrame with ``axis=None``.
+
+>>> df.any(axis=None)
+True
+
 `any` for an empty DataFrame is an empty Series.
 
 >>> pd.DataFrame([]).any()
@@ -9654,22 +9673,17 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f,
     @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
                   axis_descr=axis_descr, examples=examples, see_also=see_also)
     @Appender(_bool_doc)
-    def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
+    def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,
                      **kwargs):
         nv.validate_logical_func(tuple(), kwargs, fname=name)
-        if skipna is None:
-            skipna = True
-        if axis is None:
-            axis = self._stat_axis_number
         if level is not None:
             if bool_only is not None:
                 raise NotImplementedError("Option bool_only is not "
                                           "implemented with option level.")
             return self._agg_by_level(name, axis=axis, level=level,
                                       skipna=skipna)
-        return self._reduce(f, axis=axis, skipna=skipna,
-                            numeric_only=bool_only, filter_type='bool',
-                            name=name)
+        return self._reduce(f, name, axis=axis, skipna=skipna,
+                            numeric_only=bool_only, filter_type='bool')
 
     return set_function_name(logical_func, name, cls)
 
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index c4aa471b8b944..4f7400ad8388b 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1143,13 +1143,26 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
             raise NotImplementedError('Panel.{0} does not implement '
                                       'numeric_only.'.format(name))
 
-        axis_name = self._get_axis_name(axis)
-        axis_number = self._get_axis_number(axis_name)
+        if axis is None and filter_type == 'bool':
+            # labels = None
+            # constructor = None
+            axis_number = None
+            axis_name = None
+        else:
+            # TODO: Make other agg func handle axis=None properly
+            axis = self._get_axis_number(axis)
+            # labels = self._get_agg_axis(axis)
+            # constructor = self._constructor
+            axis_name = self._get_axis_name(axis)
+            axis_number = self._get_axis_number(axis_name)
+
         f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)
 
         with np.errstate(all='ignore'):
             result = f(self.values)
 
+        if axis is None and filter_type == 'bool':
+            return np.bool_(result)
         axes = self._get_plane_axes(axis_name)
         if result.ndim == 2 and axis_name != self._info_axis_name:
             result = result.T
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a608db806d20b..cdb901d18767c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3241,7 +3241,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         delegate = self._values
         if isinstance(delegate, np.ndarray):
             # Validate that 'axis' is consistent with Series's single axis.
-            self._get_axis_number(axis)
+            if axis is not None:
+                self._get_axis_number(axis)
             if numeric_only:
                 raise NotImplementedError('Series.{0} does not implement '
                                           'numeric_only.'.format(name))
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 12ebdbe0fd3c7..84873659ac931 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -15,7 +15,7 @@
 from pandas.compat import lrange, PY35
 from pandas import (compat, isna, notna, DataFrame, Series,
                     MultiIndex, date_range, Timestamp, Categorical,
-                    _np_version_under1p12, _np_version_under1p15,
+                    _np_version_under1p12,
                     to_datetime, to_timedelta)
 import pandas as pd
 import pandas.core.nanops as nanops
@@ -1159,11 +1159,35 @@ def test_any_all(self):
         self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True)
         self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True)
 
-        df = DataFrame(randn(10, 4)) > 0
-        df.any(1)
-        df.all(1)
-        df.any(1, bool_only=True)
-        df.all(1, bool_only=True)
+    def test_any_all_extra(self):
+        df = DataFrame({
+            'A': [True, False, False],
+            'B': [True, True, False],
+            'C': [True, True, True],
+        }, index=['a', 'b', 'c'])
+        result = df[['A', 'B']].any(1)
+        expected = Series([True, True, False], index=['a', 'b', 'c'])
+        tm.assert_series_equal(result, expected)
+
+        result = df[['A', 'B']].any(1, bool_only=True)
+        tm.assert_series_equal(result, expected)
+
+        result = df.all(1)
+        expected = Series([True, False, False], index=['a', 'b', 'c'])
+        tm.assert_series_equal(result, expected)
+
+        result = df.all(1, bool_only=True)
+        tm.assert_series_equal(result, expected)
+
+        # Axis is None
+        result = df.all(axis=None).item()
+        assert result is False
+
+        result = df.any(axis=None).item()
+        assert result is True
+
+        result = df[['C']].all(axis=None).item()
+        assert result is True
 
         # skip pathological failure cases
         # class CantNonzero(object):
@@ -1185,6 +1209,86 @@ def test_any_all(self):
         # df.any(1, bool_only=True)
         # df.all(1, bool_only=True)
 
+    @pytest.mark.parametrize('func, data, expected', [
+        (np.any, {}, False),
+        (np.all, {}, True),
+        (np.any, {'A': []}, False),
+        (np.all, {'A': []}, True),
+        (np.any, {'A': [False, False]}, False),
+        (np.all, {'A': [False, False]}, False),
+        (np.any, {'A': [True, False]}, True),
+        (np.all, {'A': [True, False]}, False),
+        (np.any, {'A': [True, True]}, True),
+        (np.all, {'A': [True, True]}, True),
+
+        (np.any, {'A': [False], 'B': [False]}, False),
+        (np.all, {'A': [False], 'B': [False]}, False),
+
+        (np.any, {'A': [False, False], 'B': [False, True]}, True),
+        (np.all, {'A': [False, False], 'B': [False, True]}, False),
+
+        # other types
+        (np.all, {'A': pd.Series([0.0, 1.0], dtype='float')}, False),
+        (np.any, {'A': pd.Series([0.0, 1.0], dtype='float')}, True),
+        (np.all, {'A': pd.Series([0, 1], dtype=int)}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype=int)}, True),
+        pytest.param(np.all, {'A': pd.Series([0, 1], dtype='M8[ns]')}, False,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([0, 1], dtype='M8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.all, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.all, {'A': pd.Series([0, 1], dtype='m8[ns]')}, False,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([0, 1], dtype='m8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.all, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        (np.all, {'A': pd.Series([0, 1], dtype='category')}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype='category')}, True),
+        (np.all, {'A': pd.Series([1, 2], dtype='category')}, True),
+        (np.any, {'A': pd.Series([1, 2], dtype='category')}, True),
+
+        # # Mix
+        # GH-21484
+        # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'),
+        #           'B': pd.Series([10, 20], dtype='m8[ns]')}, True),
+    ])
+    def test_any_all_np_func(self, func, data, expected):
+        # https://github.com/pandas-dev/pandas/issues/19976
+        data = DataFrame(data)
+        result = func(data)
+        assert isinstance(result, np.bool_)
+        assert result.item() is expected
+
+        # method version
+        result = getattr(DataFrame(data), func.__name__)(axis=None)
+        assert isinstance(result, np.bool_)
+        assert result.item() is expected
+
+    def test_any_all_object(self):
+        # https://github.com/pandas-dev/pandas/issues/19976
+        result = np.all(DataFrame(columns=['a', 'b'])).item()
+        assert result is True
+
+        result = np.any(DataFrame(columns=['a', 'b'])).item()
+        assert result is False
+
+    @pytest.mark.parametrize('method', ['any', 'all'])
+    def test_any_all_level_axis_none_raises(self, method):
+        df = DataFrame(
+            {"A": 1},
+            index=MultiIndex.from_product([['A', 'B'], ['a', 'b']],
+                                          names=['out', 'in'])
+        )
+        xpr = "Must specify 'axis' when aggregating by level."
+        with tm.assert_raises_regex(ValueError, xpr):
+            getattr(df, method)(axis=None, level='out')
+
     def _check_bool_op(self, name, alternative, frame=None, has_skipna=True,
                        has_bool_only=False):
         if frame is None:
@@ -2091,9 +2195,6 @@ def test_clip_against_list_like(self, inplace, lower, axis, res):
             result = original
         tm.assert_frame_equal(result, expected, check_exact=True)
 
-    @pytest.mark.xfail(
-        not _np_version_under1p15,
-        reason="failing under numpy-dev gh-19976")
     @pytest.mark.parametrize("axis", [0, 1, None])
     def test_clip_against_frame(self, axis):
         df = DataFrame(np.random.randn(1000, 2))
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index d95a2ad2d7f76..2f8bc228cf86e 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -2707,3 +2707,10 @@ def test_panel_index():
                                        np.repeat([1, 2, 3], 4)],
                                       names=['time', 'panel'])
     tm.assert_index_equal(index, expected)
+
+
+def test_panel_np_all():
+    with catch_warnings(record=True):
+        wp = Panel({"A": DataFrame({'b': [1, 2]})})
+    result = np.all(wp)
+    assert result == np.bool_(True)
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 89d90258f58e0..27c24e3a68079 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -30,6 +30,7 @@ def test_foo():
 
 from pandas.compat import (is_platform_windows, is_platform_32bit, PY3,
                            import_lzma)
+from pandas.compat.numpy import _np_version_under1p15
 from pandas.core.computation.expressions import (_USE_NUMEXPR,
                                                  _NUMEXPR_INSTALLED)
 
@@ -160,6 +161,9 @@ def decorated_func(func):
 
 skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(),
                                     reason="Missing matplotlib dependency")
+
+skip_if_np_lt_115 = pytest.mark.skipif(_np_version_under1p15,
+                                       reason="NumPy 1.15 or greater required")
 skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(),
                                  reason="matplotlib is present")
 skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(),

From db233f371e790d3f96b643fbdbdb985f6753409a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Tue, 26 Jun 2018 00:44:05 -0700
Subject: [PATCH 094/113] DOC: Move tz cleanup whatsnew entries to v0.24
 (#21631)

---
 doc/source/whatsnew/v0.23.2.txt | 6 +-----
 doc/source/whatsnew/v0.24.0.txt | 6 +++++-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index c5de6f0a61720..a603bf9f7e9e0 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -129,11 +129,7 @@ Bug Fixes
 
 - Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
 - Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
-- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
-- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError``(:issue:`8910`)
-- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`)
-- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
-- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
+
 
 **Other**
 
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index a63276efc5b7c..8e38171e93bc2 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -159,7 +159,11 @@ Datetimelike
 
 - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
 - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
--
+- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
+- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError``(:issue:`8910`)
+- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`)
+- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
+- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
 
 Timedelta
 ^^^^^^^^^

From 79d982aae39b552e3919d801d509ba1cf81f240b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 26 Jun 2018 12:01:10 +0200
Subject: [PATCH 095/113] DOC: fixup old whatsnew for dtype coercing change
 (#21456) (#21634)

---
 doc/source/whatsnew/v0.11.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.11.0.txt b/doc/source/whatsnew/v0.11.0.txt
index 3c9cfda49aebd..f39e6c9ff459b 100644
--- a/doc/source/whatsnew/v0.11.0.txt
+++ b/doc/source/whatsnew/v0.11.0.txt
@@ -76,7 +76,7 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
    df1.dtypes
    df2 = DataFrame(dict( A = Series(randn(8),dtype='float16'),
                          B = Series(randn(8)),
-                         C = Series(randn(8),dtype='uint8') ))
+                         C = Series(range(8),dtype='uint8') ))
    df2
    df2.dtypes
 

From 367ce07dfd4e085033d26d7776a027d65890dc44 Mon Sep 17 00:00:00 2001
From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com>
Date: Tue, 26 Jun 2018 15:37:54 +0530
Subject: [PATCH 096/113] DEPR: MultiIndex.to_hierarchical (#21613)

---
 doc/source/whatsnew/v0.24.0.txt    |  2 +-
 pandas/core/indexes/multi.py       |  6 +++++-
 pandas/core/panel.py               | 12 ++++++++----
 pandas/tests/indexes/test_multi.py | 11 +++++++----
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 8e38171e93bc2..fd92958930e55 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -110,7 +110,7 @@ Deprecations
 ~~~~~~~~~~~~
 
 - :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument.  The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`).
--
+- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version  (:issue:`21613`)
 -
 
 .. _whatsnew_0240.prior_deprecations:
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 61b50f139dd10..f9f3041bef073 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -189,7 +189,6 @@ class MultiIndex(Index):
     from_product
     set_levels
     set_labels
-    to_hierarchical
     to_frame
     is_lexsorted
     sortlevel
@@ -1182,6 +1181,8 @@ def to_frame(self, index=True):
 
     def to_hierarchical(self, n_repeat, n_shuffle=1):
         """
+        .. deprecated:: 0.24.0
+
         Return a MultiIndex reshaped to conform to the
         shapes given by n_repeat and n_shuffle.
 
@@ -1216,6 +1217,9 @@ def to_hierarchical(self, n_repeat, n_shuffle=1):
         # Assumes that each label is divisible by n_shuffle
         labels = [x.reshape(n_shuffle, -1).ravel(order='F') for x in labels]
         names = self.names
+        warnings.warn("Method .to_hierarchical is deprecated and will "
+                      "be removed in a future version",
+                      FutureWarning, stacklevel=2)
         return MultiIndex(levels=levels, labels=labels, names=names)
 
     @property
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 4f7400ad8388b..e012819812f6b 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -948,10 +948,14 @@ def to_frame(self, filter_observations=True):
             data[item] = self[item].values.ravel()[selector]
 
         def construct_multi_parts(idx, n_repeat, n_shuffle=1):
-            axis_idx = idx.to_hierarchical(n_repeat, n_shuffle)
-            labels = [x[selector] for x in axis_idx.labels]
-            levels = axis_idx.levels
-            names = axis_idx.names
+            # Replicates and shuffles MultiIndex, returns individual attributes
+            labels = [np.repeat(x, n_repeat) for x in idx.labels]
+            # Assumes that each label is divisible by n_shuffle
+            labels = [x.reshape(n_shuffle, -1).ravel(order='F')
+                      for x in labels]
+            labels = [x[selector] for x in labels]
+            levels = idx.levels
+            names = idx.names
             return labels, levels, names
 
         def construct_index_parts(idx, major=True):
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index ab53002ee1587..362f917e74972 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1673,9 +1673,11 @@ def test_to_frame(self):
         tm.assert_frame_equal(result, expected)
 
     def test_to_hierarchical(self):
+        # GH21613
         index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
             2, 'two')])
-        result = index.to_hierarchical(3)
+        with tm.assert_produces_warning(FutureWarning):
+            result = index.to_hierarchical(3)
         expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
                               labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
                                       [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
@@ -1683,7 +1685,8 @@ def test_to_hierarchical(self):
         assert result.names == index.names
 
         # K > 1
-        result = index.to_hierarchical(3, 2)
+        with tm.assert_produces_warning(FutureWarning):
+            result = index.to_hierarchical(3, 2)
         expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
                               labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
                                       [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
@@ -1694,8 +1697,8 @@ def test_to_hierarchical(self):
         index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
                                         (2, 'a'), (2, 'b')],
                                        names=['N1', 'N2'])
-
-        result = index.to_hierarchical(2)
+        with tm.assert_produces_warning(FutureWarning):
+            result = index.to_hierarchical(2)
         expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
                                            (1, 'b'),
                                            (2, 'a'), (2, 'a'),

From 8db9303ac3effe1dded9c6bf9012ed171864620b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 26 Jun 2018 08:26:21 -0400
Subject: [PATCH 097/113] TST: xfail flaky 3.7 test, xref #21636 (#21637)

---
 pandas/tests/groupby/test_categorical.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e0793b8e1bd64..0fec6a8f96a24 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.compat import PY37
 from pandas import (Index, MultiIndex, CategoricalIndex,
                     DataFrame, Categorical, Series, qcut)
 from pandas.util.testing import assert_frame_equal, assert_series_equal
@@ -205,6 +206,7 @@ def test_level_get_group(observed):
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636")
 @pytest.mark.parametrize('ordered', [True, False])
 def test_apply(ordered):
     # GH 10138

From e8f5ede0946c236ef42e16096a5b8091f8d4b774 Mon Sep 17 00:00:00 2001
From: Brett Naul <brettnaul@gmail.com>
Date: Tue, 26 Jun 2018 07:59:23 -0700
Subject: [PATCH 098/113] [ENH] Add read support for Google Cloud Storage
 (#20729)

* Google Cloud Storage support using gcsfs
---
 ci/appveyor-27.yaml                   |  1 +
 ci/check_imports.py                   |  1 +
 ci/circle-36-locale_slow.yaml         |  1 +
 ci/requirements-optional-conda.txt    |  1 +
 ci/requirements-optional-pip.txt      |  1 +
 ci/travis-27.yaml                     |  1 +
 ci/travis-36.yaml                     |  1 +
 doc/source/install.rst                |  1 +
 doc/source/whatsnew/v0.24.0.txt       |  2 +-
 pandas/conftest.py                    | 16 +++++++++
 pandas/io/common.py                   | 19 +++++++++--
 pandas/io/excel.py                    |  2 +-
 pandas/io/gcs.py                      | 16 +++++++++
 pandas/io/json/json.py                |  6 ++--
 pandas/tests/dtypes/test_inference.py |  9 ++---
 pandas/tests/io/parser/common.py      |  9 ++---
 pandas/tests/io/test_gcs.py           | 47 +++++++++++++++++++++++++++
 pandas/util/_print_versions.py        |  1 +
 18 files changed, 116 insertions(+), 19 deletions(-)
 create mode 100644 pandas/io/gcs.py
 create mode 100644 pandas/tests/io/test_gcs.py

diff --git a/ci/appveyor-27.yaml b/ci/appveyor-27.yaml
index cfc6a796bd77e..10511ac0e00ca 100644
--- a/ci/appveyor-27.yaml
+++ b/ci/appveyor-27.yaml
@@ -6,6 +6,7 @@ dependencies:
   - beautifulsoup4
   - bottleneck
   - dateutil
+  - gcsfs
   - html5lib
   - jinja2=2.8
   - lxml
diff --git a/ci/check_imports.py b/ci/check_imports.py
index d6f24ebcc4d3e..3f09290f8c375 100644
--- a/ci/check_imports.py
+++ b/ci/check_imports.py
@@ -5,6 +5,7 @@
 
 blacklist = {
     'bs4',
+    'gcsfs',
     'html5lib',
     'ipython',
     'jinja2'
diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml
index cc852c1e2aeeb..f44e98e1ee09d 100644
--- a/ci/circle-36-locale_slow.yaml
+++ b/ci/circle-36-locale_slow.yaml
@@ -5,6 +5,7 @@ channels:
 dependencies:
   - beautifulsoup4
   - cython
+  - gcsfs
   - html5lib
   - ipython
   - jinja2
diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt
index e8cfcdf80f2e8..9e4e8e99b5205 100644
--- a/ci/requirements-optional-conda.txt
+++ b/ci/requirements-optional-conda.txt
@@ -3,6 +3,7 @@ blosc
 bottleneck
 fastparquet
 feather-format
+gcsfs
 html5lib
 ipython>=5.6.0
 ipykernel
diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt
index 877c52fa0b4fd..3cce3f5339883 100644
--- a/ci/requirements-optional-pip.txt
+++ b/ci/requirements-optional-pip.txt
@@ -5,6 +5,7 @@ blosc
 bottleneck
 fastparquet
 feather-format
+gcsfs
 html5lib
 ipython>=5.6.0
 ipykernel
diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml
index 22b993a2da886..482b888b88062 100644
--- a/ci/travis-27.yaml
+++ b/ci/travis-27.yaml
@@ -9,6 +9,7 @@ dependencies:
   - fastparquet
   - feather-format
   - flake8=3.4.1
+  - gcsfs
   - html5lib
   - ipython
   - jemalloc=4.5.0.post
diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml
index 006276ba1a65f..ff4f1a4a86f99 100644
--- a/ci/travis-36.yaml
+++ b/ci/travis-36.yaml
@@ -8,6 +8,7 @@ dependencies:
   - dask
   - fastparquet
   - feather-format
+  - gcsfs
   - geopandas
   - html5lib
   - ipython
diff --git a/doc/source/install.rst b/doc/source/install.rst
index fa6b9f4fc7f4d..a8c5194124829 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -276,6 +276,7 @@ Optional Dependencies
 * `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
 * `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
 * `blosc <https://pypi.org/project/blosc>`__: for msgpack compression using ``blosc``
+* `gcsfs <http://gcsfs.readthedocs.io/>`__: necessary for Google Cloud Storage access (gcsfs >= 0.1.0).
 * One of
   `qtpy  <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),
   `PyQt5 <https://www.riverbankcomputing.com/software/pyqt/download5>`__,
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index fd92958930e55..72e7373d0dd33 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -18,7 +18,7 @@ Other Enhancements
 - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`)
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
 - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)
-
+- Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`)
 
 .. _whatsnew_0240.api_breaking:
 
diff --git a/pandas/conftest.py b/pandas/conftest.py
index d6b18db4e71f2..b4a599758417c 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,3 +1,5 @@
+import importlib
+
 import pytest
 
 import numpy as np
@@ -249,3 +251,17 @@ def any_int_dtype(request):
     """
 
     return request.param
+
+
+@pytest.fixture
+def mock():
+    """
+    Fixture providing the 'mock' module.
+
+    Uses 'unittest.mock' for Python 3. Attempts to import the 3rd party 'mock'
+    package for Python 2, skipping if not present.
+    """
+    if PY3:
+        return importlib.import_module("unittest.mock")
+    else:
+        return pytest.importorskip("mock")
diff --git a/pandas/io/common.py b/pandas/io/common.py
index ac9077f2db50e..6d579fc8a8a09 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -88,7 +88,7 @@ def _is_url(url):
     """
     try:
         return parse_url(url).scheme in _VALID_URLS
-    except:
+    except Exception:
         return False
 
 
@@ -165,7 +165,15 @@ def is_s3_url(url):
     """Check for an s3, s3n, or s3a url"""
     try:
         return parse_url(url).scheme in ['s3', 's3n', 's3a']
-    except:  # noqa
+    except Exception:
+        return False
+
+
+def is_gcs_url(url):
+    """Check for a gcs url"""
+    try:
+        return parse_url(url).scheme in ['gcs', 'gs']
+    except Exception:
         return False
 
 
@@ -208,6 +216,13 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                                          compression=compression,
                                          mode=mode)
 
+    if is_gcs_url(filepath_or_buffer):
+        from pandas.io import gcs
+        return gcs.get_filepath_or_buffer(filepath_or_buffer,
+                                          encoding=encoding,
+                                          compression=compression,
+                                          mode=mode)
+
     if isinstance(filepath_or_buffer, (compat.string_types,
                                        compat.binary_type,
                                        mmap.mmap)):
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index e86d33742b266..793a95ffb0ee7 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -46,7 +46,7 @@
 io : string, path object (pathlib.Path or py._path.local.LocalPath),
     file-like object, pandas ExcelFile, or xlrd workbook.
     The string could be a URL. Valid URL schemes include http, ftp, s3,
-    and file. For file URLs, a host is expected. For instance, a local
+    gcs, and file. For file URLs, a host is expected. For instance, a local
     file could be file://localhost/path/to/workbook.xlsx
 sheet_name : string, int, mixed list of strings/ints, or None, default 0
 
diff --git a/pandas/io/gcs.py b/pandas/io/gcs.py
new file mode 100644
index 0000000000000..aa1cb648f05d1
--- /dev/null
+++ b/pandas/io/gcs.py
@@ -0,0 +1,16 @@
+""" GCS support for remote file interactivity """
+try:
+    import gcsfs
+except ImportError:
+    raise ImportError("The gcsfs library is required to handle GCS files")
+
+
+def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
+                           compression=None, mode=None):
+
+    if mode is None:
+        mode = 'rb'
+
+    fs = gcsfs.GCSFileSystem()
+    filepath_or_buffer = fs.open(filepath_or_buffer, mode)
+    return filepath_or_buffer, None, compression, True
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index 1627b2f4d3ec3..9992be521d61f 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -231,9 +231,9 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
     Parameters
     ----------
     path_or_buf : a valid JSON string or file-like, default: None
-        The string could be a URL. Valid URL schemes include http, ftp, s3, and
-        file. For file URLs, a host is expected. For instance, a local file
-        could be ``file://localhost/path/to/table.json``
+        The string could be a URL. Valid URL schemes include http, ftp, s3,
+        gcs, and file. For file URLs, a host is expected. For instance, a local
+        file could be ``file://localhost/path/to/table.json``
 
     orient : string,
         Indication of expected JSON string format.
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index b4f5d67530fbd..65527ac1b278f 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -20,7 +20,7 @@
                     DatetimeIndex, TimedeltaIndex, Timestamp,
                     Panel, Period, Categorical, isna, Interval,
                     DateOffset)
-from pandas.compat import u, PY2, PY3, StringIO, lrange
+from pandas.compat import u, PY2, StringIO, lrange
 from pandas.core.dtypes import inference
 from pandas.core.dtypes.common import (
     is_timedelta64_dtype,
@@ -128,7 +128,7 @@ def test_is_dict_like_fails(ll):
     assert not inference.is_dict_like(ll)
 
 
-def test_is_file_like():
+def test_is_file_like(mock):
     class MockFile(object):
         pass
 
@@ -166,10 +166,7 @@ class MockFile(object):
     # Iterator but no read / write attributes
     data = [1, 2, 3]
     assert not is_file(data)
-
-    if PY3:
-        from unittest import mock
-        assert not is_file(mock.Mock())
+    assert not is_file(mock.Mock())
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index b39122e5e7906..6e1d3575a1481 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -1546,7 +1546,7 @@ def test_file_handles(self):
                     assert not m.closed
                 m.close()
 
-    def test_invalid_file_buffer(self):
+    def test_invalid_file_buffer(self, mock):
         # see gh-15337
 
         class InvalidBuffer(object):
@@ -1577,11 +1577,8 @@ def seek(self, pos, whence=0):
 
         tm.assert_frame_equal(result, expected)
 
-        if PY3:
-            from unittest import mock
-
-            with tm.assert_raises_regex(ValueError, msg):
-                self.read_csv(mock.Mock())
+        with tm.assert_raises_regex(ValueError, msg):
+            self.read_csv(mock.Mock())
 
     @tm.capture_stderr
     def test_skip_bad_lines(self):
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
new file mode 100644
index 0000000000000..251c93df0733d
--- /dev/null
+++ b/pandas/tests/io/test_gcs.py
@@ -0,0 +1,47 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, date_range, read_csv
+from pandas.compat import StringIO
+from pandas.io.common import is_gcs_url
+from pandas.util import _test_decorators as td
+from pandas.util.testing import assert_frame_equal
+
+
+def test_is_gcs_url():
+    assert is_gcs_url("gcs://pandas/somethingelse.com")
+    assert is_gcs_url("gs://pandas/somethingelse.com")
+    assert not is_gcs_url("s3://pandas/somethingelse.com")
+
+
+@td.skip_if_no('gcsfs')
+def test_read_csv_gcs(mock):
+    df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'],
+                     'dt': date_range('2018-06-18', periods=2)})
+    with mock.patch('gcsfs.GCSFileSystem') as MockFileSystem:
+        instance = MockFileSystem.return_value
+        instance.open.return_value = StringIO(df1.to_csv(index=False))
+        df2 = read_csv('gs://test/test.csv', parse_dates=['dt'])
+
+    assert_frame_equal(df1, df2)
+
+
+@td.skip_if_no('gcsfs')
+def test_gcs_get_filepath_or_buffer(mock):
+    df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'],
+                     'dt': date_range('2018-06-18', periods=2)})
+    with mock.patch('pandas.io.gcs.get_filepath_or_buffer') as MockGetFilepath:
+        MockGetFilepath.return_value = (StringIO(df1.to_csv(index=False)),
+                                        None, None, False)
+        df2 = read_csv('gs://test/test.csv', parse_dates=['dt'])
+
+    assert_frame_equal(df1, df2)
+    assert MockGetFilepath.called
+
+
+@pytest.mark.skipif(td.safe_import('gcsfs'),
+                    reason='Only check when gcsfs not installed')
+def test_gcs_not_present_exception():
+    with pytest.raises(ImportError) as e:
+        read_csv('gs://test/test.csv')
+        assert 'gcsfs library is required' in str(e.value)
diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py
index 83c1433bf5c39..01198fc541e0c 100644
--- a/pandas/util/_print_versions.py
+++ b/pandas/util/_print_versions.py
@@ -96,6 +96,7 @@ def show_versions(as_json=False):
         ("fastparquet", lambda mod: mod.__version__),
         ("pandas_gbq", lambda mod: mod.__version__),
         ("pandas_datareader", lambda mod: mod.__version__),
+        ("gcsfs", lambda mod: mod.__version__),
     ]
 
     deps_blob = list()

From 58a1a08554bf222968f78a5cff2a7257c36178f8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 26 Jun 2018 10:02:17 -0500
Subject: [PATCH 099/113] PKG: Exclude data test files. (#19535)

---
 MANIFEST.in                                   | 34 ++++---
 ci/script_single.sh                           |  8 +-
 doc/source/whatsnew/v0.23.2.txt               |  5 +
 pandas/conftest.py                            | 41 +++++++++
 pandas/tests/indexes/test_multi.py            |  8 +-
 pandas/tests/io/conftest.py                   | 21 ++---
 pandas/tests/io/formats/test_format.py        |  4 +-
 pandas/tests/io/json/test_compression.py      |  6 +-
 pandas/tests/io/json/test_pandas.py           |  8 +-
 pandas/tests/io/parser/common.py              | 25 +++--
 pandas/tests/io/parser/compression.py         |  4 +-
 pandas/tests/io/parser/dtypes.py              |  6 +-
 pandas/tests/io/parser/test_network.py        | 53 +++++------
 pandas/tests/io/parser/test_parsers.py        |  6 +-
 pandas/tests/io/parser/test_textreader.py     |  5 +-
 pandas/tests/io/sas/test_sas7bdat.py          | 43 ++++-----
 pandas/tests/io/sas/test_xport.py             |  6 +-
 pandas/tests/io/test_common.py                | 54 +++++------
 pandas/tests/io/test_excel.py                 | 12 +--
 pandas/tests/io/test_html.py                  | 92 +++++++++++--------
 pandas/tests/io/test_packers.py               | 51 +++++-----
 pandas/tests/io/test_pickle.py                | 38 ++++----
 pandas/tests/io/test_pytables.py              | 23 +++--
 pandas/tests/io/test_sql.py                   | 63 +++++++------
 pandas/tests/io/test_stata.py                 |  9 +-
 pandas/tests/plotting/common.py               |  5 -
 pandas/tests/plotting/test_deprecated.py      |  5 +-
 pandas/tests/plotting/test_misc.py            | 16 ++--
 pandas/tests/reshape/merge/test_merge_asof.py | 33 +++----
 pandas/tests/reshape/test_tile.py             |  6 +-
 pandas/tests/tseries/offsets/test_offsets.py  | 16 ++--
 pandas/tests/util/test_testing.py             | 13 +++
 pandas/util/_test_decorators.py               |  1 -
 pandas/util/testing.py                        | 10 --
 setup.cfg                                     |  3 +-
 setup.py                                      |  6 +-
 36 files changed, 392 insertions(+), 347 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9773019c6e6e0..b417b8890fa24 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,27 +3,39 @@ include LICENSE
 include RELEASE.md
 include README.md
 include setup.py
-include pyproject.toml
 
 graft doc
 prune doc/build
 
+graft LICENSES
+
 graft pandas
 
-global-exclude *.so
-global-exclude *.pyd
+global-exclude *.bz2
+global-exclude *.csv
+global-exclude *.dta
+global-exclude *.gz
+global-exclude *.h5
+global-exclude *.html
+global-exclude *.json
+global-exclude *.msgpack
+global-exclude *.pickle
+global-exclude *.png
 global-exclude *.pyc
+global-exclude *.pyd
+global-exclude *.sas7bdat
+global-exclude *.so
+global-exclude *.xls
+global-exclude *.xlsm
+global-exclude *.xlsx
+global-exclude *.xpt
+global-exclude *.xz
+global-exclude *.zip
 global-exclude *~
-global-exclude \#*
-global-exclude .git*
 global-exclude .DS_Store
-global-exclude *.png
+global-exclude .git*
+global-exclude \#*
 
-# include examples/data/*
-# recursive-include examples *.py
-# recursive-include doc/source *
-# recursive-include doc/sphinxext *
-# recursive-include LICENSES *
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl
diff --git a/ci/script_single.sh b/ci/script_single.sh
index f376c920ac71b..60e2fbb33ee5d 100755
--- a/ci/script_single.sh
+++ b/ci/script_single.sh
@@ -25,12 +25,12 @@ if [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
 
 elif [ "$COVERAGE" ]; then
-    echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
-    pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    pytest      -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 
 else
-    echo pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
-    pytest -m "single" -r xX  --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
+    echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
+    pytest      -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
 
 fi
 
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index a603bf9f7e9e0..a41a6c31b0678 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -76,6 +76,11 @@ Documentation Changes
 -
 -
 
+Build Changes
+-------------
+
+- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`)
+
 .. _whatsnew_0232.bug_fixes:
 
 Bug Fixes
diff --git a/pandas/conftest.py b/pandas/conftest.py
index b4a599758417c..82d860b091b82 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,7 +1,9 @@
+import os
 import importlib
 
 import pytest
 
+import pandas
 import numpy as np
 import pandas as pd
 from pandas.compat import PY3
@@ -17,6 +19,8 @@ def pytest_addoption(parser):
                      help="run high memory tests")
     parser.addoption("--only-slow", action="store_true",
                      help="run only slow tests")
+    parser.addoption("--strict-data-files", action="store_true",
+                     help="Fail if a test is skipped for missing data file.")
 
 
 def pytest_runtest_setup(item):
@@ -131,6 +135,43 @@ def join_type(request):
     return request.param
 
 
+@pytest.fixture
+def datapath(request):
+    """Get the path to a data file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the file, relative to ``pandas/tests/``
+
+    Returns
+    -------
+    path : path including ``pandas/tests``.
+
+    Raises
+    ------
+    ValueError
+        If the path doesn't exist and the --strict-data-files option is set.
+    """
+    def deco(*args):
+        path = os.path.join('pandas', 'tests', *args)
+        if not os.path.exists(path):
+            if request.config.getoption("--strict-data-files"):
+                msg = "Could not find file {} and --strict-data-files is set."
+                raise ValueError(msg.format(path))
+            else:
+                msg = "Could not find {}."
+                pytest.skip(msg.format(path))
+        return path
+    return deco
+
+
+@pytest.fixture
+def iris(datapath):
+    """The iris dataset as a DataFrame."""
+    return pandas.read_csv(datapath('data', 'iris.csv'))
+
+
 @pytest.fixture(params=['nlargest', 'nsmallest'])
 def nselect_method(request):
     """
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 362f917e74972..c925c4c403960 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1182,12 +1182,12 @@ def test_iter(self):
                     ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
         assert result == expected
 
-    def test_legacy_pickle(self):
+    def test_legacy_pickle(self, datapath):
         if PY3:
             pytest.skip("testing for legacy pickles not "
                         "support on py3")
 
-        path = tm.get_data_path('multiindex_v1.pickle')
+        path = datapath('indexes', 'data', 'multiindex_v1.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
@@ -1203,10 +1203,10 @@ def test_legacy_pickle(self):
         assert_almost_equal(res, exp)
         assert_almost_equal(exp, exp2)
 
-    def test_legacy_v2_unpickle(self):
+    def test_legacy_v2_unpickle(self, datapath):
 
         # 0.7.3 -> 0.8.0 format manage
-        path = tm.get_data_path('mindex_073.pickle')
+        path = datapath('indexes', 'data', 'mindex_073.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 8deb51e190bab..7623587803b41 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -1,32 +1,23 @@
-import os
-
 import pytest
 from pandas.io.parsers import read_table
-from pandas.util import testing as tm
-
-
-@pytest.fixture
-def parser_data(request):
-    return os.path.join(tm.get_data_path(), '..', 'parser', 'data')
 
 
 @pytest.fixture
-def tips_file(parser_data):
+def tips_file(datapath):
     """Path to the tips dataset"""
-    return os.path.join(parser_data, 'tips.csv')
+    return datapath('io', 'parser', 'data', 'tips.csv')
 
 
 @pytest.fixture
-def jsonl_file(parser_data):
+def jsonl_file(datapath):
     """Path a JSONL dataset"""
-    return os.path.join(parser_data, 'items.jsonl')
+    return datapath('io', 'parser', 'data', 'items.jsonl')
 
 
 @pytest.fixture
-def salaries_table(parser_data):
+def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
-    path = os.path.join(parser_data, 'salaries.csv')
-    return read_table(path)
+    return read_table(datapath('io', 'parser', 'data', 'salaries.csv'))
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index f221df93dd412..63b7cb3459069 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -916,8 +916,8 @@ def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})})
         compat.text_type(dm.to_string())
 
-    def test_string_repr_encoding(self):
-        filepath = tm.get_data_path('unicode_series.csv')
+    def test_string_repr_encoding(self, datapath):
+        filepath = datapath('io', 'formats', 'data', 'unicode_series.csv')
         df = pd.read_csv(filepath, header=None, encoding='latin1')
         repr(df)
         repr(df[1])
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index c9074ca49e5be..05ceace20f5a4 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -21,11 +21,11 @@ def test_compression_roundtrip(compression):
         assert_frame_equal(df, pd.read_json(result))
 
 
-def test_read_zipped_json():
-    uncompressed_path = tm.get_data_path("tsframe_v012.json")
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
     uncompressed_df = pd.read_json(uncompressed_path)
 
-    compressed_path = tm.get_data_path("tsframe_v012.json.zip")
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
     compressed_df = pd.read_json(compressed_path, compression='zip')
 
     assert_frame_equal(uncompressed_df, compressed_df)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 7e497c395266f..bcbac4400c953 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -37,8 +37,9 @@
 
 class TestPandasContainer(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(scope="function", autouse=True)
+    def setup(self, datapath):
+        self.dirpath = datapath("io", "json", "data")
 
         self.ts = tm.makeTimeSeries()
         self.ts.name = 'ts'
@@ -59,7 +60,8 @@ def setup_method(self, method):
         self.mixed_frame = _mixed_frame.copy()
         self.categorical = _cat_frame.copy()
 
-    def teardown_method(self, method):
+        yield
+
         del self.dirpath
 
         del self.ts
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 6e1d3575a1481..9e871d27f0ce8 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -77,7 +77,7 @@ def test_read_csv(self):
             else:
                 prefix = u("file://")
 
-            fname = prefix + compat.text_type(self.csv1)
+            fname = prefix + compat.text_type(os.path.abspath(self.csv1))
             self.read_csv(fname, index_col=0, parse_dates=True)
 
     def test_1000_sep(self):
@@ -651,21 +651,19 @@ def test_read_csv_parse_simple_list(self):
         tm.assert_frame_equal(df, expected)
 
     @tm.network
-    def test_url(self):
+    def test_url(self, datapath):
         # HTTP(S)
         url = ('https://raw.github.com/pandas-dev/pandas/master/'
                'pandas/tests/io/parser/data/salaries.csv')
         url_table = self.read_table(url)
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
         tm.assert_frame_equal(url_table, local_table)
         # TODO: ftp testing
 
     @pytest.mark.slow
-    def test_file(self):
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+    def test_file(self, datapath):
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
 
         try:
@@ -755,8 +753,8 @@ def test_utf16_bom_skiprows(self):
 
                     tm.assert_frame_equal(result, expected)
 
-    def test_utf16_example(self):
-        path = tm.get_data_path('utf16_ex.txt')
+    def test_utf16_example(self, datapath):
+        path = datapath('io', 'parser', 'data', 'utf16_ex.txt')
 
         # it works! and is the right length
         result = self.read_table(path, encoding='utf-16')
@@ -767,8 +765,8 @@ def test_utf16_example(self):
             result = self.read_table(buf, encoding='utf-16')
             assert len(result) == 50
 
-    def test_unicode_encoding(self):
-        pth = tm.get_data_path('unicode_series.csv')
+    def test_unicode_encoding(self, datapath):
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
 
         result = self.read_csv(pth, header=None, encoding='latin-1')
         result = result.set_index(0)
@@ -1513,10 +1511,9 @@ def test_internal_eof_byte_to_file(self):
             result = self.read_csv(path)
             tm.assert_frame_equal(result, expected)
 
-    def test_sub_character(self):
+    def test_sub_character(self, datapath):
         # see gh-16893
-        dirpath = tm.get_data_path()
-        filename = os.path.join(dirpath, "sub_char.csv")
+        filename = datapath('io', 'parser', 'data', 'sub_char.csv')
 
         expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
         result = self.read_csv(filename)
diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index e84db66561c49..e4950af19ea95 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -120,9 +120,9 @@ def test_read_csv_infer_compression(self):
 
                 tm.assert_frame_equal(expected, df)
 
-    def test_read_csv_compressed_utf16_example(self):
+    def test_read_csv_compressed_utf16_example(self, datapath):
         # GH18071
-        path = tm.get_data_path('utf16_ex_small.zip')
+        path = datapath('io', 'parser', 'data', 'utf16_ex_small.zip')
 
         result = self.read_csv(path, encoding='utf-16',
                                compression='zip', sep='\t')
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index b91ce04673e29..8060ebf2fbcd4 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -125,9 +125,9 @@ def test_categorical_dtype_high_cardinality_numeric(self):
             np.sort(actual.a.cat.categories), ordered=True)
         tm.assert_frame_equal(actual, expected)
 
-    def test_categorical_dtype_encoding(self):
+    def test_categorical_dtype_encoding(self, datapath):
         # GH 10153
-        pth = tm.get_data_path('unicode_series.csv')
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
         encoding = 'latin-1'
         expected = self.read_csv(pth, header=None, encoding=encoding)
         expected[1] = Categorical(expected[1])
@@ -135,7 +135,7 @@ def test_categorical_dtype_encoding(self):
                                dtype={1: 'category'})
         tm.assert_frame_equal(actual, expected)
 
-        pth = tm.get_data_path('utf16_ex.txt')
+        pth = datapath('io', 'parser', 'data', 'utf16_ex.txt')
         encoding = 'utf-16'
         expected = self.read_table(pth, encoding=encoding)
         expected = expected.apply(Categorical)
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index fdf45f307e953..e2243b8087a5b 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -48,10 +48,16 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
     tm.assert_frame_equal(url_table, salaries_table)
 
 
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath('io', 'parser', 'data', 'tips.csv'))
+
+
 @pytest.mark.usefixtures("s3_resource")
 class TestS3(object):
 
-    def test_parse_public_s3_bucket(self):
+    def test_parse_public_s3_bucket(self, tips_df):
         pytest.importorskip('s3fs')
         # more of an integration test due to the not-public contents portion
         # can probably mock this though.
@@ -60,45 +66,40 @@ def test_parse_public_s3_bucket(self):
                           ext, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
         # Read public file from bucket with not-public contents
         df = read_csv('s3://cant_get_it/tips.csv')
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
+        tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3n_bucket(self):
+    def test_parse_public_s3n_bucket(self, tips_df):
 
         # Read from AWS s3 as "s3n" URL
         df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3a_bucket(self):
+    def test_parse_public_s3a_bucket(self, tips_df):
         # Read from AWS s3 as "s3a" URL
         df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_nrows(self):
+    def test_parse_public_s3_bucket_nrows(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_chunked(self):
+    def test_parse_public_s3_bucket_chunked(self, tips_df):
         # Read with a chunksize
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp)
@@ -109,14 +110,13 @@ def test_parse_public_s3_bucket_chunked(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_chunked_python(self):
+    def test_parse_public_s3_bucket_chunked_python(self, tips_df):
         # Read with a chunksize using the Python parser
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp,
@@ -127,36 +127,33 @@ def test_parse_public_s3_bucket_chunked_python(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_python(self):
+    def test_parse_public_s3_bucket_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_infer_s3_compression(self):
+    def test_infer_s3_compression(self, tips_df):
         for ext in ['', '.gz', '.bz2']:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           engine='python', compression='infer')
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3_bucket_nrows_python(self):
+    def test_parse_public_s3_bucket_nrows_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
     def test_s3_fails(self):
         with pytest.raises(IOError):
diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py
index 7717102b64fc5..b6f13039641a2 100644
--- a/pandas/tests/io/parser/test_parsers.py
+++ b/pandas/tests/io/parser/test_parsers.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import pytest
 import pandas.util.testing as tm
 
 from pandas import read_csv, read_table, DataFrame
@@ -45,8 +46,9 @@ def read_table(self, *args, **kwargs):
     def float_precision_choices(self):
         raise com.AbstractMethodError(self)
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index e8d9d8b52164b..c7026e3e0fc88 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -28,8 +28,9 @@
 
 class TestTextReader(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index b80263021c269..101ee3e619f5b 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -11,8 +11,9 @@
 
 class TestSAS7BDAT(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.data = []
         self.test_ix = [list(range(1, 16)), [16]]
         for j in 1, 2:
@@ -123,9 +124,8 @@ def test_iterator_read_too_much(self):
         rdr.close()
 
 
-def test_encoding_options():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test1.sas7bdat")
+def test_encoding_options(datapath):
+    fname = datapath("io", "sas", "data", "test1.sas7bdat")
     df1 = pd.read_sas(fname)
     df2 = pd.read_sas(fname, encoding='utf-8')
     for col in df1.columns:
@@ -143,43 +143,39 @@ def test_encoding_options():
         assert(x == y.decode())
 
 
-def test_productsales():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "productsales.sas7bdat")
+def test_productsales(datapath):
+    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
     df = pd.read_sas(fname, encoding='utf-8')
-    fname = os.path.join(dirpath, "productsales.csv")
+    fname = datapath("io", "sas", "data", "productsales.csv")
     df0 = pd.read_csv(fname, parse_dates=['MONTH'])
     vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
     df0[vn] = df0[vn].astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_12659():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test_12659.sas7bdat")
+def test_12659(datapath):
+    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "test_12659.csv")
+    fname = datapath("io", "sas", "data", "test_12659.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_airline():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "airline.sas7bdat")
+def test_airline(datapath):
+    fname = datapath("io", "sas", "data", "airline.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "airline.csv")
+    fname = datapath("io", "sas", "data", "airline.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0, check_exact=False)
 
 
-def test_date_time():
+def test_date_time(datapath):
     # Support of different SAS date/datetime formats (PR #15871)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "datetime.sas7bdat")
+    fname = datapath("io", "sas", "data", "datetime.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "datetime.csv")
+    fname = datapath("io", "sas", "data", "datetime.csv")
     df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
                                           'DateTimeHi', 'Taiw'])
     # GH 19732: Timestamps imported from sas will incur floating point errors
@@ -187,9 +183,8 @@ def test_date_time():
     tm.assert_frame_equal(df, df0)
 
 
-def test_zero_variables():
+def test_zero_variables(datapath):
     # Check if the SAS file has zero variables (PR #18184)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "zero_variables.sas7bdat")
+    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
     with pytest.raises(EmptyDataError):
         pd.read_sas(fname)
diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py
index de31c3e36a8d5..6e5b2ab067aa5 100644
--- a/pandas/tests/io/sas/test_xport.py
+++ b/pandas/tests/io/sas/test_xport.py
@@ -1,3 +1,4 @@
+import pytest
 import pandas as pd
 import pandas.util.testing as tm
 from pandas.io.sas.sasreader import read_sas
@@ -18,8 +19,9 @@ def numeric_as_float(data):
 
 class TestXport(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
         self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
         self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index a89156db38ae3..5c9739be73393 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -149,27 +149,22 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
             reader(path)
 
     @pytest.mark.parametrize('reader, module, path', [
-        (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_fwf, 'os', os.path.join(HERE, 'data',
-                                         'fixed_width_format.txt')),
-        (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')),
-        (pd.read_feather, 'feather', os.path.join(HERE, 'data',
-                                                  'feather-0_3_1.feather')),
-        (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf',
-                                             'datetimetz_object.h5')),
-        (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')),
-        (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data',
-                                         'test1.sas7bdat')),
-        (pd.read_json, 'os', os.path.join(HERE, 'json', 'data',
-                                          'tsframe_v012.json')),
-        (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data',
-                                             'frame.mp')),
-        (pd.read_pickle, 'os', os.path.join(HERE, 'data',
-                                            'categorical_0_14_1.pickle')),
+        (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_table, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
+        (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
+        (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
+        (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
+                                 'datetimetz_object.h5')),
+        (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
+        (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
+        (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
+        (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
+        (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
     ])
-    def test_read_fspath_all(self, reader, module, path):
+    def test_read_fspath_all(self, reader, module, path, datapath):
         pytest.importorskip(module)
+        path = datapath(*path)
 
         mypath = CustomFSPath(path)
         result = reader(mypath)
@@ -232,13 +227,14 @@ def test_write_fspath_hdf5(self):
         tm.assert_frame_equal(result, expected)
 
 
-class TestMMapWrapper(object):
+@pytest.fixture
+def mmap_file(datapath):
+    return datapath('io', 'data', 'test_mmap.csv')
+
 
-    def setup_method(self, method):
-        self.mmap_file = os.path.join(tm.get_data_path(),
-                                      'test_mmap.csv')
+class TestMMapWrapper(object):
 
-    def test_constructor_bad_file(self):
+    def test_constructor_bad_file(self, mmap_file):
         non_file = StringIO('I am not a file')
         non_file.fileno = lambda: -1
 
@@ -252,15 +248,15 @@ def test_constructor_bad_file(self):
 
         tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file)
 
-        target = open(self.mmap_file, 'r')
+        target = open(mmap_file, 'r')
         target.close()
 
         msg = "I/O operation on closed file"
         tm.assert_raises_regex(
             ValueError, msg, common.MMapWrapper, target)
 
-    def test_get_attr(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_get_attr(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
 
         attrs = dir(wrapper.mmap)
@@ -273,8 +269,8 @@ def test_get_attr(self):
 
         assert not hasattr(wrapper, 'foo')
 
-    def test_next(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_next(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
             lines = target.readlines()
 
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 2a225e6fe6a45..1fda56dbff772 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -39,8 +39,9 @@
 @td.skip_if_no('xlrd', '0.9')
 class SharedItems(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.frame = _frame.copy()
         self.frame2 = _frame2.copy()
         self.tsframe = _tsframe.copy()
@@ -49,7 +50,6 @@ def setup_method(self, method):
     def get_csv_refdf(self, basename):
         """
         Obtain the reference data from read_csv with the Python engine.
-        Test data path is defined by pandas.util.testing.get_data_path()
 
         Parameters
         ----------
@@ -68,8 +68,7 @@ def get_csv_refdf(self, basename):
 
     def get_excelfile(self, basename, ext):
         """
-        Return test data ExcelFile instance. Test data path is defined by
-        pandas.util.testing.get_data_path()
+        Return test data ExcelFile instance.
 
         Parameters
         ----------
@@ -86,8 +85,7 @@ def get_excelfile(self, basename, ext):
 
     def get_exceldf(self, basename, ext, *args, **kwds):
         """
-        Return test data DataFrame. Test data path is defined by
-        pandas.util.testing.get_data_path()
+        Return test data DataFrame.
 
         Parameters
         ----------
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index a56946b82b027..9c6a8de7ed446 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -1,6 +1,5 @@
 from __future__ import print_function
 
-import glob
 import os
 import re
 import threading
@@ -25,8 +24,18 @@
 import pandas.util._test_decorators as td
 from pandas.util.testing import makeCustomDataframe as mkdf, network
 
+HERE = os.path.dirname(__file__)
 
-DATA_PATH = tm.get_data_path()
+
+@pytest.fixture(params=[
+    'chinese_utf-16.html',
+    'chinese_utf-32.html',
+    'chinese_utf-8.html',
+    'letz_latin1.html',
+])
+def html_encoding_file(request, datapath):
+    """Parametrized fixture for HTML encoding test filenames."""
+    return datapath('io', 'data', 'html_encoding', request.param)
 
 
 def assert_framelist_equal(list1, list2, *args, **kwargs):
@@ -44,11 +53,11 @@ def assert_framelist_equal(list1, list2, *args, **kwargs):
 
 
 @td.skip_if_no('bs4')
-def test_bs4_version_fails(monkeypatch):
+def test_bs4_version_fails(monkeypatch, datapath):
     import bs4
     monkeypatch.setattr(bs4, '__version__', '4.2')
     with tm.assert_raises_regex(ValueError, "minimum version"):
-        read_html(os.path.join(DATA_PATH, "spam.html"), flavor='bs4')
+        read_html(datapath("io", "data", "spam.html"), flavor='bs4')
 
 
 def test_invalid_flavor():
@@ -59,8 +68,8 @@ def test_invalid_flavor():
 
 @td.skip_if_no('bs4')
 @td.skip_if_no('lxml')
-def test_same_ordering():
-    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+def test_same_ordering(datapath):
+    filename = datapath('io', 'data', 'valid_markup.html')
     dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -72,11 +81,14 @@ def test_same_ordering():
     pytest.param('lxml', marks=pytest.mark.skipif(
         not td.safe_import('lxml'), reason='No lxml'))], scope="class")
 class TestReadHtml(object):
-    spam_data = os.path.join(DATA_PATH, 'spam.html')
-    spam_data_kwargs = {}
-    if PY3:
-        spam_data_kwargs['encoding'] = 'UTF-8'
-    banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+
+    @pytest.fixture(autouse=True)
+    def set_files(self, datapath):
+        self.spam_data = datapath('io', 'data', 'spam.html')
+        self.spam_data_kwargs = {}
+        if PY3:
+            self.spam_data_kwargs['encoding'] = 'UTF-8'
+        self.banklist_data = datapath("io", "data", "banklist.html")
 
     @pytest.fixture(autouse=True, scope="function")
     def set_defaults(self, flavor, request):
@@ -272,7 +284,8 @@ def test_invalid_url(self):
     @pytest.mark.slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url), 'First',
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
+                             'First',
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
         for df in dfs:
@@ -326,7 +339,7 @@ def test_multiindex_header_index_skiprows(self):
     @pytest.mark.slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url),
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
                              match=re.compile(re.compile('Florida')),
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
@@ -352,9 +365,9 @@ def test_python_docs_table(self):
         assert sorted(zz) == sorted(['Repo', 'What'])
 
     @pytest.mark.slow
-    def test_thousands_macau_stats(self):
+    def test_thousands_macau_stats(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath("io", "data", "macau.html")
         dfs = self.read_html(macau_data, index_col=0,
                              attrs={'class': 'style1'})
         df = dfs[all_non_nan_table_index]
@@ -362,9 +375,9 @@ def test_thousands_macau_stats(self):
         assert not any(s.isna().any() for _, s in df.iteritems())
 
     @pytest.mark.slow
-    def test_thousands_macau_index_col(self):
+    def test_thousands_macau_index_col(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath('io', 'data', 'macau.html')
         dfs = self.read_html(macau_data, index_col=0, header=0)
         df = dfs[all_non_nan_table_index]
 
@@ -518,8 +531,8 @@ def test_countries_municipalities(self):
         res2 = self.read_html(data2, header=0)
         assert_framelist_equal(res1, res2)
 
-    def test_nyse_wsj_commas_table(self):
-        data = os.path.join(DATA_PATH, 'nyse_wsj.html')
+    def test_nyse_wsj_commas_table(self, datapath):
+        data = datapath('io', 'data', 'nyse_wsj.html')
         df = self.read_html(data, index_col=0, header=0,
                             attrs={'class': 'mdcTable'})[0]
 
@@ -530,7 +543,7 @@ def test_nyse_wsj_commas_table(self):
         tm.assert_index_equal(df.columns, columns)
 
     @pytest.mark.slow
-    def test_banklist_header(self):
+    def test_banklist_header(self, datapath):
         from pandas.io.html import _remove_whitespace
 
         def try_remove_ws(x):
@@ -541,7 +554,7 @@ def try_remove_ws(x):
 
         df = self.read_html(self.banklist_data, 'Metcalf',
                             attrs={'id': 'table'})[0]
-        ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
+        ground_truth = read_csv(datapath('io', 'data', 'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
         assert df.shape == ground_truth.shape
@@ -658,19 +671,19 @@ def test_parse_dates_combine(self):
         newdf = DataFrame({'datetime': raw_dates})
         tm.assert_frame_equal(newdf, res[0])
 
-    def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+    def test_computer_sales_page(self, datapath):
+        data = datapath('io', 'data', 'computer_sales_page.html')
         with tm.assert_raises_regex(ParserError,
                                     r"Passed header=\[0,1\] are "
                                     r"too many rows for this "
                                     r"multi_index of columns"):
             self.read_html(data, header=[0, 1])
 
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+        data = datapath('io', 'data', 'computer_sales_page.html')
         assert self.read_html(data, header=[1, 2])
 
-    def test_wikipedia_states_table(self):
-        data = os.path.join(DATA_PATH, 'wikipedia_states.html')
+    def test_wikipedia_states_table(self, datapath):
+        data = datapath('io', 'data', 'wikipedia_states.html')
         assert os.path.isfile(data), '%r is not a file' % data
         assert os.path.getsize(data), '%r is an empty file' % data
         result = self.read_html(data, 'Arizona', header=1)[0]
@@ -784,15 +797,15 @@ def test_multiple_header_rows(self):
         html_df = read_html(html, )[0]
         tm.assert_frame_equal(expected_df, html_df)
 
-    def test_works_on_valid_markup(self):
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    def test_works_on_valid_markup(self, datapath):
+        filename = datapath('io', 'data', 'valid_markup.html')
         dfs = self.read_html(filename, index_col=0)
         assert isinstance(dfs, list)
         assert isinstance(dfs[0], DataFrame)
 
     @pytest.mark.slow
-    def test_fallback_success(self):
-        banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+    def test_fallback_success(self, datapath):
+        banklist_data = datapath('io', 'data', 'banklist.html')
         self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib'])
 
     def test_to_html_timestamp(self):
@@ -835,22 +848,23 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
         else:
             assert len(dfs) == 1  # Should not parse hidden table
 
-    @pytest.mark.parametrize("f", glob.glob(
-        os.path.join(DATA_PATH, 'html_encoding', '*.html')))
-    def test_encode(self, f):
-        _, encoding = os.path.splitext(os.path.basename(f))[0].split('_')
+    def test_encode(self, html_encoding_file):
+        _, encoding = os.path.splitext(
+            os.path.basename(html_encoding_file)
+        )[0].split('_')
 
         try:
-            with open(f, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_string = self.read_html(fobj.read(), encoding=encoding,
                                              index_col=0).pop()
 
-            with open(f, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_file_like = self.read_html(BytesIO(fobj.read()),
                                                 encoding=encoding,
                                                 index_col=0).pop()
 
-            from_filename = self.read_html(f, encoding=encoding,
+            from_filename = self.read_html(html_encoding_file,
+                                           encoding=encoding,
                                            index_col=0).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)
@@ -906,7 +920,7 @@ def seekable(self):
         assert self.read_html(bad)
 
     @pytest.mark.slow
-    def test_importcheck_thread_safety(self):
+    def test_importcheck_thread_safety(self, datapath):
         # see gh-16928
 
         class ErrorThread(threading.Thread):
@@ -921,7 +935,7 @@ def run(self):
         # force import check by reinitalising global vars in html.py
         reload(pandas.io.html)
 
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+        filename = datapath('io', 'data', 'valid_markup.html')
         helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
         helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))
 
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index 0b1c1ca178762..412e218f95c6f 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -3,6 +3,7 @@
 from warnings import catch_warnings
 import os
 import datetime
+import glob
 import numpy as np
 from distutils.version import LooseVersion
 
@@ -836,13 +837,13 @@ def test_default_encoding(self):
             assert_frame_equal(result, frame)
 
 
-def legacy_packers_versions():
-    # yield the packers versions
-    path = tm.get_data_path('legacy_msgpack')
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            yield v
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                               "legacy_msgpack", "*", "*.msgpack"))
+
+
+@pytest.fixture(params=files)
+def legacy_packer(request, datapath):
+    return datapath(request.param)
 
 
 class TestMsgpack(object):
@@ -919,24 +920,20 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         else:
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize('version', legacy_packers_versions())
     def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
-                             version):
-
-        pth = tm.get_data_path('legacy_msgpack/{0}'.format(version))
-        n = 0
-        for f in os.listdir(pth):
-            # GH12142 0.17 files packed in P2 can't be read in P3
-            if (compat.PY3 and version.startswith('0.17.') and
-                    f.split('.')[-4][-1] == '2'):
-                continue
-            vf = os.path.join(pth, f)
-            try:
-                with catch_warnings(record=True):
-                    self.compare(current_packers_data, all_packers_data,
-                                 vf, version)
-            except ImportError:
-                # blosc not installed
-                continue
-            n += 1
-        assert n > 0, 'Msgpack files are not tested'
+                             legacy_packer, datapath):
+
+        version = os.path.basename(os.path.dirname(legacy_packer))
+
+        # GH12142 0.17 files packed in P2 can't be read in P3
+        if (compat.PY3 and version.startswith('0.17.') and
+                legacy_packer.split('.')[-4][-1] == '2'):
+            msg = "Files packed in Py2 can't be read in Py3 ({})"
+            pytest.skip(msg.format(version))
+        try:
+            with catch_warnings(record=True):
+                self.compare(current_packers_data, all_packers_data,
+                             legacy_packer, version)
+        except ImportError:
+            # blosc not installed
+            pass
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index fbe2174e603e2..45cbbd43cd6a8 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -12,7 +12,7 @@
 
 3. Move the created pickle to "data/legacy_pickle/<version>" directory.
 """
-
+import glob
 import pytest
 from warnings import catch_warnings
 
@@ -184,27 +184,25 @@ def compare_sp_frame_float(result, expected, typ, version):
         tm.assert_sp_frame_equal(result, expected)
 
 
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                  "legacy_pickle", "*", "*.pickle"))
+
+
+@pytest.fixture(params=files)
+def legacy_pickle(request, datapath):
+    return datapath(request.param)
+
+
 # ---------------------
 # tests
 # ---------------------
-def legacy_pickle_versions():
-    # yield the pickle versions
-    path = tm.get_data_path('legacy_pickle')
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            for f in os.listdir(p):
-                yield (v, f)
-
-
-@pytest.mark.parametrize('version, f', legacy_pickle_versions())
-def test_pickles(current_pickle_data, version, f):
+def test_pickles(current_pickle_data, legacy_pickle):
     if not is_platform_little_endian():
         pytest.skip("known failure on non-little endian")
 
-    vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f))
+    version = os.path.basename(os.path.dirname(legacy_pickle))
     with catch_warnings(record=True):
-        compare(current_pickle_data, vf, version)
+        compare(current_pickle_data, legacy_pickle, version)
 
 
 def test_round_trip_current(current_pickle_data):
@@ -260,12 +258,11 @@ def python_unpickler(path):
                     compare_element(result, expected, typ)
 
 
-def test_pickle_v0_14_1():
+def test_pickle_v0_14_1(datapath):
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_14_1.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
     # This code was executed once on v0.14.1 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
@@ -275,14 +272,13 @@ def test_pickle_v0_14_1():
     tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
 
 
-def test_pickle_v0_15_2():
+def test_pickle_v0_15_2(datapath):
     # ordered -> _ordered
     # GH 9347
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_15_2.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_15_2.pickle')
     # This code was executed once on v0.15.2 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index f96e7eeb40ea2..b95df3840b6c5 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4449,28 +4449,27 @@ def f():
                 store.select('df')
             tm.assert_raises_regex(ClosedFileError, 'file is not open', f)
 
-    def test_pytables_native_read(self):
-
+    def test_pytables_native_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native.h5'),
+                datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
                 mode='r') as store:
             d2 = store['detector/readout']
             assert isinstance(d2, DataFrame)
 
     @pytest.mark.skipif(PY35 and is_platform_windows(),
                         reason="native2 read fails oddly on windows / 3.5")
-    def test_pytables_native2_read(self):
+    def test_pytables_native2_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native2.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
                 mode='r') as store:
             str(store)
             d1 = store['detector']
             assert isinstance(d1, DataFrame)
 
-    def test_legacy_table_read(self):
+    def test_legacy_table_read(self, datapath):
         # legacy table types
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/legacy_table.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
                 mode='r') as store:
 
             with catch_warnings(record=True):
@@ -5117,7 +5116,7 @@ def test_fspath(self):
             with pd.HDFStore(path) as store:
                 assert os.fspath(store) == str(path)
 
-    def test_read_py2_hdf_file_in_py3(self):
+    def test_read_py2_hdf_file_in_py3(self, datapath):
         # GH 16781
 
         # tests reading a PeriodIndex DataFrame written in Python2 in Python3
@@ -5132,8 +5131,8 @@ def test_read_py2_hdf_file_in_py3(self):
             ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
 
         with ensure_clean_store(
-                tm.get_data_path(
-                    'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
+                datapath('io', 'data', 'legacy_hdf',
+                         'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
                 mode='r') as store:
             result = store['p']
             assert_frame_equal(result, expected)
@@ -5530,14 +5529,14 @@ def test_store_timezone(self):
 
             assert_frame_equal(result, df)
 
-    def test_legacy_datetimetz_object(self):
+    def test_legacy_datetimetz_object(self, datapath):
         # legacy from < 0.17.0
         # 8260
         expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                   B=Timestamp('20130603', tz='CET')),
                              index=range(5))
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/datetimetz_object.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
                 mode='r') as store:
             result = store['df']
             assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index f3ab74d37a2bc..f8f742c5980ac 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -22,7 +22,6 @@
 import pytest
 import sqlite3
 import csv
-import os
 
 import warnings
 import numpy as np
@@ -184,9 +183,11 @@
 class MixInBase(object):
 
     def teardown_method(self, method):
-        for tbl in self._get_all_tables():
-            self.drop_table(tbl)
-        self._close_conn()
+        # if setup fails, there may not be a connection to close.
+        if hasattr(self, 'conn'):
+            for tbl in self._get_all_tables():
+                self.drop_table(tbl)
+            self._close_conn()
 
 
 class MySQLMixIn(MixInBase):
@@ -253,9 +254,9 @@ def _get_exec(self):
         else:
             return self.conn.cursor()
 
-    def _load_iris_data(self):
+    def _load_iris_data(self, datapath):
         import io
-        iris_csv_file = os.path.join(tm.get_data_path(), 'iris.csv')
+        iris_csv_file = datapath('io', 'data', 'iris.csv')
 
         self.drop_table('iris')
         self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor])
@@ -503,9 +504,10 @@ class _TestSQLApi(PandasSQLTest):
     flavor = 'sqlite'
     mode = None
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_iris_view()
         self._load_test1_data()
         self._load_test2_data()
@@ -1025,8 +1027,9 @@ class _EngineToConnMixin(object):
     A mixin that causes setup_connect to create a conn rather than an engine.
     """
 
-    def setup_method(self, method):
-        super(_EngineToConnMixin, self).setup_method(method)
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        super(_EngineToConnMixin, self).setup_method(datapath)
         engine = self.conn
         conn = engine.connect()
         self.__tx = conn.begin()
@@ -1034,12 +1037,14 @@ def setup_method(self, method):
         self.__engine = engine
         self.conn = conn
 
-    def teardown_method(self, method):
+        yield
+
         self.__tx.rollback()
         self.conn.close()
         self.conn = self.__engine
         self.pandasSQL = sql.SQLDatabase(self.__engine)
-        super(_EngineToConnMixin, self).teardown_method(method)
+        # XXX:
+        # super(_EngineToConnMixin, self).teardown_method(method)
 
 
 @pytest.mark.single
@@ -1136,7 +1141,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
     """
     flavor = None
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         cls.setup_import()
         cls.setup_driver()
@@ -1149,10 +1154,11 @@ def setup_class(cls):
             msg = "{0} - can't connect to {1} server".format(cls, cls.flavor)
             pytest.skip(msg)
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.setup_connect()
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_raw_sql()
         self._load_test1_data()
 
@@ -1920,11 +1926,12 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest):
     def connect(cls):
         return sqlite3.connect(':memory:')
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
         self.pandasSQL = sql.SQLiteDatabase(self.conn)
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
 
         self._load_test1_data()
 
@@ -2135,8 +2142,9 @@ def _skip_if_no_pymysql():
 @pytest.mark.single
 class TestXSQLite(SQLiteMixIn):
 
-    def setup_method(self, method):
-        self.method = method
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
+        self.method = request.function
         self.conn = sqlite3.connect(':memory:')
 
     def test_basic(self):
@@ -2215,8 +2223,7 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         create_sql = """
         CREATE TABLE test
         (
@@ -2236,7 +2243,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         pass
@@ -2341,7 +2348,7 @@ def clean_up(test_table_to_drop):
                   "if SQLAlchemy is not installed")
 class TestXMySQL(MySQLMixIn):
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         _skip_if_no_pymysql()
 
@@ -2370,7 +2377,8 @@ def setup_class(cls):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
         _skip_if_no_pymysql()
         import pymysql
         try:
@@ -2396,7 +2404,7 @@ def setup_method(self, method):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-        self.method = method
+        self.method = request.function
 
     def test_basic(self):
         _skip_if_no_pymysql()
@@ -2501,8 +2509,7 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         _skip_if_no_pymysql()
         drop_sql = "DROP TABLE IF EXISTS test"
         create_sql = """
@@ -2525,7 +2532,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         _skip_if_no_pymysql()
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index bfb72be80400e..cfe47cae7e5e1 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -25,8 +25,8 @@
 
 
 @pytest.fixture
-def dirpath():
-    return tm.get_data_path()
+def dirpath(datapath):
+    return datapath("io", "data")
 
 
 @pytest.fixture
@@ -39,8 +39,9 @@ def parsed_114(dirpath):
 
 class TestStata(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')
         self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta')
 
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index f65791329f2f1..09687dd97bd43 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -74,11 +74,6 @@ def setup_method(self, method):
         else:
             self.default_figsize = (8.0, 6.0)
         self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default'
-        # common test data
-        from pandas import read_csv
-        base = os.path.join(os.path.dirname(curpath()), os.pardir)
-        path = os.path.join(base, 'tests', 'data', 'iris.csv')
-        self.iris = read_csv(path)
 
         n = 100
         with tm.RNGContext(42):
diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py
index 2c2d371921d2f..a45b17ec98261 100644
--- a/pandas/tests/plotting/test_deprecated.py
+++ b/pandas/tests/plotting/test_deprecated.py
@@ -46,10 +46,9 @@ def test_boxplot_deprecated(self):
                              by='indic')
 
     @pytest.mark.slow
-    def test_radviz_deprecated(self):
-        df = self.iris
+    def test_radviz_deprecated(self, iris):
         with tm.assert_produces_warning(FutureWarning):
-            plotting.radviz(frame=df, class_column='Name')
+            plotting.radviz(frame=iris, class_column='Name')
 
     @pytest.mark.slow
     def test_plot_params(self):
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index c82c939584dc7..0473610ea2f8f 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -100,11 +100,11 @@ def test_scatter_matrix_axis(self):
             axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
 
     @pytest.mark.slow
-    def test_andrews_curves(self):
+    def test_andrews_curves(self, iris):
         from pandas.plotting import andrews_curves
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         _check_plot_works(andrews_curves, frame=df, class_column='Name')
 
@@ -165,11 +165,11 @@ def test_andrews_curves(self):
             andrews_curves(data=df, class_column='Name')
 
     @pytest.mark.slow
-    def test_parallel_coordinates(self):
+    def test_parallel_coordinates(self, iris):
         from pandas.plotting import parallel_coordinates
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         ax = _check_plot_works(parallel_coordinates,
                                frame=df, class_column='Name')
@@ -234,11 +234,11 @@ def test_parallel_coordinates_with_sorted_labels(self):
             assert prev[1] < nxt[1] and prev[0] < nxt[0]
 
     @pytest.mark.slow
-    def test_radviz(self):
+    def test_radviz(self, iris):
         from pandas.plotting import radviz
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
         _check_plot_works(radviz, frame=df, class_column='Name')
 
         rgba = ('#556270', '#4ECDC4', '#C7F464')
@@ -272,8 +272,8 @@ def test_radviz(self):
         self._check_colors(handles, facecolors=colors)
 
     @pytest.mark.slow
-    def test_subplot_titles(self):
-        df = self.iris.drop('Name', axis=1).head()
+    def test_subplot_titles(self, iris):
+        df = iris.drop('Name', axis=1).head()
         # Use the column names as the subplot titles
         title = list(df.columns)
 
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index cebbcc41c3e17..59b53cd23010e 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -1,4 +1,3 @@
-import os
 import pytest
 
 import pytz
@@ -13,8 +12,8 @@
 
 class TestAsOfMerge(object):
 
-    def read_data(self, name, dedupe=False):
-        path = os.path.join(tm.get_data_path(), name)
+    def read_data(self, datapath, name, dedupe=False):
+        path = datapath('reshape', 'merge', 'data', name)
         x = read_csv(path)
         if dedupe:
             x = (x.drop_duplicates(['time', 'ticker'], keep='last')
@@ -23,15 +22,17 @@ def read_data(self, name, dedupe=False):
         x.time = to_datetime(x.time)
         return x
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
 
-        self.trades = self.read_data('trades.csv')
-        self.quotes = self.read_data('quotes.csv', dedupe=True)
-        self.asof = self.read_data('asof.csv')
-        self.tolerance = self.read_data('tolerance.csv')
-        self.allow_exact_matches = self.read_data('allow_exact_matches.csv')
+        self.trades = self.read_data(datapath, 'trades.csv')
+        self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True)
+        self.asof = self.read_data(datapath, 'asof.csv')
+        self.tolerance = self.read_data(datapath, 'tolerance.csv')
+        self.allow_exact_matches = self.read_data(datapath,
+                                                  'allow_exact_matches.csv')
         self.allow_exact_matches_and_tolerance = self.read_data(
-            'allow_exact_matches_and_tolerance.csv')
+            datapath, 'allow_exact_matches_and_tolerance.csv')
 
     def test_examples1(self):
         """ doc-string examples """
@@ -423,11 +424,11 @@ def test_multiby_indexed(self):
             pd.merge_asof(left, right, left_index=True, right_index=True,
                           left_by=['k1', 'k2'], right_by=['k1'])
 
-    def test_basic2(self):
+    def test_basic2(self, datapath):
 
-        expected = self.read_data('asof2.csv')
-        trades = self.read_data('trades2.csv')
-        quotes = self.read_data('quotes2.csv', dedupe=True)
+        expected = self.read_data(datapath, 'asof2.csv')
+        trades = self.read_data(datapath, 'trades2.csv')
+        quotes = self.read_data(datapath, 'quotes2.csv', dedupe=True)
 
         result = merge_asof(trades, quotes,
                             on='time',
@@ -467,14 +468,14 @@ def test_valid_join_keys(self):
             merge_asof(trades, quotes,
                        by='ticker')
 
-    def test_with_duplicates(self):
+    def test_with_duplicates(self, datapath):
 
         q = pd.concat([self.quotes, self.quotes]).sort_values(
             ['time', 'ticker']).reset_index(drop=True)
         result = merge_asof(self.trades, q,
                             on='time',
                             by='ticker')
-        expected = self.read_data('asof.csv')
+        expected = self.read_data(datapath, 'asof.csv')
         assert_frame_equal(result, expected)
 
     def test_with_duplicates_no_on(self):
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index 5ea27f9e34e1c..807fb2530603a 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -282,10 +282,10 @@ def test_round_frac(self):
         result = tmod._round_frac(0.000123456, precision=2)
         assert result == 0.00012
 
-    def test_qcut_binning_issues(self):
+    def test_qcut_binning_issues(self, datapath):
         # #1978, 1979
-        path = os.path.join(tm.get_data_path(), 'cut_data.csv')
-        arr = np.loadtxt(path)
+        cut_file = datapath(os.path.join('reshape', 'data', 'cut_data.csv'))
+        arr = np.loadtxt(cut_file)
 
         result = qcut(arr, 20)
 
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 74bc08ee9649b..b93a0206479ca 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1,4 +1,3 @@
-import os
 from distutils.version import LooseVersion
 from datetime import date, datetime, timedelta
 
@@ -518,14 +517,15 @@ def test_add(self, offset_types, tz):
         assert isinstance(result, Timestamp)
         assert result == expected_localize
 
-    def test_pickle_v0_15_2(self):
+    def test_pickle_v0_15_2(self, datapath):
         offsets = {'DateOffset': DateOffset(years=1),
                    'MonthBegin': MonthBegin(1),
                    'Day': Day(1),
                    'YearBegin': YearBegin(1),
                    'Week': Week(1)}
-        pickle_path = os.path.join(tm.get_data_path(),
-                                   'dateoffset_0_15_2.pickle')
+
+        pickle_path = datapath('tseries', 'offsets', 'data',
+                               'dateoffset_0_15_2.pickle')
         # This code was executed once on v0.15.2 to generate the pickle:
         # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f)
         #
@@ -1838,12 +1838,10 @@ def _check_roundtrip(obj):
         _check_roundtrip(self.offset2)
         _check_roundtrip(self.offset * 2)
 
-    def test_pickle_compat_0_14_1(self):
+    def test_pickle_compat_0_14_1(self, datapath):
         hdays = [datetime(2013, 1, 1) for ele in range(4)]
-
-        pth = tm.get_data_path()
-
-        cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle'))
+        pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle')
+        cday0_14_1 = read_pickle(pth)
         cday = CDay(holidays=hdays)
         assert cday == cday0_14_1
 
diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index ab7c4fb528452..4d34987e14f75 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import os
 import pandas as pd
 import pytest
 import numpy as np
@@ -841,3 +842,15 @@ def test_locale(self):
         # GH9744
         locales = tm.get_locales()
         assert len(locales) >= 1
+
+
+def test_datapath_missing(datapath, request):
+    if not request.config.getoption("--strict-data-files"):
+        pytest.skip("Need to set '--strict-data-files'")
+
+    with pytest.raises(ValueError):
+        datapath('not_a_file')
+
+    result = datapath('data', 'iris.csv')
+    expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
+    assert result == expected
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 27c24e3a68079..c6ab24403d58d 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -23,7 +23,6 @@ def test_foo():
 
 For more information, refer to the ``pytest`` documentation on ``skipif``.
 """
-
 import pytest
 import locale
 from distutils.version import LooseVersion
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 675dd94d49750..a5afcb6915034 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -6,7 +6,6 @@
 import sys
 import tempfile
 import warnings
-import inspect
 import os
 import subprocess
 import locale
@@ -757,15 +756,6 @@ def ensure_clean(filename=None, return_filelike=False):
                 print("Exception on removing file: {error}".format(error=e))
 
 
-def get_data_path(f=''):
-    """Return the path of a data file, these are relative to the current test
-    directory.
-    """
-    # get our callers file
-    _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
-    base_dir = os.path.abspath(os.path.dirname(filename))
-    return os.path.join(base_dir, 'data', f)
-
 # -----------------------------------------------------------------------------
 # Comparators
 
diff --git a/setup.cfg b/setup.cfg
index 6d9657737a8bd..9ec967c25e225 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -32,4 +32,5 @@ markers =
     slow: mark a test as slow
     network: mark a test as network
     high_memory: mark a test as a high-memory only
-doctest_optionflags= NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
+addopts = --strict-data-files
+doctest_optionflags= NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
\ No newline at end of file
diff --git a/setup.py b/setup.py
index dd026bd611727..0fd008612b5bd 100755
--- a/setup.py
+++ b/setup.py
@@ -735,11 +735,7 @@ def pxd(name):
       maintainer=AUTHOR,
       version=versioneer.get_version(),
       packages=find_packages(include=['pandas', 'pandas.*']),
-      package_data={'': ['data/*', 'templates/*', '_libs/*.dll'],
-                    'pandas.tests.io': ['data/legacy_hdf/*.h5',
-                                        'data/legacy_pickle/*/*.pickle',
-                                        'data/legacy_msgpack/*/*.msgpack',
-                                        'data/html_encoding/*.html']},
+      package_data={'': ['templates/*', '_libs/*.dll']},
       ext_modules=extensions,
       maintainer_email=EMAIL,
       description=DESCRIPTION,

From c6660f6b5b0ad2b11832fadf098388d67b6726c9 Mon Sep 17 00:00:00 2001
From: Benjamin Grove <bgroveben@gmail.com>
Date: Tue, 26 Jun 2018 13:26:39 -0700
Subject: [PATCH 100/113] DOC: fix typo in cookbook.rst (#21635)

Removing the semicolon delimiter at the end of the modified line of code allows the line's output to be displayed.
---
 doc/source/cookbook.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index fdc3b38cfdebc..f6fa9e9f86143 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -132,7 +132,7 @@ Building Criteria
 
 .. ipython:: python
 
-   newseries = df.loc[(df['BBB'] > 25) | (df['CCC'] >= -40), 'AAA']; newseries;
+   newseries = df.loc[(df['BBB'] > 25) | (df['CCC'] >= -40), 'AAA']; newseries
 
 ...or (with assignment modifies the DataFrame.)
 

From 7555378d64672a36e44205c62ca9d8f4f52aa0de Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Tue, 26 Jun 2018 22:30:42 +0100
Subject: [PATCH 101/113] DOC: minor correction to v0.23.2.txt  (#21644)

---
 doc/source/whatsnew/v0.23.2.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index a41a6c31b0678..9c4b408a1d24b 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -61,8 +61,8 @@ Fixed Regressions
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Improved performance of membership checks in :class:`CategoricalIndex`
-  (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
+- Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex`
+  (i.e. ``x in cat``-style checks are much faster). :meth:`CategoricalIndex.contains`
   is likewise much faster (:issue:`21369`, :issue:`21508`)
 - Improved performance of :meth:`HDFStore.groups` (and dependent functions like
   :meth:`~HDFStore.keys`.  (i.e. ``x in store`` checks are much faster)

From 45cfa62f17e87f34eae630cb5b68658eef37a45e Mon Sep 17 00:00:00 2001
From: david-liu-brattle-1
 <36486871+david-liu-brattle-1@users.noreply.github.com>
Date: Tue, 26 Jun 2018 18:19:41 -0400
Subject: [PATCH 102/113] Cleanup clipboard tests (#21163)

---
 pandas/tests/io/test_clipboard.py | 196 ++++++++++++++++++++----------
 1 file changed, 129 insertions(+), 67 deletions(-)

diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 98c0effabec84..80fddd50fc9a8 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -9,10 +9,11 @@
 from pandas import DataFrame
 from pandas import read_clipboard
 from pandas import get_option
+from pandas.compat import PY2
 from pandas.util import testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf
 from pandas.io.clipboard.exceptions import PyperclipException
-from pandas.io.clipboard import clipboard_set
+from pandas.io.clipboard import clipboard_set, clipboard_get
 
 
 try:
@@ -22,73 +23,134 @@
     _DEPS_INSTALLED = 0
 
 
+def build_kwargs(sep, excel):
+    kwargs = {}
+    if excel != 'default':
+        kwargs['excel'] = excel
+    if sep != 'default':
+        kwargs['sep'] = sep
+    return kwargs
+
+
+@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
+                        'colwidth', 'mixed', 'float', 'int'])
+def df(request):
+    data_type = request.param
+
+    if data_type == 'delims':
+        return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
+                             'b': ['hi\'j', 'k\'\'lm']})
+    elif data_type == 'utf8':
+        return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
+                             'b': ['øπ∆˚¬', 'œ∑´®']})
+    elif data_type == 'string':
+        return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'long':
+        max_rows = get_option('display.max_rows')
+        return mkdf(max_rows + 1, 3,
+                    data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'nonascii':
+        return pd.DataFrame({'en': 'in English'.split(),
+                             'es': 'en español'.split()})
+    elif data_type == 'colwidth':
+        _cw = get_option('display.max_colwidth') + 1
+        return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'mixed':
+        return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
+                          'b': np.arange(1, 6),
+                          'c': list('abcde')})
+    elif data_type == 'float':
+        return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'int':
+        return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    else:
+        raise ValueError
+
+
 @pytest.mark.single
 @pytest.mark.skipif(not _DEPS_INSTALLED,
                     reason="clipboard primitives not installed")
 class TestClipboard(object):
-
-    @classmethod
-    def setup_class(cls):
-        cls.data = {}
-        cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i',
-                                  c_idx_names=[None], r_idx_names=[None])
-        cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2),
-                               c_idx_type='s', r_idx_type='i',
-                               c_idx_names=[None], r_idx_names=[None])
-        cls.data['float'] = mkdf(5, 3,
-                                 data_gen_f=lambda r, c: float(r) + 0.01,
-                                 c_idx_type='s', r_idx_type='i',
-                                 c_idx_names=[None], r_idx_names=[None])
-        cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
-                                       'b': np.arange(1, 6),
-                                       'c': list('abcde')})
-
-        # Test columns exceeding "max_colwidth" (GH8305)
-        _cw = get_option('display.max_colwidth') + 1
-        cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
-                                    c_idx_type='s', r_idx_type='i',
-                                    c_idx_names=[None], r_idx_names=[None])
-        # Test GH-5346
-        max_rows = get_option('display.max_rows')
-        cls.data['longdf'] = mkdf(max_rows + 1, 3,
-                                  data_gen_f=lambda *args: randint(2),
-                                  c_idx_type='s', r_idx_type='i',
-                                  c_idx_names=[None], r_idx_names=[None])
-        # Test for non-ascii text: GH9263
-        cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
-                                             'es': 'en español'.split()})
-        # unicode round trip test for GH 13747, GH 12529
-        cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
-                                         'b': ['øπ∆˚¬', 'œ∑´®']})
-        cls.data_types = list(cls.data.keys())
-
-    @classmethod
-    def teardown_class(cls):
-        del cls.data_types, cls.data
-
-    def check_round_trip_frame(self, data_type, excel=None, sep=None,
+    def check_round_trip_frame(self, data, excel=None, sep=None,
                                encoding=None):
-        data = self.data[data_type]
         data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
-        if sep is not None:
-            result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
-        else:
-            result = read_clipboard(encoding=encoding)
+        result = read_clipboard(sep=sep or '\t', index_col=0,
+                                encoding=encoding)
         tm.assert_frame_equal(data, result, check_dtype=False)
 
-    def test_round_trip_frame_sep(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt, sep=',')
-            self.check_round_trip_frame(dt, sep=r'\s+')
-            self.check_round_trip_frame(dt, sep='|')
-
-    def test_round_trip_frame_string(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt, excel=False)
-
-    def test_round_trip_frame(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt)
+    # Test that default arguments copy as tab delimited
+    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
+                       'Issue in #21104, Fixed in #21111')
+    def test_round_trip_frame(self, df):
+        self.check_round_trip_frame(df)
+
+    # Test that explicit delimiters are respected
+    @pytest.mark.parametrize('sep', ['\t', ',', '|'])
+    def test_round_trip_frame_sep(self, df, sep):
+        self.check_round_trip_frame(df, sep=sep)
+
+    # Test white space separator
+    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
+                       "aren't handled correctly in default c engine. Fixed "
+                       "in #21111 by defaulting to python engine for "
+                       "whitespace separator")
+    def test_round_trip_frame_string(self, df):
+        df.to_clipboard(excel=False, sep=None)
+        result = read_clipboard()
+        assert df.to_string() == result.to_string()
+        assert df.shape == result.shape
+
+    # Two character separator is not supported in to_clipboard
+    # Test that multi-character separators are not silently passed
+    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
+    def test_excel_sep_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=True, sep=r'\t')
+
+    # Separator is ignored when excel=False and should produce a warning
+    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
+    def test_copy_delim_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=False, sep='\t')
+
+    # Tests that the default behavior of to_clipboard is tab
+    # delimited and excel="True"
+    @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
+                       "#21104, Fixed in #21111")
+    @pytest.mark.parametrize('sep', ['\t', None, 'default'])
+    @pytest.mark.parametrize('excel', [True, None, 'default'])
+    def test_clipboard_copy_tabs_default(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        if PY2:
+            # to_clipboard copies unicode, to_csv produces bytes. This is
+            # expected behavior
+            assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
+        else:
+            assert clipboard_get() == df.to_csv(sep='\t')
+
+    # Tests reading of white space separated tables
+    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
+                       "aren't handled correctly. in default c engine. Fixed "
+                       "in #21111 by defaulting to python engine for "
+                       "whitespace separator")
+    @pytest.mark.parametrize('sep', [None, 'default'])
+    @pytest.mark.parametrize('excel', [False])
+    def test_clipboard_copy_strings(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        result = read_clipboard(sep=r'\s+')
+        assert result.to_string() == df.to_string()
+        assert df.shape == result.shape
 
     def test_read_clipboard_infer_excel(self):
         # gh-19010: avoid warnings
@@ -124,15 +186,15 @@ def test_read_clipboard_infer_excel(self):
 
         tm.assert_frame_equal(res, exp)
 
-    def test_invalid_encoding(self):
+    def test_invalid_encoding(self, df):
         # test case for testing invalid encoding
-        data = self.data['string']
         with pytest.raises(ValueError):
-            data.to_clipboard(encoding='ascii')
+            df.to_clipboard(encoding='ascii')
         with pytest.raises(NotImplementedError):
             pd.read_clipboard(encoding='ascii')
 
-    def test_round_trip_valid_encodings(self):
-        for enc in ['UTF-8', 'utf-8', 'utf8']:
-            for dt in self.data_types:
-                self.check_round_trip_frame(dt, encoding=enc)
+    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
+                       'Issue in #21104, Fixed in #21111')
+    @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
+    def test_round_trip_valid_encodings(self, enc, df):
+        self.check_round_trip_frame(df, encoding=enc)

From d746bee41816d57d266e7e41340a1e2cd4b7fe7c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 26 Jun 2018 15:25:57 -0700
Subject: [PATCH 103/113] ENH: Update to_gbq and read_gbq to pandas-gbq 0.5.0
 (#21628)

* Add link to Pandas-GBQ 0.5.0 in what's new.
* Remove unnecessary sleep in GBQ tests.

Closes https://github.com/pydata/pandas-gbq/issues/177

Closes #21627
---
 doc/source/whatsnew/v0.24.0.txt |  5 ++
 pandas/core/frame.py            | 59 +++++++++++-----------
 pandas/io/gbq.py                | 86 ++++++++++++++++++---------------
 pandas/tests/io/test_gbq.py     | 15 +++---
 4 files changed, 92 insertions(+), 73 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 72e7373d0dd33..60c3e4df8d129 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -19,6 +19,11 @@ Other Enhancements
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
 - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)
 - Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`)
+- :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to
+  reflect changes from the `Pandas-GBQ library version 0.5.0
+  <https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-5-0>`__.
+  (:issue:`21627`)
+
 
 .. _whatsnew_0240.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0bf5acf14294a..b553cfdc72c92 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1102,37 +1102,27 @@ def to_dict(self, orient='dict', into=dict):
         else:
             raise ValueError("orient '{o}' not understood".format(o=orient))
 
-    def to_gbq(self, destination_table, project_id, chunksize=None,
-               verbose=None, reauth=False, if_exists='fail', private_key=None,
-               auth_local_webserver=False, table_schema=None):
+    def to_gbq(self, destination_table, project_id=None, chunksize=None,
+               reauth=False, if_exists='fail', private_key=None,
+               auth_local_webserver=False, table_schema=None, location=None,
+               progress_bar=True, verbose=None):
         """
         Write a DataFrame to a Google BigQuery table.
 
         This function requires the `pandas-gbq package
         <https://pandas-gbq.readthedocs.io>`__.
 
-        Authentication to the Google BigQuery service is via OAuth 2.0.
-
-        - If ``private_key`` is provided, the library loads the JSON service
-          account credentials and uses those to authenticate.
-
-        - If no ``private_key`` is provided, the library tries `application
-          default credentials`_.
-
-          .. _application default credentials:
-              https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application
-
-        - If application default credentials are not found or cannot be used
-          with BigQuery, the library authenticates with user account
-          credentials. In this case, you will be asked to grant permissions
-          for product name 'pandas GBQ'.
+        See the `How to authenticate with Google BigQuery
+        <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
+        guide for authentication instructions.
 
         Parameters
         ----------
         destination_table : str
-            Name of table to be written, in the form 'dataset.tablename'.
-        project_id : str
-            Google BigQuery Account project ID.
+            Name of table to be written, in the form ``dataset.tablename``.
+        project_id : str, optional
+            Google BigQuery Account project ID. Optional when available from
+            the environment.
         chunksize : int, optional
             Number of rows to be inserted in each chunk from the dataframe.
             Set to ``None`` to load the whole dataframe at once.
@@ -1170,8 +1160,21 @@ def to_gbq(self, destination_table, project_id, chunksize=None,
             BigQuery API documentation on available names of a field.
 
             *New in version 0.3.1 of pandas-gbq*.
-        verbose : boolean, deprecated
-            *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
+        location : str, optional
+            Location where the load job should run. See the `BigQuery locations
+            documentation
+            <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
+            list of available locations. The location must match that of the
+            target dataset.
+
+            *New in version 0.5.0 of pandas-gbq*.
+        progress_bar : bool, default True
+            Use the library `tqdm` to show the progress bar for the upload,
+            chunk by chunk.
+
+            *New in version 0.5.0 of pandas-gbq*.
+        verbose : bool, deprecated
+            Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
             to adjust verbosity instead
             <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
 
@@ -1182,10 +1185,12 @@ def to_gbq(self, destination_table, project_id, chunksize=None,
         """
         from pandas.io import gbq
         return gbq.to_gbq(
-            self, destination_table, project_id, chunksize=chunksize,
-            verbose=verbose, reauth=reauth, if_exists=if_exists,
-            private_key=private_key, auth_local_webserver=auth_local_webserver,
-            table_schema=table_schema)
+            self, destination_table, project_id=project_id,
+            chunksize=chunksize, reauth=reauth,
+            if_exists=if_exists, private_key=private_key,
+            auth_local_webserver=auth_local_webserver,
+            table_schema=table_schema, location=location,
+            progress_bar=progress_bar, verbose=verbose)
 
     @classmethod
     def from_records(cls, data, index=None, exclude=None, columns=None,
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index c7c16598ee432..87a0e4d5d1747 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -22,34 +22,26 @@ def _try_import():
 
 
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
-             reauth=False, verbose=None, private_key=None, dialect='legacy',
-             **kwargs):
+             reauth=False, private_key=None, auth_local_webserver=False,
+             dialect='legacy', location=None, configuration=None,
+             verbose=None):
     """
     Load data from Google BigQuery.
 
     This function requires the `pandas-gbq package
     <https://pandas-gbq.readthedocs.io>`__.
 
-    Authentication to the Google BigQuery service is via OAuth 2.0.
-
-    - If "private_key" is not provided:
-
-      By default "application default credentials" are used.
-
-      If default application credentials are not found or are restrictive,
-      user account credentials are used. In this case, you will be asked to
-      grant permissions for product name 'pandas GBQ'.
-
-    - If "private_key" is provided:
-
-      Service account credentials will be used to authenticate.
+    See the `How to authenticate with Google BigQuery
+    <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
+    guide for authentication instructions.
 
     Parameters
     ----------
     query : str
         SQL-Like Query to return data values.
-    project_id : str
-        Google BigQuery Account project ID.
+    project_id : str, optional
+        Google BigQuery Account project ID. Optional when available from
+        the environment.
     index_col : str, optional
         Name of result column to use for index in results DataFrame.
     col_order : list(str), optional
@@ -62,6 +54,16 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         Service account private key in JSON format. Can be file path
         or string contents. This is useful for remote server
         authentication (eg. Jupyter/IPython notebook on remote host).
+    auth_local_webserver : boolean, default False
+        Use the `local webserver flow`_ instead of the `console flow`_
+        when getting user credentials.
+
+        .. _local webserver flow:
+            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
+        .. _console flow:
+            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
+
+        *New in version 0.2.0 of pandas-gbq*.
     dialect : str, default 'legacy'
         SQL syntax dialect to use. Value can be one of:
 
@@ -74,19 +76,26 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
             compliant with the SQL 2011 standard. For more information
             see `BigQuery Standard SQL Reference
             <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
-    verbose : boolean, deprecated
-        *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
-        to adjust verbosity instead
-        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
-    kwargs : dict
-        Arbitrary keyword arguments.
-        configuration (dict): query config parameters for job processing.
+    location : str, optional
+        Location where the query job should run. See the `BigQuery locations
+        documentation
+        <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
+        list of available locations. The location must match that of any
+        datasets used in the query.
+
+        *New in version 0.5.0 of pandas-gbq*.
+    configuration : dict, optional
+        Query config parameters for job processing.
         For example:
 
             configuration = {'query': {'useQueryCache': False}}
 
-        For more information see `BigQuery SQL Reference
-        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
+        For more information see `BigQuery REST API Reference
+        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
+    verbose : None, deprecated
+        Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
+        to adjust verbosity instead
+        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
 
     Returns
     -------
@@ -100,20 +109,21 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     """
     pandas_gbq = _try_import()
     return pandas_gbq.read_gbq(
-        query, project_id=project_id,
-        index_col=index_col, col_order=col_order,
-        reauth=reauth, verbose=verbose,
-        private_key=private_key,
-        dialect=dialect,
-        **kwargs)
+        query, project_id=project_id, index_col=index_col,
+        col_order=col_order, reauth=reauth, verbose=verbose,
+        private_key=private_key, auth_local_webserver=auth_local_webserver,
+        dialect=dialect, location=location, configuration=configuration)
 
 
-def to_gbq(dataframe, destination_table, project_id, chunksize=None,
+def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
            verbose=None, reauth=False, if_exists='fail', private_key=None,
-           auth_local_webserver=False, table_schema=None):
+           auth_local_webserver=False, table_schema=None, location=None,
+           progress_bar=True):
     pandas_gbq = _try_import()
     return pandas_gbq.to_gbq(
-        dataframe, destination_table, project_id, chunksize=chunksize,
-        verbose=verbose, reauth=reauth, if_exists=if_exists,
-        private_key=private_key, auth_local_webserver=auth_local_webserver,
-        table_schema=table_schema)
+        dataframe, destination_table, project_id=project_id,
+        chunksize=chunksize, verbose=verbose, reauth=reauth,
+        if_exists=if_exists, private_key=private_key,
+        auth_local_webserver=auth_local_webserver,
+        table_schema=table_schema, location=location,
+        progress_bar=progress_bar)
diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py
index 58a84ad4d47f8..dc6c319bb3366 100644
--- a/pandas/tests/io/test_gbq.py
+++ b/pandas/tests/io/test_gbq.py
@@ -2,7 +2,6 @@
 from datetime import datetime
 import pytz
 import platform
-from time import sleep
 import os
 
 import numpy as np
@@ -48,16 +47,18 @@ def _in_travis_environment():
 def _get_project_id():
     if _in_travis_environment():
         return os.environ.get('GBQ_PROJECT_ID')
-    else:
-        return PROJECT_ID
+    return PROJECT_ID or os.environ.get('GBQ_PROJECT_ID')
 
 
 def _get_private_key_path():
     if _in_travis_environment():
         return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
                               'travis_gbq.json'])
-    else:
-        return PRIVATE_KEY_JSON_PATH
+
+    private_key_path = PRIVATE_KEY_JSON_PATH
+    if not private_key_path:
+        private_key_path = os.environ.get('GBQ_GOOGLE_APPLICATION_CREDENTIALS')
+    return private_key_path
 
 
 def clean_gbq_environment(private_key=None):
@@ -123,11 +124,9 @@ def test_roundtrip(self):
         test_size = 20001
         df = make_mixed_dataframe_v2(test_size)
 
-        df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
+        df.to_gbq(destination_table, _get_project_id(), chunksize=None,
                   private_key=_get_private_key_path())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                              .format(destination_table),
                              project_id=_get_project_id(),

From 476717ce0e6a195b5474cf247c6f610800062975 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 26 Jun 2018 15:27:29 -0700
Subject: [PATCH 104/113] More speedups for Period comparisons (#21606)

---
 doc/source/whatsnew/v0.24.0.txt |  2 +-
 pandas/_libs/tslibs/offsets.pyx | 44 +++++++++++++++++++++++++++++++++
 pandas/tseries/offsets.py       | 31 +----------------------
 3 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 60c3e4df8d129..5e757987d4518 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -135,7 +135,7 @@ Performance Improvements
 
 - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
-- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`)
+- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`,:issue:`21606`)
 -
 
 .. _whatsnew_0240.docs:
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 63add06db17b4..b4b27b99bdb30 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -88,6 +88,15 @@ for _d in DAYS:
 # ---------------------------------------------------------------------
 # Misc Helpers
 
+cdef to_offset(object obj):
+    """
+    Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime
+    imports
+    """
+    from pandas.tseries.frequencies import to_offset
+    return to_offset(obj)
+
+
 def as_datetime(obj):
     f = getattr(obj, 'to_pydatetime', None)
     if f is not None:
@@ -313,6 +322,41 @@ class _BaseOffset(object):
     def __setattr__(self, name, value):
         raise AttributeError("DateOffset objects are immutable.")
 
+    def __eq__(self, other):
+        if is_string_object(other):
+            other = to_offset(other)
+
+        try:
+            return self._params == other._params
+        except AttributeError:
+            # other is not a DateOffset object
+            return False
+
+        return self._params == other._params
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        return hash(self._params)
+
+    @property
+    def _params(self):
+        """
+        Returns a tuple containing all of the attributes needed to evaluate
+        equality between two DateOffset objects.
+        """
+        # NB: non-cython subclasses override property with cache_readonly
+        all_paras = self.__dict__.copy()
+        if 'holidays' in all_paras and not all_paras['holidays']:
+            all_paras.pop('holidays')
+        exclude = ['kwds', 'name', 'calendar']
+        attrs = [(k, v) for k, v in all_paras.items()
+                 if (k not in exclude) and (k[0] != '_')]
+        attrs = sorted(set(attrs))
+        params = tuple([str(self.__class__)] + attrs)
+        return params
+
     @property
     def kwds(self):
         # for backwards-compatibility
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index a3f82c1a0902e..1cfd3f476f8ab 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -182,6 +182,7 @@ def __add__(date):
 
     Since 0 is a bit weird, we suggest avoiding its use.
     """
+    _params = cache_readonly(BaseOffset._params.fget)
     _use_relativedelta = False
     _adjust_dst = False
     _attributes = frozenset(['n', 'normalize'] +
@@ -288,18 +289,6 @@ def isAnchored(self):
         # if there were a canonical docstring for what isAnchored means.
         return (self.n == 1)
 
-    @cache_readonly
-    def _params(self):
-        all_paras = self.__dict__.copy()
-        if 'holidays' in all_paras and not all_paras['holidays']:
-            all_paras.pop('holidays')
-        exclude = ['kwds', 'name', 'calendar']
-        attrs = [(k, v) for k, v in all_paras.items()
-                 if (k not in exclude) and (k[0] != '_')]
-        attrs = sorted(set(attrs))
-        params = tuple([str(self.__class__)] + attrs)
-        return params
-
     # TODO: Combine this with BusinessMixin version by defining a whitelisted
     # set of attributes on each object rather than the existing behavior of
     # iterating over internal ``__dict__``
@@ -322,24 +311,6 @@ def _repr_attrs(self):
     def name(self):
         return self.rule_code
 
-    def __eq__(self, other):
-
-        if isinstance(other, compat.string_types):
-            from pandas.tseries.frequencies import to_offset
-
-            other = to_offset(other)
-
-        if not isinstance(other, DateOffset):
-            return False
-
-        return self._params == other._params
-
-    def __ne__(self, other):
-        return not self == other
-
-    def __hash__(self):
-        return hash(self._params)
-
     def __add__(self, other):
         if isinstance(other, (ABCDatetimeIndex, ABCSeries)):
             return other + self

From 001dc78fb1c774564732cb38b362038b3f6968d8 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 26 Jun 2018 15:29:47 -0700
Subject: [PATCH 105/113] use ccalendar instead of np_datetime (#21549)

---
 pandas/_libs/tslibs/ccalendar.pxd   |  2 +-
 pandas/_libs/tslibs/np_datetime.pxd |  4 ----
 pandas/_libs/tslibs/offsets.pyx     | 10 ++--------
 pandas/_libs/tslibs/period.pyx      |  3 +--
 setup.py                            |  1 +
 5 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd
index 42473a97a7150..04fb6eaf49c84 100644
--- a/pandas/_libs/tslibs/ccalendar.pxd
+++ b/pandas/_libs/tslibs/ccalendar.pxd
@@ -6,7 +6,7 @@ from cython cimport Py_ssize_t
 from numpy cimport int64_t, int32_t
 
 
-cdef int dayofweek(int y, int m, int m) nogil
+cdef int dayofweek(int y, int m, int d) nogil
 cdef bint is_leapyear(int64_t year) nogil
 cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil
 cpdef int32_t get_week_of_year(int year, int month, int day) nogil
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index 33b8b32bcf2dc..1a0baa8271643 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -54,10 +54,6 @@ cdef extern from "../src/datetime/np_datetime.h":
                                            PANDAS_DATETIMEUNIT fr,
                                            pandas_datetimestruct *result) nogil
 
-    int days_per_month_table[2][12]
-    int dayofweek(int y, int m, int d) nogil
-    int is_leapyear(int64_t year) nogil
-
 
 cdef int reverse_ops[6]
 
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index b4b27b99bdb30..841db80cf094e 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -18,12 +18,12 @@ cnp.import_array()
 from util cimport is_string_object, is_integer_object
 
 from ccalendar import MONTHS, DAYS
+from ccalendar cimport get_days_in_month, dayofweek
 from conversion cimport tz_convert_single, pydt_to_i8
 from frequencies cimport get_freq_code
 from nattype cimport NPY_NAT
 from np_datetime cimport (pandas_datetimestruct,
-                          dtstruct_to_dt64, dt64_to_dtstruct,
-                          is_leapyear, days_per_month_table, dayofweek)
+                          dtstruct_to_dt64, dt64_to_dtstruct)
 
 # ---------------------------------------------------------------------
 # Constants
@@ -494,12 +494,6 @@ class BaseOffset(_BaseOffset):
 # ----------------------------------------------------------------------
 # RelativeDelta Arithmetic
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cdef inline int get_days_in_month(int year, int month) nogil:
-    return days_per_month_table[is_leapyear(year)][month - 1]
-
-
 cdef inline int year_add_months(pandas_datetimestruct dts, int months) nogil:
     """new year number after shifting pandas_datetimestruct number of months"""
     return dts.year + (dts.month + months - 1) / 12
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index cc2fb6e0617cb..49208056f88fe 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -45,9 +45,8 @@ from timezones cimport is_utc, is_tzlocal, get_utcoffset, get_dst_info
 from timedeltas cimport delta_to_nanoseconds
 
 cimport ccalendar
-from ccalendar cimport dayofweek, get_day_of_year
+from ccalendar cimport dayofweek, get_day_of_year, is_leapyear
 from ccalendar import MONTH_NUMBERS
-from ccalendar cimport is_leapyear
 from conversion cimport tz_convert_utc_to_tzlocal
 from frequencies cimport (get_freq_code, get_base_alias,
                           get_to_timestamp_base, get_freq_str,
diff --git a/setup.py b/setup.py
index 0fd008612b5bd..621655dd05dbc 100755
--- a/setup.py
+++ b/setup.py
@@ -592,6 +592,7 @@ def pxd(name):
     '_libs.tslibs.offsets': {
         'pyxfile': '_libs/tslibs/offsets',
         'pxdfiles': ['_libs/src/util',
+                     '_libs/tslibs/ccalendar',
                      '_libs/tslibs/conversion',
                      '_libs/tslibs/frequencies',
                      '_libs/tslibs/nattype'],

From 59286daee4083a4b1760e4d04365576fca52f8bd Mon Sep 17 00:00:00 2001
From: Stephen Pascoe <Stephen.Pascoe@nanoporetech.com>
Date: Sun, 30 Aug 2015 08:58:08 +0100
Subject: [PATCH 106/113] ENH:  Function to walk the group hierarchy of a
 PyTables HDF5 file.

closes #10143
---
 doc/source/api.rst               |  1 +
 doc/source/io.rst                | 19 ++++++++++++
 doc/source/whatsnew/v0.24.0.txt  |  3 +-
 pandas/io/pytables.py            | 47 +++++++++++++++++++++++++++++
 pandas/tests/io/test_pytables.py | 51 ++++++++++++++++++++++++++++++++
 5 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index f2c00d5d12031..8dc5d0e9fc023 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -100,6 +100,7 @@ HDFStore: PyTables (HDF5)
    HDFStore.select
    HDFStore.info
    HDFStore.keys
+   HDFStore.walk
 
 Feather
 ~~~~~~~
diff --git a/doc/source/io.rst b/doc/source/io.rst
index ae6c4f12f04f7..9fe578524c8e0 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3554,6 +3554,25 @@ everything in the sub-store and **below**, so be *careful*.
    store.remove('food')
    store
 
+
+You can walk through the group hierarchy using the ``walk`` method which
+will yield a tuple for each group key along with the relative keys of its contents.
+
+.. versionadded:: 0.24.0
+
+
+.. ipython:: python
+
+   for (path, subgroups, subkeys) in store.walk():
+       for subgroup in subgroups:
+           print('GROUP: {}/{}'.format(path, subgroup))
+       for subkey in subkeys:
+           key = '/'.join([path, subkey])
+           print('KEY: {}'.format(key))
+           print(store.get(key))
+
+
+
 .. warning::
 
     Hierarchical keys cannot be retrieved as dotted (attribute) access as described above for items stored under the root node.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 5e757987d4518..abf574ae109fd 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -23,7 +23,8 @@ Other Enhancements
   reflect changes from the `Pandas-GBQ library version 0.5.0
   <https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-5-0>`__.
   (:issue:`21627`)
-
+- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`)
+-
 
 .. _whatsnew_0240.api_breaking:
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 580c7923017e5..f93ad425b2c6a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1106,6 +1106,53 @@ def groups(self):
                  g._v_name != u('table'))))
         ]
 
+    def walk(self, where="/"):
+        """ Walk the pytables group hierarchy for pandas objects
+
+        This generator will yield the group path, subgroups and pandas object
+        names for each group.
+        Any non-pandas PyTables objects that are not a group will be ignored.
+
+        The `where` group itself is listed first (preorder), then each of its
+        child groups (following an alphanumerical order) is also traversed,
+        following the same procedure.
+
+        .. versionadded:: 0.24.0
+
+        Parameters
+        ----------
+        where : str, optional
+            Group where to start walking.
+            If not supplied, the root group is used.
+
+        Yields
+        ------
+        path : str
+            Full path to a group (without trailing '/')
+        groups : list of str
+            names of the groups contained in `path`
+        leaves : list of str
+            names of the pandas objects contained in `path`
+
+        """
+        _tables()
+        self._check_if_open()
+        for g in self._handle.walk_groups(where):
+            if getattr(g._v_attrs, 'pandas_type', None) is not None:
+                continue
+
+            groups = []
+            leaves = []
+            for child in g._v_children.values():
+                pandas_type = getattr(child._v_attrs, 'pandas_type', None)
+                if pandas_type is None:
+                    if isinstance(child, _table_mod.group.Group):
+                        groups.append(child._v_name)
+                else:
+                    leaves.append(child._v_name)
+
+            yield (g._v_pathname.rstrip('/'), groups, leaves)
+
     def get_node(self, key):
         """ return the node with the key or None if it does not exist """
         self._check_if_open()
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index b95df3840b6c5..29063b64221c1 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -635,6 +635,57 @@ def test_get(self):
 
             pytest.raises(KeyError, store.get, 'b')
 
+    @pytest.mark.parametrize('where, expected', [
+        ('/', {
+            '': ({'first_group', 'second_group'}, set()),
+            '/first_group': (set(), {'df1', 'df2'}),
+            '/second_group': ({'third_group'}, {'df3', 's1'}),
+            '/second_group/third_group': (set(), {'df4'}),
+        }),
+        ('/second_group', {
+            '/second_group': ({'third_group'}, {'df3', 's1'}),
+            '/second_group/third_group': (set(), {'df4'}),
+        })
+    ])
+    def test_walk(self, where, expected):
+        # GH10143
+        objs = {
+            'df1': pd.DataFrame([1, 2, 3]),
+            'df2': pd.DataFrame([4, 5, 6]),
+            'df3': pd.DataFrame([6, 7, 8]),
+            'df4': pd.DataFrame([9, 10, 11]),
+            's1': pd.Series([10, 9, 8]),
+            # Next 3 items aren't pandas objects and should be ignored
+            'a1': np.array([[1, 2, 3], [4, 5, 6]]),
+            'tb1': np.array([(1, 2, 3), (4, 5, 6)], dtype='i,i,i'),
+            'tb2': np.array([(7, 8, 9), (10, 11, 12)], dtype='i,i,i')
+        }
+
+        with ensure_clean_store('walk_groups.hdf', mode='w') as store:
+            store.put('/first_group/df1', objs['df1'])
+            store.put('/first_group/df2', objs['df2'])
+            store.put('/second_group/df3', objs['df3'])
+            store.put('/second_group/s1', objs['s1'])
+            store.put('/second_group/third_group/df4', objs['df4'])
+            # Create non-pandas objects
+            store._handle.create_array('/first_group', 'a1', objs['a1'])
+            store._handle.create_table('/first_group', 'tb1', obj=objs['tb1'])
+            store._handle.create_table('/second_group', 'tb2', obj=objs['tb2'])
+
+            assert len(list(store.walk(where=where))) == len(expected)
+            for path, groups, leaves in store.walk(where=where):
+                assert path in expected
+                expected_groups, expected_frames = expected[path]
+                assert expected_groups == set(groups)
+                assert expected_frames == set(leaves)
+                for leaf in leaves:
+                    frame_path = '/'.join([path, leaf])
+                    obj = store.get(frame_path)
+                    if 'df' in leaf:
+                        tm.assert_frame_equal(obj, objs[leaf])
+                    else:
+                        tm.assert_series_equal(obj, objs[leaf])
+
     def test_getattr(self):
 
         with ensure_clean_store(self.path) as store:

From dad12525b3786327c1230d88d7e583f0df8bbc9d Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Wed, 27 Jun 2018 03:57:55 -0600
Subject: [PATCH 107/113] DOC: Fix versionadded directive typos in
 IntervalIndex (#21649)

---
 pandas/core/indexes/interval.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index eb9d7efc06c27..23a655b9a51ee 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -160,7 +160,7 @@ class IntervalIndex(IntervalMixin, Index):
     dtype : dtype or None, default None
         If None, dtype will be inferred
 
-        ..versionadded:: 0.23.0
+        .. versionadded:: 0.23.0
 
     Attributes
     ----------
@@ -438,7 +438,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False,
         dtype : dtype or None, default None
             If None, dtype will be inferred
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------
@@ -568,7 +568,7 @@ def from_intervals(cls, data, closed=None, name=None, copy=False,
         dtype : dtype or None, default None
             If None, dtype will be inferred
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------
@@ -619,7 +619,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False,
         dtype : dtype or None, default None
             If None, dtype will be inferred
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------
@@ -671,7 +671,7 @@ def to_tuples(self, na_tuple=True):
             Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
             value itself if False, ``nan``.
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------

From b3b047eaa7b8e30675fa8ac33b4b34848b3c4b8c Mon Sep 17 00:00:00 2001
From: LeakedMemory <elmq0022@umn.edu>
Date: Wed, 27 Jun 2018 06:23:22 -0500
Subject: [PATCH 108/113] TST: Use absolute path for datapath (#21647)

---
 pandas/conftest.py                | 4 +++-
 pandas/tests/util/test_testing.py | 7 ++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 82d860b091b82..5b9c162a0a022 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -153,8 +153,10 @@ def datapath(request):
     ValueError
         If the path doesn't exist and the --strict-data-files option is set.
     """
+    BASE_PATH = os.path.join(os.path.dirname(__file__), 'tests')
+
     def deco(*args):
-        path = os.path.join('pandas', 'tests', *args)
+        path = os.path.join(BASE_PATH, *args)
         if not os.path.exists(path):
             if request.config.getoption("--strict-data-files"):
                 msg = "Could not find file {} and --strict-data-files is set."
diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index 4d34987e14f75..95ea4658212e9 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -852,5 +852,10 @@ def test_datapath_missing(datapath, request):
         datapath('not_a_file')
 
     result = datapath('data', 'iris.csv')
-    expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
+    expected = os.path.join(
+        os.path.dirname(os.path.dirname(__file__)),
+        'data',
+        'iris.csv'
+    )
+
     assert result == expected

From 242ccbcf14b705f4e6d3513e952e8a18c4acb798 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phan=20Taljaard?= <steptaljaard@gmail.com>
Date: Wed, 27 Jun 2018 13:59:59 +0200
Subject: [PATCH 109/113] DOC: update DataFrame.dropna's axis argument docs
 (#21652)

---
 pandas/core/frame.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b553cfdc72c92..42a68de52a3c4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4186,6 +4186,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
 
             .. deprecated:: 0.23.0
                 Pass tuple or list to drop on multiple axes.
+                Only a single axis is allowed.
 
         how : {'any', 'all'}, default 'any'
             Determine if row or column is removed from DataFrame, when we have

From 8cbfcbff06bf5a74b9c928e42e64d9eedcfe5b3c Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Wed, 27 Jun 2018 09:29:15 -0600
Subject: [PATCH 110/113] BUG: Let IntervalIndex constructor override inferred
 closed (#21584)

---
 doc/source/whatsnew/v0.24.0.txt               |  7 +++++
 pandas/_libs/interval.pyx                     | 19 +++++++++++---
 pandas/core/indexes/interval.py               | 14 +++-------
 .../indexes/interval/test_construction.py     | 26 ++++++++++++++-----
 4 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index abf574ae109fd..406ca9ba045c9 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -205,6 +205,13 @@ Strings
 -
 -
 
+Interval
+^^^^^^^^
+
+- Bug in the :class:`IntervalIndex` constructor where the ``closed`` parameter did not always override the inferred ``closed`` (:issue:`19370`)
+-
+-
+
 Indexing
 ^^^^^^^^
 
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
index 5dbf509fda65e..fbb7265a17f8b 100644
--- a/pandas/_libs/interval.pyx
+++ b/pandas/_libs/interval.pyx
@@ -335,11 +335,17 @@ cdef class Interval(IntervalMixin):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef intervals_to_interval_bounds(ndarray intervals):
+cpdef intervals_to_interval_bounds(ndarray intervals,
+                                   bint validate_closed=True):
     """
     Parameters
     ----------
-    intervals: ndarray object array of Intervals / nulls
+    intervals : ndarray
+        object array of Intervals / nulls
+
+    validate_closed: boolean, default True
+        boolean indicating if all intervals must be closed on the same side.
+        Mismatching closed will raise if True, else return None for closed.
 
     Returns
     -------
@@ -353,6 +359,7 @@ cpdef intervals_to_interval_bounds(ndarray intervals):
         object closed = None, interval
         int64_t n = len(intervals)
         ndarray left, right
+        bint seen_closed = False
 
     left = np.empty(n, dtype=intervals.dtype)
     right = np.empty(n, dtype=intervals.dtype)
@@ -370,10 +377,14 @@ cpdef intervals_to_interval_bounds(ndarray intervals):
 
         left[i] = interval.left
         right[i] = interval.right
-        if closed is None:
+        if not seen_closed:
+            seen_closed = True
             closed = interval.closed
         elif closed != interval.closed:
-            raise ValueError('intervals must all be closed on the same side')
+            closed = None
+            if validate_closed:
+                msg = 'intervals must all be closed on the same side'
+                raise ValueError(msg)
 
     return left, right, closed
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 23a655b9a51ee..80619c7beb28c 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -233,7 +233,7 @@ def __new__(cls, data, closed=None, dtype=None, copy=False,
         if isinstance(data, IntervalIndex):
             left = data.left
             right = data.right
-            closed = data.closed
+            closed = closed or data.closed
         else:
 
             # don't allow scalars
@@ -241,16 +241,8 @@ def __new__(cls, data, closed=None, dtype=None, copy=False,
                 cls._scalar_data_error(data)
 
             data = maybe_convert_platform_interval(data)
-            left, right, infer_closed = intervals_to_interval_bounds(data)
-
-            if (com._all_not_none(closed, infer_closed) and
-                    closed != infer_closed):
-                # GH 18421
-                msg = ("conflicting values for closed: constructor got "
-                       "'{closed}', inferred from data '{infer_closed}'"
-                       .format(closed=closed, infer_closed=infer_closed))
-                raise ValueError(msg)
-
+            left, right, infer_closed = intervals_to_interval_bounds(
+                data, validate_closed=closed is None)
             closed = closed or infer_closed
 
         return cls._simple_new(left, right, closed, name, copy=copy,
diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py
index ac946a3421e53..3745f79d7d65d 100644
--- a/pandas/tests/indexes/interval/test_construction.py
+++ b/pandas/tests/indexes/interval/test_construction.py
@@ -312,13 +312,7 @@ def test_generic_errors(self, constructor):
         pass
 
     def test_constructor_errors(self, constructor):
-        # mismatched closed inferred from intervals vs constructor.
-        ivs = [Interval(0, 1, closed='both'), Interval(1, 2, closed='both')]
-        msg = 'conflicting values for closed'
-        with tm.assert_raises_regex(ValueError, msg):
-            constructor(ivs, closed='neither')
-
-        # mismatched closed within intervals
+        # mismatched closed within intervals with no constructor override
         ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')]
         msg = 'intervals must all be closed on the same side'
         with tm.assert_raises_regex(ValueError, msg):
@@ -336,6 +330,24 @@ def test_constructor_errors(self, constructor):
         with tm.assert_raises_regex(TypeError, msg):
             constructor([0, 1])
 
+    @pytest.mark.parametrize('data, closed', [
+        ([], 'both'),
+        ([np.nan, np.nan], 'neither'),
+        ([Interval(0, 3, closed='neither'),
+          Interval(2, 5, closed='neither')], 'left'),
+        ([Interval(0, 3, closed='left'),
+          Interval(2, 5, closed='right')], 'neither'),
+        (IntervalIndex.from_breaks(range(5), closed='both'), 'right')])
+    def test_override_inferred_closed(self, constructor, data, closed):
+        # GH 19370
+        if isinstance(data, IntervalIndex):
+            tuples = data.to_tuples()
+        else:
+            tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
+        expected = IntervalIndex.from_tuples(tuples, closed=closed)
+        result = constructor(data, closed=closed)
+        tm.assert_index_equal(result, expected)
+
 
 class TestFromIntervals(TestClassConstructors):
     """

From d07e61b5b3542a9a86868b155b946f87b4ad31c9 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Thu, 28 Jun 2018 03:11:48 -0700
Subject: [PATCH 111/113] TST: Use fixtures in dtypes/test_cast.py (#21661)

---
 pandas/conftest.py               |  12 ++
 pandas/tests/dtypes/test_cast.py | 249 ++++++++++++++++---------------
 2 files changed, 138 insertions(+), 123 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 5b9c162a0a022..803b3add97052 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -245,6 +245,18 @@ def float_dtype(request):
     return request.param
 
 
+@pytest.fixture(params=["complex64", "complex128"])
+def complex_dtype(request):
+    """
+    Parameterized fixture for complex dtypes.
+
+    * complex64
+    * complex128
+    """
+
+    return request.param
+
+
 UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
 SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
 ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py
index 4a19682e2c558..0d6382424ccf5 100644
--- a/pandas/tests/dtypes/test_cast.py
+++ b/pandas/tests/dtypes/test_cast.py
@@ -36,7 +36,7 @@
 
 class TestMaybeDowncast(object):
 
-    def test_downcast_conv(self):
+    def test_downcast(self):
         # test downcasting
 
         arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
@@ -53,33 +53,34 @@ def test_downcast_conv(self):
         expected = np.array([8, 8, 8, 8, 9], dtype=np.int64)
         tm.assert_numpy_array_equal(result, expected)
 
-        # GH16875 coercing of bools
+        # see gh-16875: coercing of booleans.
         ser = Series([True, True, False])
         result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
         expected = ser
         tm.assert_series_equal(result, expected)
 
-        # conversions
-
+    @pytest.mark.parametrize("dtype", [np.float64, object, np.int64])
+    def test_downcast_conversion_no_nan(self, dtype):
         expected = np.array([1, 2])
-        for dtype in [np.float64, object, np.int64]:
-            arr = np.array([1.0, 2.0], dtype=dtype)
-            result = maybe_downcast_to_dtype(arr, 'infer')
-            tm.assert_almost_equal(result, expected, check_dtype=False)
-
-        for dtype in [np.float64, object]:
-            expected = np.array([1.0, 2.0, np.nan], dtype=dtype)
-            arr = np.array([1.0, 2.0, np.nan], dtype=dtype)
-            result = maybe_downcast_to_dtype(arr, 'infer')
-            tm.assert_almost_equal(result, expected)
-
-        # empties
-        for dtype in [np.int32, np.float64, np.float32, np.bool_,
-                      np.int64, object]:
-            arr = np.array([], dtype=dtype)
-            result = maybe_downcast_to_dtype(arr, 'int64')
-            tm.assert_almost_equal(result, np.array([], dtype=np.int64))
-            assert result.dtype == np.int64
+        arr = np.array([1.0, 2.0], dtype=dtype)
+
+        result = maybe_downcast_to_dtype(arr, "infer")
+        tm.assert_almost_equal(result, expected, check_dtype=False)
+
+    @pytest.mark.parametrize("dtype", [np.float64, object])
+    def test_downcast_conversion_nan(self, dtype):
+        expected = np.array([1.0, 2.0, np.nan], dtype=dtype)
+        arr = np.array([1.0, 2.0, np.nan], dtype=dtype)
+
+        result = maybe_downcast_to_dtype(arr, "infer")
+        tm.assert_almost_equal(result, expected)
+
+    @pytest.mark.parametrize("dtype", [np.int32, np.float64, np.float32,
+                                       np.bool_, np.int64, object])
+    def test_downcast_conversion_empty(self, dtype):
+        arr = np.array([], dtype=dtype)
+        result = maybe_downcast_to_dtype(arr, "int64")
+        tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
 
     def test_datetimelikes_nan(self):
         arr = np.array([1, 2, np.nan])
@@ -104,66 +105,71 @@ def test_datetime_with_timezone(self):
 
 class TestInferDtype(object):
 
-    def testinfer_dtype_from_scalar(self):
-        # Test that infer_dtype_from_scalar is returning correct dtype for int
-        # and float.
+    def test_infer_dtype_from_int_scalar(self, any_int_dtype):
+        # Test that infer_dtype_from_scalar is
+        # returning correct dtype for int and float.
+        data = np.dtype(any_int_dtype).type(12)
+        dtype, val = infer_dtype_from_scalar(data)
+        assert dtype == type(data)
+
+    def test_infer_dtype_from_float_scalar(self, float_dtype):
+        float_dtype = np.dtype(float_dtype).type
+        data = float_dtype(12)
 
-        for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32,
-                       np.int32, np.uint64, np.int64]:
-            data = dtypec(12)
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == type(data)
+        dtype, val = infer_dtype_from_scalar(data)
+        assert dtype == float_dtype
 
+    def test_infer_dtype_from_python_scalar(self):
         data = 12
         dtype, val = infer_dtype_from_scalar(data)
         assert dtype == np.int64
 
-        for dtypec in [np.float16, np.float32, np.float64]:
-            data = dtypec(12)
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == dtypec
-
         data = np.float(12)
         dtype, val = infer_dtype_from_scalar(data)
         assert dtype == np.float64
 
-        for data in [True, False]:
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == np.bool_
+    @pytest.mark.parametrize("bool_val", [True, False])
+    def test_infer_dtype_from_boolean(self, bool_val):
+        dtype, val = infer_dtype_from_scalar(bool_val)
+        assert dtype == np.bool_
 
-        for data in [np.complex64(1), np.complex128(1)]:
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == np.complex_
+    def test_infer_dtype_from_complex(self, complex_dtype):
+        data = np.dtype(complex_dtype).type(1)
+        dtype, val = infer_dtype_from_scalar(data)
+        assert dtype == np.complex_
 
-        for data in [np.datetime64(1, 'ns'), Timestamp(1),
-                     datetime(2000, 1, 1, 0, 0)]:
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == 'M8[ns]'
+    @pytest.mark.parametrize("data", [np.datetime64(1, "ns"), Timestamp(1),
+                                      datetime(2000, 1, 1, 0, 0)])
+    def test_infer_dtype_from_datetime(self, data):
+        dtype, val = infer_dtype_from_scalar(data)
+        assert dtype == "M8[ns]"
 
-        for data in [np.timedelta64(1, 'ns'), Timedelta(1),
-                     timedelta(1)]:
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == 'm8[ns]'
+    @pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1),
+                                      timedelta(1)])
+    def test_infer_dtype_from_timedelta(self, data):
+        dtype, val = infer_dtype_from_scalar(data)
+        assert dtype == "m8[ns]"
 
-        for freq in ['M', 'D']:
-            p = Period('2011-01-01', freq=freq)
-            dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True)
-            assert dtype == 'period[{0}]'.format(freq)
-            assert val == p.ordinal
+    @pytest.mark.parametrize("freq", ["M", "D"])
+    def test_infer_dtype_from_period(self, freq):
+        p = Period("2011-01-01", freq=freq)
+        dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True)
 
-            dtype, val = infer_dtype_from_scalar(p)
-            dtype == np.object_
-            assert val == p
+        assert dtype == "period[{0}]".format(freq)
+        assert val == p.ordinal
 
-        # misc
-        for data in [date(2000, 1, 1),
-                     Timestamp(1, tz='US/Eastern'), 'foo']:
+        dtype, val = infer_dtype_from_scalar(p)
+        assert dtype == np.object_
+        assert val == p
 
-            dtype, val = infer_dtype_from_scalar(data)
-            assert dtype == np.object_
+    @pytest.mark.parametrize("data", [date(2000, 1, 1), "foo",
+                                      Timestamp(1, tz="US/Eastern")])
+    def test_infer_dtype_misc(self, data):
+        dtype, val = infer_dtype_from_scalar(data)
+        assert dtype == np.object_
 
     @pytest.mark.parametrize('tz', ['UTC', 'US/Eastern', 'Asia/Tokyo'])
-    def testinfer_from_scalar_tz(self, tz):
+    def test_infer_from_scalar_tz(self, tz):
         dt = Timestamp(1, tz=tz)
         dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True)
         assert dtype == 'datetime64[ns, {0}]'.format(tz)
@@ -173,7 +179,7 @@ def testinfer_from_scalar_tz(self, tz):
         assert dtype == np.object_
         assert val == dt
 
-    def testinfer_dtype_from_scalar_errors(self):
+    def test_infer_dtype_from_scalar_errors(self):
         with pytest.raises(ValueError):
             infer_dtype_from_scalar(np.array([1]))
 
@@ -329,66 +335,63 @@ def test_maybe_convert_objects_copy(self):
 
 class TestCommonTypes(object):
 
-    def test_numpy_dtypes(self):
-        # (source_types, destination_type)
-        testcases = (
-            # identity
-            ((np.int64,), np.int64),
-            ((np.uint64,), np.uint64),
-            ((np.float32,), np.float32),
-            ((np.object,), np.object),
-
-            # into ints
-            ((np.int16, np.int64), np.int64),
-            ((np.int32, np.uint32), np.int64),
-            ((np.uint16, np.uint64), np.uint64),
-
-            # into floats
-            ((np.float16, np.float32), np.float32),
-            ((np.float16, np.int16), np.float32),
-            ((np.float32, np.int16), np.float32),
-            ((np.uint64, np.int64), np.float64),
-            ((np.int16, np.float64), np.float64),
-            ((np.float16, np.int64), np.float64),
-
-            # into others
-            ((np.complex128, np.int32), np.complex128),
-            ((np.object, np.float32), np.object),
-            ((np.object, np.int16), np.object),
-
-            # bool with int
-            ((np.dtype('bool'), np.int64), np.object),
-            ((np.dtype('bool'), np.int32), np.object),
-            ((np.dtype('bool'), np.int16), np.object),
-            ((np.dtype('bool'), np.int8), np.object),
-            ((np.dtype('bool'), np.uint64), np.object),
-            ((np.dtype('bool'), np.uint32), np.object),
-            ((np.dtype('bool'), np.uint16), np.object),
-            ((np.dtype('bool'), np.uint8), np.object),
-
-            # bool with float
-            ((np.dtype('bool'), np.float64), np.object),
-            ((np.dtype('bool'), np.float32), np.object),
-
-            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
-             np.dtype('datetime64[ns]')),
-            ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
-             np.dtype('timedelta64[ns]')),
-
-            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')),
-             np.dtype('datetime64[ns]')),
-            ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')),
-             np.dtype('timedelta64[ns]')),
-
-            ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')),
-             np.object),
-            ((np.dtype('datetime64[ns]'), np.int64), np.object)
-        )
-        for src, common in testcases:
-            assert find_common_type(src) == common
-
+    @pytest.mark.parametrize("source_dtypes,expected_common_dtype", [
+        ((np.int64,), np.int64),
+        ((np.uint64,), np.uint64),
+        ((np.float32,), np.float32),
+        ((np.object,), np.object),
+
+        # into ints
+        ((np.int16, np.int64), np.int64),
+        ((np.int32, np.uint32), np.int64),
+        ((np.uint16, np.uint64), np.uint64),
+
+        # into floats
+        ((np.float16, np.float32), np.float32),
+        ((np.float16, np.int16), np.float32),
+        ((np.float32, np.int16), np.float32),
+        ((np.uint64, np.int64), np.float64),
+        ((np.int16, np.float64), np.float64),
+        ((np.float16, np.int64), np.float64),
+
+        # into others
+        ((np.complex128, np.int32), np.complex128),
+        ((np.object, np.float32), np.object),
+        ((np.object, np.int16), np.object),
+
+        # bool with int
+        ((np.dtype('bool'), np.int64), np.object),
+        ((np.dtype('bool'), np.int32), np.object),
+        ((np.dtype('bool'), np.int16), np.object),
+        ((np.dtype('bool'), np.int8), np.object),
+        ((np.dtype('bool'), np.uint64), np.object),
+        ((np.dtype('bool'), np.uint32), np.object),
+        ((np.dtype('bool'), np.uint16), np.object),
+        ((np.dtype('bool'), np.uint8), np.object),
+
+        # bool with float
+        ((np.dtype('bool'), np.float64), np.object),
+        ((np.dtype('bool'), np.float32), np.object),
+
+        ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
+         np.dtype('datetime64[ns]')),
+        ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
+         np.dtype('timedelta64[ns]')),
+
+        ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')),
+         np.dtype('datetime64[ns]')),
+        ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')),
+         np.dtype('timedelta64[ns]')),
+
+        ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')),
+         np.object),
+        ((np.dtype('datetime64[ns]'), np.int64), np.object)
+    ])
+    def test_numpy_dtypes(self, source_dtypes, expected_common_dtype):
+        assert find_common_type(source_dtypes) == expected_common_dtype
+
+    def test_raises_empty_input(self):
         with pytest.raises(ValueError):
-            # empty
             find_common_type([])
 
     def test_categorical_dtype(self):

From 0829063acb06b326d645ce094b029b8a1f4dcb3f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 28 Jun 2018 03:22:59 -0700
Subject: [PATCH 112/113] TST: Clean old timezone issues PT2 (#21612)

---
 doc/source/whatsnew/v0.24.0.txt               | 24 +++++++++++--------
 pandas/conftest.py                            | 17 +++++++++++++
 pandas/tests/frame/test_indexing.py           | 10 ++++++++
 .../indexes/datetimes/test_arithmetic.py      |  4 ++++
 .../indexes/datetimes/test_date_range.py      |  9 +++++++
 pandas/tests/indexing/test_datetime.py        | 14 +++++++++++
 .../tests/scalar/timestamp/test_timestamp.py  |  8 +++++++
 pandas/tests/series/test_constructors.py      |  8 +++++++
 pandas/tests/series/test_replace.py           |  7 ++++++
 pandas/tests/series/test_timezones.py         |  8 +++++++
 10 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 406ca9ba045c9..1105acda067d3 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -164,12 +164,6 @@ Datetimelike
 ^^^^^^^^^^^^
 
 - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
-- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
-- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
-- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError``(:issue:`8910`)
-- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`)
-- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
-- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
 
 Timedelta
 ^^^^^^^^^
@@ -181,9 +175,15 @@ Timedelta
 Timezones
 ^^^^^^^^^
 
--
--
--
+- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
+- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError``(:issue:`8910`)
+- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`)
+- Bug in :meth:`Series.replace` with ``datetime64[ns, tz]`` data when replacing ``NaT`` (:issue:`11792`)
+- Bug in :class:`Timestamp` when passing different string date formats with a timezone offset would produce different timezone offsets (:issue:`12064`)
+- Bug when comparing a tz-naive :class:`Timestamp` to a tz-aware :class:`DatetimeIndex` which would coerce the :class:`DatetimeIndex` to tz-naive (:issue:`12601`)
+- Bug in :meth:`Series.truncate` with a tz-aware :class:`DatetimeIndex` which would cause a core dump (:issue:`9243`)
+- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`)
+- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
 
 Offsets
 ^^^^^^^
@@ -217,7 +217,10 @@ Indexing
 
 - The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`)
 - When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError`` - consistently with the case of a flat :class:`Int64Index` - rather than falling back to positional indexing (:issue:`21593`)
--
+- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
+- Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`)
+- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`)
+
 -
 
 MultiIndex
@@ -245,6 +248,7 @@ Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
+- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
 -
 -
 
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 803b3add97052..ae08e0817de29 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -320,3 +320,20 @@ def mock():
         return importlib.import_module("unittest.mock")
     else:
         return pytest.importorskip("mock")
+
+
+@pytest.fixture(params=['__eq__', '__ne__', '__le__',
+                        '__lt__', '__ge__', '__gt__'])
+def all_compare_operators(request):
+    """
+    Fixture for dunder names for common compare operations
+
+    * >=
+    * >
+    * ==
+    * !=
+    * <
+    * <=
+    """
+
+    return request.param
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index be37e696ea0a3..c7aaf900b17fa 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -2248,6 +2248,16 @@ def test_setitem_datetimelike_with_inference(self):
                           index=list('ABCDEFGH'))
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('idxer', ['var', ['var']])
+    def test_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
+        # GH 11365
+        tz = tz_naive_fixture
+        idx = date_range(start='2015-07-12', periods=3, freq='H', tz=tz)
+        expected = DataFrame(1.2, index=idx, columns=['var'])
+        result = DataFrame(index=idx, columns=['var'])
+        result.loc[:, idxer] = expected
+        tm.assert_frame_equal(result, expected)
+
     def test_at_time_between_time_datetimeindex(self):
         index = date_range("2012-01-01", "2012-01-05", freq='30min')
         df = DataFrame(randn(len(index), 5), index=index)
diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py
index 0649083a440df..ff31ffee13217 100644
--- a/pandas/tests/indexes/datetimes/test_arithmetic.py
+++ b/pandas/tests/indexes/datetimes/test_arithmetic.py
@@ -276,6 +276,10 @@ def test_comparison_tzawareness_compat(self, op):
         with pytest.raises(TypeError):
             op(dz, ts)
 
+        # GH 12601: Check comparison against Timestamps and DatetimeIndex
+        with pytest.raises(TypeError):
+            op(ts, dz)
+
     @pytest.mark.parametrize('op', [operator.eq, operator.ne,
                                     operator.gt, operator.ge,
                                     operator.lt, operator.le])
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index ec37bbbcb6c02..47d4d15420f1d 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -292,6 +292,15 @@ def test_construct_over_dst(self):
                             freq='H', tz='US/Pacific')
         tm.assert_index_equal(result, expected)
 
+    def test_construct_with_different_start_end_string_format(self):
+        # GH 12064
+        result = date_range('2013-01-01 00:00:00+09:00',
+                            '2013/01/01 02:00:00+09:00', freq='H')
+        expected = DatetimeIndex([Timestamp('2013-01-01 00:00:00+09:00'),
+                                  Timestamp('2013-01-01 01:00:00+09:00'),
+                                  Timestamp('2013-01-01 02:00:00+09:00')])
+        tm.assert_index_equal(result, expected)
+
 
 class TestGenRangeGeneration(object):
 
diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py
index a5c12e4152c90..751372380d262 100644
--- a/pandas/tests/indexing/test_datetime.py
+++ b/pandas/tests/indexing/test_datetime.py
@@ -252,3 +252,17 @@ def test_series_partial_set_period(self):
                                         check_stacklevel=False):
             result = ser.loc[keys]
         tm.assert_series_equal(result, exp)
+
+    def test_nanosecond_getitem_setitem_with_tz(self):
+        # GH 11679
+        data = ['2016-06-28 08:30:00.123456789']
+        index = pd.DatetimeIndex(data, dtype='datetime64[ns, America/Chicago]')
+        df = DataFrame({'a': [10]}, index=index)
+        result = df.loc[df.index[0]]
+        expected = Series(10, index=['a'], name=df.index[0])
+        tm.assert_series_equal(result, expected)
+
+        result = df.copy()
+        result.loc[df.index[0], 'a'] = -1
+        expected = DataFrame(-1, index=index, columns=['a'])
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index 8dc9903b7356d..5272059163a07 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -542,6 +542,14 @@ def test_construct_timestamp_near_dst(self, offset):
         result = Timestamp(expected, tz='Europe/Helsinki')
         assert result == expected
 
+    @pytest.mark.parametrize('arg', [
+        '2013/01/01 00:00:00+09:00', '2013-01-01 00:00:00+09:00'])
+    def test_construct_with_different_string_format(self, arg):
+        # GH 12064
+        result = Timestamp(arg)
+        expected = Timestamp(datetime(2013, 1, 1), tz=pytz.FixedOffset(540))
+        assert result == expected
+
 
 class TestTimestamp(object):
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 27cfec0dbf20d..fe224436c52e6 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -1185,3 +1185,11 @@ def test_constructor_range_dtype(self, dtype):
         expected = Series([0, 1, 2, 3, 4], dtype=dtype or 'int64')
         result = Series(range(5), dtype=dtype)
         tm.assert_series_equal(result, expected)
+
+    def test_constructor_tz_mixed_data(self):
+        # GH 13051
+        dt_list = [Timestamp('2016-05-01 02:03:37'),
+                   Timestamp('2016-04-30 19:03:37-0700', tz='US/Pacific')]
+        result = Series(dt_list)
+        expected = Series(dt_list, dtype=object)
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py
index 2c07d87865f53..a3b92798879f5 100644
--- a/pandas/tests/series/test_replace.py
+++ b/pandas/tests/series/test_replace.py
@@ -108,6 +108,13 @@ def test_replace_gh5319(self):
                              pd.Timestamp('20120101'))
         tm.assert_series_equal(result, expected)
 
+        # GH 11792: Test with replacing NaT in a list with tz data
+        ts = pd.Timestamp('2015/01/01', tz='UTC')
+        s = pd.Series([pd.NaT, pd.Timestamp('2015/01/01', tz='UTC')])
+        result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
+        expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
+        tm.assert_series_equal(expected, result)
+
     def test_replace_with_single_list(self):
         ser = pd.Series([0, 1, 2, 3, 4])
         result = ser.replace([1, 2, 3])
diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py
index b54645d04bd1a..f2433163352ac 100644
--- a/pandas/tests/series/test_timezones.py
+++ b/pandas/tests/series/test_timezones.py
@@ -300,3 +300,11 @@ def test_getitem_pydatetime_tz(self, tzstr):
         dt = datetime(2012, 12, 24, 17, 0)
         time_datetime = tslib._localize_pydatetime(dt, tz)
         assert ts[time_pandas] == ts[time_datetime]
+
+    def test_series_truncate_datetimeindex_tz(self):
+        # GH 9243
+        idx = date_range('4/1/2005', '4/30/2005', freq='D', tz='US/Pacific')
+        s = Series(range(len(idx)), index=idx)
+        result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
+        expected = Series([1, 2, 3], index=idx[1:4])
+        tm.assert_series_equal(result, expected)

From da3b9031cd1ad48ae026710a7109403962eaba59 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Thu, 28 Jun 2018 20:55:45 +0100
Subject: [PATCH 113/113] Whatsnew Timestamp bug

---
 doc/source/whatsnew/v0.23.2.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 9c4b408a1d24b..8c36d51a5fd16 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -54,7 +54,7 @@ Fixed Regressions
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
--
+- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
 
 .. _whatsnew_0232.performance: