From e41387dd7c64e0a2032f8aad12a3c00ae50e3165 Mon Sep 17 00:00:00 2001
From: yanglinlee <yanglinlee@gmail.com>
Date: Fri, 24 May 2019 11:45:59 -0400
Subject: [PATCH 01/34] BUG: None comparison evaluates to True #26504

---
 doc/source/whatsnew/v0.25.0.rst                   |  2 +-
 pandas/core/arrays/categorical.py                 | 10 +++++++++-
 pandas/tests/arrays/categorical/test_operators.py | 13 +++++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 91b70334dc9bc..df7f952d4180a 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -305,7 +305,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
--
+- Bug in :func:`_cat_compare_op` that would valuate comparison with None to True (:issue:`26504`)
 -
 
 Datetimelike
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d25ccd1b158be..eeacb88f33f55 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -100,7 +100,15 @@ def f(self, other):
         if is_scalar(other):
             if other in self.categories:
                 i = self.categories.get_loc(other)
-                return getattr(self._codes, op)(i)
+                f = getattr(self._codes, op)
+                ret = f(i)
+
+                # check for NaN in self
+                na_mask = (self._codes == -1)
+                if na_mask.any():
+                    # In other series, the leads to False, so do that here too
+                    ret[na_mask] = False
+                return ret
             else:
                 if op == '__eq__':
                     return np.repeat(False, len(self))
diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index dc6e1a5bc36b3..b323cb2b6a7c3 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -186,6 +186,19 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
+    def test_comparison_with_known_scalars(self):
+        # https://github.com/pandas-dev/pandas/issues/26504
+        # and following comparisons with scalars in categories with None should
+        # be evaluated as False
+
+        cat1 = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
+        cat2 = Categorical([None, 1, 2, 3], categories=[1, 2, 3], ordered=True)
+
+        tm.assert_numpy_array_equal(cat1 <= 2,
+                                    np.array([True, True, False, False]))
+        tm.assert_numpy_array_equal(cat2 <= 2,
+                                    np.array([False, True, True, False]))
+
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
         ([1, 2, 3], [3, 2, 1], [2, 2, 2])]

From 9af03ce19c8e2184afc62340138881fb7c804dfd Mon Sep 17 00:00:00 2001
From: ArtinSarraf <sarraf.artin@gmail.com>
Date: Tue, 21 May 2019 16:41:44 -0400
Subject: [PATCH 02/34] ENH - Index set operation modifications to address
 issue #23525 (#23538)

---
 doc/source/whatsnew/v0.25.0.rst               |  27 +++++
 pandas/core/indexes/base.py                   | 104 +++++++++++++++---
 pandas/core/indexes/datetimes.py              |  34 +-----
 pandas/core/indexes/interval.py               |  26 ++---
 pandas/core/indexes/numeric.py                |   8 ++
 pandas/core/indexes/period.py                 |  12 +-
 pandas/core/indexes/range.py                  |  10 +-
 pandas/core/indexes/timedeltas.py             |  21 +---
 pandas/tests/indexes/common.py                |  24 +---
 pandas/tests/indexes/conftest.py              |  36 +++---
 .../tests/indexes/datetimes/test_datetime.py  |   6 +-
 pandas/tests/indexes/datetimes/test_setops.py |  22 +++-
 .../tests/indexes/datetimes/test_timezones.py |   5 +-
 .../tests/indexes/interval/test_interval.py   |  13 ++-
 pandas/tests/indexes/period/test_setops.py    |   4 -
 pandas/tests/indexes/test_base.py             |   2 +
 pandas/tests/indexes/test_setops.py           |  76 +++++++++++++
 pandas/tests/reshape/test_concat.py           |  23 ++--
 pandas/tests/series/test_combine_concat.py    |   1 +
 pandas/tests/series/test_missing.py           |  14 ++-
 pandas/tests/series/test_operators.py         |  36 +++++-
 21 files changed, 343 insertions(+), 161 deletions(-)
 create mode 100644 pandas/tests/indexes/test_setops.py

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index df7f952d4180a..0c69d1a4ce013 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -154,6 +154,33 @@ returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwi
 Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
 cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 
+.. _whatsnew_0250.api_breaking.incompatible_index_unions
+
+Incompatible Index Type Unions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When performing :func:`Index.union` operations between objects of incompatible dtypes,
+the result will be a base :class:`Index` of dtype ``object``. This behavior holds true for
+unions between :class:`Index` objects that previously would have been prohibited. The dtype
+of empty :class:`Index` objects will now be evaluated before performing union operations
+rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be
+considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`).
+
+*Previous Behavior*:
+
+    In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
+    ...
+    ValueError: can only call with other PeriodIndex-ed objects
+
+    In [2]: pd.Index([], dtype=object).union(pd.Index([1, 2, 3]))
+    Out[2]: Int64Index([1, 2, 3], dtype='int64')
+
+*New Behavior*:
+
+.. ipython:: python
+
+    pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
+    pd.Index([], dtype=object).union(pd.Index([1, 2, 3]))
 
 ``DataFrame`` groupby ffill/bfill no longer return group labels
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index dd3717813ce3f..eff7ff2c9f347 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -20,11 +20,10 @@
     ensure_categorical, ensure_int64, ensure_object, ensure_platform_int,
     is_bool, is_bool_dtype, is_categorical, is_categorical_dtype,
     is_datetime64_any_dtype, is_datetime64tz_dtype, is_dtype_equal,
-    is_dtype_union_equal, is_extension_array_dtype, is_float, is_float_dtype,
-    is_hashable, is_integer, is_integer_dtype, is_interval_dtype, is_iterator,
-    is_list_like, is_object_dtype, is_period_dtype, is_scalar,
-    is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
-    pandas_dtype)
+    is_extension_array_dtype, is_float, is_float_dtype, is_hashable,
+    is_integer, is_integer_dtype, is_interval_dtype, is_iterator, is_list_like,
+    is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype,
+    is_timedelta64_dtype, is_unsigned_integer_dtype, pandas_dtype)
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import (
     ABCDataFrame, ABCDateOffset, ABCDatetimeArray, ABCIndexClass,
@@ -2262,6 +2261,47 @@ def _get_reconciled_name_object(self, other):
             return self._shallow_copy(name=name)
         return self
 
+    def _union_incompatible_dtypes(self, other, sort):
+        """
+        Casts this and other index to object dtype to allow the formation
+        of a union between incompatible types.
+
+        Parameters
+        ----------
+        other : Index or array-like
+        sort : False or None, default False
+            Whether to sort the resulting index.
+
+            * False : do not sort the result.
+            * None : sort the result, except when `self` and `other` are equal
+              or when the values cannot be compared.
+
+        Returns
+        -------
+        Index
+        """
+        this = self.astype(object, copy=False)
+        # cast to Index for when `other` is list-like
+        other = Index(other).astype(object, copy=False)
+        return Index.union(this, other, sort=sort).astype(object, copy=False)
+
+    def _is_compatible_with_other(self, other):
+        """
+        Check whether this and the other dtype are compatible with each other.
+        Meaning a union can be formed between them without needing to be cast
+        to dtype object.
+
+        Parameters
+        ----------
+        other : Index or array-like
+
+        Returns
+        -------
+        bool
+        """
+        return (type(self) is type(other)
+                and is_dtype_equal(self.dtype, other.dtype))
+
     def _validate_sort_keyword(self, sort):
         if sort not in [None, False]:
             raise ValueError("The 'sort' keyword only takes the values of "
@@ -2271,6 +2311,11 @@ def union(self, other, sort=None):
         """
         Form the union of two Index objects.
 
+        If the Index objects are incompatible, both Index objects will be
+        cast to dtype('object') first.
+
+            .. versionchanged:: 0.25.0
+
         Parameters
         ----------
         other : Index or array-like
@@ -2300,30 +2345,54 @@ def union(self, other, sort=None):
         Examples
         --------
 
+        Union matching dtypes
+
         >>> idx1 = pd.Index([1, 2, 3, 4])
         >>> idx2 = pd.Index([3, 4, 5, 6])
         >>> idx1.union(idx2)
         Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
+
+        Union mismatched dtypes
+
+        >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
+        >>> idx2 = pd.Index([1, 2, 3, 4])
+        >>> idx1.union(idx2)
+        Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
         """
         self._validate_sort_keyword(sort)
         self._assert_can_do_setop(other)
-        other = ensure_index(other)
 
-        if len(other) == 0 or self.equals(other):
+        if not self._is_compatible_with_other(other):
+            return self._union_incompatible_dtypes(other, sort=sort)
+
+        return self._union(other, sort=sort)
+
+    def _union(self, other, sort):
+        """
+        Specific union logic should go here. In subclasses, union behavior
+        should be overwritten here rather than in `self.union`.
+
+        Parameters
+        ----------
+        other : Index or array-like
+        sort : False or None, default False
+            Whether to sort the resulting index.
+
+            * False : do not sort the result.
+            * None : sort the result, except when `self` and `other` are equal
+              or when the values cannot be compared.
+
+        Returns
+        -------
+        Index
+        """
+
+        if not len(other) or self.equals(other):
             return self._get_reconciled_name_object(other)
 
-        if len(self) == 0:
+        if not len(self):
             return other._get_reconciled_name_object(self)
 
-        # TODO: is_dtype_union_equal is a hack around
-        # 1. buggy set ops with duplicates (GH #13432)
-        # 2. CategoricalIndex lacking setops (GH #10186)
-        # Once those are fixed, this workaround can be removed
-        if not is_dtype_union_equal(self.dtype, other.dtype):
-            this = self.astype('O')
-            other = other.astype('O')
-            return this.union(other, sort=sort)
-
         # TODO(EA): setops-refactor, clean all this up
         if is_period_dtype(self) or is_datetime64tz_dtype(self):
             lvals = self._ndarray_values
@@ -2370,6 +2439,7 @@ def union(self, other, sort=None):
     def _wrap_setop_result(self, other, result):
         return self._constructor(result, name=get_op_result_name(self, other))
 
+    # TODO: standardize return type of non-union setops type(self vs other)
     def intersection(self, other, sort=False):
         """
         Form the intersection of two Index objects.
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 9c735a5598f4a..7fd537fb9989a 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -451,35 +451,9 @@ def _formatter_func(self):
     # --------------------------------------------------------------------
     # Set Operation Methods
 
-    def union(self, other, sort=None):
-        """
-        Specialized union for DatetimeIndex objects. If combine
-        overlapping ranges with the same DateOffset, will be much
-        faster than Index.union
-
-        Parameters
-        ----------
-        other : DatetimeIndex or array-like
-        sort : bool or None, default None
-            Whether to sort the resulting Index.
-
-            * None : Sort the result, except when
-
-              1. `self` and `other` are equal.
-              2. `self` or `other` has length 0.
-              3. Some values in `self` or `other` cannot be compared.
-                 A RuntimeWarning is issued in this case.
-
-            * False : do not sort the result
-
-            .. versionadded:: 0.25.0
-
-        Returns
-        -------
-        y : Index or DatetimeIndex
-        """
-        self._validate_sort_keyword(sort)
-        self._assert_can_do_setop(other)
+    def _union(self, other, sort):
+        if not len(other) or self.equals(other) or not len(self):
+            return super()._union(other, sort=sort)
 
         if len(other) == 0 or self.equals(other) or len(self) == 0:
             return super().union(other, sort=sort)
@@ -495,7 +469,7 @@ def union(self, other, sort=None):
         if this._can_fast_union(other):
             return this._fast_union(other, sort=sort)
         else:
-            result = Index.union(this, other, sort=sort)
+            result = Index._union(this, other, sort=sort)
             if isinstance(result, DatetimeIndex):
                 # TODO: we shouldn't be setting attributes like this;
                 #  in all the tests this equality already holds
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index a3dbf2e03957b..87216dcc7b957 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -964,19 +964,6 @@ def insert(self, loc, item):
         new_right = self.right.insert(loc, right_insert)
         return self._shallow_copy(new_left, new_right)
 
-    def _as_like_interval_index(self, other):
-        self._assert_can_do_setop(other)
-        other = ensure_index(other)
-        if not isinstance(other, IntervalIndex):
-            msg = ('the other index needs to be an IntervalIndex too, but '
-                   'was type {}').format(other.__class__.__name__)
-            raise TypeError(msg)
-        elif self.closed != other.closed:
-            msg = ('can only do set operations between two IntervalIndex '
-                   'objects that are closed on the same side')
-            raise ValueError(msg)
-        return other
-
     def _concat_same_dtype(self, to_concat, name):
         """
         assert that we all have the same .closed
@@ -1092,7 +1079,17 @@ def overlaps(self, other):
 
     def _setop(op_name, sort=None):
         def func(self, other, sort=sort):
-            other = self._as_like_interval_index(other)
+            self._assert_can_do_setop(other)
+            other = ensure_index(other)
+            if not isinstance(other, IntervalIndex):
+                result = getattr(self.astype(object), op_name)(other)
+                if op_name in ('difference',):
+                    result = result.astype(self.dtype)
+                return result
+            elif self.closed != other.closed:
+                msg = ('can only do set operations between two IntervalIndex '
+                       'objects that are closed on the same side')
+                raise ValueError(msg)
 
             # GH 19016: ensure set op will not return a prohibited dtype
             subtypes = [self.dtype.subtype, other.dtype.subtype]
@@ -1114,6 +1111,7 @@ def func(self, other, sort=sort):
 
             return type(self).from_tuples(result, closed=self.closed,
                                           name=result_name)
+
         return func
 
     @property
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index a11f34cbdcceb..b6c8ba588f9d6 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -9,6 +9,7 @@
     is_bool, is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_float,
     is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype)
 import pandas.core.dtypes.concat as _concat
+from pandas.core.dtypes.generic import ABCInt64Index, ABCRangeIndex
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import algorithms
@@ -221,6 +222,13 @@ def _assert_safe_casting(cls, data, subarr):
                 raise TypeError('Unsafe NumPy casting, you must '
                                 'explicitly cast')
 
+    def _is_compatible_with_other(self, other):
+        return (
+            super()._is_compatible_with_other(other)
+            or all(isinstance(type(obj), (ABCInt64Index, ABCRangeIndex))
+                   for obj in [self, other])
+        )
+
 
 Int64Index._add_numeric_methods()
 Int64Index._add_logical_methods()
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index ed08de54ad6f2..044951ceda502 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -791,6 +791,11 @@ def join(self, other, how='left', level=None, return_indexers=False,
         """
         self._assert_can_do_setop(other)
 
+        if not isinstance(other, PeriodIndex):
+            return self.astype(object).join(other, how=how, level=level,
+                                            return_indexers=return_indexers,
+                                            sort=sort)
+
         result = Int64Index.join(self, other, how=how, level=level,
                                  return_indexers=return_indexers,
                                  sort=sort)
@@ -807,10 +812,9 @@ def intersection(self, other, sort=False):
     def _assert_can_do_setop(self, other):
         super()._assert_can_do_setop(other)
 
-        if not isinstance(other, PeriodIndex):
-            raise ValueError('can only call with other PeriodIndex-ed objects')
-
-        if self.freq != other.freq:
+        # *Can't* use PeriodIndexes of different freqs
+        # *Can* use PeriodIndex/DatetimeIndex
+        if isinstance(other, PeriodIndex) and self.freq != other.freq:
             msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
                                         own_freq=self.freqstr,
                                         other_freq=other.freqstr)
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 160e6284d3c59..ea14a4c789cd3 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -470,7 +470,7 @@ def _extended_gcd(self, a, b):
             old_t, t = t, old_t - quotient * t
         return old_r, old_s, old_t
 
-    def union(self, other, sort=None):
+    def _union(self, other, sort):
         """
         Form the union of two Index objects and sorts if possible
 
@@ -490,9 +490,8 @@ def union(self, other, sort=None):
         -------
         union : Index
         """
-        self._assert_can_do_setop(other)
-        if len(other) == 0 or self.equals(other) or len(self) == 0:
-            return super().union(other, sort=sort)
+        if not len(other) or self.equals(other) or not len(self):
+            return super()._union(other, sort=sort)
 
         if isinstance(other, RangeIndex) and sort is None:
             start_s, step_s = self._start, self._step
@@ -530,8 +529,7 @@ def union(self, other, sort=None):
                         (start_s + step_o >= start_o) and
                         (end_s - step_o <= end_o)):
                     return RangeIndex(start_r, end_r + step_o, step_o)
-
-        return self._int64index.union(other, sort=sort)
+        return self._int64index._union(other, sort=sort)
 
     @Appender(_index_shared_docs['join'])
     def join(self, other, how='left', level=None, return_indexers=False,
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 5e62c2ef881e9..6ae17e62b49c6 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -329,24 +329,9 @@ def astype(self, dtype, copy=True):
             return Index(result.astype('i8'), name=self.name)
         return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy)
 
-    def union(self, other):
-        """
-        Specialized union for TimedeltaIndex objects. If combine
-        overlapping ranges with the same DateOffset, will be much
-        faster than Index.union
-
-        Parameters
-        ----------
-        other : TimedeltaIndex or array-like
-
-        Returns
-        -------
-        y : Index or TimedeltaIndex
-        """
-        self._assert_can_do_setop(other)
-
+    def _union(self, other, sort):
         if len(other) == 0 or self.equals(other) or len(self) == 0:
-            return super().union(other)
+            return super()._union(other, sort=sort)
 
         if not isinstance(other, TimedeltaIndex):
             try:
@@ -358,7 +343,7 @@ def union(self, other):
         if this._can_fast_union(other):
             return this._fast_union(other)
         else:
-            result = Index.union(this, other)
+            result = Index._union(this, other, sort=sort)
             if isinstance(result, TimedeltaIndex):
                 if result.freq is None:
                     result.freq = to_offset(result.inferred_freq)
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 71d1e686f5c02..674f600bc8693 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -447,11 +447,7 @@ def test_intersection_base(self):
             cases = [klass(second.values)
                      for klass in [np.array, Series, list]]
             for case in cases:
-                if isinstance(idx, PeriodIndex):
-                    msg = "can only call with other PeriodIndex-ed objects"
-                    with pytest.raises(ValueError, match=msg):
-                        first.intersection(case)
-                elif isinstance(idx, CategoricalIndex):
+                if isinstance(idx, CategoricalIndex):
                     pass
                 else:
                     result = first.intersection(case)
@@ -474,11 +470,7 @@ def test_union_base(self):
             cases = [klass(second.values)
                      for klass in [np.array, Series, list]]
             for case in cases:
-                if isinstance(idx, PeriodIndex):
-                    msg = "can only call with other PeriodIndex-ed objects"
-                    with pytest.raises(ValueError, match=msg):
-                        first.union(case)
-                elif isinstance(idx, CategoricalIndex):
+                if isinstance(idx, CategoricalIndex):
                     pass
                 else:
                     result = first.union(case)
@@ -506,11 +498,7 @@ def test_difference_base(self, sort):
             cases = [klass(second.values)
                      for klass in [np.array, Series, list]]
             for case in cases:
-                if isinstance(idx, PeriodIndex):
-                    msg = "can only call with other PeriodIndex-ed objects"
-                    with pytest.raises(ValueError, match=msg):
-                        first.difference(case, sort)
-                elif isinstance(idx, CategoricalIndex):
+                if isinstance(idx, CategoricalIndex):
                     pass
                 elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
                     assert result.__class__ == answer.__class__
@@ -540,11 +528,7 @@ def test_symmetric_difference(self):
             cases = [klass(second.values)
                      for klass in [np.array, Series, list]]
             for case in cases:
-                if isinstance(idx, PeriodIndex):
-                    msg = "can only call with other PeriodIndex-ed objects"
-                    with pytest.raises(ValueError, match=msg):
-                        first.symmetric_difference(case)
-                elif isinstance(idx, CategoricalIndex):
+                if isinstance(idx, CategoricalIndex):
                     pass
                 else:
                     result = first.symmetric_difference(case)
diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py
index 632d5b2875a5a..83f1f22b158b1 100644
--- a/pandas/tests/indexes/conftest.py
+++ b/pandas/tests/indexes/conftest.py
@@ -5,23 +5,25 @@
 from pandas.core.indexes.api import Index, MultiIndex
 import pandas.util.testing as tm
 
-
-@pytest.fixture(params=[tm.makeUnicodeIndex(100),
-                        tm.makeStringIndex(100),
-                        tm.makeDateIndex(100),
-                        tm.makePeriodIndex(100),
-                        tm.makeTimedeltaIndex(100),
-                        tm.makeIntIndex(100),
-                        tm.makeUIntIndex(100),
-                        tm.makeRangeIndex(100),
-                        tm.makeFloatIndex(100),
-                        Index([True, False]),
-                        tm.makeCategoricalIndex(100),
-                        Index([]),
-                        MultiIndex.from_tuples(zip(
-                            ['foo', 'bar', 'baz'], [1, 2, 3])),
-                        Index([0, 0, 1, 1, 2, 2])],
-                ids=lambda x: type(x).__name__)
+indices_list = [tm.makeUnicodeIndex(100),
+                tm.makeStringIndex(100),
+                tm.makeDateIndex(100),
+                tm.makePeriodIndex(100),
+                tm.makeTimedeltaIndex(100),
+                tm.makeIntIndex(100),
+                tm.makeUIntIndex(100),
+                tm.makeRangeIndex(100),
+                tm.makeFloatIndex(100),
+                Index([True, False]),
+                tm.makeCategoricalIndex(100),
+                tm.makeIntervalIndex(100),
+                Index([]),
+                MultiIndex.from_tuples(zip(
+                    ['foo', 'bar', 'baz'], [1, 2, 3])),
+                Index([0, 0, 1, 1, 2, 2])]
+
+
+@pytest.fixture(params=indices_list, ids=lambda x: type(x).__name__)
 def indices(request):
     return request.param
 
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index efa6d006bad6f..01649cb4646de 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -298,9 +298,9 @@ def test_join_with_period_index(self, join_type):
             c_idx_type='p', r_idx_type='dt')
         s = df.iloc[:5, 0]
 
-        msg = 'can only call with other PeriodIndex-ed objects'
-        with pytest.raises(ValueError, match=msg):
-            df.columns.join(s.index, how=join_type)
+        expected = df.columns.astype('O').join(s.index, how=join_type)
+        result = df.columns.join(s.index, how=join_type)
+        tm.assert_index_equal(expected, result)
 
     def test_factorize(self):
         idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02',
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index 45a3a64216cab..fd666f3d56c9d 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -29,11 +29,20 @@ def test_union2(self, sort):
         union = first.union(second, sort=sort)
         tm.assert_index_equal(union, everything)
 
+    @pytest.mark.parametrize("box", [np.array, Series, list])
+    @pytest.mark.parametrize("sort", [None, False])
+    def test_union3(self, sort, box):
+        everything = tm.makeDateIndex(10)
+        first = everything[:5]
+        second = everything[5:]
+
         # GH 10149
-        cases = [klass(second.values) for klass in [np.array, Series, list]]
-        for case in cases:
-            result = first.union(case, sort=sort)
-            tm.assert_index_equal(result, everything)
+        expected = first.astype('O').union(
+            pd.Index(second.values, dtype='O')
+        ).astype('O')
+        case = box(second.values)
+        result = first.union(case, sort=sort)
+        tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize("tz", tz)
     @pytest.mark.parametrize("sort", [None, False])
@@ -303,11 +312,12 @@ def test_datetimeindex_union_join_empty(self, sort):
         empty = Index([])
 
         result = dti.union(empty, sort=sort)
-        assert isinstance(result, DatetimeIndex)
-        assert result is result
+        expected = dti.astype('O')
+        tm.assert_index_equal(result, expected)
 
         result = dti.join(empty)
         assert isinstance(result, DatetimeIndex)
+        tm.assert_index_equal(result, dti)
 
     def test_join_nonunique(self):
         idx1 = to_datetime(['2012-11-06 16:00:11.477563',
diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py
index 3f876565119cb..368dc68e516df 100644
--- a/pandas/tests/indexes/datetimes/test_timezones.py
+++ b/pandas/tests/indexes/datetimes/test_timezones.py
@@ -1077,7 +1077,10 @@ def test_dti_union_aware(self):
                           tz="US/Eastern")
 
         result = rng.union(rng2)
-        assert result.tz.zone == 'UTC'
+        expected = rng.astype('O').union(rng2.astype('O'))
+        tm.assert_index_equal(result, expected)
+        assert result[0].tz.zone == 'US/Central'
+        assert result[-1].tz.zone == 'US/Eastern'
 
     @pytest.mark.parametrize('tz', [None, 'UTC', "US/Central",
                                     dateutil.tz.tzoffset(None, -28800)])
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index 61465d8454383..f4f63aaecd336 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -901,15 +901,18 @@ def test_symmetric_difference(self, closed, sort):
     @pytest.mark.parametrize('op_name', [
         'union', 'intersection', 'difference', 'symmetric_difference'])
     @pytest.mark.parametrize("sort", [None, False])
-    def test_set_operation_errors(self, closed, op_name, sort):
+    def test_set_incompatible_types(self, closed, op_name, sort):
         index = self.create_index(closed=closed)
         set_op = getattr(index, op_name)
 
+        # TODO: standardize return type of non-union setops type(self vs other)
         # non-IntervalIndex
-        msg = ('the other index needs to be an IntervalIndex too, but '
-               'was type Int64Index')
-        with pytest.raises(TypeError, match=msg):
-            set_op(Index([1, 2, 3]), sort=sort)
+        if op_name == 'difference':
+            expected = index
+        else:
+            expected = getattr(index.astype('O'), op_name)(Index([1, 2, 3]))
+        result = set_op(Index([1, 2, 3]), sort=sort)
+        tm.assert_index_equal(result, expected)
 
         # mixed closed
         msg = ('can only do set operations between two IntervalIndex objects '
diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py
index 29d07a0985574..a9102aeec060c 100644
--- a/pandas/tests/indexes/period/test_setops.py
+++ b/pandas/tests/indexes/period/test_setops.py
@@ -127,10 +127,6 @@ def test_union_misc(self, sort):
         with pytest.raises(period.IncompatibleFrequency):
             index.union(index2, sort=sort)
 
-        msg = 'can only call with other PeriodIndex-ed objects'
-        with pytest.raises(ValueError, match=msg):
-            index.join(index.to_timestamp())
-
         index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
         with pytest.raises(period.IncompatibleFrequency):
             index.join(index3)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 7b507a9de6b5d..7e70d77ea70fc 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -889,6 +889,8 @@ def test_union_identity(self, sort):
         # i.e. identity is not preserved when sort is True
         assert (union is first) is (not sort)
 
+        # This should no longer be the same object, since [] is not consistent,
+        # both objects will be recast to dtype('O')
         union = first.union([], sort=sort)
         assert (union is first) is (not sort)
 
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
new file mode 100644
index 0000000000000..b626ced2ccb1b
--- /dev/null
+++ b/pandas/tests/indexes/test_setops.py
@@ -0,0 +1,76 @@
+'''
+The tests in this package are to ensure the proper resultant dtypes of
+set operations.
+'''
+import itertools as it
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.common import is_dtype_equal
+
+import pandas as pd
+from pandas import Int64Index, RangeIndex
+from pandas.tests.indexes.conftest import indices_list
+import pandas.util.testing as tm
+
+COMPATIBLE_INCONSISTENT_PAIRS = {
+    (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex)
+}
+
+
+@pytest.fixture(params=list(it.combinations(indices_list, 2)),
+                ids=lambda x: type(x[0]).__name__ + type(x[1]).__name__)
+def index_pair(request):
+    """
+    Create all combinations of 2 index types.
+    """
+    return request.param
+
+
+def test_union_same_types(indices):
+    # Union with a non-unique, non-monotonic index raises error
+    # Only needed for bool index factory
+    idx1 = indices.sort_values()
+    idx2 = indices.sort_values()
+    assert idx1.union(idx2).dtype == idx1.dtype
+
+
+def test_union_different_types(index_pair):
+    # GH 23525
+    idx1, idx2 = index_pair
+    type_pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x)))
+    if type_pair in COMPATIBLE_INCONSISTENT_PAIRS:
+        pytest.xfail('This test only considers non compatible indexes.')
+
+    if any(isinstance(idx, pd.MultiIndex) for idx in index_pair):
+        pytest.xfail('This test doesn\'t consider multiindixes.')
+
+    if is_dtype_equal(idx1.dtype, idx2.dtype):
+        pytest.xfail('This test only considers non matching dtypes.')
+
+    # A union with a CategoricalIndex (even as dtype('O')) and a
+    # non-CategoricalIndex can only be made if both indices are monotonic.
+    # This is true before this PR as well.
+
+    # Union with a non-unique, non-monotonic index raises error
+    # This applies to the boolean index
+    idx1 = idx1.sort_values()
+    idx2 = idx2.sort_values()
+
+    assert idx1.union(idx2).dtype == np.dtype('O')
+    assert idx2.union(idx1).dtype == np.dtype('O')
+
+
+@pytest.mark.parametrize('idx_fact1,idx_fact2',
+                         COMPATIBLE_INCONSISTENT_PAIRS.values())
+def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
+    # GH 23525
+    idx1 = idx_fact1(10)
+    idx2 = idx_fact2(20)
+
+    res1 = idx1.union(idx2)
+    res2 = idx2.union(idx1)
+
+    assert res1.dtype in (idx1.dtype, idx2.dtype)
+    assert res2.dtype in (idx1.dtype, idx2.dtype)
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 3d9f3da75306a..ecd62380d8c65 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -960,22 +960,23 @@ def test_append_different_columns_types_raises(
         df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
         ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other,
                         name=2)
-        msg = ("the other index needs to be an IntervalIndex too, but was"
+        msg = (r"unorderable types: (Interval|int)\(\) (<|>) "
+               r"(int|long|float|str|Timestamp)\(\)|"
+               r"Expected tuple, got (int|long|float|str)|"
+               r"Cannot compare type 'Timestamp' with type '(int|long)'|"
+               r"'(<|>)' not supported between instances of 'int' "
+               r"and '(str|Timestamp)'|"
+               r"the other index needs to be an IntervalIndex too, but was"
                r" type {}|"
                r"object of type '(int|float|Timestamp)' has no len\(\)|"
                "Expected tuple, got str")
-        with pytest.raises(TypeError, match=msg.format(
-                index_can_append.__class__.__name__)):
+        with pytest.raises(TypeError, match=msg):
             df.append(ser)
 
         df = pd.DataFrame([[1, 2, 3], [4, 5, 6]],
                           columns=index_cannot_append_with_other)
         ser = pd.Series([7, 8, 9], index=index_can_append, name=2)
-        msg = (r"unorderable types: (Interval|int)\(\) > "
-               r"(int|float|str)\(\)|"
-               r"Expected tuple, got (int|float|str)|"
-               r"Cannot compare type 'Timestamp' with type 'int'|"
-               r"'>' not supported between instances of 'int' and 'str'")
+
         with pytest.raises(TypeError, match=msg):
             df.append(ser)
 
@@ -2029,7 +2030,8 @@ def test_concat_empty_series(self):
         s1 = pd.Series([1, 2, 3], name='x')
         s2 = pd.Series(name='y')
         res = pd.concat([s1, s2], axis=1)
-        exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
+        exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]},
+                           index=pd.Index([0, 1, 2], dtype='O'))
         tm.assert_frame_equal(res, exp)
 
         s1 = pd.Series([1, 2, 3], name='x')
@@ -2044,7 +2046,8 @@ def test_concat_empty_series(self):
         s2 = pd.Series(name=None)
         res = pd.concat([s1, s2], axis=1)
         exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
-                           columns=['x', 0])
+                           columns=['x', 0],
+                           index=pd.Index([0, 1, 2], dtype='O'))
         tm.assert_frame_equal(res, exp)
 
     @pytest.mark.parametrize('tz', [None, 'UTC'])
diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py
index 41c3e220ad06f..ed5cf2d6b2c51 100644
--- a/pandas/tests/series/test_combine_concat.py
+++ b/pandas/tests/series/test_combine_concat.py
@@ -100,6 +100,7 @@ def test_combine_first(self):
         # corner case
         s = Series([1., 2, 3], index=[0, 1, 2])
         result = s.combine_first(Series([], index=[]))
+        s.index = s.index.astype('O')
         assert_series_equal(s, result)
 
     def test_update(self):
diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py
index 13e8d6c885029..11ad238eecd77 100644
--- a/pandas/tests/series/test_missing.py
+++ b/pandas/tests/series/test_missing.py
@@ -912,7 +912,7 @@ def test_interpolate_pchip(self):
 
         # interpolate at new_index
         new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5,
-                                           50.75]))
+                                           50.75])).astype(float)
         interp_s = ser.reindex(new_index).interpolate(method='pchip')
         # does not blow up, GH5977
         interp_s[49:51]
@@ -928,7 +928,9 @@ def test_interpolate_akima(self):
                           index=Index([1.0, 1.25, 1.5, 1.75,
                                        2.0, 2.25, 2.5, 2.75, 3.0]))
         # interpolate at new_index
-        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
+        new_index = ser.index.union(
+            Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])
+        ).astype(float)
         interp_s = ser.reindex(new_index).interpolate(method='akima')
         assert_series_equal(interp_s[1:3], expected)
 
@@ -941,7 +943,9 @@ def test_interpolate_piecewise_polynomial(self):
                           index=Index([1.0, 1.25, 1.5, 1.75,
                                        2.0, 2.25, 2.5, 2.75, 3.0]))
         # interpolate at new_index
-        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
+        new_index = ser.index.union(
+            Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])
+        ).astype(float)
         interp_s = ser.reindex(new_index).interpolate(
             method='piecewise_polynomial')
         assert_series_equal(interp_s[1:3], expected)
@@ -955,7 +959,9 @@ def test_interpolate_from_derivatives(self):
                           index=Index([1.0, 1.25, 1.5, 1.75,
                                        2.0, 2.25, 2.5, 2.75, 3.0]))
         # interpolate at new_index
-        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
+        new_index = ser.index.union(
+            Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])
+        ).astype(float)
         interp_s = ser.reindex(new_index).interpolate(
             method='from_derivatives')
         assert_series_equal(interp_s[1:3], expected)
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index fee1976698b04..215fa9f22277e 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -8,10 +8,12 @@
 from pandas import (
     Categorical, DataFrame, Index, Series, bdate_range, date_range, isna)
 from pandas.core import ops
+from pandas.core.indexes.base import InvalidIndexError
 import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 from pandas.util.testing import (
-    assert_almost_equal, assert_frame_equal, assert_series_equal)
+    assert_almost_equal, assert_frame_equal, assert_index_equal,
+    assert_series_equal)
 
 from .common import TestData
 
@@ -171,7 +173,6 @@ def test_scalar_na_logical_ops_corners(self):
         operator.and_,
         operator.or_,
         operator.xor,
-
     ])
     def test_logical_ops_with_index(self, op):
         # GH#22092, GH#19792
@@ -190,6 +191,37 @@ def test_logical_ops_with_index(self, op):
         result = op(ser, idx2)
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('op', [
+        pytest.param(ops.rand_,
+                     marks=pytest.mark.xfail(reason="GH#22092 Index "
+                                                    "implementation returns "
+                                                    "Index",
+                                             raises=AssertionError,
+                                             strict=True)),
+        pytest.param(ops.ror_,
+                     marks=pytest.mark.xfail(reason="Index.get_indexer "
+                                                    "with non unique index",
+                                             raises=InvalidIndexError,
+                                             strict=True)),
+        ops.rxor,
+    ])
+    def test_reversed_logical_ops_with_index(self, op):
+        # GH#22092, GH#19792
+        ser = Series([True, True, False, False])
+        idx1 = Index([True, False, True, False])
+        idx2 = Index([1, 0, 1, 0])
+
+        # symmetric_difference is only for rxor, but other 2 should fail
+        expected = idx1.symmetric_difference(ser)
+
+        result = op(ser, idx1)
+        assert_index_equal(result, expected)
+
+        expected = idx2.symmetric_difference(ser)
+
+        result = op(ser, idx2)
+        assert_index_equal(result, expected)
+
     @pytest.mark.parametrize("op, expected", [
         (ops.rand_, pd.Index([False, True])),
         (ops.ror_, pd.Index([False, True])),

From 620fa592e2403bf56de46945d1c7363006491173 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 21 May 2019 23:33:41 +0100
Subject: [PATCH 03/34] DOC/CLN: wil -> will (#26484)

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 76910f425836e..623e2b4863029 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -926,7 +926,7 @@ def squeeze(self, axis=None):
         a    1
         Name: 0, dtype: int64
 
-        Squeezing all axes wil project directly into a scalar:
+        Squeezing all axes will project directly into a scalar:
 
         >>> df_0a.squeeze()
         1

From aad1bf9904ef83e9d35022c9df9ec7080ca45f2e Mon Sep 17 00:00:00 2001
From: Brett Randall <javabrett@gmail.com>
Date: Wed, 22 May 2019 23:58:19 +1000
Subject: [PATCH 04/34] Fixed typo mutiplication -> multiplication. (#26489)

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index afe37bf198ab7..6bfa63012689d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -918,7 +918,7 @@ def __len__(self):
 
     def dot(self, other):
         """
-        Compute the matrix mutiplication between the DataFrame and other.
+        Compute the matrix multiplication between the DataFrame and other.
 
         This method computes the matrix product between the DataFrame and the
         values of an other Series, DataFrame or a numpy array.

From ef87d02a71fe075075a3a32df8af8804f390da2d Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 23 May 2019 14:04:56 +0100
Subject: [PATCH 05/34] DOC: fix SyntaxError in doc build on Windows (#26499)

---
 doc/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index e7d358c7961ab..971aa04ba866a 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -319,7 +319,7 @@
    pd.options.display.max_rows = 15
 
    import os
-   os.chdir('{}')
+   os.chdir(r'{}')
 """.format(os.path.dirname(os.path.dirname(__file__)))
 
 

From 89cc7f2ce39f046dd9877d569fe226d6a1cbfe5a Mon Sep 17 00:00:00 2001
From: Mats Maiwald <32721837+matsmaiwald@users.noreply.github.com>
Date: Thu, 23 May 2019 18:46:20 +0200
Subject: [PATCH 06/34] DOC: Highlighted role of index alignment in
 DataFrame.dot(other) (#26480) (#26496)

---
 pandas/core/frame.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6bfa63012689d..7d501e8095921 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -944,7 +944,9 @@ def dot(self, other):
         Notes
         -----
         The dimensions of DataFrame and other must be compatible in order to
-        compute the matrix multiplication.
+        compute the matrix multiplication. In addition, the column names of
+        DataFrame and the index of other must contain the same values, as they
+        will be aligned prior to the multiplication.
 
         The dot method for Series computes the inner product, instead of the
         matrix product here.
@@ -982,6 +984,14 @@ def dot(self, other):
             0   1
         0   1   4
         1   2   2
+
+        Note how shuffling of the objects does not change the result.
+
+        >>> s2 = s.reindex([1, 0, 2, 3])
+        >>> df.dot(s2)
+        0    -4
+        1     5
+        dtype: int64
         """
         if isinstance(other, (Series, DataFrame)):
             common = self.columns.union(other.index)

From babd5720f67e66fc817f85925b1ef9cf0b746576 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 24 May 2019 16:11:25 +0100
Subject: [PATCH 07/34] DOC/CLN: Change API reference section title (#26486)

---
 doc/source/reference/indexing.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
index 680cb7e3dac91..42ebf648f299f 100644
--- a/doc/source/reference/indexing.rst
+++ b/doc/source/reference/indexing.rst
@@ -2,9 +2,9 @@
 
 .. _api.indexing:
 
-========
-Indexing
-========
+=============
+Index Objects
+=============
 
 Index
 -----

From 4c231a77858b06f5ac4c48faf3155394c166da38 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Fri, 24 May 2019 17:29:32 +0200
Subject: [PATCH 08/34] CLN: Remove StringMixin from PandasObject (#26505)

---
 doc/source/whatsnew/v0.25.0.rst   | 14 ++++++++++++++
 pandas/core/arrays/categorical.py |  6 +-----
 pandas/core/arrays/sparse.py      |  2 +-
 pandas/core/base.py               |  4 ++--
 pandas/core/frame.py              |  2 +-
 pandas/core/generic.py            |  2 +-
 pandas/core/groupby/groupby.py    |  4 ++--
 pandas/core/indexes/base.py       |  2 +-
 pandas/core/indexes/frozen.py     |  2 +-
 pandas/core/internals/blocks.py   |  3 +--
 pandas/core/internals/managers.py |  2 +-
 pandas/core/panel.py              |  2 +-
 pandas/core/series.py             |  2 +-
 pandas/core/sparse/series.py      |  5 ++---
 pandas/core/window.py             |  2 +-
 15 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 0c69d1a4ce013..d86379c4d0703 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -213,6 +213,20 @@ are returned. (:issue:`21521`)
     df.groupby("a").ffill()
 
 
+``__str__`` methods now call ``__repr__`` rather than vica-versa
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pandas has until now mostly defined string representations in a Pandas objects's
+``__str__``/``__unicode__``/``__bytes__`` methods, and called ``__str__`` from the ``__repr__``
+method, if a specific ``__repr__`` method is not found. This is not needed for Python3.
+In Pandas 0.25, the string representations of Pandas objects are now generally
+defined in ``__repr__``, and calls to ``__str__`` in general now pass the call on to
+the ``__repr__``, if a specific ``__str__`` method doesn't exist, as is standard for Python.
+This change is backward compatible for direct usage of Pandas, but if you subclass
+Pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods,
+you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`).
+
+
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index eeacb88f33f55..1d6b906158125 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2030,7 +2030,7 @@ def _get_repr(self, length=True, na_rep='NaN', footer=True):
         result = formatter.to_string()
         return str(result)
 
-    def __str__(self):
+    def __repr__(self):
         """
         String representation.
         """
@@ -2045,10 +2045,6 @@ def __str__(self):
 
         return result
 
-    def __repr__(self):
-        # We want to bypass the ExtensionArray.__repr__
-        return str(self)
-
     def _maybe_coerce_indexer(self, indexer):
         """
         return an indexer coerced to the codes dtype
diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index 7a66e0ff33cc7..b0236cb393c1c 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -1831,7 +1831,7 @@ def _add_comparison_ops(cls):
     # ----------
     # Formatting
     # -----------
-    def __str__(self):
+    def __repr__(self):
         return '{self}\nFill: {fill}\n{index}'.format(
             self=printing.pprint_thing(self),
             fill=printing.pprint_thing(self.fill_value),
diff --git a/pandas/core/base.py b/pandas/core/base.py
index f7837c60c0b82..3f59871fb5b38 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -55,7 +55,7 @@ def __repr__(self):
         return str(self)
 
 
-class PandasObject(StringMixin, DirNamesMixin):
+class PandasObject(DirNamesMixin):
 
     """baseclass for various pandas objects"""
 
@@ -64,7 +64,7 @@ def _constructor(self):
         """class constructor (for this class it's just `__class__`"""
         return self.__class__
 
-    def __str__(self):
+    def __repr__(self):
         """
         Return a string representation for a particular object.
         """
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7d501e8095921..7cf200506e853 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -610,7 +610,7 @@ def _info_repr(self):
         return info_repr_option and not (self._repr_fits_horizontal_() and
                                          self._repr_fits_vertical_())
 
-    def __str__(self):
+    def __repr__(self):
         """
         Return a string representation for a particular DataFrame.
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 623e2b4863029..76c73fc40977c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2022,7 +2022,7 @@ def __setstate__(self, state):
     # ----------------------------------------------------------------------
     # Rendering Methods
 
-    def __str__(self):
+    def __repr__(self):
         # string representation based upon iterating over self
         # (since, by definition, `PandasContainers` are iterable)
         prepr = '[%s]' % ','.join(map(pprint_thing, self))
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4e9e3b4963b6d..aa04b7505afe4 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -373,8 +373,8 @@ def __init__(self, obj, keys=None, axis=0, level=None,
     def __len__(self):
         return len(self.groups)
 
-    def __str__(self):
-        # TODO: Better str/repr for GroupBy object
+    def __repr__(self):
+        # TODO: Better repr for GroupBy object
         return object.__repr__(self)
 
     def _assure_grouper(self):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index eff7ff2c9f347..a4544e79e2dfa 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -932,7 +932,7 @@ def __deepcopy__(self, memo=None):
     # --------------------------------------------------------------------
     # Rendering Methods
 
-    def __str__(self):
+    def __repr__(self):
         """
         Return a string representation for this object.
         """
diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py
index 60e4253e3101b..aeb0fa119ab33 100644
--- a/pandas/core/indexes/frozen.py
+++ b/pandas/core/indexes/frozen.py
@@ -149,7 +149,7 @@ def values(self):
         arr = self.view(np.ndarray).copy()
         return arr
 
-    def __str__(self):
+    def __repr__(self):
         """
         Return a string representation for this object.
         """
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 0ac87c653cfff..f86ef40a97299 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -233,8 +233,7 @@ def make_block_same_class(self, values, placement=None, ndim=None,
         return make_block(values, placement=placement, ndim=ndim,
                           klass=self.__class__, dtype=dtype)
 
-    def __str__(self):
-
+    def __repr__(self):
         # don't want to print out all of the items here
         name = pprint_thing(self.__class__.__name__)
         if self._is_single_block:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 96a672b60da70..0b63588c9f5d9 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -291,7 +291,7 @@ def _post_setstate(self):
     def __len__(self):
         return len(self.items)
 
-    def __str__(self):
+    def __repr__(self):
         output = pprint_thing(self.__class__.__name__)
         for i, ax in enumerate(self.axes):
             if i == 0:
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index b6b957c543df6..c65a73bd0d3f0 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -340,7 +340,7 @@ def _compare_constructor(self, other, func):
     # ----------------------------------------------------------------------
     # Magic methods
 
-    def __str__(self):
+    def __repr__(self):
         """
         Return a string representation for a particular Panel.
         """
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5b59fd6e7b38d..55b5bdcbf53f4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1384,7 +1384,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
     # ----------------------------------------------------------------------
     # Rendering Methods
 
-    def __str__(self):
+    def __repr__(self):
         """
         Return a string representation for a particular Series.
         """
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index ae1c94e136475..eac59e2c0f5eb 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -217,9 +217,8 @@ def as_sparse_array(self, kind=None, fill_value=None, copy=False):
         return SparseArray(self.values, sparse_index=self.sp_index,
                            fill_value=fill_value, kind=kind, copy=copy)
 
-    def __str__(self):
-        # currently, unicode is same as repr...fixes infinite loop
-        series_rep = Series.__str__(self)
+    def __repr__(self):
+        series_rep = Series.__repr__(self)
         rep = '{series}\n{index!r}'.format(series=series_rep,
                                            index=self.sp_index)
         return rep
diff --git a/pandas/core/window.py b/pandas/core/window.py
index deb64f1fb089d..d51e12035c829 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -157,7 +157,7 @@ def _get_window(self, other=None):
     def _window_type(self):
         return self.__class__.__name__
 
-    def __str__(self):
+    def __repr__(self):
         """
         Provide a nice str repr of our rolling object.
         """

From cffbaac126fb1043e44cac9ca4ed872d5424fc52 Mon Sep 17 00:00:00 2001
From: Vaibhav Vishal <vaibhav.hrt@gmail.com>
Date: Fri, 24 May 2019 21:02:14 +0530
Subject: [PATCH 09/34] Fix type annotations in pandas.core.indexes.datetimes
 (#26404)

---
 mypy.ini                            |  6 ------
 pandas/core/indexes/datetimelike.py | 14 +++++++-------
 pandas/core/indexes/datetimes.py    | 12 +++++++-----
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/mypy.ini b/mypy.ini
index 584c747a26f2e..3df8fd13a2a75 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -8,11 +8,5 @@ ignore_errors=True
 [mypy-pandas.core.indexes.datetimelike]
 ignore_errors=True
 
-[mypy-pandas.core.indexes.datetimes]
-ignore_errors=True
-
 [mypy-pandas.core.indexes.period]
 ignore_errors=True
-
-[mypy-pandas.core.indexes.timedeltas]
-ignore_errors=True
\ No newline at end of file
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 7454b015cb556..092cec00228cd 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -57,7 +57,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):
     """
     common ops mixin to support a unified interface datetimelike Index
     """
-    _data = None  # type: DatetimeLikeArrayMixin
+    _data = None
 
     # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
     # properties there.  They can be made into cache_readonly for Index
@@ -220,9 +220,9 @@ def __contains__(self, key):
 
     # Try to run function on index first, and then on elements of index
     # Especially important for group-by functionality
-    def map(self, f):
+    def map(self, mapper, na_action=None):
         try:
-            result = f(self)
+            result = mapper(self)
 
             # Try to use this result if we can
             if isinstance(result, np.ndarray):
@@ -232,7 +232,7 @@ def map(self, f):
                 raise TypeError('The map function must return an Index object')
             return result
         except Exception:
-            return self.astype(object).map(f)
+            return self.astype(object).map(mapper)
 
     def sort_values(self, return_indexer=False, ascending=True):
         """
@@ -430,8 +430,8 @@ def argmax(self, axis=None, skipna=True, *args, **kwargs):
     # --------------------------------------------------------------------
     # Rendering Methods
 
-    def _format_with_header(self, header, **kwargs):
-        return header + list(self._format_native_types(**kwargs))
+    def _format_with_header(self, header, na_rep='NaT', **kwargs):
+        return header + list(self._format_native_types(na_rep, **kwargs))
 
     @property
     def _formatter_func(self):
@@ -509,7 +509,7 @@ def __rsub__(self, other):
 
         cls.__rsub__ = __rsub__
 
-    def isin(self, values):
+    def isin(self, values, level=None):
         """
         Compute boolean array of whether each index value is found in the
         passed set of values.
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 7fd537fb9989a..e68431b79dcd3 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -4,8 +4,8 @@
 
 import numpy as np
 
-from pandas._libs import (
-    Timestamp, index as libindex, join as libjoin, lib, tslib as libts)
+from pandas._libs import Timestamp, index as libindex, lib, tslib as libts
+import pandas._libs.join as libjoin
 from pandas._libs.tslibs import ccalendar, fields, parsing, timezones
 from pandas.util._decorators import Appender, Substitution, cache_readonly
 
@@ -1087,9 +1087,11 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
     _is_monotonic_decreasing = Index.is_monotonic_decreasing
     _is_unique = Index.is_unique
 
-    _timezone = cache_readonly(DatetimeArray._timezone.fget)
-    is_normalized = cache_readonly(DatetimeArray.is_normalized.fget)
-    _resolution = cache_readonly(DatetimeArray._resolution.fget)
+    _timezone = cache_readonly(DatetimeArray._timezone.fget)  # type: ignore
+    is_normalized = cache_readonly(
+        DatetimeArray.is_normalized.fget)  # type: ignore
+    _resolution = cache_readonly(
+        DatetimeArray._resolution.fget)  # type: ignore
 
     strftime = ea_passthrough(DatetimeArray.strftime)
     _has_same_tz = ea_passthrough(DatetimeArray._has_same_tz)

From a8af7a1a73164e97627bb5b96dacfdb2ba10b24e Mon Sep 17 00:00:00 2001
From: Mats Maiwald <32721837+matsmaiwald@users.noreply.github.com>
Date: Fri, 24 May 2019 17:47:01 +0200
Subject: [PATCH 10/34] =?UTF-8?q?Better=20error=20message=20for=20DataFram?=
 =?UTF-8?q?e.hist()=20without=20numerical=20columns=20(=E2=80=A6=20(#26483?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pandas/plotting/_core.py                  |  4 ++++
 pandas/tests/plotting/test_hist_method.py | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 90297ecfa3415..fed4b0d90983c 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -2426,6 +2426,10 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
     data = data._get_numeric_data()
     naxes = len(data.columns)
 
+    if naxes == 0:
+        raise ValueError("hist method requires numerical columns, "
+                         "nothing to plot.")
+
     fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
                           sharex=sharex, sharey=sharey, figsize=figsize,
                           layout=layout)
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
index c62ed21c2fb17..f3f6c9c7fc2d4 100644
--- a/pandas/tests/plotting/test_hist_method.py
+++ b/pandas/tests/plotting/test_hist_method.py
@@ -209,6 +209,16 @@ def test_hist_df_legacy(self):
         with pytest.raises(AttributeError):
             ser.hist(foo='bar')
 
+    @pytest.mark.slow
+    def test_hist_non_numerical_raises(self):
+        # gh-10444
+        df = DataFrame(np.random.rand(10, 2))
+        df_o = df.astype(np.object)
+
+        msg = "hist method requires numerical columns, nothing to plot."
+        with pytest.raises(ValueError, match=msg):
+            df_o.hist()
+
     @pytest.mark.slow
     def test_hist_layout(self):
         df = DataFrame(randn(100, 3))

From ac026742dfc74b6c26ef867fa846cc322a602847 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Fri, 24 May 2019 09:01:09 -0700
Subject: [PATCH 11/34] Excel Test Cleanup - ReadWriteClass (#26473)

---
 pandas/tests/io/test_excel.py | 427 +++++++++++++++++-----------------
 1 file changed, 216 insertions(+), 211 deletions(-)

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 112d14795d9bf..f9926cd26d8da 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -49,7 +49,6 @@ def ignore_xlrd_time_clock_warning():
         yield
 
 
-@td.skip_if_no('xlrd', '1.0.0')
 class SharedItems:
 
     @pytest.fixture(autouse=True)
@@ -60,6 +59,20 @@ def setup_method(self, datapath):
         self.tsframe = _tsframe.copy()
         self.mixed_frame = _mixed_frame.copy()
 
+
+@td.skip_if_no('xlrd', '1.0.0')
+class ReadingTestsBase(SharedItems):
+    # This is based on ExcelWriterBase
+
+    @pytest.fixture(autouse=True, params=['xlrd', None])
+    def set_engine(self, request):
+        func_name = "get_exceldf"
+        old_func = getattr(self, func_name)
+        new_func = partial(old_func, engine=request.param)
+        setattr(self, func_name, new_func)
+        yield
+        setattr(self, func_name, old_func)
+
     def get_csv_refdf(self, basename):
         """
         Obtain the reference data from read_csv with the Python engine.
@@ -114,19 +127,6 @@ def get_exceldf(self, basename, ext, *args, **kwds):
         pth = os.path.join(self.dirpath, basename + ext)
         return read_excel(pth, *args, **kwds)
 
-
-class ReadingTestsBase(SharedItems):
-    # This is based on ExcelWriterBase
-
-    @pytest.fixture(autouse=True, params=['xlrd', None])
-    def set_engine(self, request):
-        func_name = "get_exceldf"
-        old_func = getattr(self, func_name)
-        new_func = partial(old_func, engine=request.param)
-        setattr(self, func_name, new_func)
-        yield
-        setattr(self, func_name, old_func)
-
     @td.skip_if_no("xlrd", "1.0.1")  # see gh-22682
     def test_usecols_int(self, ext):
 
@@ -565,74 +565,6 @@ def test_read_excel_blank_with_header(self, ext):
         actual = self.get_exceldf('blank_with_header', ext, 'Sheet1')
         tm.assert_frame_equal(actual, expected)
 
-    @td.skip_if_no("xlwt")
-    @td.skip_if_no("openpyxl")
-    @pytest.mark.parametrize("header,expected", [
-        (None, DataFrame([np.nan] * 4)),
-        (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))
-    ])
-    def test_read_one_empty_col_no_header(self, ext, header, expected):
-        # xref gh-12292
-        filename = "no_header"
-        df = pd.DataFrame(
-            [["", 1, 100],
-             ["", 2, 200],
-             ["", 3, 300],
-             ["", 4, 400]]
-        )
-
-        with ensure_clean(ext) as path:
-            df.to_excel(path, filename, index=False, header=False)
-            result = read_excel(path, filename, usecols=[0], header=header)
-
-        tm.assert_frame_equal(result, expected)
-
-    @td.skip_if_no("xlwt")
-    @td.skip_if_no("openpyxl")
-    @pytest.mark.parametrize("header,expected", [
-        (None, DataFrame([0] + [np.nan] * 4)),
-        (0, DataFrame([np.nan] * 4))
-    ])
-    def test_read_one_empty_col_with_header(self, ext, header, expected):
-        filename = "with_header"
-        df = pd.DataFrame(
-            [["", 1, 100],
-             ["", 2, 200],
-             ["", 3, 300],
-             ["", 4, 400]]
-        )
-
-        with ensure_clean(ext) as path:
-            df.to_excel(path, 'with_header', index=False, header=True)
-            result = read_excel(path, filename, usecols=[0], header=header)
-
-        tm.assert_frame_equal(result, expected)
-
-    @td.skip_if_no('openpyxl')
-    @td.skip_if_no('xlwt')
-    def test_set_column_names_in_parameter(self, ext):
-        # GH 12870 : pass down column names associated with
-        # keyword argument names
-        refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'],
-                              [3, 'baz']], columns=['a', 'b'])
-
-        with ensure_clean(ext) as pth:
-            with ExcelWriter(pth) as writer:
-                refdf.to_excel(writer, 'Data_no_head',
-                               header=False, index=False)
-                refdf.to_excel(writer, 'Data_with_head', index=False)
-
-            refdf.columns = ['A', 'B']
-
-            with ExcelFile(pth) as reader:
-                xlsdf_no_head = read_excel(reader, 'Data_no_head',
-                                           header=None, names=['A', 'B'])
-                xlsdf_with_head = read_excel(reader, 'Data_with_head',
-                                             index_col=None, names=['A', 'B'])
-
-            tm.assert_frame_equal(xlsdf_no_head, refdf)
-            tm.assert_frame_equal(xlsdf_with_head, refdf)
-
     def test_date_conversion_overflow(self, ext):
         # GH 10001 : pandas.ExcelFile ignore parse_dates=False
         expected = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'],
@@ -741,7 +673,6 @@ def test_read_from_file_url(self, ext):
 
         tm.assert_frame_equal(url_table, local_table)
 
-    @td.skip_if_no('pathlib')
     def test_read_from_pathlib_path(self, ext):
 
         # GH12655
@@ -780,32 +711,6 @@ def test_reader_closes_file(self, ext):
 
         assert f.closed
 
-    @td.skip_if_no("xlwt")
-    @td.skip_if_no("openpyxl")
-    def test_creating_and_reading_multiple_sheets(self, ext):
-        # see gh-9450
-        #
-        # Test reading multiple sheets, from a runtime
-        # created Excel file with multiple sheets.
-        def tdf(col_sheet_name):
-            d, i = [11, 22, 33], [1, 2, 3]
-            return DataFrame(d, i, columns=[col_sheet_name])
-
-        sheets = ["AAA", "BBB", "CCC"]
-
-        dfs = [tdf(s) for s in sheets]
-        dfs = dict(zip(sheets, dfs))
-
-        with ensure_clean(ext) as pth:
-            with ExcelWriter(pth) as ew:
-                for sheetname, df in dfs.items():
-                    df.to_excel(ew, sheetname)
-
-            dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0)
-
-            for s in sheets:
-                tm.assert_frame_equal(dfs[s], dfs_returned[s])
-
     def test_reader_seconds(self, ext):
 
         # Test reading times with and without milliseconds. GH5945.
@@ -902,84 +807,6 @@ def test_read_excel_multiindex_header_only(self, ext):
         expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns)
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_if_no("xlsxwriter")
-    def test_read_excel_multiindex_empty_level(self, ext):
-        # see gh-12453
-        with ensure_clean(ext) as path:
-            df = DataFrame({
-                ("One", "x"): {0: 1},
-                ("Two", "X"): {0: 3},
-                ("Two", "Y"): {0: 7},
-                ("Zero", ""): {0: 0}
-            })
-
-            expected = DataFrame({
-                ("One", "x"): {0: 1},
-                ("Two", "X"): {0: 3},
-                ("Two", "Y"): {0: 7},
-                ("Zero", "Unnamed: 4_level_1"): {0: 0}
-            })
-
-            df.to_excel(path)
-            actual = pd.read_excel(path, header=[0, 1], index_col=0)
-            tm.assert_frame_equal(actual, expected)
-
-            df = pd.DataFrame({
-                ("Beg", ""): {0: 0},
-                ("Middle", "x"): {0: 1},
-                ("Tail", "X"): {0: 3},
-                ("Tail", "Y"): {0: 7}
-            })
-
-            expected = pd.DataFrame({
-                ("Beg", "Unnamed: 1_level_1"): {0: 0},
-                ("Middle", "x"): {0: 1},
-                ("Tail", "X"): {0: 3},
-                ("Tail", "Y"): {0: 7}
-            })
-
-            df.to_excel(path)
-            actual = pd.read_excel(path, header=[0, 1], index_col=0)
-            tm.assert_frame_equal(actual, expected)
-
-    @td.skip_if_no("xlsxwriter")
-    @pytest.mark.parametrize("c_idx_names", [True, False])
-    @pytest.mark.parametrize("r_idx_names", [True, False])
-    @pytest.mark.parametrize("c_idx_levels", [1, 3])
-    @pytest.mark.parametrize("r_idx_levels", [1, 3])
-    def test_excel_multindex_roundtrip(self, ext, c_idx_names, r_idx_names,
-                                       c_idx_levels, r_idx_levels):
-        # see gh-4679
-        with ensure_clean(ext) as pth:
-            if c_idx_levels == 1 and c_idx_names:
-                pytest.skip("Column index name cannot be "
-                            "serialized unless it's a MultiIndex")
-
-            # Empty name case current read in as
-            # unnamed levels, not Nones.
-            check_names = r_idx_names or r_idx_levels <= 1
-
-            df = mkdf(5, 5, c_idx_names, r_idx_names,
-                      c_idx_levels, r_idx_levels)
-            df.to_excel(pth)
-
-            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
-                                header=list(range(c_idx_levels)))
-            tm.assert_frame_equal(df, act, check_names=check_names)
-
-            df.iloc[0, :] = np.nan
-            df.to_excel(pth)
-
-            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
-                                header=list(range(c_idx_levels)))
-            tm.assert_frame_equal(df, act, check_names=check_names)
-
-            df.iloc[-1, :] = np.nan
-            df.to_excel(pth)
-            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
-                                header=list(range(c_idx_levels)))
-            tm.assert_frame_equal(df, act, check_names=check_names)
-
     def test_excel_old_index_format(self, ext):
         # see gh-4679
         filename = "test_index_name_pre17" + ext
@@ -1054,30 +881,6 @@ def test_read_excel_chunksize(self, ext):
             pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
                           chunksize=100)
 
-    @td.skip_if_no("xlwt")
-    @td.skip_if_no("openpyxl")
-    def test_read_excel_parse_dates(self, ext):
-        # see gh-11544, gh-12051
-        df = DataFrame(
-            {"col": [1, 2, 3],
-             "date_strings": pd.date_range("2012-01-01", periods=3)})
-        df2 = df.copy()
-        df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y")
-
-        with ensure_clean(ext) as pth:
-            df2.to_excel(pth)
-
-            res = read_excel(pth, index_col=0)
-            tm.assert_frame_equal(df2, res)
-
-            res = read_excel(pth, parse_dates=["date_strings"], index_col=0)
-            tm.assert_frame_equal(df, res)
-
-            date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y")
-            res = read_excel(pth, parse_dates=["date_strings"],
-                             date_parser=date_parser, index_col=0)
-            tm.assert_frame_equal(df, res)
-
     def test_read_excel_skiprows_list(self, ext):
         # GH 4903
         actual = pd.read_excel(os.path.join(self.dirpath,
@@ -1141,6 +944,208 @@ def test_read_excel_squeeze(self, ext):
         tm.assert_series_equal(actual, expected)
 
 
+@td.skip_if_no('xlrd', '1.0.0')
+@pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm'])
+class TestRoundTrip:
+
+    @td.skip_if_no("xlwt")
+    @td.skip_if_no("openpyxl")
+    @pytest.mark.parametrize("header,expected", [
+        (None, DataFrame([np.nan] * 4)),
+        (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))
+    ])
+    def test_read_one_empty_col_no_header(self, ext, header, expected):
+        # xref gh-12292
+        filename = "no_header"
+        df = pd.DataFrame(
+            [["", 1, 100],
+             ["", 2, 200],
+             ["", 3, 300],
+             ["", 4, 400]]
+        )
+
+        with ensure_clean(ext) as path:
+            df.to_excel(path, filename, index=False, header=False)
+            result = read_excel(path, filename, usecols=[0], header=header)
+
+        tm.assert_frame_equal(result, expected)
+
+    @td.skip_if_no("xlwt")
+    @td.skip_if_no("openpyxl")
+    @pytest.mark.parametrize("header,expected", [
+        (None, DataFrame([0] + [np.nan] * 4)),
+        (0, DataFrame([np.nan] * 4))
+    ])
+    def test_read_one_empty_col_with_header(self, ext, header, expected):
+        filename = "with_header"
+        df = pd.DataFrame(
+            [["", 1, 100],
+             ["", 2, 200],
+             ["", 3, 300],
+             ["", 4, 400]]
+        )
+
+        with ensure_clean(ext) as path:
+            df.to_excel(path, 'with_header', index=False, header=True)
+            result = read_excel(path, filename, usecols=[0], header=header)
+
+        tm.assert_frame_equal(result, expected)
+
+    @td.skip_if_no('openpyxl')
+    @td.skip_if_no('xlwt')
+    def test_set_column_names_in_parameter(self, ext):
+        # GH 12870 : pass down column names associated with
+        # keyword argument names
+        refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'],
+                              [3, 'baz']], columns=['a', 'b'])
+
+        with ensure_clean(ext) as pth:
+            with ExcelWriter(pth) as writer:
+                refdf.to_excel(writer, 'Data_no_head',
+                               header=False, index=False)
+                refdf.to_excel(writer, 'Data_with_head', index=False)
+
+            refdf.columns = ['A', 'B']
+
+            with ExcelFile(pth) as reader:
+                xlsdf_no_head = read_excel(reader, 'Data_no_head',
+                                           header=None, names=['A', 'B'])
+                xlsdf_with_head = read_excel(reader, 'Data_with_head',
+                                             index_col=None, names=['A', 'B'])
+
+            tm.assert_frame_equal(xlsdf_no_head, refdf)
+            tm.assert_frame_equal(xlsdf_with_head, refdf)
+
+    @td.skip_if_no("xlwt")
+    @td.skip_if_no("openpyxl")
+    def test_creating_and_reading_multiple_sheets(self, ext):
+        # see gh-9450
+        #
+        # Test reading multiple sheets, from a runtime
+        # created Excel file with multiple sheets.
+        def tdf(col_sheet_name):
+            d, i = [11, 22, 33], [1, 2, 3]
+            return DataFrame(d, i, columns=[col_sheet_name])
+
+        sheets = ["AAA", "BBB", "CCC"]
+
+        dfs = [tdf(s) for s in sheets]
+        dfs = dict(zip(sheets, dfs))
+
+        with ensure_clean(ext) as pth:
+            with ExcelWriter(pth) as ew:
+                for sheetname, df in dfs.items():
+                    df.to_excel(ew, sheetname)
+
+            dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0)
+
+            for s in sheets:
+                tm.assert_frame_equal(dfs[s], dfs_returned[s])
+
+    @td.skip_if_no("xlsxwriter")
+    def test_read_excel_multiindex_empty_level(self, ext):
+        # see gh-12453
+        with ensure_clean(ext) as path:
+            df = DataFrame({
+                ("One", "x"): {0: 1},
+                ("Two", "X"): {0: 3},
+                ("Two", "Y"): {0: 7},
+                ("Zero", ""): {0: 0}
+            })
+
+            expected = DataFrame({
+                ("One", "x"): {0: 1},
+                ("Two", "X"): {0: 3},
+                ("Two", "Y"): {0: 7},
+                ("Zero", "Unnamed: 4_level_1"): {0: 0}
+            })
+
+            df.to_excel(path)
+            actual = pd.read_excel(path, header=[0, 1], index_col=0)
+            tm.assert_frame_equal(actual, expected)
+
+            df = pd.DataFrame({
+                ("Beg", ""): {0: 0},
+                ("Middle", "x"): {0: 1},
+                ("Tail", "X"): {0: 3},
+                ("Tail", "Y"): {0: 7}
+            })
+
+            expected = pd.DataFrame({
+                ("Beg", "Unnamed: 1_level_1"): {0: 0},
+                ("Middle", "x"): {0: 1},
+                ("Tail", "X"): {0: 3},
+                ("Tail", "Y"): {0: 7}
+            })
+
+            df.to_excel(path)
+            actual = pd.read_excel(path, header=[0, 1], index_col=0)
+            tm.assert_frame_equal(actual, expected)
+
+    @td.skip_if_no("xlsxwriter")
+    @pytest.mark.parametrize("c_idx_names", [True, False])
+    @pytest.mark.parametrize("r_idx_names", [True, False])
+    @pytest.mark.parametrize("c_idx_levels", [1, 3])
+    @pytest.mark.parametrize("r_idx_levels", [1, 3])
+    def test_excel_multindex_roundtrip(self, ext, c_idx_names, r_idx_names,
+                                       c_idx_levels, r_idx_levels):
+        # see gh-4679
+        with ensure_clean(ext) as pth:
+            if c_idx_levels == 1 and c_idx_names:
+                pytest.skip("Column index name cannot be "
+                            "serialized unless it's a MultiIndex")
+
+            # Empty name case current read in as
+            # unnamed levels, not Nones.
+            check_names = r_idx_names or r_idx_levels <= 1
+
+            df = mkdf(5, 5, c_idx_names, r_idx_names,
+                      c_idx_levels, r_idx_levels)
+            df.to_excel(pth)
+
+            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
+                                header=list(range(c_idx_levels)))
+            tm.assert_frame_equal(df, act, check_names=check_names)
+
+            df.iloc[0, :] = np.nan
+            df.to_excel(pth)
+
+            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
+                                header=list(range(c_idx_levels)))
+            tm.assert_frame_equal(df, act, check_names=check_names)
+
+            df.iloc[-1, :] = np.nan
+            df.to_excel(pth)
+            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
+                                header=list(range(c_idx_levels)))
+            tm.assert_frame_equal(df, act, check_names=check_names)
+
+    @td.skip_if_no("xlwt")
+    @td.skip_if_no("openpyxl")
+    def test_read_excel_parse_dates(self, ext):
+        # see gh-11544, gh-12051
+        df = DataFrame(
+            {"col": [1, 2, 3],
+             "date_strings": pd.date_range("2012-01-01", periods=3)})
+        df2 = df.copy()
+        df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y")
+
+        with ensure_clean(ext) as pth:
+            df2.to_excel(pth)
+
+            res = read_excel(pth, index_col=0)
+            tm.assert_frame_equal(df2, res)
+
+            res = read_excel(pth, parse_dates=["date_strings"], index_col=0)
+            tm.assert_frame_equal(df, res)
+
+            date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y")
+            res = read_excel(pth, parse_dates=["date_strings"],
+                             date_parser=date_parser, index_col=0)
+            tm.assert_frame_equal(df, res)
+
+
+@td.skip_if_no('xlrd', '1.0.0')
 @pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm'])
 class TestXlrdReader(ReadingTestsBase):
     """

From 91512111bc0f42ac15695d2af94e3fff3d6ba536 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Fri, 24 May 2019 15:07:54 -0700
Subject: [PATCH 12/34]  CLN: pd.TimeGrouper (#26477)

---
 doc/source/whatsnew/v0.25.0.rst              |  2 +-
 pandas/__init__.py                           |  2 +-
 pandas/core/api.py                           | 12 ----------
 pandas/tests/api/test_api.py                 | 13 +----------
 pandas/tests/groupby/test_timegrouper.py     |  7 +++---
 pandas/tests/resample/test_base.py           |  4 ++--
 pandas/tests/resample/test_datetime_index.py | 14 ++++++------
 pandas/tests/resample/test_time_grouper.py   | 24 ++++++++------------
 8 files changed, 25 insertions(+), 53 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index d86379c4d0703..d4104ab1d79a1 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -313,7 +313,7 @@ Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 - Removed ``Panel`` (:issue:`25047`, :issue:`25191`, :issue:`25231`)
 -
--
+- Removed previously deprecated ``TimeGrouper`` (:issue:`16942`)
 -
 
 .. _whatsnew_0250.performance:
diff --git a/pandas/__init__.py b/pandas/__init__.py
index bd367bbe27d5e..6af6f3093c120 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -65,7 +65,7 @@
     to_numeric, to_datetime, to_timedelta,
 
     # misc
-    np, TimeGrouper, Grouper, factorize, unique, value_counts,
+    np, Grouper, factorize, unique, value_counts,
     array, Categorical, set_eng_float_format, Series, DataFrame,
     Panel)
 
diff --git a/pandas/core/api.py b/pandas/core/api.py
index 96f623bda9a8a..b7398e433f28f 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -45,15 +45,3 @@
 from pandas.tseries.offsets import DateOffset
 from pandas.core.tools.datetimes import to_datetime
 from pandas.core.tools.timedeltas import to_timedelta
-
-
-# Deprecation: xref gh-16747
-class TimeGrouper:
-
-    def __new__(cls, *args, **kwargs):
-        from pandas.core.resample import TimeGrouper
-        import warnings
-        warnings.warn("pd.TimeGrouper is deprecated and will be removed; "
-                      "Please use pd.Grouper(freq=...)",
-                      FutureWarning, stacklevel=2)
-        return TimeGrouper(*args, **kwargs)
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 7ee0225723675..c92808200ebea 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -50,7 +50,7 @@ class TestPDApi(Base):
                ]
 
     # these are already deprecated; awaiting removal
-    deprecated_classes = ['TimeGrouper', 'Panel']
+    deprecated_classes = ['Panel']
 
     # these should be deprecated in the future
     deprecated_classes_in_future = []
@@ -132,17 +132,6 @@ def test_testing(self):
         self.check(testing, self.funcs)
 
 
-class TestTopLevelDeprecations:
-
-    # top-level API deprecations
-    # GH 13790
-
-    def test_TimeGrouper(self):
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            pd.TimeGrouper(freq='D')
-
-
 class TestCDateRange:
 
     def test_deprecation_cdaterange(self):
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index 21c71154c95ef..ef05e6ada4890 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -10,6 +10,7 @@
 
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range
+from pandas.core.groupby.grouper import Grouper
 from pandas.core.groupby.ops import BinGrouper
 from pandas.util import testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
@@ -365,10 +366,8 @@ def sumfunc_value(x):
             return x.value.sum()
 
         expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
-                      .apply(sumfunc_value))
+        result = (df_dt.groupby(Grouper(freq='M', key='date'))
+                  .apply(sumfunc_value))
         assert_series_equal(result.reset_index(drop=True),
                             expected.reset_index(drop=True))
 
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index c3c908f4b0d1b..63fa2007e401d 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -6,10 +6,10 @@
 import pandas as pd
 from pandas import DataFrame, Series
 from pandas.core.groupby.groupby import DataError
+from pandas.core.groupby.grouper import Grouper
 from pandas.core.indexes.datetimes import date_range
 from pandas.core.indexes.period import PeriodIndex, period_range
 from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
-from pandas.core.resample import TimeGrouper
 import pandas.util.testing as tm
 from pandas.util.testing import (
     assert_almost_equal, assert_frame_equal, assert_index_equal,
@@ -214,7 +214,7 @@ def test_apply_to_empty_series(empty_series):
 def test_resampler_is_iterable(series):
     # GH 15314
     freq = 'H'
-    tg = TimeGrouper(freq, convention='start')
+    tg = Grouper(freq=freq, convention='start')
     grouped = series.groupby(tg)
     resampled = series.resample(freq)
     for (rk, rv), (gk, gv) in zip(resampled, grouped):
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index c2868979e9d8d..5711174ef0c9f 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -10,10 +10,10 @@
 
 import pandas as pd
 from pandas import DataFrame, Series, Timedelta, Timestamp, isna, notna
+from pandas.core.groupby.grouper import Grouper
 from pandas.core.indexes.datetimes import date_range
 from pandas.core.indexes.period import Period, period_range
-from pandas.core.resample import (
-    DatetimeIndex, TimeGrouper, _get_timestamp_range_edges)
+from pandas.core.resample import DatetimeIndex, _get_timestamp_range_edges
 import pandas.util.testing as tm
 from pandas.util.testing import (
     assert_almost_equal, assert_frame_equal, assert_series_equal)
@@ -42,7 +42,7 @@ def test_custom_grouper(index):
     dti = index
     s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')
 
-    b = TimeGrouper(Minute(5))
+    b = Grouper(freq=Minute(5))
     g = s.groupby(b)
 
     # check all cython functions work
@@ -50,7 +50,7 @@ def test_custom_grouper(index):
     for f in funcs:
         g._cython_agg_general(f)
 
-    b = TimeGrouper(Minute(5), closed='right', label='right')
+    b = Grouper(freq=Minute(5), closed='right', label='right')
     g = s.groupby(b)
     # check all cython functions work
     funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
@@ -116,7 +116,7 @@ def test_resample_integerarray():
 def test_resample_basic_grouper(series):
     s = series
     result = s.resample('5Min').last()
-    grouper = TimeGrouper(Minute(5), closed='left', label='left')
+    grouper = Grouper(freq=Minute(5), closed='left', label='left')
     expected = s.groupby(grouper).agg(lambda x: x[-1])
     assert_series_equal(result, expected)
 
@@ -373,7 +373,7 @@ def test_resample_upsampling_picked_but_not_correct():
 def test_resample_frame_basic():
     df = tm.makeTimeDataFrame()
 
-    b = TimeGrouper('M')
+    b = Grouper(freq='M')
     g = df.groupby(b)
 
     # check all cython functions work
@@ -521,7 +521,7 @@ def test_nearest_upsample_with_limit():
 def test_resample_ohlc(series):
     s = series
 
-    grouper = TimeGrouper(Minute(5))
+    grouper = Grouper(freq=Minute(5))
     expect = s.groupby(grouper).agg(lambda x: x[-1])
     result = s.resample('5Min').ohlc()
 
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index 2f330d1f2484b..3f767f8e7100f 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -6,8 +6,8 @@
 
 import pandas as pd
 from pandas import DataFrame, Series
+from pandas.core.groupby.grouper import Grouper
 from pandas.core.indexes.datetimes import date_range
-from pandas.core.resample import TimeGrouper
 import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 
@@ -16,9 +16,7 @@
 
 
 def test_apply():
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
+    grouper = Grouper(freq='A', label='right', closed='right')
 
     grouped = test_series.groupby(grouper)
 
@@ -38,9 +36,7 @@ def test_count():
 
     expected = test_series.groupby(lambda x: x.year).count()
 
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
+    grouper = Grouper(freq='A', label='right', closed='right')
     result = test_series.groupby(grouper).count()
     expected.index = result.index
     assert_series_equal(result, expected)
@@ -64,7 +60,7 @@ def test_apply_iteration():
     N = 1000
     ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
     df = DataFrame({'open': 1, 'close': 2}, index=ind)
-    tg = TimeGrouper('M')
+    tg = Grouper(freq='M')
 
     _, grouper, _ = tg._get_grouper(df)
 
@@ -93,7 +89,7 @@ def test_fails_on_no_datetime_index(name, func):
     msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
            "or PeriodIndex, but got an instance of '{}'".format(name))
     with pytest.raises(TypeError, match=msg):
-        df.groupby(TimeGrouper('D'))
+        df.groupby(Grouper(freq='D'))
 
 
 def test_aaa_group_order():
@@ -105,7 +101,7 @@ def test_aaa_group_order():
     df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
                  datetime(2013, 1, 3), datetime(2013, 1, 4),
                  datetime(2013, 1, 5)] * 4
-    grouped = df.groupby(TimeGrouper(key='key', freq='D'))
+    grouped = df.groupby(Grouper(key='key', freq='D'))
 
     tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)),
                           df[::5])
@@ -135,7 +131,7 @@ def test_aggregate_normal(resample_method):
                     datetime(2013, 1, 5)] * 4
 
     normal_grouped = normal_df.groupby('key')
-    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
+    dt_grouped = dt_df.groupby(Grouper(key='key', freq='D'))
 
     expected = getattr(normal_grouped, resample_method)()
     dt_result = getattr(dt_grouped, resample_method)()
@@ -195,7 +191,7 @@ def test_aggregate_with_nat(func, fill_value):
                     datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
 
     normal_grouped = normal_df.groupby('key')
-    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
+    dt_grouped = dt_df.groupby(Grouper(key='key', freq='D'))
 
     normal_result = getattr(normal_grouped, func)()
     dt_result = getattr(dt_grouped, func)()
@@ -222,7 +218,7 @@ def test_aggregate_with_nat_size():
                     datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
 
     normal_grouped = normal_df.groupby('key')
-    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
+    dt_grouped = dt_df.groupby(Grouper(key='key', freq='D'))
 
     normal_result = normal_grouped.size()
     dt_result = dt_grouped.size()
@@ -238,7 +234,7 @@ def test_aggregate_with_nat_size():
 
 def test_repr():
     # GH18203
-    result = repr(TimeGrouper(key='A', freq='H'))
+    result = repr(Grouper(key='A', freq='H'))
     expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
                 "closed='left', label='left', how='mean', "
                 "convention='e', base=0)")

From 8c8a1759a92c87ff3a56f8cef2d6ba2d9d500bc1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Fri, 24 May 2019 18:18:04 -0700
Subject: [PATCH 13/34] CLN: Remove ExcelWriter.sheetname (#26464)

xref gh-6581
---
 doc/source/whatsnew/v0.25.0.rst |  2 +-
 pandas/io/excel/_base.py        | 24 ++++-------------------
 pandas/tests/io/test_excel.py   | 34 +++++++++++----------------------
 3 files changed, 16 insertions(+), 44 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index d4104ab1d79a1..29cc14b638996 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -312,7 +312,7 @@ Deprecations
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 - Removed ``Panel`` (:issue:`25047`, :issue:`25191`, :issue:`25231`)
--
+- Removed the previously deprecated ``sheetname`` keyword in :func:`read_excel` (:issue:`16442`, :issue:`20938`)
 - Removed previously deprecated ``TimeGrouper`` (:issue:`16942`)
 -
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index c0678575fd6f0..a0d51e85aa4f3 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -5,7 +5,6 @@
 import os
 from textwrap import fill
 from urllib.request import urlopen
-import warnings
 
 from pandas._config import config
 
@@ -291,15 +290,10 @@ def read_excel(io,
                mangle_dupe_cols=True,
                **kwds):
 
-    # Can't use _deprecate_kwarg since sheetname=None has a special meaning
-    if is_integer(sheet_name) and sheet_name == 0 and 'sheetname' in kwds:
-        warnings.warn("The `sheetname` keyword is deprecated, use "
-                      "`sheet_name` instead", FutureWarning, stacklevel=2)
-        sheet_name = kwds.pop("sheetname")
-
-    if 'sheet' in kwds:
-        raise TypeError("read_excel() got an unexpected keyword argument "
-                        "`sheet`")
+    for arg in ('sheet', 'sheetname'):
+        if arg in kwds:
+            raise TypeError("read_excel() got an unexpected keyword argument "
+                            "`{}`".format(arg))
 
     if not isinstance(io, ExcelFile):
         io = ExcelFile(io, engine=engine)
@@ -833,16 +827,6 @@ def parse(self,
         DataFrame or dict of DataFrames
             DataFrame from the passed in Excel file.
         """
-
-        # Can't use _deprecate_kwarg since sheetname=None has a special meaning
-        if is_integer(sheet_name) and sheet_name == 0 and 'sheetname' in kwds:
-            warnings.warn("The `sheetname` keyword is deprecated, use "
-                          "`sheet_name` instead", FutureWarning, stacklevel=2)
-            sheet_name = kwds.pop("sheetname")
-        elif 'sheetname' in kwds:
-            raise TypeError("Cannot specify both `sheet_name` "
-                            "and `sheetname`. Use just `sheet_name`")
-
         if 'chunksize' in kwds:
             raise NotImplementedError("chunksize keyword of read_excel "
                                       "is not implemented")
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index f9926cd26d8da..44ce3111c3a1e 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -342,15 +342,15 @@ def test_excel_passes_na(self, ext):
         tm.assert_frame_equal(parsed, expected)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
-    def test_deprecated_sheetname(self, ext):
+    @pytest.mark.parametrize('arg', ['sheet', 'sheetname'])
+    def test_unexpected_kwargs_raises(self, ext, arg):
         # gh-17964
         excel = self.get_excelfile('test1', ext)
 
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            read_excel(excel, sheetname='Sheet1')
-
-        with pytest.raises(TypeError):
-            read_excel(excel, sheet='Sheet1')
+        kwarg = {arg: 'Sheet1'}
+        msg = "unexpected keyword argument `{}`".format(arg)
+        with pytest.raises(TypeError, match=msg):
+            read_excel(excel, **kwarg)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
     def test_excel_table_sheet_by_index(self, ext):
@@ -588,32 +588,20 @@ def test_sheet_name_and_sheetname(self, ext):
         df_ref = self.get_csv_refdf(filename)
         df1 = self.get_exceldf(filename, ext,
                                sheet_name=sheet_name, index_col=0)  # doc
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            with ignore_xlrd_time_clock_warning():
-                df2 = self.get_exceldf(filename, ext, index_col=0,
-                                       sheetname=sheet_name)  # backward compat
+        with ignore_xlrd_time_clock_warning():
+            df2 = self.get_exceldf(filename, ext, index_col=0,
+                                   sheet_name=sheet_name)
 
         excel = self.get_excelfile(filename, ext)
         df1_parse = excel.parse(sheet_name=sheet_name, index_col=0)  # doc
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            df2_parse = excel.parse(index_col=0,
-                                    sheetname=sheet_name)  # backward compat
+        df2_parse = excel.parse(index_col=0,
+                                sheet_name=sheet_name)
 
         tm.assert_frame_equal(df1, df_ref, check_names=False)
         tm.assert_frame_equal(df2, df_ref, check_names=False)
         tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
         tm.assert_frame_equal(df2_parse, df_ref, check_names=False)
 
-    def test_sheet_name_both_raises(self, ext):
-        with pytest.raises(TypeError, match="Cannot specify both"):
-            self.get_exceldf('test1', ext, sheetname='Sheet1',
-                             sheet_name='Sheet1')
-
-        excel = self.get_excelfile('test1', ext)
-        with pytest.raises(TypeError, match="Cannot specify both"):
-            excel.parse(sheetname='Sheet1',
-                        sheet_name='Sheet1')
-
     def test_excel_read_buffer(self, ext):
 
         pth = os.path.join(self.dirpath, 'test1' + ext)

From 9d6d95994ad2d58bad0ae0910ea9b5ab2df6be50 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sat, 25 May 2019 11:40:15 -0700
Subject: [PATCH 14/34] CLN: Remove deprecated parse_cols from read_excel
 (#26522)

xref gh-6581
---
 doc/source/whatsnew/v0.25.0.rst |  4 ++--
 pandas/io/excel/_base.py        | 10 +---------
 pandas/tests/io/test_excel.py   | 22 +---------------------
 3 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 29cc14b638996..af59a34245660 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -313,8 +313,8 @@ Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 - Removed ``Panel`` (:issue:`25047`, :issue:`25191`, :issue:`25231`)
 - Removed the previously deprecated ``sheetname`` keyword in :func:`read_excel` (:issue:`16442`, :issue:`20938`)
-- Removed previously deprecated ``TimeGrouper`` (:issue:`16942`)
--
+- Removed the previously deprecated ``TimeGrouper`` (:issue:`16942`)
+- Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`)
 
 .. _whatsnew_0250.performance:
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index a0d51e85aa4f3..3af6be7a371e7 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -64,12 +64,6 @@
     those columns will be combined into a ``MultiIndex``.  If a
     subset of data is selected with ``usecols``, index_col
     is based on the subset.
-parse_cols : int or list, default None
-    Alias of `usecols`.
-
-    .. deprecated:: 0.21.0
-       Use `usecols` instead.
-
 usecols : int, str, list-like, or callable default None
     Return a subset of the columns.
 
@@ -260,14 +254,12 @@
 
 
 @Appender(_read_excel_doc)
-@deprecate_kwarg("parse_cols", "usecols")
 @deprecate_kwarg("skip_footer", "skipfooter")
 def read_excel(io,
                sheet_name=0,
                header=0,
                names=None,
                index_col=None,
-               parse_cols=None,
                usecols=None,
                squeeze=False,
                dtype=None,
@@ -290,7 +282,7 @@ def read_excel(io,
                mangle_dupe_cols=True,
                **kwds):
 
-    for arg in ('sheet', 'sheetname'):
+    for arg in ('sheet', 'sheetname', 'parse_cols'):
         if arg in kwds:
             raise TypeError("read_excel() got an unexpected keyword argument "
                             "`{}`".format(arg))
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 44ce3111c3a1e..100de227aa97c 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -147,17 +147,9 @@ def test_usecols_int(self, ext):
                 df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
                                        index_col=0, usecols=3)
 
-        # parse_cols instead of usecols, usecols as int
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            with ignore_xlrd_time_clock_warning():
-                df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
-                                       index_col=0, parse_cols=3)
-
         # TODO add index to xls file)
         tm.assert_frame_equal(df1, df_ref, check_names=False)
         tm.assert_frame_equal(df2, df_ref, check_names=False)
-        tm.assert_frame_equal(df3, df_ref, check_names=False)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
     def test_usecols_list(self, ext):
@@ -169,15 +161,9 @@ def test_usecols_list(self, ext):
         df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
                                index_col=0, usecols=[0, 2, 3])
 
-        with tm.assert_produces_warning(FutureWarning):
-            with ignore_xlrd_time_clock_warning():
-                df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                                       index_col=0, parse_cols=[0, 2, 3])
-
         # TODO add index to xls file)
         tm.assert_frame_equal(df1, dfref, check_names=False)
         tm.assert_frame_equal(df2, dfref, check_names=False)
-        tm.assert_frame_equal(df3, dfref, check_names=False)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
     def test_usecols_str(self, ext):
@@ -190,15 +176,9 @@ def test_usecols_str(self, ext):
         df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
                                index_col=0, usecols='A:D')
 
-        with tm.assert_produces_warning(FutureWarning):
-            with ignore_xlrd_time_clock_warning():
-                df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                                       index_col=0, parse_cols='A:D')
-
         # TODO add index to xls, read xls ignores index name ?
         tm.assert_frame_equal(df2, df1, check_names=False)
         tm.assert_frame_equal(df3, df1, check_names=False)
-        tm.assert_frame_equal(df4, df1, check_names=False)
 
         df1 = dfref.reindex(columns=['B', 'C'])
         df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
@@ -342,7 +322,7 @@ def test_excel_passes_na(self, ext):
         tm.assert_frame_equal(parsed, expected)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
-    @pytest.mark.parametrize('arg', ['sheet', 'sheetname'])
+    @pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols'])
     def test_unexpected_kwargs_raises(self, ext, arg):
         # gh-17964
         excel = self.get_excelfile('test1', ext)

From 3bb47664e28ae5e3d33748cbf1825c4acbd4297e Mon Sep 17 00:00:00 2001
From: Mak Sze Chun <makszechun@gmail.com>
Date: Sun, 26 May 2019 03:24:39 +0800
Subject: [PATCH 15/34] [TEST] Add two more parameters to the
 test_dti_add_sub_nonzero_mth_offset (#26392)

* Add two more parameters to the test

* Add array into the boy and add parameter freq
---
 pandas/tests/arithmetic/test_datetime64.py | 38 ++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 910fa4818c5de..13adae279c989 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1435,27 +1435,39 @@ def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture,
         expected = tm.box_expected(expected, box_with_array)
         tm.assert_equal(res, expected)
 
-    @pytest.mark.parametrize("op, offset, exp", [
+    @pytest.mark.parametrize("op, offset, exp, exp_freq", [
         ('__add__', pd.DateOffset(months=3, days=10),
-         DatetimeIndex([Timestamp('2014-04-11'), Timestamp('2015-04-11'),
-                        Timestamp('2016-04-11'), Timestamp('2017-04-11')])),
+         [Timestamp('2014-04-11'), Timestamp('2015-04-11'),
+          Timestamp('2016-04-11'), Timestamp('2017-04-11')],
+         None),
         ('__add__', pd.DateOffset(months=3),
-         DatetimeIndex([Timestamp('2014-04-01'), Timestamp('2015-04-01'),
-                        Timestamp('2016-04-01'), Timestamp('2017-04-01')])),
+         [Timestamp('2014-04-01'), Timestamp('2015-04-01'),
+          Timestamp('2016-04-01'), Timestamp('2017-04-01')],
+         "AS-APR"),
         ('__sub__', pd.DateOffset(months=3, days=10),
-         DatetimeIndex([Timestamp('2013-09-21'), Timestamp('2014-09-21'),
-                        Timestamp('2015-09-21'), Timestamp('2016-09-21')])),
+         [Timestamp('2013-09-21'), Timestamp('2014-09-21'),
+          Timestamp('2015-09-21'), Timestamp('2016-09-21')],
+         None),
         ('__sub__', pd.DateOffset(months=3),
-         DatetimeIndex([Timestamp('2013-10-01'), Timestamp('2014-10-01'),
-                        Timestamp('2015-10-01'), Timestamp('2016-10-01')]))
-
+         [Timestamp('2013-10-01'), Timestamp('2014-10-01'),
+          Timestamp('2015-10-01'), Timestamp('2016-10-01')],
+         "AS-OCT")
     ])
-    def test_dti_add_sub_nonzero_mth_offset(self, op, offset, exp):
+    def test_dti_add_sub_nonzero_mth_offset(self, op, offset,
+                                            exp, exp_freq,
+                                            tz_aware_fixture,
+                                            box_with_array):
         # GH 26258
-        date = date_range(start='01 Jan 2014', end='01 Jan 2017', freq='AS')
+        tz = tz_aware_fixture
+        date = date_range(start='01 Jan 2014', end='01 Jan 2017', freq='AS',
+                          tz=tz)
+        date = tm.box_expected(date, box_with_array, False)
         mth = getattr(date, op)
         result = mth(offset)
-        tm.assert_equal(result, exp)
+
+        expected = pd.DatetimeIndex(exp, tz=tz, freq=exp_freq)
+        expected = tm.box_expected(expected, box_with_array, False)
+        tm.assert_equal(result, expected)
 
 
 class TestDatetime64OverflowHandling:

From 014abdc3553bb49c681bff11e09fb7c55f4500db Mon Sep 17 00:00:00 2001
From: Nanda H Krishna <nanda.harishankar@gmail.com>
Date: Sun, 26 May 2019 07:18:02 +0530
Subject: [PATCH 16/34] Remove py.path special handling from io.common (#26458)

---
 pandas/io/common.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index f9cd1806763e2..34635ebf64ad6 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -9,6 +9,7 @@
 import lzma
 import mmap
 import os
+import pathlib
 from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode, urljoin, urlparse as parse_url, uses_netloc, uses_params,
@@ -115,24 +116,10 @@ def _stringify_path(filepath_or_buffer):
     Any other object is passed through unchanged, which includes bytes,
     strings, buffers, or anything else that's not even path-like.
     """
-    try:
-        import pathlib
-        _PATHLIB_INSTALLED = True
-    except ImportError:
-        _PATHLIB_INSTALLED = False
-
-    try:
-        from py.path import local as LocalPath
-        _PY_PATH_INSTALLED = True
-    except ImportError:
-        _PY_PATH_INSTALLED = False
-
     if hasattr(filepath_or_buffer, '__fspath__'):
         return filepath_or_buffer.__fspath__()
-    if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path):
+    elif isinstance(filepath_or_buffer, pathlib.Path):
         return str(filepath_or_buffer)
-    if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath):
-        return filepath_or_buffer.strpath
     return _expand_user(filepath_or_buffer)
 
 

From 420eee5bf7b8458bddfc6dd3ff2c9020da38dbef Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Sun, 26 May 2019 16:31:43 +0200
Subject: [PATCH 17/34] CLN: remove StringMixin from code base, except
 core.computation (#26523)

---
 pandas/io/pytables.py | 19 +++++++++----------
 pandas/io/stata.py    |  3 +--
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 0f7f6fe399256..11f705e88179d 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -32,7 +32,6 @@
     to_datetime)
 from pandas.core.arrays.categorical import Categorical
 from pandas.core.arrays.sparse import BlockIndex, IntIndex
-from pandas.core.base import StringMixin
 import pandas.core.common as com
 from pandas.core.computation.pytables import Expr, maybe_expression
 from pandas.core.index import ensure_index
@@ -398,7 +397,7 @@ def _is_metadata_of(group, parent_group):
     return False
 
 
-class HDFStore(StringMixin):
+class HDFStore:
 
     """
     Dict-like IO interface for storing pandas objects in PyTables
@@ -520,7 +519,7 @@ def __contains__(self, key):
     def __len__(self):
         return len(self.groups())
 
-    def __str__(self):
+    def __repr__(self):
         return '{type}\nFile path: {path}\n'.format(
             type=type(self), path=pprint_thing(self._path))
 
@@ -1519,7 +1518,7 @@ def get_result(self, coordinates=False):
         return results
 
 
-class IndexCol(StringMixin):
+class IndexCol:
 
     """ an index column description class
 
@@ -1587,7 +1586,7 @@ def set_table(self, table):
         self.table = table
         return self
 
-    def __str__(self):
+    def __repr__(self):
         temp = tuple(
             map(pprint_thing,
                     (self.name,
@@ -1881,7 +1880,7 @@ def __init__(self, values=None, kind=None, typ=None,
         self.set_data(data)
         self.set_metadata(metadata)
 
-    def __str__(self):
+    def __repr__(self):
         temp = tuple(
             map(pprint_thing,
                     (self.name,
@@ -2286,7 +2285,7 @@ def get_attr(self):
         pass
 
 
-class Fixed(StringMixin):
+class Fixed:
 
     """ represent an object in my store
         facilitate read/write of various types of objects
@@ -2336,7 +2335,7 @@ def pandas_type(self):
     def format_type(self):
         return 'fixed'
 
-    def __str__(self):
+    def __repr__(self):
         """ return a pretty representation of myself """
         self.infer_axes()
         s = self.shape
@@ -3077,8 +3076,8 @@ def table_type_short(self):
     def format_type(self):
         return 'table'
 
-    def __str__(self):
-        """ return a pretty representatgion of myself """
+    def __repr__(self):
+        """ return a pretty representation of myself """
         self.infer_axes()
         dc = ",dc->[{columns}]".format(columns=(','.join(
             self.data_columns) if len(self.data_columns) else ''))
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 27ddc4ef6f594..d8dfd15477974 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -31,7 +31,6 @@
 from pandas import (
     Categorical, DatetimeIndex, NaT, Timestamp, concat, isna, to_datetime,
     to_timedelta)
-from pandas.core.base import StringMixin
 from pandas.core.frame import DataFrame
 from pandas.core.series import Series
 
@@ -712,7 +711,7 @@ def generate_value_label(self, byteorder, encoding):
         return bio.read()
 
 
-class StataMissingValue(StringMixin):
+class StataMissingValue:
     """
     An observation's missing value.
 

From 48a4b8cf966529dcd441ece139afe82fc4873742 Mon Sep 17 00:00:00 2001
From: Chuanzhu Xu <chuanzhu.xu@gmail.com>
Date: Sun, 26 May 2019 13:57:47 -0400
Subject: [PATCH 18/34] MAINT: port numpy#13188 for np_datetime simplification
 (#26516)

* MAINT: port numpy#13188 for np_datetime simplificaion

Bring numpy changes about emulating the behavior of python's divmod to
pandas.

* cpplint fix

* Add reference numpy change into comment

* fix typo
---
 doc/source/whatsnew/v0.25.0.rst               |   2 +-
 .../_libs/tslibs/src/datetime/np_datetime.c   | 208 +++++++-----------
 pandas/core/arrays/categorical.py             |   8 +-
 .../arrays/categorical/test_operators.py      |  17 +-
 4 files changed, 94 insertions(+), 141 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index af59a34245660..9ea690a11259d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -346,7 +346,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
-- Bug in :func:`_cat_compare_op` that would valuate comparison with None to True (:issue:`26504`)
+- Fixed Bug in :func:`_cat_compare_op` that would evaluate comparison of ordered `Categorical` with missing values with scalar to True sometimes (:issue:`26504`)
 -
 
 Datetimelike
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c
index 87866d804503e..a8a47e2e90f93 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime.c
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c
@@ -498,6 +498,27 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
     return ret;
 }
 
+/*
+ * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/
+ *
+ * Computes the python `ret, d = divmod(d, unit)`.
+ *
+ * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch
+ * for subsequent calls to this command - it is able to deduce that `*d >= 0`.
+ */
+npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) {
+    assert(unit > 0);
+    npy_int64 div = *d / unit;
+    npy_int64 mod = *d % unit;
+    if (mod < 0) {
+        mod += unit;
+        div -= 1;
+    }
+    assert(mod >= 0);
+    *d = mod;
+    return div;
+}
+
 /*
  * Converts a datetime based on the given metadata into a datetimestruct
  */
@@ -522,13 +543,8 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt,
             break;
 
         case NPY_FR_M:
-            if (dt >= 0) {
-                out->year = 1970 + dt / 12;
-                out->month = dt % 12 + 1;
-            } else {
-                out->year = 1969 + (dt + 1) / 12;
-                out->month = 12 + (dt + 1) % 12;
-            }
+            out->year  = 1970 + extract_unit(&dt, 12);
+            out->month = dt + 1;
             break;
 
         case NPY_FR_W:
@@ -543,167 +559,105 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt,
         case NPY_FR_h:
             perday = 24LL;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
             out->hour = dt;
             break;
 
         case NPY_FR_m:
             perday = 24LL * 60;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
-            out->hour = dt / 60;
-            out->min = dt % 60;
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            out->hour = (int)extract_unit(&dt, 60);
+            out->min = (int)dt;
             break;
 
         case NPY_FR_s:
             perday = 24LL * 60 * 60;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
-            out->hour = dt / (60 * 60);
-            out->min = (dt / 60) % 60;
-            out->sec = dt % 60;
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            out->hour = (int)extract_unit(&dt, 60 * 60);
+            out->min  = (int)extract_unit(&dt, 60);
+            out->sec  = (int)dt;
             break;
 
         case NPY_FR_ms:
             perday = 24LL * 60 * 60 * 1000;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
-            out->hour = dt / (60 * 60 * 1000LL);
-            out->min = (dt / (60 * 1000LL)) % 60;
-            out->sec = (dt / 1000LL) % 60;
-            out->us = (dt % 1000LL) * 1000;
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60);
+            out->min  = (int)extract_unit(&dt, 1000LL * 60);
+            out->sec  = (int)extract_unit(&dt, 1000LL);
+            out->us   = (int)(dt * 1000);
             break;
 
         case NPY_FR_us:
             perday = 24LL * 60LL * 60LL * 1000LL * 1000LL;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
-            out->hour = dt / (60 * 60 * 1000000LL);
-            out->min = (dt / (60 * 1000000LL)) % 60;
-            out->sec = (dt / 1000000LL) % 60;
-            out->us = dt % 1000000LL;
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60);
+            out->min  = (int)extract_unit(&dt, 1000LL * 1000 * 60);
+            out->sec  = (int)extract_unit(&dt, 1000LL * 1000);
+            out->us   = (int)dt;
             break;
 
         case NPY_FR_ns:
             perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
-            out->hour = dt / (60 * 60 * 1000000000LL);
-            out->min = (dt / (60 * 1000000000LL)) % 60;
-            out->sec = (dt / 1000000000LL) % 60;
-            out->us = (dt / 1000LL) % 1000000LL;
-            out->ps = (dt % 1000LL) * 1000;
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
+            out->min  = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
+            out->sec  = (int)extract_unit(&dt, 1000LL * 1000 * 1000);
+            out->us   = (int)extract_unit(&dt, 1000LL);
+            out->ps   = (int)(dt * 1000);
             break;
 
         case NPY_FR_ps:
             perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
 
-            if (dt >= 0) {
-                set_datetimestruct_days(dt / perday, out);
-                dt = dt % perday;
-            } else {
-                set_datetimestruct_days(
-                    dt / perday - (dt % perday == 0 ? 0 : 1), out);
-                dt = (perday - 1) + (dt + 1) % perday;
-            }
-            out->hour = dt / (60 * 60 * 1000000000000LL);
-            out->min = (dt / (60 * 1000000000000LL)) % 60;
-            out->sec = (dt / 1000000000000LL) % 60;
-            out->us = (dt / 1000000LL) % 1000000LL;
-            out->ps = dt % 1000000LL;
+            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
+            out->min  = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
+            out->sec  = (int)extract_unit(&dt, 1000LL * 1000 * 1000);
+            out->us   = (int)extract_unit(&dt, 1000LL);
+            out->ps   = (int)(dt * 1000);
             break;
 
         case NPY_FR_fs:
             /* entire range is only +- 2.6 hours */
-            if (dt >= 0) {
-                out->hour = dt / (60 * 60 * 1000000000000000LL);
-                out->min = (dt / (60 * 1000000000000000LL)) % 60;
-                out->sec = (dt / 1000000000000000LL) % 60;
-                out->us = (dt / 1000000000LL) % 1000000LL;
-                out->ps = (dt / 1000LL) % 1000000LL;
-                out->as = (dt % 1000LL) * 1000;
-            } else {
-                npy_datetime minutes;
-
-                minutes = dt / (60 * 1000000000000000LL);
-                dt = dt % (60 * 1000000000000000LL);
-                if (dt < 0) {
-                    dt += (60 * 1000000000000000LL);
-                    --minutes;
-                }
-                /* Offset the negative minutes */
-                add_minutes_to_datetimestruct(out, minutes);
-                out->sec = (dt / 1000000000000000LL) % 60;
-                out->us = (dt / 1000000000LL) % 1000000LL;
-                out->ps = (dt / 1000LL) % 1000000LL;
-                out->as = (dt % 1000LL) * 1000;
+            out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
+                                        1000 * 60 * 60);
+            if (out->hour < 0) {
+                out->year  = 1969;
+                out->month = 12;
+                out->day   = 31;
+                out->hour  += 24;
+                assert(out->hour >= 0);
             }
+            out->min  = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
+                                        1000 * 60);
+            out->sec  = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
+                                        1000);
+            out->us   = (int)extract_unit(&dt, 1000LL * 1000 * 1000);
+            out->ps   = (int)extract_unit(&dt, 1000LL);
+            out->as   = (int)(dt * 1000);
             break;
 
         case NPY_FR_as:
             /* entire range is only +- 9.2 seconds */
-            if (dt >= 0) {
-                out->sec = (dt / 1000000000000000000LL) % 60;
-                out->us = (dt / 1000000000000LL) % 1000000LL;
-                out->ps = (dt / 1000000LL) % 1000000LL;
-                out->as = dt % 1000000LL;
-            } else {
-                npy_datetime seconds;
-
-                seconds = dt / 1000000000000000000LL;
-                dt = dt % 1000000000000000000LL;
-                if (dt < 0) {
-                    dt += 1000000000000000000LL;
-                    --seconds;
-                }
-                /* Offset the negative seconds */
-                add_seconds_to_datetimestruct(out, seconds);
-                out->us = (dt / 1000000000000LL) % 1000000LL;
-                out->ps = (dt / 1000000LL) % 1000000LL;
-                out->as = dt % 1000000LL;
+            out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
+                                        1000 * 1000);
+            if (out->sec < 0) {
+                out->year  = 1969;
+                out->month = 12;
+                out->day   = 31;
+                out->hour  = 23;
+                out->min   = 59;
+                out->sec   += 60;
+                assert(out->sec >= 0);
             }
+            out->us   = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
+            out->ps   = (int)extract_unit(&dt, 1000LL * 1000);
+            out->as   = (int)dt;
             break;
 
         default:
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 1d6b906158125..df01c6bee8917 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -100,13 +100,12 @@ def f(self, other):
         if is_scalar(other):
             if other in self.categories:
                 i = self.categories.get_loc(other)
-                f = getattr(self._codes, op)
-                ret = f(i)
+                ret = getattr(self._codes, op)(i)
 
                 # check for NaN in self
                 na_mask = (self._codes == -1)
                 if na_mask.any():
-                    # In other series, the leads to False, so do that here too
+                    # comparison to missing values NaN leads to False
                     ret[na_mask] = False
                 return ret
             else:
@@ -1412,6 +1411,7 @@ def isna(self):
 
         ret = self._codes == -1
         return ret
+
     isnull = isna
 
     def notna(self):
@@ -1433,6 +1433,7 @@ def notna(self):
 
         """
         return ~self.isna()
+
     notnull = notna
 
     def put(self, *args, **kwargs):
@@ -2555,6 +2556,7 @@ def index(self):
              stacklevel=2)
         return self._index
 
+
 # utility routines
 
 
diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index b323cb2b6a7c3..f2f4871a70d92 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -17,7 +17,6 @@ def test_categories_none_comparisons(self):
         tm.assert_categorical_equal(factor, self.factor)
 
     def test_comparisons(self):
-
         result = self.factor[self.factor == 'a']
         expected = self.factor[np.asarray(self.factor) == 'a']
         tm.assert_categorical_equal(result, expected)
@@ -186,23 +185,21 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
-    def test_comparison_with_known_scalars(self):
+    def test_comparison_of_ordered_categorical_with_missing_values(self):
         # https://github.com/pandas-dev/pandas/issues/26504
-        # and following comparisons with scalars in categories with None should
-        # be evaluated as False
+        # BUG: fix ordered categorical comparison with missing values (#26504 )
+        # and following comparisons with scalars in categories with missing values
+        # should be evaluated as False
 
-        cat1 = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
-        cat2 = Categorical([None, 1, 2, 3], categories=[1, 2, 3], ordered=True)
+        cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
 
-        tm.assert_numpy_array_equal(cat1 <= 2,
+        tm.assert_numpy_array_equal(cat <= 2,
                                     np.array([True, True, False, False]))
-        tm.assert_numpy_array_equal(cat2 <= 2,
-                                    np.array([False, True, True, False]))
 
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
         ([1, 2, 3], [3, 2, 1], [2, 2, 2])]
-    )
+                             )
     def test_comparisons(self, data, reverse, base):
         cat_rev = Series(
             Categorical(data, categories=reverse, ordered=True))

From 3e205694ef3fe3447b5da626a05e03a29d977ab0 Mon Sep 17 00:00:00 2001
From: yanglinlee <yanglinlee@gmail.com>
Date: Tue, 28 May 2019 21:41:31 -0400
Subject: [PATCH 19/34] fix categorical comparison with missing values #26504

---
 pandas/tests/arrays/categorical/test_operators.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index f2f4871a70d92..24a274ed8c115 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -188,8 +188,8 @@ def test_comparison_with_unknown_scalars(self):
     def test_comparison_of_ordered_categorical_with_missing_values(self):
         # https://github.com/pandas-dev/pandas/issues/26504
         # BUG: fix ordered categorical comparison with missing values (#26504 )
-        # and following comparisons with scalars in categories with missing values
-        # should be evaluated as False
+        # and following comparisons with scalars in categories with missing
+        # values should be evaluated as False
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
 
@@ -198,8 +198,7 @@ def test_comparison_of_ordered_categorical_with_missing_values(self):
 
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
-        ([1, 2, 3], [3, 2, 1], [2, 2, 2])]
-                             )
+        ([1, 2, 3], [3, 2, 1], [2, 2, 2])])
     def test_comparisons(self, data, reverse, base):
         cat_rev = Series(
             Categorical(data, categories=reverse, ordered=True))

From 7e6662d70bd6c0008a48ed7f5c806fb841f22a2b Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Tue, 28 May 2019 22:46:38 -0400
Subject: [PATCH 20/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 24a274ed8c115..fe10823254a12 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -185,7 +185,7 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
-    def test_comparison_of_ordered_categorical_with_missing_values(self):
+    def test_comparison_of_ordered_categorical_with_missing_values_to_scalar(self):
         # https://github.com/pandas-dev/pandas/issues/26504
         # BUG: fix ordered categorical comparison with missing values (#26504 )
         # and following comparisons with scalars in categories with missing

From 16dac3a3ca1a64f603c6e4466a884fafe0771c99 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Tue, 28 May 2019 22:51:02 -0400
Subject: [PATCH 21/34] Update categorical.py

---
 pandas/core/arrays/categorical.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index df01c6bee8917..93346488aaf1d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -89,12 +89,12 @@ def f(self, other):
             else:
                 other_codes = other._codes
 
-            na_mask = (self._codes == -1) | (other_codes == -1)
+            mask = (self._codes == -1) | (other_codes == -1)
             f = getattr(self._codes, op)
             ret = f(other_codes)
-            if na_mask.any():
+            if mask.any():
                 # In other series, the leads to False, so do that here too
-                ret[na_mask] = False
+                ret[mask] = False
             return ret
 
         if is_scalar(other):
@@ -103,10 +103,10 @@ def f(self, other):
                 ret = getattr(self._codes, op)(i)
 
                 # check for NaN in self
-                na_mask = (self._codes == -1)
-                if na_mask.any():
+                mask = (self._codes == -1)
+                if mask.any():
                     # comparison to missing values NaN leads to False
-                    ret[na_mask] = False
+                    ret[mask] = False
                 return ret
             else:
                 if op == '__eq__':

From 9464f72944e48c1809b0e5430873eb63cb1655a0 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Tue, 28 May 2019 22:55:19 -0400
Subject: [PATCH 22/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index fe10823254a12..c4c977b3a84e2 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -185,7 +185,7 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
-    def test_comparison_of_ordered_categorical_with_missing_values_to_scalar(self):
+    def test_comparison_of_ordered_categorical_with_nan_to_scalar(self):
         # https://github.com/pandas-dev/pandas/issues/26504
         # BUG: fix ordered categorical comparison with missing values (#26504 )
         # and following comparisons with scalars in categories with missing

From c2b73438c68eadb07a479658fe51fc8998978a50 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 01:10:19 -0400
Subject: [PATCH 23/34] Update test_operators.py

---
 .../tests/arrays/categorical/test_operators.py   | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index c4c977b3a84e2..13d037b76ec18 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -185,7 +185,8 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
-    def test_comparison_of_ordered_categorical_with_nan_to_scalar(self):
+    def test_comparison_of_ordered_categorical_with_nan_to_scalar(
+        self, compare_operators_no_eq_ne):
         # https://github.com/pandas-dev/pandas/issues/26504
         # BUG: fix ordered categorical comparison with missing values (#26504 )
         # and following comparisons with scalars in categories with missing
@@ -193,9 +194,18 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(self):
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
 
-        tm.assert_numpy_array_equal(cat <= 2,
-                                    np.array([True, True, False, False]))
+        assert getattr(cat, compare_operators_no_eq_ne)(2)[-1] == False
 
+    def test_comparison_of_ordered_categorical_with_nan_to_listlike(
+            self, compare_operators_no_eq_ne):
+        # https://github.com/pandas-dev/pandas/issues/26504 
+        # and following comparisons of missing values in ordered Categorical 
+        # with listlike should be evaluated as False
+
+        cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
+        other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
+        assert getattr(cat, compare_operators_no_eq_ne)(other)[-1] == False
+        
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
         ([1, 2, 3], [3, 2, 1], [2, 2, 2])])

From 65014e76780a0533d22d748d980b2a592d3d15fd Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 12:22:13 -0400
Subject: [PATCH 24/34] Update doc/source/whatsnew/v0.25.0.rst

Co-Authored-By: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 9f10d758c72c4..c65ba8ae54f2e 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -348,7 +348,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
-- Fixed Bug in :func:`_cat_compare_op` that would evaluate comparison of ordered `Categorical` with missing values with scalar to True sometimes (:issue:`26504`)
+- Fixed bug in comparison of ordered `Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
 -
 
 Datetimelike

From 8964f0acd8b1b15d2e6e642056380718205e6288 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 20:57:14 -0400
Subject: [PATCH 25/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 13d037b76ec18..b6b4713a7d87e 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -186,7 +186,7 @@ def test_comparison_with_unknown_scalars(self):
                                     np.array([True, True, True]))
 
     def test_comparison_of_ordered_categorical_with_nan_to_scalar(
-        self, compare_operators_no_eq_ne):
+            self, compare_operators_no_eq_ne):
         # https://github.com/pandas-dev/pandas/issues/26504
         # BUG: fix ordered categorical comparison with missing values (#26504 )
         # and following comparisons with scalars in categories with missing
@@ -198,8 +198,8 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(
 
     def test_comparison_of_ordered_categorical_with_nan_to_listlike(
             self, compare_operators_no_eq_ne):
-        # https://github.com/pandas-dev/pandas/issues/26504 
-        # and following comparisons of missing values in ordered Categorical 
+        # https://github.com/pandas-dev/pandas/issues/26504
+        # and following comparisons of missing values in ordered Categorical
         # with listlike should be evaluated as False
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)

From 7f404d2a5e931196eadeceb7308642d14fc8afd2 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 21:02:59 -0400
Subject: [PATCH 26/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index b6b4713a7d87e..101731b4e96cb 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -208,7 +208,8 @@ def test_comparison_of_ordered_categorical_with_nan_to_listlike(
         
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
-        ([1, 2, 3], [3, 2, 1], [2, 2, 2])])
+        ([1, 2, 3], [3, 2, 1], [2, 2, 2])]
+    )
     def test_comparisons(self, data, reverse, base):
         cat_rev = Series(
             Categorical(data, categories=reverse, ordered=True))

From 19e3711343c1ef5d926ea78705033c37e185f2a8 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 21:23:35 -0400
Subject: [PATCH 27/34] Update v0.25.0.rst

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index c65ba8ae54f2e..ffa9fc5977709 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -348,7 +348,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
-- Fixed bug in comparison of ordered `Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
+- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
 -
 
 Datetimelike

From 2fc1d278d2a33faaf0ac482433f6b44323515f54 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 21:40:01 -0400
Subject: [PATCH 28/34] Update test_operators.py

---
 .../tests/arrays/categorical/test_operators.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 101731b4e96cb..80f218bd3a30c 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -7,6 +7,7 @@
 from pandas import Categorical, DataFrame, Series, date_range
 from pandas.tests.arrays.categorical.common import TestCategorical
 import pandas.util.testing as tm
+import warnings
 
 
 class TestCategoricalOpsWithFactor(TestCategorical):
@@ -193,8 +194,14 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(
         # values should be evaluated as False
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
-
-        assert getattr(cat, compare_operators_no_eq_ne)(2)[-1] == False
+        scalar = 2
+        
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
+            expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar)
+            tm.assert_numpy_array_equal(actual, expected)
+        
 
     def test_comparison_of_ordered_categorical_with_nan_to_listlike(
             self, compare_operators_no_eq_ne):
@@ -204,7 +211,12 @@ def test_comparison_of_ordered_categorical_with_nan_to_listlike(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
-        assert getattr(cat, compare_operators_no_eq_ne)(other)[-1] == False
+        
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            actual = getattr(cat, compare_operators_no_eq_ne)(other)
+            expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
+            tm.assert_numpy_array_equal(actual, expected)
         
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),

From c80c2dcab5cafd8604d09eab9ceef4a6314b072e Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 22:49:09 -0400
Subject: [PATCH 29/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 80f218bd3a30c..05bbb30bd00a1 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -195,13 +195,12 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         scalar = 2
-        
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
-            expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar)
+            expected = getattr(np.array(cat), 
+                               compare_operators_no_eq_ne)(scalar)
             tm.assert_numpy_array_equal(actual, expected)
-        
 
     def test_comparison_of_ordered_categorical_with_nan_to_listlike(
             self, compare_operators_no_eq_ne):
@@ -211,13 +210,12 @@ def test_comparison_of_ordered_categorical_with_nan_to_listlike(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
-        
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             actual = getattr(cat, compare_operators_no_eq_ne)(other)
             expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
             tm.assert_numpy_array_equal(actual, expected)
-        
+
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
         ([1, 2, 3], [3, 2, 1], [2, 2, 2])]

From 2e01686a7e6c6eb9a1e4ce20c0f0862210c4b046 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 23:23:44 -0400
Subject: [PATCH 30/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 05bbb30bd00a1..1f85a22e03d96 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -198,7 +198,7 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
-            expected = getattr(np.array(cat), 
+            expected = getattr(np.array(cat),
                                compare_operators_no_eq_ne)(scalar)
             tm.assert_numpy_array_equal(actual, expected)
 

From 924f6937db462de93c9b6ba70bd1e653b3465d42 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Wed, 29 May 2019 23:51:40 -0400
Subject: [PATCH 31/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 1f85a22e03d96..72275444ccaa1 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -1,4 +1,5 @@
 import operator
+import warnings
 
 import numpy as np
 import pytest
@@ -7,7 +8,6 @@
 from pandas import Categorical, DataFrame, Series, date_range
 from pandas.tests.arrays.categorical.common import TestCategorical
 import pandas.util.testing as tm
-import warnings
 
 
 class TestCategoricalOpsWithFactor(TestCategorical):

From 3b4a42a58126f47b8e3e16167660238810246743 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Thu, 30 May 2019 10:18:45 -0400
Subject: [PATCH 32/34] Update categorical.py

---
 pandas/core/arrays/categorical.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 93346488aaf1d..8bbd161ee107b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1411,7 +1411,6 @@ def isna(self):
 
         ret = self._codes == -1
         return ret
-
     isnull = isna
 
     def notna(self):
@@ -1433,7 +1432,6 @@ def notna(self):
 
         """
         return ~self.isna()
-
     notnull = notna
 
     def put(self, *args, **kwargs):
@@ -2556,7 +2554,6 @@ def index(self):
              stacklevel=2)
         return self._index
 
-
 # utility routines
 
 

From 57480bd3c3be622c2fafbdd5b73dcffac0ee22ae Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Thu, 30 May 2019 10:24:59 -0400
Subject: [PATCH 33/34] Update test_operators.py

---
 pandas/tests/arrays/categorical/test_operators.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index 72275444ccaa1..b67d430667682 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -195,12 +195,13 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         scalar = 2
+        
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
-            actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
             expected = getattr(np.array(cat),
                                compare_operators_no_eq_ne)(scalar)
-            tm.assert_numpy_array_equal(actual, expected)
+        actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
+        tm.assert_numpy_array_equal(actual, expected)
 
     def test_comparison_of_ordered_categorical_with_nan_to_listlike(
             self, compare_operators_no_eq_ne):
@@ -210,11 +211,12 @@ def test_comparison_of_ordered_categorical_with_nan_to_listlike(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
+        
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
-            actual = getattr(cat, compare_operators_no_eq_ne)(other)
             expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
-            tm.assert_numpy_array_equal(actual, expected)
+        actual = getattr(cat, compare_operators_no_eq_ne)(other)
+        tm.assert_numpy_array_equal(actual, expected)
 
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),

From 8bb9bcf27a4dbc1220b6eeaffa605a3aef286da3 Mon Sep 17 00:00:00 2001
From: yanglinlee <yanglinlee@gmail.com>
Date: Fri, 31 May 2019 12:22:55 -0400
Subject: [PATCH 34/34] fix categorical comparison with missing values

---
 pandas/core/arrays/categorical.py                 | 4 +---
 pandas/tests/arrays/categorical/test_operators.py | 2 --
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6d34a8b66c5ea..44bb44457bc25 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -104,9 +104,7 @@ def f(self, other):
 
                 # check for NaN in self
                 mask = (self._codes == -1)
-                if mask.any():
-                    # comparison to missing values NaN leads to False
-                    ret[mask] = False
+                ret[mask] = False
                 return ret
             else:
                 if op == '__eq__':
diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index b67d430667682..a443408bf9479 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -195,7 +195,6 @@ def test_comparison_of_ordered_categorical_with_nan_to_scalar(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         scalar = 2
-        
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             expected = getattr(np.array(cat),
@@ -211,7 +210,6 @@ def test_comparison_of_ordered_categorical_with_nan_to_listlike(
 
         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
         other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
-        
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)