From a13c16165d718c4fc0f010ef3d6325f125c01fe2 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Wed, 25 Jul 2018 21:24:36 -0700
Subject: [PATCH 1/7] fix trailing whitespace

---
 pandas/_libs/tslibs/period.pyx     | 2 +-
 pandas/_libs/tslibs/timedeltas.pyx | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index 4054154cd285b..517f126e0cd6d 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -1539,7 +1539,7 @@ cdef class _Period(object):
         See Also
         --------
         Period.year : Return the calendar year of the period.
-        
+
         Examples
         --------
         If the natural and fiscal year are the same, `qyear` and `year` will
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index f7a6cf0c6dafc..221c345531208 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -929,7 +929,7 @@ cdef class _Timedelta(timedelta):
     def nanoseconds(self):
         """
         Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
-       
+
         Returns
         -------
         int

From c8948bc4ae2456b14fc1b7b33c3b615f7c6d0bba Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Wed, 25 Jul 2018 21:25:04 -0700
Subject: [PATCH 2/7] implement dispatch_to_series, remove a layer of closure

---
 pandas/core/frame.py | 37 +++-------------------
 pandas/core/ops.py   | 74 +++++++++++++++++++++++++++-----------------
 2 files changed, 50 insertions(+), 61 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 078e176ff2b99..35bd6240c7958 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4901,20 +4901,7 @@ def _arith_op(left, right):
 
         if this._is_mixed_type or other._is_mixed_type:
             # iterate over columns
-            if this.columns.is_unique:
-                # unique columns
-                result = {col: _arith_op(this[col], other[col])
-                          for col in this}
-                result = self._constructor(result, index=new_index,
-                                           columns=new_columns, copy=False)
-            else:
-                # non-unique columns
-                result = {i: _arith_op(this.iloc[:, i], other.iloc[:, i])
-                          for i, col in enumerate(this.columns)}
-                result = self._constructor(result, index=new_index, copy=False)
-                result.columns = new_columns
-            return result
-
+            return ops.dispatch_to_series(this, other, _arith_op)
         else:
             result = _arith_op(this.values, other.values)
 
@@ -4948,27 +4935,11 @@ def _compare_frame(self, other, func, str_rep):
         # compare_frame assumes self._indexed_same(other)
 
         import pandas.core.computation.expressions as expressions
-        # unique
-        if self.columns.is_unique:
 
-            def _compare(a, b):
-                return {col: func(a[col], b[col]) for col in a.columns}
+        def _compare(a, b):
+            return expressions.evaluate(func, str_rep, a, b)
 
-            new_data = expressions.evaluate(_compare, str_rep, self, other)
-            return self._constructor(data=new_data, index=self.index,
-                                     columns=self.columns, copy=False)
-        # non-unique
-        else:
-
-            def _compare(a, b):
-                return {i: func(a.iloc[:, i], b.iloc[:, i])
-                        for i, col in enumerate(a.columns)}
-
-            new_data = expressions.evaluate(_compare, str_rep, self, other)
-            result = self._constructor(data=new_data, index=self.index,
-                                       copy=False)
-            result.columns = self.columns
-            return result
+        return ops.dispatch_to_series(self, other, _compare)
 
     def combine(self, other, func, fill_value=None, overwrite=True):
         """
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index c65d2dcdc478c..2384780243e14 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1123,30 +1123,6 @@ def na_op(x, y):
         result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
         return result
 
-    def safe_na_op(lvalues, rvalues):
-        """
-        return the result of evaluating na_op on the passed in values
-
-        try coercion to object type if the native types are not compatible
-
-        Parameters
-        ----------
-        lvalues : array-like
-        rvalues : array-like
-
-        Raises
-        ------
-        TypeError: invalid operation
-        """
-        try:
-            with np.errstate(all='ignore'):
-                return na_op(lvalues, rvalues)
-        except Exception:
-            if is_object_dtype(lvalues):
-                return libalgos.arrmap_object(lvalues,
-                                              lambda x: op(x, rvalues))
-            raise
-
     def wrapper(left, right):
         if isinstance(right, ABCDataFrame):
             return NotImplemented
@@ -1174,12 +1150,18 @@ def wrapper(left, right):
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
+        elif is_object_dtype(left):
+            rvalues = right.values if isinstance(right, ABCSeries) else right
+            result = libalgos.arrmap_object(left.values,
+                                            lambda x: op(x, rvalues))
+
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
             rvalues = rvalues.values
 
-        result = safe_na_op(lvalues, rvalues)
+        with np.errstate(all='ignore'):
+            result = na_op(lvalues, rvalues)
         return construct_result(left, result,
                                 index=left.index, name=res_name, dtype=None)
 
@@ -1313,7 +1295,7 @@ def wrapper(self, other, axis=None):
             return self._constructor(res_values, index=self.index,
                                      name=res_name)
 
-        if is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
+        elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
             # Dispatch to DatetimeIndex to ensure identical
             # Series/Index behavior
             if (isinstance(other, datetime.date) and
@@ -1355,8 +1337,9 @@ def wrapper(self, other, axis=None):
                                      name=res_name)
 
         elif (is_extension_array_dtype(self) or
-              (is_extension_array_dtype(other) and
-               not is_scalar(other))):
+              (is_extension_array_dtype(other) and not is_scalar(other))):
+            # Note: the `not is_scalar(other)` condition rules out
+            # e.g. other == "category"
             return dispatch_to_extension_op(op, self, other)
 
         elif isinstance(other, ABCSeries):
@@ -1515,6 +1498,41 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
 # -----------------------------------------------------------------------------
 # DataFrame
 
+def dispatch_to_series(left, right, func):
+    """
+    Evaluate the frame operation func(left, right) by evaluating
+    column-by-column, dispatching to the Series implementation.
+
+    Parameters
+    ----------
+    left : DataFrame
+    right : scalar or DataFrame
+    func : arithmetic or comparison operator
+
+    Returns
+    -------
+    DataFrame
+    """
+    # Note: we use iloc to access columns for compat with cases
+    #       with non-unique columns.
+    if lib.is_scalar(right):
+        new_data = {i: func(left.iloc[:, i], right)
+                    for i in range(len(left.columns))}
+    elif isinstance(right, ABCDataFrame):
+        assert right._indexed_same(left)
+        new_data = {i: func(left.iloc[:, i], right.iloc[:, i])
+                    for i in range(len(left.columns))}
+    else:
+        # Remaining cases have less-obvious dispatch rules
+        raise NotImplementedError
+
+    result = left._constructor(new_data, index=left.index, copy=False)
+    # Pin columns instead of passing to constructor for compat with
+    # non-unique columns case
+    result.columns = left.columns
+    return result
+
+
 def _combine_series_frame(self, other, func, fill_value=None, axis=None,
                           level=None, try_cast=True):
     """

From 2f25223ab52051846eaabd9e987340e6c2b06d6e Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Thu, 26 Jul 2018 09:32:39 -0700
Subject: [PATCH 3/7] revert incorrect change; remove no-longer-reachable cases

---
 pandas/core/ops.py | 49 +++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 2384780243e14..5f44741080009 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1123,6 +1123,30 @@ def na_op(x, y):
         result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
         return result
 
+    def safe_na_op(lvalues, rvalues):
+        """
+        return the result of evaluating na_op on the passed in values
+
+        try coercion to object type if the native types are not compatible
+
+        Parameters
+        ----------
+        lvalues : array-like
+        rvalues : array-like
+
+        Raises
+        ------
+        TypeError: invalid operation
+        """
+        try:
+            with np.errstate(all='ignore'):
+                return na_op(lvalues, rvalues)
+        except Exception:
+            if is_object_dtype(lvalues):
+                return libalgos.arrmap_object(lvalues,
+                                              lambda x: op(x, rvalues))
+            raise
+
     def wrapper(left, right):
         if isinstance(right, ABCDataFrame):
             return NotImplemented
@@ -1150,18 +1174,12 @@ def wrapper(left, right):
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
-        elif is_object_dtype(left):
-            rvalues = right.values if isinstance(right, ABCSeries) else right
-            result = libalgos.arrmap_object(left.values,
-                                            lambda x: op(x, rvalues))
-
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
             rvalues = rvalues.values
 
-        with np.errstate(all='ignore'):
-            result = na_op(lvalues, rvalues)
+        result = safe_na_op(lvalues, rvalues)
         return construct_result(left, result,
                                 index=left.index, name=res_name, dtype=None)
 
@@ -1231,13 +1249,11 @@ def na_op(x, y):
         # should have guarantess on what x, y can be type-wise
         # Extension Dtypes are not called here
 
-        # dispatch to the categorical if we have a categorical
-        # in either operand
-        if is_categorical_dtype(y) and not is_scalar(y):
-            # The `not is_scalar(y)` check excludes the string "category"
-            return op(y, x)
+        # Checking that cases that were once handled here are no longer
+        # reachable.
+        assert not (is_categorical_dtype(y) and not is_scalar(y))
 
-        elif is_object_dtype(x.dtype):
+        if is_object_dtype(x.dtype):
             result = _comp_method_OBJECT_ARRAY(op, x, y)
 
         elif is_datetimelike_v_numeric(x, y):
@@ -1362,13 +1378,6 @@ def wrapper(self, other, axis=None):
             # is not.
             return result.__finalize__(self).rename(res_name)
 
-        elif isinstance(other, pd.Categorical):
-            # ordering of checks matters; by this point we know
-            # that not is_categorical_dtype(self)
-            res_values = op(self.values, other)
-            return self._constructor(res_values, index=self.index,
-                                     name=res_name)
-
         elif is_scalar(other) and isna(other):
             # numpy does not like comparisons vs None
             if op is operator.ne:

From 8527bd2091abbe7364f5349330bcee8f2655b81d Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Thu, 26 Jul 2018 11:24:27 -0700
Subject: [PATCH 4/7] Comment and assertion

---
 pandas/core/ops.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 5f44741080009..6397ce3a314da 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1114,6 +1114,7 @@ def na_op(x, y):
                 result[mask] = op(x[mask], com.values_from_object(y[mask]))
             else:
                 assert isinstance(x, np.ndarray)
+                assert lib.is_scalar(y)
                 result = np.empty(len(x), dtype=x.dtype)
                 mask = notna(x)
                 result[mask] = op(x[mask], y)
@@ -1160,6 +1161,7 @@ def wrapper(left, right):
 
         elif (is_extension_array_dtype(left) or
                 is_extension_array_dtype(right)):
+            # TODO: should this include `not is_scalar(right)`?
             return dispatch_to_extension_op(op, left, right)
 
         elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):

From 2ae4b2ff41ea2091f6da97294613725f554bbd42 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 28 Jul 2018 09:10:56 -0700
Subject: [PATCH 5/7] use imported is_scalar

---
 pandas/core/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 6397ce3a314da..661f7c11617dd 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1114,7 +1114,7 @@ def na_op(x, y):
                 result[mask] = op(x[mask], com.values_from_object(y[mask]))
             else:
                 assert isinstance(x, np.ndarray)
-                assert lib.is_scalar(y)
+                assert is_scalar(y)
                 result = np.empty(len(x), dtype=x.dtype)
                 mask = notna(x)
                 result[mask] = op(x[mask], y)

From f5bb3b64c42a8e81ecbb2f9805e13f3cbf25ff6f Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Wed, 1 Aug 2018 17:57:21 -0700
Subject: [PATCH 6/7] dummy commit to force CI

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 46bb8b7045400..b949d5c1bec13 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4898,7 +4898,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
         new_index, new_columns = this.index, this.columns
 
         def _arith_op(left, right):
-            # for the mixed_type case where we iterate over columns,
+            # For the mixed_type case where we iterate over columns,
             # _arith_op(left, right) is equivalent to
             # left._binop(right, func, fill_value=fill_value)
             left, right = ops.fill_binop(left, right, fill_value)

From c36e6729b1df61559e29e5e9b2dbe0678c0795a6 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Wed, 1 Aug 2018 18:37:52 -0700
Subject: [PATCH 7/7] revert change that hurt perf

---
 pandas/core/frame.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b949d5c1bec13..719488a50ca40 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4898,7 +4898,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
         new_index, new_columns = this.index, this.columns
 
         def _arith_op(left, right):
-            # For the mixed_type case where we iterate over columns,
+            # for the mixed_type case where we iterate over columns,
             # _arith_op(left, right) is equivalent to
             # left._binop(right, func, fill_value=fill_value)
             left, right = ops.fill_binop(left, right, fill_value)
@@ -4942,9 +4942,14 @@ def _compare_frame(self, other, func, str_rep):
         import pandas.core.computation.expressions as expressions
 
         def _compare(a, b):
-            return expressions.evaluate(func, str_rep, a, b)
+            return {i: func(a.iloc[:, i], b.iloc[:, i])
+                    for i in range(len(a.columns))}
 
-        return ops.dispatch_to_series(self, other, _compare)
+        new_data = expressions.evaluate(_compare, str_rep, self, other)
+        result = self._constructor(data=new_data, index=self.index,
+                                   copy=False)
+        result.columns = self.columns
+        return result
 
     def combine(self, other, func, fill_value=None, overwrite=True):
         """