pandas-dev · jreback · Aug 8, 2018 · Jul 26, 2018 · Jul 26, 2018 · Jul 26, 2018
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -1539,7 +1539,7 @@ cdef class _Period(object):
         See Also
         --------
         Period.year : Return the calendar year of the period.
-        
+
         Examples
         --------
         If the natural and fiscal year are the same, `qyear` and `year` will

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -929,7 +929,7 @@ cdef class _Timedelta(timedelta):
     def nanoseconds(self):
         """
         Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
-       
+
         Returns
         -------
         int

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4902,20 +4902,7 @@ def _arith_op(left, right):
 
         if this._is_mixed_type or other._is_mixed_type:
             # iterate over columns
-            if this.columns.is_unique:
-                # unique columns
-                result = {col: _arith_op(this[col], other[col])
-                          for col in this}
-                result = self._constructor(result, index=new_index,
-                                           columns=new_columns, copy=False)
-            else:
-                # non-unique columns
-                result = {i: _arith_op(this.iloc[:, i], other.iloc[:, i])
-                          for i, col in enumerate(this.columns)}
-                result = self._constructor(result, index=new_index, copy=False)
-                result.columns = new_columns
-            return result
-
+            return ops.dispatch_to_series(this, other, _arith_op)
         else:
             result = _arith_op(this.values, other.values)
 
@@ -4949,27 +4936,11 @@ def _compare_frame(self, other, func, str_rep):
         # compare_frame assumes self._indexed_same(other)
 
         import pandas.core.computation.expressions as expressions
-        # unique
-        if self.columns.is_unique:
 
-            def _compare(a, b):
-                return {col: func(a[col], b[col]) for col in a.columns}
+        def _compare(a, b):
+            return expressions.evaluate(func, str_rep, a, b)
 
-            new_data = expressions.evaluate(_compare, str_rep, self, other)
-            return self._constructor(data=new_data, index=self.index,
-                                     columns=self.columns, copy=False)
-        # non-unique
-        else:
-
-            def _compare(a, b):
-                return {i: func(a.iloc[:, i], b.iloc[:, i])
-                        for i, col in enumerate(a.columns)}
-
-            new_data = expressions.evaluate(_compare, str_rep, self, other)
-            result = self._constructor(data=new_data, index=self.index,
-                                       copy=False)
-            result.columns = self.columns
-            return result
+        return ops.dispatch_to_series(self, other, _compare)
 
     def combine(self, other, func, fill_value=None, overwrite=True):
         """

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1114,6 +1114,7 @@ def na_op(x, y):
                 result[mask] = op(x[mask], com.values_from_object(y[mask]))
             else:
                 assert isinstance(x, np.ndarray)
+                assert lib.is_scalar(y)
                 result = np.empty(len(x), dtype=x.dtype)
                 mask = notna(x)
                 result[mask] = op(x[mask], y)
@@ -1160,6 +1161,7 @@ def wrapper(left, right):
 
         elif (is_extension_array_dtype(left) or
                 is_extension_array_dtype(right)):
+            # TODO: should this include `not is_scalar(right)`?
             return dispatch_to_extension_op(op, left, right)
 
         elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
@@ -1249,13 +1251,11 @@ def na_op(x, y):
         # should have guarantess on what x, y can be type-wise
         # Extension Dtypes are not called here
 
-        # dispatch to the categorical if we have a categorical
-        # in either operand
-        if is_categorical_dtype(y) and not is_scalar(y):
-            # The `not is_scalar(y)` check excludes the string "category"
-            return op(y, x)
+        # Checking that cases that were once handled here are no longer
+        # reachable.
+        assert not (is_categorical_dtype(y) and not is_scalar(y))
 
-        elif is_object_dtype(x.dtype):
+        if is_object_dtype(x.dtype):
             result = _comp_method_OBJECT_ARRAY(op, x, y)
 
         elif is_datetimelike_v_numeric(x, y):
@@ -1313,7 +1313,7 @@ def wrapper(self, other, axis=None):
             return self._constructor(res_values, index=self.index,
                                      name=res_name)
 
-        if is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
+        elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
             # Dispatch to DatetimeIndex to ensure identical
             # Series/Index behavior
             if (isinstance(other, datetime.date) and
@@ -1355,8 +1355,9 @@ def wrapper(self, other, axis=None):
                                      name=res_name)
 
         elif (is_extension_array_dtype(self) or
-              (is_extension_array_dtype(other) and
-               not is_scalar(other))):
+              (is_extension_array_dtype(other) and not is_scalar(other))):
+            # Note: the `not is_scalar(other)` condition rules out
+            # e.g. other == "category"
             return dispatch_to_extension_op(op, self, other)
 
         elif isinstance(other, ABCSeries):
@@ -1379,13 +1380,6 @@ def wrapper(self, other, axis=None):
             # is not.
             return result.__finalize__(self).rename(res_name)
 
-        elif isinstance(other, pd.Categorical):
-            # ordering of checks matters; by this point we know
-            # that not is_categorical_dtype(self)
-            res_values = op(self.values, other)
-            return self._constructor(res_values, index=self.index,
-                                     name=res_name)
-
         elif is_scalar(other) and isna(other):
             # numpy does not like comparisons vs None
             if op is operator.ne:
@@ -1515,6 +1509,41 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
 # -----------------------------------------------------------------------------
 # DataFrame
 
+def dispatch_to_series(left, right, func):
+    """
+    Evaluate the frame operation func(left, right) by evaluating
+    column-by-column, dispatching to the Series implementation.
+
+    Parameters
+    ----------
+    left : DataFrame
+    right : scalar or DataFrame
+    func : arithmetic or comparison operator
+
+    Returns
+    -------
+    DataFrame
+    """
+    # Note: we use iloc to access columns for compat with cases
+    #       with non-unique columns.
+    if lib.is_scalar(right):
+        new_data = {i: func(left.iloc[:, i], right)
+                    for i in range(len(left.columns))}
+    elif isinstance(right, ABCDataFrame):
+        assert right._indexed_same(left)
+        new_data = {i: func(left.iloc[:, i], right.iloc[:, i])
+                    for i in range(len(left.columns))}
+    else:
+        # Remaining cases have less-obvious dispatch rules
+        raise NotImplementedError
+
+    result = left._constructor(new_data, index=left.index, copy=False)
+    # Pin columns instead of passing to constructor for compat with
+    # non-unique columns case
+    result.columns = left.columns
+    return result
+
+
 def _combine_series_frame(self, other, func, fill_value=None, axis=None,
                           level=None, try_cast=True):
     """
-Original file line number
+Diff line change
@@ Expand Up / @@ -929,7 +929,7 @@ cdef class _Timedelta(timedelta): @@
         def nanoseconds(self):
             """
             Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
             Returns
             -------
             int
@@ Expand Down @@