Merge pull request pandas-dev#6682 from jreback/binary_op

jreback · jreback · commit 66cf19a66f72 · 2014-03-21T16:10:13.000-04:00
BUG: Bug in binary operations with a rhs of a Series not aligning (GH6681)
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -140,10 +140,9 @@ either match on the *index* or *columns* via the **axis** keyword:
 
 .. ipython:: python
 
-   d = {'one' : Series(randn(3), index=['a', 'b', 'c']),
-        'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
-        'three' : Series(randn(3), index=['b', 'c', 'd'])}
-   df = df_orig = DataFrame(d)
+   df = DataFrame({'one' : Series(randn(3), index=['a', 'b', 'c']),
+                   'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
+                   'three' : Series(randn(3), index=['b', 'c', 'd'])})
    df
    row = df.ix[1]
    column = df['two']
@@ -154,6 +153,20 @@ either match on the *index* or *columns* via the **axis** keyword:
    df.sub(column, axis='index')
    df.sub(column, axis=0)
 
+.. ipython:: python
+   :suppress:
+
+   df_orig = df
+
+Furthermore you can align a level of a multi-indexed DataFrame with a Series.
+
+.. ipython:: python
+
+   dfmi = df.copy()
+   dfmi.index = MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')],
+                                       names=['first','second'])
+   dfmi.sub(column, axis=0, level='second')
+
 With Panel, describing the matching behavior is a bit more difficult, so
 the arithmetic methods instead (and perhaps confusingly?) give you the option
 to specify the *broadcast axis*. For example, suppose we wished to demean the
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1843,11 +1843,11 @@ the sheet names using the ``sheet_names`` attribute.
 .. versionadded:: 0.13
 
 There are now two ways to read in sheets from an Excel file. You can provide
-either the index of a sheet or its name to by passing different values for 
-``sheet_name``. 
+either the index of a sheet or its name to by passing different values for
+``sheet_name``.
 
 - Pass a string to refer to the name of a particular sheet in the workbook.
-- Pass an integer to refer to the index of a sheet. Indices follow Python 
+- Pass an integer to refer to the index of a sheet. Indices follow Python
   convention, beginning at 0.
 - The default value is ``sheet_name=0``. This reads the first sheet.
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -268,6 +268,7 @@ Bug Fixes
 - Bug in ``fillna`` with ``limit`` and ``value`` specified
 - Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`)
 - Bug in compat with ``np.compress``, surfaced in (:issue:`6658`)
+- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`)
 
 pandas 0.13.1
 -------------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2801,12 +2801,12 @@ def _combine_series(self, other, func, fill_value=None, axis=None,
         if axis is not None:
             axis = self._get_axis_name(axis)
             if axis == 'index':
-                return self._combine_match_index(other, func, fill_value)
+                return self._combine_match_index(other, func, level=level, fill_value=fill_value)
             else:
-                return self._combine_match_columns(other, func, fill_value)
-        return self._combine_series_infer(other, func, fill_value)
+                return self._combine_match_columns(other, func, level=level, fill_value=fill_value)
+        return self._combine_series_infer(other, func, level=level, fill_value=fill_value)
 
-    def _combine_series_infer(self, other, func, fill_value=None):
+    def _combine_series_infer(self, other, func, level=None, fill_value=None):
         if len(other) == 0:
             return self * NA
 
@@ -2822,21 +2822,21 @@ def _combine_series_infer(self, other, func, fill_value=None):
                            "DataFrame.<op> to explicitly broadcast arithmetic "
                            "operations along the index"),
                           FutureWarning)
-            return self._combine_match_index(other, func, fill_value)
+            return self._combine_match_index(other, func, level=level, fill_value=fill_value)
         else:
-            return self._combine_match_columns(other, func, fill_value)
+            return self._combine_match_columns(other, func, level=level, fill_value=fill_value)
 
-    def _combine_match_index(self, other, func, fill_value=None):
-        left, right = self.align(other, join='outer', axis=0, copy=False)
+    def _combine_match_index(self, other, func, level=None, fill_value=None):
+        left, right = self.align(other, join='outer', axis=0, level=level, copy=False)
         if fill_value is not None:
             raise NotImplementedError("fill_value %r not supported." %
                                       fill_value)
         return self._constructor(func(left.values.T, right.values).T,
                                  index=left.index,
                                  columns=self.columns, copy=False)
 
-    def _combine_match_columns(self, other, func, fill_value=None):
-        left, right = self.align(other, join='outer', axis=1, copy=False)
+    def _combine_match_columns(self, other, func, level=None, fill_value=None):
+        left, right = self.align(other, join='outer', axis=1, level=level, copy=False)
         if fill_value is not None:
             raise NotImplementedError("fill_value %r not supported" %
                                       fill_value)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2996,23 +2996,30 @@ def _align_series(self, other, join='outer', axis=None, level=None,
 
         else:
 
+            # for join compat if we have an unnamed index, but
+            # are specifying a level join
+            other_index = other.index
+            if level is not None and other.index.name is None:
+                other_index = other_index.set_names([level])
+
             # one has > 1 ndim
             fdata = self._data
             if axis == 0:
                 join_index = self.index
                 lidx, ridx = None, None
-                if not self.index.equals(other.index):
+                if not self.index.equals(other_index):
                     join_index, lidx, ridx = self.index.join(
-                        other.index, how=join, return_indexers=True)
+                        other_index, how=join, return_indexers=True)
 
                 if lidx is not None:
                     fdata = fdata.reindex_indexer(join_index, lidx, axis=1)
+
             elif axis == 1:
                 join_index = self.columns
                 lidx, ridx = None, None
-                if not self.columns.equals(other.index):
+                if not self.columns.equals(other_index):
                     join_index, lidx, ridx = \
-                        self.columns.join(other.index, how=join,
+                        self.columns.join(other_index, how=join,
                                           return_indexers=True)
 
                 if lidx is not None:
@@ -3024,7 +3031,11 @@ def _align_series(self, other, join='outer', axis=None, level=None,
                 fdata = fdata.copy()
 
             left_result = DataFrame(fdata)
-            right_result = other if ridx is None else other.reindex(join_index)
+
+            if ridx is None:
+                right_result = other
+            else:
+                right_result = other.reindex(join_index, level=level)
 
         # fill
         fill_na = notnull(fill_value) or (method is not None)
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -456,11 +456,13 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
                                  default_fill_value=new_fill_value,
                                  fill_value=new_fill_value).__finalize__(self)
 
-    def _combine_match_index(self, other, func, fill_value=None):
+    def _combine_match_index(self, other, func, level=None, fill_value=None):
         new_data = {}
 
         if fill_value is not None:
             raise NotImplementedError
+        if level is not None:
+            raise NotImplementedError
 
         new_index = self.index.union(other.index)
         this = self
@@ -486,14 +488,16 @@ def _combine_match_index(self, other, func, fill_value=None):
                                  default_fill_value=fill_value,
                                  fill_value=self.default_fill_value).__finalize__(self)
 
-    def _combine_match_columns(self, other, func, fill_value):
+    def _combine_match_columns(self, other, func, level=None, fill_value=None):
         # patched version of DataFrame._combine_match_columns to account for
         # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series,
         # where 3.0 is numpy.float64 and series is a SparseSeries. Still
         # possible for this to happen, which is bothersome
 
         if fill_value is not None:
             raise NotImplementedError
+        if level is not None:
+            raise NotImplementedError
 
         new_data = {}
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -4941,6 +4941,38 @@ def test_arith_flex_frame(self):
         with assertRaisesRegexp(NotImplementedError, 'fill_value'):
             self.frame.add(self.frame.irow(0), axis='index', fill_value=3)
 
+    def test_binary_ops_align(self):
+
+        # test aligning binary ops
+
+        # GH 6681
+        index=MultiIndex.from_product([list('abc'),
+                                       ['one','two','three'],
+                                       [1,2,3]],
+                                      names=['first','second','third'])
+
+        df = DataFrame(np.arange(27*3).reshape(27,3),
+                       index=index,
+                       columns=['value1','value2','value3']).sortlevel()
+
+        idx = pd.IndexSlice
+        for op in ['add','sub','mul','div','truediv']:
+            opa = getattr(operator,op,None)
+            if opa is None:
+                continue
+
+            x = Series([ 1.0, 10.0, 100.0], [1,2,3])
+            result = getattr(df,op)(x,level='third',axis=0)
+
+            expected = pd.concat([ opa(df.loc[idx[:,:,i],:],v) for i, v in x.iteritems() ]).sortlevel()
+            assert_frame_equal(result, expected)
+
+            x = Series([ 1.0, 10.0], ['two','three'])
+            result = getattr(df,op)(x,level='second',axis=0)
+
+            expected = pd.concat([ opa(df.loc[idx[:,i],:],v) for i, v in x.iteritems() ]).reindex_like(df).sortlevel()
+            assert_frame_equal(result, expected)
+
     def test_arith_mixed(self):
 
         left = DataFrame({'A': ['a', 'b', 'c'],