diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 159cd05194300..fe3fc42992468 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -140,10 +140,9 @@ either match on the *index* or *columns* via the **axis** keyword: .. ipython:: python - d = {'one' : Series(randn(3), index=['a', 'b', 'c']), - 'two' : Series(randn(4), index=['a', 'b', 'c', 'd']), - 'three' : Series(randn(3), index=['b', 'c', 'd'])} - df = df_orig = DataFrame(d) + df = DataFrame({'one' : Series(randn(3), index=['a', 'b', 'c']), + 'two' : Series(randn(4), index=['a', 'b', 'c', 'd']), + 'three' : Series(randn(3), index=['b', 'c', 'd'])}) df row = df.ix[1] column = df['two'] @@ -154,6 +153,20 @@ either match on the *index* or *columns* via the **axis** keyword: df.sub(column, axis='index') df.sub(column, axis=0) +.. ipython:: python + :suppress: + + df_orig = df + +Furthermore you can align a level of a multi-indexed DataFrame with a Series. + +.. ipython:: python + + dfmi = df.copy() + dfmi.index = MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')], + names=['first','second']) + dfmi.sub(column, axis=0, level='second') + With Panel, describing the matching behavior is a bit more difficult, so the arithmetic methods instead (and perhaps confusingly?) give you the option to specify the *broadcast axis*. For example, suppose we wished to demean the diff --git a/doc/source/io.rst b/doc/source/io.rst index f43582ded4473..4d97c43e85de8 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1843,11 +1843,11 @@ the sheet names using the ``sheet_names`` attribute. .. versionadded:: 0.13 There are now two ways to read in sheets from an Excel file. You can provide -either the index of a sheet or its name to by passing different values for -``sheet_name``. +either the index of a sheet or its name to by passing different values for +``sheet_name``. - Pass a string to refer to the name of a particular sheet in the workbook. -- Pass an integer to refer to the index of a sheet. Indices follow Python +- Pass an integer to refer to the index of a sheet. Indices follow Python convention, beginning at 0. - The default value is ``sheet_name=0``. This reads the first sheet. diff --git a/doc/source/release.rst b/doc/source/release.rst index fe57133752ce3..3937b4b30fa0e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -268,6 +268,7 @@ Bug Fixes - Bug in ``fillna`` with ``limit`` and ``value`` specified - Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`) - Bug in compat with ``np.compress``, surfaced in (:issue:`6658`) +- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`) pandas 0.13.1 ------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df43dae257408..253b9ac2c7a16 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2801,12 +2801,12 @@ def _combine_series(self, other, func, fill_value=None, axis=None, if axis is not None: axis = self._get_axis_name(axis) if axis == 'index': - return self._combine_match_index(other, func, fill_value) + return self._combine_match_index(other, func, level=level, fill_value=fill_value) else: - return self._combine_match_columns(other, func, fill_value) - return self._combine_series_infer(other, func, fill_value) + return self._combine_match_columns(other, func, level=level, fill_value=fill_value) + return self._combine_series_infer(other, func, level=level, fill_value=fill_value) - def _combine_series_infer(self, other, func, fill_value=None): + def _combine_series_infer(self, other, func, level=None, fill_value=None): if len(other) == 0: return self * NA @@ -2822,12 +2822,12 @@ def _combine_series_infer(self, other, func, fill_value=None): "DataFrame. to explicitly broadcast arithmetic " "operations along the index"), FutureWarning) - return self._combine_match_index(other, func, fill_value) + return self._combine_match_index(other, func, level=level, fill_value=fill_value) else: - return self._combine_match_columns(other, func, fill_value) + return self._combine_match_columns(other, func, level=level, fill_value=fill_value) - def _combine_match_index(self, other, func, fill_value=None): - left, right = self.align(other, join='outer', axis=0, copy=False) + def _combine_match_index(self, other, func, level=None, fill_value=None): + left, right = self.align(other, join='outer', axis=0, level=level, copy=False) if fill_value is not None: raise NotImplementedError("fill_value %r not supported." % fill_value) @@ -2835,8 +2835,8 @@ def _combine_match_index(self, other, func, fill_value=None): index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, fill_value=None): - left, right = self.align(other, join='outer', axis=1, copy=False) + def _combine_match_columns(self, other, func, level=None, fill_value=None): + left, right = self.align(other, join='outer', axis=1, level=level, copy=False) if fill_value is not None: raise NotImplementedError("fill_value %r not supported" % fill_value) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba6e7a33a7515..6c57a9ce5beaa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2996,23 +2996,30 @@ def _align_series(self, other, join='outer', axis=None, level=None, else: + # for join compat if we have an unnamed index, but + # are specifying a level join + other_index = other.index + if level is not None and other.index.name is None: + other_index = other_index.set_names([level]) + # one has > 1 ndim fdata = self._data if axis == 0: join_index = self.index lidx, ridx = None, None - if not self.index.equals(other.index): + if not self.index.equals(other_index): join_index, lidx, ridx = self.index.join( - other.index, how=join, return_indexers=True) + other_index, how=join, return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=1) + elif axis == 1: join_index = self.columns lidx, ridx = None, None - if not self.columns.equals(other.index): + if not self.columns.equals(other_index): join_index, lidx, ridx = \ - self.columns.join(other.index, how=join, + self.columns.join(other_index, how=join, return_indexers=True) if lidx is not None: @@ -3024,7 +3031,11 @@ def _align_series(self, other, join='outer', axis=None, level=None, fdata = fdata.copy() left_result = DataFrame(fdata) - right_result = other if ridx is None else other.reindex(join_index) + + if ridx is None: + right_result = other + else: + right_result = other.reindex(join_index, level=level) # fill fill_na = notnull(fill_value) or (method is not None) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index a69c07494af8a..bd34c7e5f02b2 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -456,11 +456,13 @@ def _combine_frame(self, other, func, fill_value=None, level=None): default_fill_value=new_fill_value, fill_value=new_fill_value).__finalize__(self) - def _combine_match_index(self, other, func, fill_value=None): + def _combine_match_index(self, other, func, level=None, fill_value=None): new_data = {} if fill_value is not None: raise NotImplementedError + if level is not None: + raise NotImplementedError new_index = self.index.union(other.index) this = self @@ -486,7 +488,7 @@ def _combine_match_index(self, other, func, fill_value=None): default_fill_value=fill_value, fill_value=self.default_fill_value).__finalize__(self) - def _combine_match_columns(self, other, func, fill_value): + def _combine_match_columns(self, other, func, level=None, fill_value=None): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still @@ -494,6 +496,8 @@ def _combine_match_columns(self, other, func, fill_value): if fill_value is not None: raise NotImplementedError + if level is not None: + raise NotImplementedError new_data = {} diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f61880f97c7a5..32805d47821f4 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4941,6 +4941,38 @@ def test_arith_flex_frame(self): with assertRaisesRegexp(NotImplementedError, 'fill_value'): self.frame.add(self.frame.irow(0), axis='index', fill_value=3) + def test_binary_ops_align(self): + + # test aligning binary ops + + # GH 6681 + index=MultiIndex.from_product([list('abc'), + ['one','two','three'], + [1,2,3]], + names=['first','second','third']) + + df = DataFrame(np.arange(27*3).reshape(27,3), + index=index, + columns=['value1','value2','value3']).sortlevel() + + idx = pd.IndexSlice + for op in ['add','sub','mul','div','truediv']: + opa = getattr(operator,op,None) + if opa is None: + continue + + x = Series([ 1.0, 10.0, 100.0], [1,2,3]) + result = getattr(df,op)(x,level='third',axis=0) + + expected = pd.concat([ opa(df.loc[idx[:,:,i],:],v) for i, v in x.iteritems() ]).sortlevel() + assert_frame_equal(result, expected) + + x = Series([ 1.0, 10.0], ['two','three']) + result = getattr(df,op)(x,level='second',axis=0) + + expected = pd.concat([ opa(df.loc[idx[:,i],:],v) for i, v in x.iteritems() ]).reindex_like(df).sortlevel() + assert_frame_equal(result, expected) + def test_arith_mixed(self): left = DataFrame({'A': ['a', 'b', 'c'],