Skip to content

BUG: Bug in binary operations with a rhs of a Series not aligning (GH6681) #6682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 21, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,9 @@ either match on the *index* or *columns* via the **axis** keyword:

.. ipython:: python

d = {'one' : Series(randn(3), index=['a', 'b', 'c']),
'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
'three' : Series(randn(3), index=['b', 'c', 'd'])}
df = df_orig = DataFrame(d)
df = DataFrame({'one' : Series(randn(3), index=['a', 'b', 'c']),
'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
'three' : Series(randn(3), index=['b', 'c', 'd'])})
df
row = df.ix[1]
column = df['two']
Expand All @@ -154,6 +153,20 @@ either match on the *index* or *columns* via the **axis** keyword:
df.sub(column, axis='index')
df.sub(column, axis=0)

.. ipython:: python
:suppress:

df_orig = df

Furthermore you can align a level of a multi-indexed DataFrame with a Series.

.. ipython:: python

dfmi = df.copy()
dfmi.index = MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')],
names=['first','second'])
dfmi.sub(column, axis=0, level='second')

With Panel, describing the matching behavior is a bit more difficult, so
the arithmetic methods instead (and perhaps confusingly?) give you the option
to specify the *broadcast axis*. For example, suppose we wished to demean the
Expand Down
6 changes: 3 additions & 3 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1843,11 +1843,11 @@ the sheet names using the ``sheet_names`` attribute.
.. versionadded:: 0.13

There are now two ways to read in sheets from an Excel file. You can provide
either the index of a sheet or its name to by passing different values for
``sheet_name``.
either the index of a sheet or its name to by passing different values for
``sheet_name``.

- Pass a string to refer to the name of a particular sheet in the workbook.
- Pass an integer to refer to the index of a sheet. Indices follow Python
- Pass an integer to refer to the index of a sheet. Indices follow Python
convention, beginning at 0.
- The default value is ``sheet_name=0``. This reads the first sheet.

Expand Down
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ Bug Fixes
- Bug in ``fillna`` with ``limit`` and ``value`` specified
- Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`)
- Bug in compat with ``np.compress``, surfaced in (:issue:`6658`)
- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`)

pandas 0.13.1
-------------
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2801,12 +2801,12 @@ def _combine_series(self, other, func, fill_value=None, axis=None,
if axis is not None:
axis = self._get_axis_name(axis)
if axis == 'index':
return self._combine_match_index(other, func, fill_value)
return self._combine_match_index(other, func, level=level, fill_value=fill_value)
else:
return self._combine_match_columns(other, func, fill_value)
return self._combine_series_infer(other, func, fill_value)
return self._combine_match_columns(other, func, level=level, fill_value=fill_value)
return self._combine_series_infer(other, func, level=level, fill_value=fill_value)

def _combine_series_infer(self, other, func, fill_value=None):
def _combine_series_infer(self, other, func, level=None, fill_value=None):
if len(other) == 0:
return self * NA

Expand All @@ -2822,21 +2822,21 @@ def _combine_series_infer(self, other, func, fill_value=None):
"DataFrame.<op> to explicitly broadcast arithmetic "
"operations along the index"),
FutureWarning)
return self._combine_match_index(other, func, fill_value)
return self._combine_match_index(other, func, level=level, fill_value=fill_value)
else:
return self._combine_match_columns(other, func, fill_value)
return self._combine_match_columns(other, func, level=level, fill_value=fill_value)

def _combine_match_index(self, other, func, fill_value=None):
left, right = self.align(other, join='outer', axis=0, copy=False)
def _combine_match_index(self, other, func, level=None, fill_value=None):
left, right = self.align(other, join='outer', axis=0, level=level, copy=False)
if fill_value is not None:
raise NotImplementedError("fill_value %r not supported." %
fill_value)
return self._constructor(func(left.values.T, right.values).T,
index=left.index,
columns=self.columns, copy=False)

def _combine_match_columns(self, other, func, fill_value=None):
left, right = self.align(other, join='outer', axis=1, copy=False)
def _combine_match_columns(self, other, func, level=None, fill_value=None):
left, right = self.align(other, join='outer', axis=1, level=level, copy=False)
if fill_value is not None:
raise NotImplementedError("fill_value %r not supported" %
fill_value)
Expand Down
21 changes: 16 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2996,23 +2996,30 @@ def _align_series(self, other, join='outer', axis=None, level=None,

else:

# for join compat if we have an unnamed index, but
# are specifying a level join
other_index = other.index
if level is not None and other.index.name is None:
other_index = other_index.set_names([level])

# one has > 1 ndim
fdata = self._data
if axis == 0:
join_index = self.index
lidx, ridx = None, None
if not self.index.equals(other.index):
if not self.index.equals(other_index):
join_index, lidx, ridx = self.index.join(
other.index, how=join, return_indexers=True)
other_index, how=join, return_indexers=True)

if lidx is not None:
fdata = fdata.reindex_indexer(join_index, lidx, axis=1)

elif axis == 1:
join_index = self.columns
lidx, ridx = None, None
if not self.columns.equals(other.index):
if not self.columns.equals(other_index):
join_index, lidx, ridx = \
self.columns.join(other.index, how=join,
self.columns.join(other_index, how=join,
return_indexers=True)

if lidx is not None:
Expand All @@ -3024,7 +3031,11 @@ def _align_series(self, other, join='outer', axis=None, level=None,
fdata = fdata.copy()

left_result = DataFrame(fdata)
right_result = other if ridx is None else other.reindex(join_index)

if ridx is None:
right_result = other
else:
right_result = other.reindex(join_index, level=level)

# fill
fill_na = notnull(fill_value) or (method is not None)
Expand Down
8 changes: 6 additions & 2 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,13 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
default_fill_value=new_fill_value,
fill_value=new_fill_value).__finalize__(self)

def _combine_match_index(self, other, func, fill_value=None):
def _combine_match_index(self, other, func, level=None, fill_value=None):
new_data = {}

if fill_value is not None:
raise NotImplementedError
if level is not None:
raise NotImplementedError

new_index = self.index.union(other.index)
this = self
Expand All @@ -486,14 +488,16 @@ def _combine_match_index(self, other, func, fill_value=None):
default_fill_value=fill_value,
fill_value=self.default_fill_value).__finalize__(self)

def _combine_match_columns(self, other, func, fill_value):
def _combine_match_columns(self, other, func, level=None, fill_value=None):
# patched version of DataFrame._combine_match_columns to account for
# NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series,
# where 3.0 is numpy.float64 and series is a SparseSeries. Still
# possible for this to happen, which is bothersome

if fill_value is not None:
raise NotImplementedError
if level is not None:
raise NotImplementedError

new_data = {}

Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4941,6 +4941,38 @@ def test_arith_flex_frame(self):
with assertRaisesRegexp(NotImplementedError, 'fill_value'):
self.frame.add(self.frame.irow(0), axis='index', fill_value=3)

def test_binary_ops_align(self):

# test aligning binary ops

# GH 6681
index=MultiIndex.from_product([list('abc'),
['one','two','three'],
[1,2,3]],
names=['first','second','third'])

df = DataFrame(np.arange(27*3).reshape(27,3),
index=index,
columns=['value1','value2','value3']).sortlevel()

idx = pd.IndexSlice
for op in ['add','sub','mul','div','truediv']:
opa = getattr(operator,op,None)
if opa is None:
continue

x = Series([ 1.0, 10.0, 100.0], [1,2,3])
result = getattr(df,op)(x,level='third',axis=0)

expected = pd.concat([ opa(df.loc[idx[:,:,i],:],v) for i, v in x.iteritems() ]).sortlevel()
assert_frame_equal(result, expected)

x = Series([ 1.0, 10.0], ['two','three'])
result = getattr(df,op)(x,level='second',axis=0)

expected = pd.concat([ opa(df.loc[idx[:,i],:],v) for i, v in x.iteritems() ]).reindex_like(df).sortlevel()
assert_frame_equal(result, expected)

def test_arith_mixed(self):

left = DataFrame({'A': ['a', 'b', 'c'],
Expand Down