Skip to content

Commit 66cf19a

Browse files
committed
Merge pull request pandas-dev#6682 from jreback/binary_op
BUG: Bug in binary operations with a rhs of a Series not aligning (GH6681)
2 parents c12166f + cd81d20 commit 66cf19a

File tree

7 files changed

+85
-24
lines changed

7 files changed

+85
-24
lines changed

doc/source/basics.rst

+17-4
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,9 @@ either match on the *index* or *columns* via the **axis** keyword:
140140

141141
.. ipython:: python
142142
143-
d = {'one' : Series(randn(3), index=['a', 'b', 'c']),
144-
'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
145-
'three' : Series(randn(3), index=['b', 'c', 'd'])}
146-
df = df_orig = DataFrame(d)
143+
df = DataFrame({'one' : Series(randn(3), index=['a', 'b', 'c']),
144+
'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
145+
'three' : Series(randn(3), index=['b', 'c', 'd'])})
147146
df
148147
row = df.ix[1]
149148
column = df['two']
@@ -154,6 +153,20 @@ either match on the *index* or *columns* via the **axis** keyword:
154153
df.sub(column, axis='index')
155154
df.sub(column, axis=0)
156155
156+
.. ipython:: python
157+
:suppress:
158+
159+
df_orig = df
160+
161+
Furthermore you can align a level of a multi-indexed DataFrame with a Series.
162+
163+
.. ipython:: python
164+
165+
dfmi = df.copy()
166+
dfmi.index = MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')],
167+
names=['first','second'])
168+
dfmi.sub(column, axis=0, level='second')
169+
157170
With Panel, describing the matching behavior is a bit more difficult, so
158171
the arithmetic methods instead (and perhaps confusingly?) give you the option
159172
to specify the *broadcast axis*. For example, suppose we wished to demean the

doc/source/io.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -1843,11 +1843,11 @@ the sheet names using the ``sheet_names`` attribute.
18431843
.. versionadded:: 0.13
18441844

18451845
There are now two ways to read in sheets from an Excel file. You can provide
1846-
either the index of a sheet or its name to by passing different values for
1847-
``sheet_name``.
1846+
either the index of a sheet or its name to by passing different values for
1847+
``sheet_name``.
18481848

18491849
- Pass a string to refer to the name of a particular sheet in the workbook.
1850-
- Pass an integer to refer to the index of a sheet. Indices follow Python
1850+
- Pass an integer to refer to the index of a sheet. Indices follow Python
18511851
convention, beginning at 0.
18521852
- The default value is ``sheet_name=0``. This reads the first sheet.
18531853

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ Bug Fixes
268268
- Bug in ``fillna`` with ``limit`` and ``value`` specified
269269
- Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`)
270270
- Bug in compat with ``np.compress``, surfaced in (:issue:`6658`)
271+
- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`)
271272

272273
pandas 0.13.1
273274
-------------

pandas/core/frame.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -2801,12 +2801,12 @@ def _combine_series(self, other, func, fill_value=None, axis=None,
28012801
if axis is not None:
28022802
axis = self._get_axis_name(axis)
28032803
if axis == 'index':
2804-
return self._combine_match_index(other, func, fill_value)
2804+
return self._combine_match_index(other, func, level=level, fill_value=fill_value)
28052805
else:
2806-
return self._combine_match_columns(other, func, fill_value)
2807-
return self._combine_series_infer(other, func, fill_value)
2806+
return self._combine_match_columns(other, func, level=level, fill_value=fill_value)
2807+
return self._combine_series_infer(other, func, level=level, fill_value=fill_value)
28082808

2809-
def _combine_series_infer(self, other, func, fill_value=None):
2809+
def _combine_series_infer(self, other, func, level=None, fill_value=None):
28102810
if len(other) == 0:
28112811
return self * NA
28122812

@@ -2822,21 +2822,21 @@ def _combine_series_infer(self, other, func, fill_value=None):
28222822
"DataFrame.<op> to explicitly broadcast arithmetic "
28232823
"operations along the index"),
28242824
FutureWarning)
2825-
return self._combine_match_index(other, func, fill_value)
2825+
return self._combine_match_index(other, func, level=level, fill_value=fill_value)
28262826
else:
2827-
return self._combine_match_columns(other, func, fill_value)
2827+
return self._combine_match_columns(other, func, level=level, fill_value=fill_value)
28282828

2829-
def _combine_match_index(self, other, func, fill_value=None):
2830-
left, right = self.align(other, join='outer', axis=0, copy=False)
2829+
def _combine_match_index(self, other, func, level=None, fill_value=None):
2830+
left, right = self.align(other, join='outer', axis=0, level=level, copy=False)
28312831
if fill_value is not None:
28322832
raise NotImplementedError("fill_value %r not supported." %
28332833
fill_value)
28342834
return self._constructor(func(left.values.T, right.values).T,
28352835
index=left.index,
28362836
columns=self.columns, copy=False)
28372837

2838-
def _combine_match_columns(self, other, func, fill_value=None):
2839-
left, right = self.align(other, join='outer', axis=1, copy=False)
2838+
def _combine_match_columns(self, other, func, level=None, fill_value=None):
2839+
left, right = self.align(other, join='outer', axis=1, level=level, copy=False)
28402840
if fill_value is not None:
28412841
raise NotImplementedError("fill_value %r not supported" %
28422842
fill_value)

pandas/core/generic.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -2996,23 +2996,30 @@ def _align_series(self, other, join='outer', axis=None, level=None,
29962996

29972997
else:
29982998

2999+
# for join compat if we have an unnamed index, but
3000+
# are specifying a level join
3001+
other_index = other.index
3002+
if level is not None and other.index.name is None:
3003+
other_index = other_index.set_names([level])
3004+
29993005
# one has > 1 ndim
30003006
fdata = self._data
30013007
if axis == 0:
30023008
join_index = self.index
30033009
lidx, ridx = None, None
3004-
if not self.index.equals(other.index):
3010+
if not self.index.equals(other_index):
30053011
join_index, lidx, ridx = self.index.join(
3006-
other.index, how=join, return_indexers=True)
3012+
other_index, how=join, return_indexers=True)
30073013

30083014
if lidx is not None:
30093015
fdata = fdata.reindex_indexer(join_index, lidx, axis=1)
3016+
30103017
elif axis == 1:
30113018
join_index = self.columns
30123019
lidx, ridx = None, None
3013-
if not self.columns.equals(other.index):
3020+
if not self.columns.equals(other_index):
30143021
join_index, lidx, ridx = \
3015-
self.columns.join(other.index, how=join,
3022+
self.columns.join(other_index, how=join,
30163023
return_indexers=True)
30173024

30183025
if lidx is not None:
@@ -3024,7 +3031,11 @@ def _align_series(self, other, join='outer', axis=None, level=None,
30243031
fdata = fdata.copy()
30253032

30263033
left_result = DataFrame(fdata)
3027-
right_result = other if ridx is None else other.reindex(join_index)
3034+
3035+
if ridx is None:
3036+
right_result = other
3037+
else:
3038+
right_result = other.reindex(join_index, level=level)
30283039

30293040
# fill
30303041
fill_na = notnull(fill_value) or (method is not None)

pandas/sparse/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -456,11 +456,13 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
456456
default_fill_value=new_fill_value,
457457
fill_value=new_fill_value).__finalize__(self)
458458

459-
def _combine_match_index(self, other, func, fill_value=None):
459+
def _combine_match_index(self, other, func, level=None, fill_value=None):
460460
new_data = {}
461461

462462
if fill_value is not None:
463463
raise NotImplementedError
464+
if level is not None:
465+
raise NotImplementedError
464466

465467
new_index = self.index.union(other.index)
466468
this = self
@@ -486,14 +488,16 @@ def _combine_match_index(self, other, func, fill_value=None):
486488
default_fill_value=fill_value,
487489
fill_value=self.default_fill_value).__finalize__(self)
488490

489-
def _combine_match_columns(self, other, func, fill_value):
491+
def _combine_match_columns(self, other, func, level=None, fill_value=None):
490492
# patched version of DataFrame._combine_match_columns to account for
491493
# NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series,
492494
# where 3.0 is numpy.float64 and series is a SparseSeries. Still
493495
# possible for this to happen, which is bothersome
494496

495497
if fill_value is not None:
496498
raise NotImplementedError
499+
if level is not None:
500+
raise NotImplementedError
497501

498502
new_data = {}
499503

pandas/tests/test_frame.py

+32
Original file line numberDiff line numberDiff line change
@@ -4941,6 +4941,38 @@ def test_arith_flex_frame(self):
49414941
with assertRaisesRegexp(NotImplementedError, 'fill_value'):
49424942
self.frame.add(self.frame.irow(0), axis='index', fill_value=3)
49434943

4944+
def test_binary_ops_align(self):
4945+
4946+
# test aligning binary ops
4947+
4948+
# GH 6681
4949+
index=MultiIndex.from_product([list('abc'),
4950+
['one','two','three'],
4951+
[1,2,3]],
4952+
names=['first','second','third'])
4953+
4954+
df = DataFrame(np.arange(27*3).reshape(27,3),
4955+
index=index,
4956+
columns=['value1','value2','value3']).sortlevel()
4957+
4958+
idx = pd.IndexSlice
4959+
for op in ['add','sub','mul','div','truediv']:
4960+
opa = getattr(operator,op,None)
4961+
if opa is None:
4962+
continue
4963+
4964+
x = Series([ 1.0, 10.0, 100.0], [1,2,3])
4965+
result = getattr(df,op)(x,level='third',axis=0)
4966+
4967+
expected = pd.concat([ opa(df.loc[idx[:,:,i],:],v) for i, v in x.iteritems() ]).sortlevel()
4968+
assert_frame_equal(result, expected)
4969+
4970+
x = Series([ 1.0, 10.0], ['two','three'])
4971+
result = getattr(df,op)(x,level='second',axis=0)
4972+
4973+
expected = pd.concat([ opa(df.loc[idx[:,i],:],v) for i, v in x.iteritems() ]).reindex_like(df).sortlevel()
4974+
assert_frame_equal(result, expected)
4975+
49444976
def test_arith_mixed(self):
49454977

49464978
left = DataFrame({'A': ['a', 'b', 'c'],

0 commit comments

Comments
 (0)