Skip to content

Commit 914b5dd

Browse files
committed
Merge pull request #10716 from sinhrks/salign
BUG: Series.align with MultiIndex may be inverted
2 parents bc45bca + 99b90de commit 914b5dd

File tree

5 files changed

+127
-0
lines changed

5 files changed

+127
-0
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,8 @@ Bug Fixes
648648
- Bug in line and kde plot cannot accept multiple colors when ``subplots=True`` (:issue:`9894`)
649649
- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`)
650650

651+
- Bug in left and right ``align`` of ``Series`` with ``MultiIndex`` may be inverted (:issue:`10665`)
652+
- Bug in left and right ``join`` of with ``MultiIndex`` may be inverted (:issue:`10741`)
651653

652654
- Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
653655
- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)

pandas/core/index.py

+2
Original file line numberDiff line numberDiff line change
@@ -2159,6 +2159,8 @@ def _join_multi(self, other, how, return_indexers=True):
21592159
if self_is_mi:
21602160
self, other = other, self
21612161
flip_order = True
2162+
# flip if join method is right or left
2163+
how = {'right': 'left', 'left': 'right'}.get(how, how)
21622164

21632165
level = other.names.index(jl)
21642166
result = self._join_level(other, level, how=how,

pandas/tests/test_frame.py

+60
Original file line numberDiff line numberDiff line change
@@ -4749,6 +4749,33 @@ def test_join_str_datetime(self):
47494749

47504750
self.assertEqual(len(tst.columns), 3)
47514751

4752+
def test_join_multiindex_leftright(self):
4753+
# GH 10741
4754+
df1 = pd.DataFrame([['a', 'x', 0.471780], ['a','y', 0.774908],
4755+
['a', 'z', 0.563634], ['b', 'x', -0.353756],
4756+
['b', 'y', 0.368062], ['b', 'z', -1.721840],
4757+
['c', 'x', 1], ['c', 'y', 2], ['c', 'z', 3]],
4758+
columns=['first', 'second', 'value1']).set_index(['first', 'second'])
4759+
df2 = pd.DataFrame([['a', 10], ['b', 20]], columns=['first', 'value2']).set_index(['first'])
4760+
4761+
exp = pd.DataFrame([[0.471780, 10], [0.774908, 10], [0.563634, 10],
4762+
[-0.353756, 20], [0.368062, 20], [-1.721840, 20],
4763+
[1.000000, np.nan], [2.000000, np.nan], [3.000000, np.nan]],
4764+
index=df1.index, columns=['value1', 'value2'])
4765+
4766+
# these must be the same results (but columns are flipped)
4767+
tm.assert_frame_equal(df1.join(df2, how='left'), exp)
4768+
tm.assert_frame_equal(df2.join(df1, how='right'), exp[['value2', 'value1']])
4769+
4770+
exp_idx = pd.MultiIndex.from_product([['a', 'b'], ['x', 'y', 'z']],
4771+
names=['first', 'second'])
4772+
exp = pd.DataFrame([[0.471780, 10], [0.774908, 10], [0.563634, 10],
4773+
[-0.353756, 20], [0.368062, 20], [-1.721840, 20]],
4774+
index=exp_idx, columns=['value1', 'value2'])
4775+
4776+
tm.assert_frame_equal(df1.join(df2, how='right'), exp)
4777+
tm.assert_frame_equal(df2.join(df1, how='left'), exp[['value2', 'value1']])
4778+
47524779
def test_from_records_sequencelike(self):
47534780
df = DataFrame({'A' : np.array(np.random.randn(6), dtype = np.float64),
47544781
'A1': np.array(np.random.randn(6), dtype = np.float64),
@@ -10100,6 +10127,39 @@ def test_align_int_fill_bug(self):
1010010127
expected = df2 - df2.mean()
1010110128
assert_frame_equal(result, expected)
1010210129

10130+
def test_align_multiindex(self):
10131+
# GH 10665
10132+
# same test cases as test_align_multiindex in test_series.py
10133+
10134+
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
10135+
names=('a', 'b', 'c'))
10136+
idx = pd.Index(range(2), name='b')
10137+
df1 = pd.DataFrame(np.arange(12), index=midx)
10138+
df2 = pd.DataFrame(np.arange(2), index=idx)
10139+
10140+
# these must be the same results (but flipped)
10141+
res1l, res1r = df1.align(df2, join='left')
10142+
res2l, res2r = df2.align(df1, join='right')
10143+
10144+
expl = df1
10145+
tm.assert_frame_equal(expl, res1l)
10146+
tm.assert_frame_equal(expl, res2r)
10147+
expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
10148+
tm.assert_frame_equal(expr, res1r)
10149+
tm.assert_frame_equal(expr, res2l)
10150+
10151+
res1l, res1r = df1.align(df2, join='right')
10152+
res2l, res2r = df2.align(df1, join='left')
10153+
10154+
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
10155+
names=('a', 'b', 'c'))
10156+
expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
10157+
tm.assert_frame_equal(expl, res1l)
10158+
tm.assert_frame_equal(expl, res2r)
10159+
expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx)
10160+
tm.assert_frame_equal(expr, res1r)
10161+
tm.assert_frame_equal(expr, res2l)
10162+
1010310163
def test_where(self):
1010410164
default_frame = DataFrame(np.random.randn(5, 3),columns=['A','B','C'])
1010510165

pandas/tests/test_index.py

+31
Original file line numberDiff line numberDiff line change
@@ -4616,6 +4616,37 @@ def test_join_self(self):
46164616
joined = res.join(res, how=kind)
46174617
self.assertIs(res, joined)
46184618

4619+
def test_join_multi(self):
4620+
# GH 10665
4621+
midx = pd.MultiIndex.from_product([np.arange(4), np.arange(4)], names=['a', 'b'])
4622+
idx = pd.Index([1, 2, 5], name='b')
4623+
4624+
# inner
4625+
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
4626+
exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=['a', 'b'])
4627+
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14])
4628+
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1])
4629+
self.assert_index_equal(jidx, exp_idx)
4630+
self.assert_numpy_array_equal(lidx, exp_lidx)
4631+
self.assert_numpy_array_equal(ridx, exp_ridx)
4632+
# flip
4633+
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
4634+
self.assert_index_equal(jidx, exp_idx)
4635+
self.assert_numpy_array_equal(lidx, exp_lidx)
4636+
self.assert_numpy_array_equal(ridx, exp_ridx)
4637+
4638+
# keep MultiIndex
4639+
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
4640+
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1])
4641+
self.assert_index_equal(jidx, midx)
4642+
self.assertIsNone(lidx)
4643+
self.assert_numpy_array_equal(ridx, exp_ridx)
4644+
# flip
4645+
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
4646+
self.assert_index_equal(jidx, midx)
4647+
self.assertIsNone(lidx)
4648+
self.assert_numpy_array_equal(ridx, exp_ridx)
4649+
46194650
def test_reindex(self):
46204651
result, indexer = self.index.reindex(list(self.index[:4]))
46214652
tm.assertIsInstance(result, MultiIndex)

pandas/tests/test_series.py

+32
Original file line numberDiff line numberDiff line change
@@ -6351,6 +6351,38 @@ def test_align_sameindex(self):
63516351
# self.assertIsNot(a.index, self.ts.index)
63526352
# self.assertIsNot(b.index, self.ts.index)
63536353

6354+
def test_align_multiindex(self):
6355+
# GH 10665
6356+
6357+
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
6358+
names=('a', 'b', 'c'))
6359+
idx = pd.Index(range(2), name='b')
6360+
s1 = pd.Series(np.arange(12), index=midx)
6361+
s2 = pd.Series(np.arange(2), index=idx)
6362+
6363+
# these must be the same results (but flipped)
6364+
res1l, res1r = s1.align(s2, join='left')
6365+
res2l, res2r = s2.align(s1, join='right')
6366+
6367+
expl = s1
6368+
tm.assert_series_equal(expl, res1l)
6369+
tm.assert_series_equal(expl, res2r)
6370+
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
6371+
tm.assert_series_equal(expr, res1r)
6372+
tm.assert_series_equal(expr, res2l)
6373+
6374+
res1l, res1r = s1.align(s2, join='right')
6375+
res2l, res2r = s2.align(s1, join='left')
6376+
6377+
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
6378+
names=('a', 'b', 'c'))
6379+
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
6380+
tm.assert_series_equal(expl, res1l)
6381+
tm.assert_series_equal(expl, res2r)
6382+
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
6383+
tm.assert_series_equal(expr, res1r)
6384+
tm.assert_series_equal(expr, res2l)
6385+
63546386
def test_reindex(self):
63556387

63566388
identity = self.series.reindex(self.series.index)

0 commit comments

Comments
 (0)