Skip to content

Commit 99b90de

Browse files
committed
Series.align with MultiIndex may be inverted
1 parent 5a4d60f commit 99b90de

File tree

5 files changed

+127
-0
lines changed

5 files changed

+127
-0
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,8 @@ Bug Fixes
603603
- Bug in line and kde plot cannot accept multiple colors when ``subplots=True`` (:issue:`9894`)
604604
- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`)
605605

606+
- Bug in left and right ``align`` of ``Series`` with ``MultiIndex`` may be inverted (:issue:`10665`)
607+
- Bug in left and right ``join`` of with ``MultiIndex`` may be inverted (:issue:`10741`)
606608

607609
- Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
608610

pandas/core/index.py

+2
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,8 @@ def _join_multi(self, other, how, return_indexers=True):
21562156
if self_is_mi:
21572157
self, other = other, self
21582158
flip_order = True
2159+
# flip if join method is right or left
2160+
how = {'right': 'left', 'left': 'right'}.get(how, how)
21592161

21602162
level = other.names.index(jl)
21612163
result = self._join_level(other, level, how=how,

pandas/tests/test_frame.py

+60
Original file line numberDiff line numberDiff line change
@@ -4749,6 +4749,33 @@ def test_join_str_datetime(self):
47494749

47504750
self.assertEqual(len(tst.columns), 3)
47514751

4752+
def test_join_multiindex_leftright(self):
4753+
# GH 10741
4754+
df1 = pd.DataFrame([['a', 'x', 0.471780], ['a','y', 0.774908],
4755+
['a', 'z', 0.563634], ['b', 'x', -0.353756],
4756+
['b', 'y', 0.368062], ['b', 'z', -1.721840],
4757+
['c', 'x', 1], ['c', 'y', 2], ['c', 'z', 3]],
4758+
columns=['first', 'second', 'value1']).set_index(['first', 'second'])
4759+
df2 = pd.DataFrame([['a', 10], ['b', 20]], columns=['first', 'value2']).set_index(['first'])
4760+
4761+
exp = pd.DataFrame([[0.471780, 10], [0.774908, 10], [0.563634, 10],
4762+
[-0.353756, 20], [0.368062, 20], [-1.721840, 20],
4763+
[1.000000, np.nan], [2.000000, np.nan], [3.000000, np.nan]],
4764+
index=df1.index, columns=['value1', 'value2'])
4765+
4766+
# these must be the same results (but columns are flipped)
4767+
tm.assert_frame_equal(df1.join(df2, how='left'), exp)
4768+
tm.assert_frame_equal(df2.join(df1, how='right'), exp[['value2', 'value1']])
4769+
4770+
exp_idx = pd.MultiIndex.from_product([['a', 'b'], ['x', 'y', 'z']],
4771+
names=['first', 'second'])
4772+
exp = pd.DataFrame([[0.471780, 10], [0.774908, 10], [0.563634, 10],
4773+
[-0.353756, 20], [0.368062, 20], [-1.721840, 20]],
4774+
index=exp_idx, columns=['value1', 'value2'])
4775+
4776+
tm.assert_frame_equal(df1.join(df2, how='right'), exp)
4777+
tm.assert_frame_equal(df2.join(df1, how='left'), exp[['value2', 'value1']])
4778+
47524779
def test_from_records_sequencelike(self):
47534780
df = DataFrame({'A' : np.array(np.random.randn(6), dtype = np.float64),
47544781
'A1': np.array(np.random.randn(6), dtype = np.float64),
@@ -9895,6 +9922,39 @@ def test_align_int_fill_bug(self):
98959922
expected = df2 - df2.mean()
98969923
assert_frame_equal(result, expected)
98979924

9925+
def test_align_multiindex(self):
9926+
# GH 10665
9927+
# same test cases as test_align_multiindex in test_series.py
9928+
9929+
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
9930+
names=('a', 'b', 'c'))
9931+
idx = pd.Index(range(2), name='b')
9932+
df1 = pd.DataFrame(np.arange(12), index=midx)
9933+
df2 = pd.DataFrame(np.arange(2), index=idx)
9934+
9935+
# these must be the same results (but flipped)
9936+
res1l, res1r = df1.align(df2, join='left')
9937+
res2l, res2r = df2.align(df1, join='right')
9938+
9939+
expl = df1
9940+
tm.assert_frame_equal(expl, res1l)
9941+
tm.assert_frame_equal(expl, res2r)
9942+
expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
9943+
tm.assert_frame_equal(expr, res1r)
9944+
tm.assert_frame_equal(expr, res2l)
9945+
9946+
res1l, res1r = df1.align(df2, join='right')
9947+
res2l, res2r = df2.align(df1, join='left')
9948+
9949+
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
9950+
names=('a', 'b', 'c'))
9951+
expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
9952+
tm.assert_frame_equal(expl, res1l)
9953+
tm.assert_frame_equal(expl, res2r)
9954+
expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx)
9955+
tm.assert_frame_equal(expr, res1r)
9956+
tm.assert_frame_equal(expr, res2l)
9957+
98989958
def test_where(self):
98999959
default_frame = DataFrame(np.random.randn(5, 3),columns=['A','B','C'])
99009960

pandas/tests/test_index.py

+31
Original file line numberDiff line numberDiff line change
@@ -4589,6 +4589,37 @@ def test_join_self(self):
45894589
joined = res.join(res, how=kind)
45904590
self.assertIs(res, joined)
45914591

4592+
def test_join_multi(self):
4593+
# GH 10665
4594+
midx = pd.MultiIndex.from_product([np.arange(4), np.arange(4)], names=['a', 'b'])
4595+
idx = pd.Index([1, 2, 5], name='b')
4596+
4597+
# inner
4598+
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
4599+
exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=['a', 'b'])
4600+
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14])
4601+
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1])
4602+
self.assert_index_equal(jidx, exp_idx)
4603+
self.assert_numpy_array_equal(lidx, exp_lidx)
4604+
self.assert_numpy_array_equal(ridx, exp_ridx)
4605+
# flip
4606+
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
4607+
self.assert_index_equal(jidx, exp_idx)
4608+
self.assert_numpy_array_equal(lidx, exp_lidx)
4609+
self.assert_numpy_array_equal(ridx, exp_ridx)
4610+
4611+
# keep MultiIndex
4612+
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
4613+
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1])
4614+
self.assert_index_equal(jidx, midx)
4615+
self.assertIsNone(lidx)
4616+
self.assert_numpy_array_equal(ridx, exp_ridx)
4617+
# flip
4618+
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
4619+
self.assert_index_equal(jidx, midx)
4620+
self.assertIsNone(lidx)
4621+
self.assert_numpy_array_equal(ridx, exp_ridx)
4622+
45924623
def test_reindex(self):
45934624
result, indexer = self.index.reindex(list(self.index[:4]))
45944625
tm.assertIsInstance(result, MultiIndex)

pandas/tests/test_series.py

+32
Original file line numberDiff line numberDiff line change
@@ -6288,6 +6288,38 @@ def test_align_sameindex(self):
62886288
# self.assertIsNot(a.index, self.ts.index)
62896289
# self.assertIsNot(b.index, self.ts.index)
62906290

6291+
def test_align_multiindex(self):
6292+
# GH 10665
6293+
6294+
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
6295+
names=('a', 'b', 'c'))
6296+
idx = pd.Index(range(2), name='b')
6297+
s1 = pd.Series(np.arange(12), index=midx)
6298+
s2 = pd.Series(np.arange(2), index=idx)
6299+
6300+
# these must be the same results (but flipped)
6301+
res1l, res1r = s1.align(s2, join='left')
6302+
res2l, res2r = s2.align(s1, join='right')
6303+
6304+
expl = s1
6305+
tm.assert_series_equal(expl, res1l)
6306+
tm.assert_series_equal(expl, res2r)
6307+
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
6308+
tm.assert_series_equal(expr, res1r)
6309+
tm.assert_series_equal(expr, res2l)
6310+
6311+
res1l, res1r = s1.align(s2, join='right')
6312+
res2l, res2r = s2.align(s1, join='left')
6313+
6314+
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
6315+
names=('a', 'b', 'c'))
6316+
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
6317+
tm.assert_series_equal(expl, res1l)
6318+
tm.assert_series_equal(expl, res2r)
6319+
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
6320+
tm.assert_series_equal(expr, res1r)
6321+
tm.assert_series_equal(expr, res2l)
6322+
62916323
def test_reindex(self):
62926324

62936325
identity = self.series.reindex(self.series.index)

0 commit comments

Comments
 (0)