diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index dbe446f0a7b4f..6ba8edcca59e8 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -47,7 +47,7 @@ API changes - +- ``pandas.merge()`` and ``DataFrame.join()`` will show a ``UserWarning`` when merging/joining a single- with a multi-leveled dataframe (:issue:`9455`, :issue:`12219`) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 64be1ef460f51..26a88d601ec4b 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -142,6 +142,31 @@ def test_drop_multiindex_not_lexsorted(self): tm.assert_frame_equal(result, expected) + def test_merge_join_different_levels(self): + # GH 9455 + + # first dataframe + df1 = DataFrame(columns=['a', 'b'], data=[[1, 11], [0, 22]]) + + # second dataframe + columns = MultiIndex.from_tuples([('a', ''), ('c', 'c1')]) + df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) + + # merge + columns = ['a', 'b', ('c', 'c1')] + expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) + with tm.assert_produces_warning(UserWarning): + result = pd.merge(df1, df2, on='a') + tm.assert_frame_equal(result, expected) + + # join, see discussion in GH 12219 + columns = ['a', 'b', ('a', ''), ('c', 'c1')] + expected = DataFrame(columns=columns, + data=[[1, 11, 0, 44], [0, 22, 1, 33]]) + with tm.assert_produces_warning(UserWarning): + result = df1.join(df2, on='a') + tm.assert_frame_equal(result, expected) + def test_reindex(self): newFrame = self.frame.reindex(self.ts1.index) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 82fdf0a3d3b46..895f5b74a3e80 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -2,6 +2,8 @@ SQL-style merge routines """ +import warnings + import numpy as np from pandas.compat import range, lrange, lzip, zip, map, filter import pandas.compat as compat @@ -193,6 +195,13 @@ def __init__(self, left, right, how='inner', on=None, 'can not merge DataFrame with instance of ' 'type {0}'.format(type(right))) + # warn user when merging between different levels + if left.columns.nlevels != right.columns.nlevels: + msg = ('merging between different levels can give an unintended ' + 'result ({0} levels on the left, {1} on the right)') + msg = msg.format(left.columns.nlevels, right.columns.nlevels) + warnings.warn(msg, UserWarning) + # note this function has side effects (self.left_join_keys, self.right_join_keys, diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index d5ddfe624e240..f27192dd3f379 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -459,13 +459,15 @@ def test_join_inner_multiindex(self): # _assert_same_contents(expected, expected2.ix[:, expected.columns]) def test_join_hierarchical_mixed(self): + # GH 2024 df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=['a', 'b', 'c']) new_df = df.groupby(['a']).agg({'b': [np.mean, np.sum]}) other_df = DataFrame( [(1, 2, 3), (7, 10, 6)], columns=['a', 'b', 'd']) other_df.set_index('a', inplace=True) - - result = merge(new_df, other_df, left_index=True, right_index=True) + # GH 9455, 12219 + with tm.assert_produces_warning(UserWarning): + result = merge(new_df, other_df, left_index=True, right_index=True) self.assertTrue(('b', 'mean') in result) self.assertTrue('b' in result)