From 7f82be94233b2349ca53262831709d2f47f1f42f Mon Sep 17 00:00:00 2001 From: Tong Shen Date: Sun, 9 Apr 2017 22:10:32 +0800 Subject: [PATCH 1/3] BUG: Fix MultiIndex names handling in pd.concat This is a fix attempt for issue #15787. The discrepancy between definition and corresponding implementation of so-called non-none names in function _get_consensus_names leads to this bug. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/api.py | 2 +- pandas/tests/tools/test_concat.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7664688ffa4f4..94e32da7eb191 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1170,6 +1170,7 @@ Indexing - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) +- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) I/O ^^^ diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index a38453e0d2ccc..a3cb54ca97071 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -107,7 +107,7 @@ def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make # the set hashable, then reverse on return consensus_names = set([tuple(i.names) for i in indexes - if all(n is not None for n in i.names)]) + if any(n is not None for n in i.names)]) if len(consensus_names) == 1: return list(list(consensus_names)[0]) return [None] * indexes[0].nlevels diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 623c5fa02fcb2..45f7d03f6bfeb 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1048,6 +1048,24 @@ def test_concat_multiindex_with_tz(self): result = concat([df, df]) tm.assert_frame_equal(result, expected) + def test_concat_multiindex_with_none_in_index_names(self): + # GH 15787 + from pandas.indexes.frozen import FrozenList + + index = pd.MultiIndex.from_product([[1], range(5)], + names=['level1', None]) + df = pd.DataFrame({'col': range(5)}, index=index) + + result = concat([df, df], keys=[1, 2], names=['level2']) + result = result.index.names + expected = FrozenList(['level2', 'level1', None]) + self.assertEqual(result, expected) + + result = concat([df, df[:2]], keys=[1, 2], names=['level2']) + result = result.index.names + expected = FrozenList(['level2', 'level1', None]) + self.assertEqual(result, expected) + def test_concat_keys_and_levels(self): df = DataFrame(np.random.randn(1, 3)) df2 = DataFrame(np.random.randn(1, 4)) From db7866ffede3f2d3836607c7501d4d2a9b2e8ebc Mon Sep 17 00:00:00 2001 From: Tong Shen Date: Sun, 9 Apr 2017 23:56:06 +0800 Subject: [PATCH 2/3] construct expected results as DataFrame instead of FrozenList --- pandas/tests/tools/test_concat.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 45f7d03f6bfeb..6ff6e553f16a7 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1050,21 +1050,25 @@ def test_concat_multiindex_with_tz(self): def test_concat_multiindex_with_none_in_index_names(self): # GH 15787 - from pandas.indexes.frozen import FrozenList - index = pd.MultiIndex.from_product([[1], range(5)], names=['level1', None]) df = pd.DataFrame({'col': range(5)}, index=index) result = concat([df, df], keys=[1, 2], names=['level2']) - result = result.index.names - expected = FrozenList(['level2', 'level1', None]) - self.assertEqual(result, expected) + index = pd.MultiIndex.from_product([[1, 2], [1], range(5)], + names=['level2', 'level1', None]) + expected = pd.DataFrame({'col': list(range(5)) * 2}, index=index) + assert_frame_equal(result, expected) result = concat([df, df[:2]], keys=[1, 2], names=['level2']) - result = result.index.names - expected = FrozenList(['level2', 'level1', None]) - self.assertEqual(result, expected) + level2 = [1] * 5 + [2] * 2 + level1 = [1] * 7 + no_name = list(range(5)) + list(range(2)) + tuples = list(zip(level2, level1, no_name)) + index = pd.MultiIndex.from_tuples(tuples, + names=['level2', 'level1', None]) + expected = pd.DataFrame({'col': no_name}, index=index) + assert_frame_equal(result, expected) def test_concat_keys_and_levels(self): df = DataFrame(np.random.randn(1, 3)) From 8c0e7211f8ddad6353e2b1bbf005d808f853d322 Mon Sep 17 00:00:00 2001 From: Tong Shen Date: Mon, 10 Apr 2017 09:31:46 +0800 Subject: [PATCH 3/3] explicitly specify dtype when constructing DataFrame to avoid test failure --- pandas/tests/tools/test_concat.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 6ff6e553f16a7..c61f2a3dc8066 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1052,12 +1052,13 @@ def test_concat_multiindex_with_none_in_index_names(self): # GH 15787 index = pd.MultiIndex.from_product([[1], range(5)], names=['level1', None]) - df = pd.DataFrame({'col': range(5)}, index=index) + df = pd.DataFrame({'col': range(5)}, index=index, dtype=np.int32) result = concat([df, df], keys=[1, 2], names=['level2']) index = pd.MultiIndex.from_product([[1, 2], [1], range(5)], names=['level2', 'level1', None]) - expected = pd.DataFrame({'col': list(range(5)) * 2}, index=index) + expected = pd.DataFrame({'col': list(range(5)) * 2}, + index=index, dtype=np.int32) assert_frame_equal(result, expected) result = concat([df, df[:2]], keys=[1, 2], names=['level2']) @@ -1067,7 +1068,8 @@ def test_concat_multiindex_with_none_in_index_names(self): tuples = list(zip(level2, level1, no_name)) index = pd.MultiIndex.from_tuples(tuples, names=['level2', 'level1', None]) - expected = pd.DataFrame({'col': no_name}, index=index) + expected = pd.DataFrame({'col': no_name}, index=index, + dtype=np.int32) assert_frame_equal(result, expected) def test_concat_keys_and_levels(self):