From da75bbec8a31c8bf0ab8c5451ffb7f0ea136b44a Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 3 Jan 2022 15:23:54 +0000 Subject: [PATCH 1/4] Implementation, test and whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/indexes/multi.py | 7 ++++--- pandas/tests/indexes/multi/test_get_set.py | 7 +++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4c3e53ddcfa26..41e87ac4eca4c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -835,6 +835,7 @@ MultiIndex - Bug in :meth:`MultiIndex.get_loc` raising ``TypeError`` instead of ``KeyError`` on nested tuple (:issue:`42440`) - Bug in :meth:`MultiIndex.union` setting wrong ``sortorder`` causing errors in subsequent indexing operations with slices (:issue:`44752`) - Bug in :meth:`MultiIndex.putmask` where the other value was also a :class:`MultiIndex` (:issue:`43212`) +- Bug in :meth:`MultiIndex.dtypes` when duplicate level names returned only one dtype (:issue:`45174`) - I/O diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 830f3afc8a1e7..e565d02af66e0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -735,13 +735,14 @@ def array(self): def dtypes(self) -> Series: """ Return the dtypes as a Series for the underlying MultiIndex. + + .. versionchanged:: 1.4.0 + Correct result when there are duplicated level names. """ from pandas import Series names = com.fill_missing_names([level.name for level in self.levels]) - return Series( - {names[idx]: level.dtype for idx, level in enumerate(self.levels)} - ) + return Series([level.dtype for level in self.levels], index=names) def __len__(self) -> int: return len(self.codes[0]) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index e806ee1751b00..890e6cf056928 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -67,6 +67,13 @@ def test_get_dtypes_no_level_name(): tm.assert_series_equal(expected, idx_multitype.dtypes) +def test_get_dtypes_duplicate_level_names(): + # Test MultiIndex.dtypes with non-unique level names (# GH45174 ) + result = pd.MultiIndex.from_arrays([[1], [2]], names=[1, 1]).dtypes + expected = pd.Series([np.dtype("int64"), np.dtype("int64")], index=[1, 1]) + tm.assert_series_equal(result, expected) + + def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data From a95f3b32bbf197bfd0a1e66d5eb394d85b264af4 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 3 Jan 2022 15:39:46 +0000 Subject: [PATCH 2/4] Update test_get_set.py --- pandas/tests/indexes/multi/test_get_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 890e6cf056928..14e07836ed075 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -69,7 +69,7 @@ def test_get_dtypes_no_level_name(): def test_get_dtypes_duplicate_level_names(): # Test MultiIndex.dtypes with non-unique level names (# GH45174 ) - result = pd.MultiIndex.from_arrays([[1], [2]], names=[1, 1]).dtypes + result = MultiIndex.from_arrays([[1], [2]], names=[1, 1]).dtypes expected = pd.Series([np.dtype("int64"), np.dtype("int64")], index=[1, 1]) tm.assert_series_equal(result, expected) From 4bd8156cdc15805dff0b65fa46d572cb42f87d36 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 4 Jan 2022 09:16:18 +0000 Subject: [PATCH 3/4] review fixes --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/indexes/multi.py | 3 --- pandas/tests/indexes/multi/test_get_set.py | 20 +++++++++++++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 41e87ac4eca4c..68e6893bb9db1 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -835,7 +835,7 @@ MultiIndex - Bug in :meth:`MultiIndex.get_loc` raising ``TypeError`` instead of ``KeyError`` on nested tuple (:issue:`42440`) - Bug in :meth:`MultiIndex.union` setting wrong ``sortorder`` causing errors in subsequent indexing operations with slices (:issue:`44752`) - Bug in :meth:`MultiIndex.putmask` where the other value was also a :class:`MultiIndex` (:issue:`43212`) -- Bug in :meth:`MultiIndex.dtypes` when duplicate level names returned only one dtype (:issue:`45174`) +- Bug in :meth:`MultiIndex.dtypes` duplicate level names returned only one dtype per name (:issue:`45174`) - I/O diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e565d02af66e0..816fa3abb618c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -735,9 +735,6 @@ def array(self): def dtypes(self) -> Series: """ Return the dtypes as a Series for the underlying MultiIndex. - - .. versionchanged:: 1.4.0 - Correct result when there are duplicated level names. """ from pandas import Series diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 14e07836ed075..8789831a55e6c 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -68,9 +68,23 @@ def test_get_dtypes_no_level_name(): def test_get_dtypes_duplicate_level_names(): - # Test MultiIndex.dtypes with non-unique level names (# GH45174 ) - result = MultiIndex.from_arrays([[1], [2]], names=[1, 1]).dtypes - expected = pd.Series([np.dtype("int64"), np.dtype("int64")], index=[1, 1]) + # Test MultiIndex.dtypes with non-unique level names (# GH45174) + result = MultiIndex.from_product( + [ + [1, 2, 3], + ["a", "b", "c"], + pd.date_range("20200101", periods=2, tz="UTC"), + ], + names=["A", "A", "A"] + ).dtypes + expected = pd.Series( + [ + np.dtype("int64"), + np.dtype("O"), + DatetimeTZDtype(tz="utc") + ], + index=["A", "A", "A"] + ) tm.assert_series_equal(result, expected) From a8111decaa628e9a3cc1449d3b0968f3300e613d Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 4 Jan 2022 09:25:42 +0000 Subject: [PATCH 4/4] Update test_get_set.py --- pandas/tests/indexes/multi/test_get_set.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 8789831a55e6c..aa0e91cecd4fc 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -75,15 +75,11 @@ def test_get_dtypes_duplicate_level_names(): ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC"), ], - names=["A", "A", "A"] + names=["A", "A", "A"], ).dtypes expected = pd.Series( - [ - np.dtype("int64"), - np.dtype("O"), - DatetimeTZDtype(tz="utc") - ], - index=["A", "A", "A"] + [np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")], + index=["A", "A", "A"], ) tm.assert_series_equal(result, expected)