diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0df5a70d87655..2900db24a99ff 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -712,7 +712,7 @@ MultiIndex - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) -- +- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) I/O ^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aafd802b827a5..0c0232bdc6d4c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1647,6 +1647,8 @@ def map_array( If the function returns a tuple with more than one element a MultiIndex will be returned. """ + from pandas import Index + if na_action not in (None, "ignore"): msg = f"na_action must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) @@ -1676,6 +1678,10 @@ def map_array( if len(mapper) == 0: mapper = Series(mapper, dtype=np.float64) + elif isinstance(mapper, dict): + mapper = Series( + mapper.values(), index=Index(mapper.keys(), tupleize_cols=False) + ) else: mapper = Series(mapper) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 79eb1b693d866..29b34f560ab2e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -9,6 +9,7 @@ Sequence, ) from functools import wraps +from itertools import zip_longest from sys import getsizeof from typing import ( TYPE_CHECKING, @@ -588,7 +589,7 @@ def from_tuples( elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrs = zip(*tuples) + arrs = zip_longest(*tuples, fillvalue=np.nan) arrays = cast(list[Sequence[Hashable]], arrs) return cls.from_arrays(arrays, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index b2867d4ac8e68..92827cf154394 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -410,6 +410,19 @@ def test_from_tuples_with_tuple_label(): tm.assert_frame_equal(expected, result) +@pytest.mark.parametrize( + "keys, expected", + [ + ((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))), + ((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))), + ], +) +def test_from_tuples_with_various_tuple_lengths(keys, expected): + # GH 60695 + idx = MultiIndex.from_tuples(keys) + assert tuple(idx) == expected + + # ---------------------------------------------------------------------------- # from_product # ---------------------------------------------------------------------------- diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a2be698c0ec28..5f4a100e7ccc7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1441,10 +1441,17 @@ def test_constructor_tuple_of_tuples(self): s = Series(data) assert tuple(s) == data - def test_constructor_dict_of_tuples(self): - data = {(1, 2): 3, (None, 5): 6} + @pytest.mark.parametrize( + "data, expected_values, expected_index", + [ + ({(1, 2): 3, (None, 5): 6}, [3, 6], [(1, 2), (None, 5)]), + ({(1,): 3, (4, 5): 6}, [3, 6], [(1, None), (4, 5)]), + ], + ) + def test_constructor_dict_of_tuples(self, data, expected_values, expected_index): + # GH 60695 result = Series(data).sort_values() - expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) + expected = Series(expected_values, index=MultiIndex.from_tuples(expected_index)) tm.assert_series_equal(result, expected) # https://github.com/pandas-dev/pandas/issues/22698 @@ -1860,23 +1867,30 @@ class A(OrderedDict): series = Series(A(data)) tm.assert_series_equal(series, expected) - def test_constructor_dict_multiindex(self): - d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} - _d = sorted(d.items()) - result = Series(d) - expected = Series( - [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) - ) - tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data, expected_index_multi", + [ + ({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True), + ({("a",): 0.0, ("a", "b"): 1.0}, True), + ({"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, False), + ], + ) + def test_constructor_dict_multiindex(self, data, expected_index_multi): + # GH#60695 + result = Series(data) - d["z"] = 111.0 - _d.insert(0, ("z", d["z"])) - result = Series(d) - expected = Series( - [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) - ) - result = result.reindex(index=expected.index) - tm.assert_series_equal(result, expected) + if expected_index_multi: + expected = Series( + list(data.values()), + index=MultiIndex.from_tuples(list(data.keys())), + ) + tm.assert_series_equal(result, expected) + else: + expected = Series( + list(data.values()), + index=Index(list(data.keys())), + ) + tm.assert_series_equal(result, expected) def test_constructor_dict_multiindex_reindex_flat(self): # construction involves reindexing with a MultiIndex corner case