diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dc48cd1ed958e..50d7c1e5d752a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -6,7 +6,10 @@ Generator, Hashable, Iterable, + Mapping, Sequence, + List, + Iterator, ) from functools import wraps from sys import getsizeof @@ -15,6 +18,8 @@ Any, Literal, cast, + ArrayLike, + overload, ) import warnings @@ -27,7 +32,7 @@ index as libindex, lib, ) -from pandas._libs.hashtable import duplicated +from pandas._libs.hashtable import duplicated, duplicated_int64 from pandas._typing import ( AnyAll, AnyArrayLike, @@ -517,7 +522,7 @@ def from_arrays( ) @classmethod - @names_compat + @doc(doc_create_index) def from_tuples( cls, tuples: Iterable[tuple[Hashable, ...]], @@ -526,73 +531,27 @@ def from_tuples( ) -> MultiIndex: """ Convert list of tuples to MultiIndex. - - Parameters - ---------- - tuples : list / sequence of tuple-likes - Each tuple is the index of one row/column. - sortorder : int or None - Level of sortedness (must be lexicographically sorted by that - level). - names : list / sequence of str, optional - Names for the levels in the index. - - Returns - ------- - MultiIndex - - See Also - -------- - MultiIndex.from_arrays : Convert list of arrays to MultiIndex. - MultiIndex.from_product : Make a MultiIndex from cartesian product - of iterables. - MultiIndex.from_frame : Make a MultiIndex from a DataFrame. - - Examples - -------- - >>> tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")] - >>> pd.MultiIndex.from_tuples(tuples, names=("number", "color")) - MultiIndex([(1, 'red'), - (1, 'blue'), - (2, 'red'), - (2, 'blue')], - names=['number', 'color']) """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") - if is_iterator(tuples): - tuples = list(tuples) - tuples = cast(Collection[tuple[Hashable, ...]], tuples) - - # handling the empty tuple cases - if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): - codes = [np.zeros(len(tuples))] - levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] - return cls( - levels=levels, - codes=codes, - sortorder=sortorder, - names=names, - verify_integrity=False, - ) - arrays: list[Sequence[Hashable]] - if len(tuples) == 0: + if isinstance(tuples, (list, tuple)) and len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") - # error: Argument 1 to "len" has incompatible type "Hashable"; - # expected "Sized" - arrays = [[]] * len(names) # type: ignore[arg-type] - elif isinstance(tuples, (np.ndarray, Index)): - if isinstance(tuples, Index): - tuples = np.asarray(tuples._values) - - arrays = list(lib.tuples_to_object_array(tuples).T) - elif isinstance(tuples, list): - arrays = list(lib.to_object_array_tuples(tuples).T) + names_seq = cast(Sequence[Hashable], names) + arrays: List[ArrayLike] = [[]] * len(names_seq) + return cls.from_arrays(arrays, sortorder=sortorder, names=names) + + # Convert to list and normalize + tuples_list = [t if isinstance(t, tuple) else (t,) for t in tuples] + if not tuples_list: + arrays = [] else: - arrs = zip(*tuples) - arrays = cast(list[Sequence[Hashable]], arrs) + max_length = max(len(t) for t in tuples_list) + result_tuples = [ + t + (np.nan,) * (max_length - len(t)) for t in tuples_list + ] + arrays = list(lib.to_object_array_tuples(result_tuples).T) return cls.from_arrays(arrays, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index b2867d4ac8e68..b8d38325e4bef 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -857,3 +857,50 @@ def test_dtype_representation(using_infer_string): dtype=object, ) tm.assert_series_equal(result, expected) + + +def test_from_tuples_different_lengths_gh60695(): + """ + Test that MultiIndex.from_tuples properly handles tuples of different lengths. + + GH#60695 + """ + # Test case 1: Basic string tuples + tuples = [("l1",), ("l1", "l2")] + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + tm.assert_index_equal(result, expected, exact=True) + + # Test case 2: Series with tuple keys + s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"}) + expected = pd.Series( + ["v1", "v2"], + index=MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + ) + tm.assert_series_equal(s, expected, check_index_type=True) + + # Test case 3: Empty input with names + empty_idx = MultiIndex.from_tuples([], names=["a", "b"]) + assert isinstance(empty_idx, MultiIndex) + assert empty_idx.names == ["a", "b"] + assert len(empty_idx) == 0 + + # Test case 4: Empty input without names + with pytest.raises(TypeError, match="Cannot infer number of levels"): + MultiIndex.from_tuples([]) + + # Test case 5: None values + tuples = [(1, None), (1, 2)] + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)]) + tm.assert_index_equal(result, expected, exact=True) + + # Test case 6: Mixed types + tuples = [(1, "a"), (1,), (2, "b", "c")] + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([ + (1, "a", np.nan), + (1, np.nan, np.nan), + (2, "b", "c") + ]) + tm.assert_index_equal(result, expected, exact=True)