Skip to content

Commit 0f77b11

Browse files
committed
fix for pandas-dev#60695 fix Series constructor dropping key levels when keys have varying entry counts
1 parent 46dfc5b commit 0f77b11

File tree

2 files changed

+74
-41
lines changed

2 files changed

+74
-41
lines changed

pandas/core/indexes/multi.py

+35-18
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Hashable,
88
Iterable,
99
Sequence,
10+
List,
1011
)
1112
from functools import wraps
1213
from sys import getsizeof
@@ -15,6 +16,7 @@
1516
Any,
1617
Literal,
1718
cast,
19+
ArrayLike,
1820
)
1921
import warnings
2022

@@ -526,37 +528,52 @@ def from_tuples(
526528
) -> MultiIndex:
527529
"""
528530
Convert list of tuples to MultiIndex.
531+
532+
Parameters
533+
----------
534+
tuples : list / sequence of tuple-likes
535+
Each tuple is the index of one row/column.
536+
sortorder : int or None
537+
Level of sortedness (must be lexicographically sorted by that level).
538+
names : list / sequence of str, optional
539+
Names for the levels in the index.
540+
541+
Returns
542+
-------
543+
MultiIndex
529544
"""
530545
if not is_list_like(tuples):
531546
raise TypeError("Input must be a list / sequence of tuple-likes.")
532-
547+
533548
if is_iterator(tuples):
534549
tuples = list(tuples)
535-
536-
# Cast to proper tuple type
550+
537551
tuples = cast(Collection[tuple[Hashable, ...]], tuples)
538-
552+
539553
if len(tuples) == 0:
540554
if names is None:
541555
raise TypeError("Cannot infer number of levels from empty list")
542-
arrays = [[]] * len(names)
543-
elif isinstance(tuples, (np.ndarray, Index)):
556+
names_seq = cast(Sequence[Hashable], names)
557+
arrays: List[ArrayLike] = [[]] * len(names_seq)
558+
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
559+
560+
if isinstance(tuples, (np.ndarray, Index)):
544561
if isinstance(tuples, Index):
545562
tuples = np.asarray(tuples._values)
546563
arrays = list(lib.tuples_to_object_array(tuples).T)
547-
elif isinstance(tuples, list):
548-
# Find the maximum length of tuples
549-
max_length = max((len(t) for t in tuples), default=0)
550-
551-
# Pad shorter tuples with np.nan
552-
padded_tuples = [
553-
t + (np.nan,) * (max_length - len(t)) for t in tuples
554-
]
555-
arrays = list(lib.to_object_array_tuples(padded_tuples).T)
556-
else:
557-
from itertools import zip_longest
558-
arrays = list(zip_longest(*tuples, fillvalue=np.nan))
564+
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
565+
566+
# Convert to list and process
567+
tuples_list = list(tuples)
568+
max_length = max(len(t) if isinstance(t, tuple) else 1 for t in tuples_list)
569+
570+
result_tuples = []
571+
for t in tuples_list:
572+
if not isinstance(t, tuple):
573+
t = (t,)
574+
result_tuples.append(t + (np.nan,) * (max_length - len(t)))
559575

576+
arrays = list(lib.to_object_array_tuples(result_tuples).T)
560577
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
561578

562579
@classmethod

pandas/tests/indexes/multi/test_constructors.py

+39-23
Original file line numberDiff line numberDiff line change
@@ -865,43 +865,59 @@ def test_from_tuples_different_lengths_gh60695():
865865
866866
GH#60695
867867
"""
868-
# Test case 1: Original issue example with string values
868+
# Test case 1: Basic string tuples
869869
tuples = [("l1",), ("l1", "l2")]
870-
result = pd.MultiIndex.from_tuples(tuples)
871-
expected = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
872-
tm.assert_index_equal(result, expected, exact=True)
870+
result = MultiIndex.from_tuples(tuples)
871+
expected = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
872+
tm.assert_index_equal(result, expected)
873873

874-
# Test case 2: Series construction with tuple keys
874+
# Test case 2: Series with tuple keys
875875
s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"})
876876
expected = pd.Series(
877877
["v1", "v2"],
878-
index=pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
878+
index=MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
879879
)
880-
tm.assert_series_equal(s, expected, check_index_type=True)
880+
tm.assert_series_equal(s, expected)
881881

882-
# Test case 3: Handle numeric values
882+
# Test case 3: Numeric tuples
883883
tuples = [(1,), (1, 2)]
884-
result = pd.MultiIndex.from_tuples(tuples)
885-
expected = pd.MultiIndex.from_tuples([(1, np.nan), (1, 2)])
886-
tm.assert_index_equal(result, expected, exact=True)
884+
result = MultiIndex.from_tuples(tuples)
885+
expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)])
886+
tm.assert_index_equal(result, expected)
887887

888-
# Test case 4: Mixed types (strings and integers)
888+
# Test case 4: Mixed types
889889
tuples = [(1, "a"), (1,), (2, "b", "c")]
890-
result = pd.MultiIndex.from_tuples(tuples)
891-
expected = pd.MultiIndex.from_tuples([
890+
result = MultiIndex.from_tuples(tuples)
891+
expected = MultiIndex.from_tuples([
892892
(1, "a", np.nan),
893893
(1, np.nan, np.nan),
894894
(2, "b", "c")
895895
])
896-
tm.assert_index_equal(result, expected, exact=True)
896+
tm.assert_index_equal(result, expected)
897897

898-
# Test case 5: Empty tuples
899-
tuples = []
898+
# Test case 5: Empty input with names
899+
empty_idx = MultiIndex.from_tuples([], names=["a", "b"])
900+
assert empty_idx.names == ["a", "b"]
901+
assert len(empty_idx) == 0
902+
903+
# Test case 6: Empty input without names
900904
with pytest.raises(TypeError, match="Cannot infer number of levels"):
901-
pd.MultiIndex.from_tuples(tuples)
905+
MultiIndex.from_tuples([])
902906

903-
# Test case 6: Single level consistency
904-
tuples = [("a",)]
905-
result = pd.MultiIndex.from_tuples(tuples)
906-
expected = pd.MultiIndex.from_tuples([("a",)])
907-
tm.assert_index_equal(result, expected, exact=True)
907+
# Test case 7: None values
908+
tuples = [(1, None), (1, 2)]
909+
result = MultiIndex.from_tuples(tuples)
910+
expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)])
911+
tm.assert_index_equal(result, expected)
912+
913+
# Test case 8: DataFrame with tuple index
914+
df = pd.DataFrame(
915+
{"col": ["v1", "v2"]},
916+
index=MultiIndex.from_tuples([("l1",), ("l1", "l2")])
917+
)
918+
expected_index = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
919+
expected_df = pd.DataFrame(
920+
{"col": ["v1", "v2"]},
921+
index=expected_index
922+
)
923+
tm.assert_frame_equal(df, expected_df)

0 commit comments

Comments
 (0)