Skip to content

Commit 46dfc5b

Browse files
committed
fix for pandas-dev#60695 fix Series constructor dropping key levels when keys have varying entry counts
1 parent dacff95 commit 46dfc5b

File tree

2 files changed

+38
-64
lines changed

2 files changed

+38
-64
lines changed

pandas/core/indexes/multi.py

+14-36
Original file line numberDiff line numberDiff line change
@@ -526,58 +526,36 @@ def from_tuples(
526526
) -> MultiIndex:
527527
"""
528528
Convert list of tuples to MultiIndex.
529-
530-
Parameters
531-
----------
532-
tuples : list / sequence of tuple-likes
533-
Each tuple is the index of one row/column.
534-
sortorder : int or None
535-
Level of sortedness (must be lexicographically sorted by that
536-
level).
537-
names : list / sequence of str, optional
538-
Names for the levels in the index.
539-
540-
Returns
541-
-------
542-
MultiIndex
543529
"""
544530
if not is_list_like(tuples):
545531
raise TypeError("Input must be a list / sequence of tuple-likes.")
532+
546533
if is_iterator(tuples):
547534
tuples = list(tuples)
535+
536+
# Cast to proper tuple type
548537
tuples = cast(Collection[tuple[Hashable, ...]], tuples)
549-
550-
# handling the empty tuple cases
551-
if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
552-
codes = [np.zeros(len(tuples))]
553-
levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
554-
return cls(
555-
levels=levels,
556-
codes=codes,
557-
sortorder=sortorder,
558-
names=names,
559-
verify_integrity=False,
560-
)
561-
562-
arrays: list[Sequence[Hashable]]
538+
563539
if len(tuples) == 0:
564540
if names is None:
565541
raise TypeError("Cannot infer number of levels from empty list")
566-
# error: Argument 1 to "len" has incompatible type "Hashable";
567-
# expected "Sized"
568-
arrays = [[]] * len(names) # type: ignore[arg-type]
542+
arrays = [[]] * len(names)
569543
elif isinstance(tuples, (np.ndarray, Index)):
570544
if isinstance(tuples, Index):
571545
tuples = np.asarray(tuples._values)
572-
573546
arrays = list(lib.tuples_to_object_array(tuples).T)
574547
elif isinstance(tuples, list):
575-
arrays = list(lib.to_object_array_tuples(tuples).T)
548+
# Find the maximum length of tuples
549+
max_length = max((len(t) for t in tuples), default=0)
550+
551+
# Pad shorter tuples with np.nan
552+
padded_tuples = [
553+
t + (np.nan,) * (max_length - len(t)) for t in tuples
554+
]
555+
arrays = list(lib.to_object_array_tuples(padded_tuples).T)
576556
else:
577-
# Use zip_longest instead of zip to handle tuples of different lengths
578557
from itertools import zip_longest
579-
arrs = zip_longest(*tuples, fillvalue=np.nan)
580-
arrays = cast(list[Sequence[Hashable]], arrs)
558+
arrays = list(zip_longest(*tuples, fillvalue=np.nan))
581559

582560
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
583561

pandas/tests/indexes/multi/test_constructors.py

+24-28
Original file line numberDiff line numberDiff line change
@@ -865,47 +865,43 @@ def test_from_tuples_different_lengths_gh60695():
865865
866866
GH#60695
867867
"""
868-
# Test case 1: Original issue example
868+
# Test case 1: Original issue example with string values
869869
tuples = [("l1",), ("l1", "l2")]
870870
result = pd.MultiIndex.from_tuples(tuples)
871871
expected = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
872-
tm.assert_index_equal(result, expected)
872+
tm.assert_index_equal(result, expected, exact=True)
873873

874874
# Test case 2: Series construction with tuple keys
875875
s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"})
876876
expected = pd.Series(
877877
["v1", "v2"],
878878
index=pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
879879
)
880-
tm.assert_series_equal(s, expected)
880+
tm.assert_series_equal(s, expected, check_index_type=True)
881881

882-
# Test case 3: Multiple levels with None
883-
data = {(1, 1, None): -1.0}
884-
result = pd.Series(data)
885-
expected = pd.Series(
886-
-1.0,
887-
index=pd.MultiIndex.from_tuples([(1, 1, np.nan)]),
888-
)
889-
tm.assert_series_equal(result, expected)
882+
# Test case 3: Handle numeric values
883+
tuples = [(1,), (1, 2)]
884+
result = pd.MultiIndex.from_tuples(tuples)
885+
expected = pd.MultiIndex.from_tuples([(1, np.nan), (1, 2)])
886+
tm.assert_index_equal(result, expected, exact=True)
890887

891-
# Test case 4: Mixed length tuples
892-
tuples = [("a",), ("b", "c"), ("d", "e", "f")]
888+
# Test case 4: Mixed types (strings and integers)
889+
tuples = [(1, "a"), (1,), (2, "b", "c")]
893890
result = pd.MultiIndex.from_tuples(tuples)
894891
expected = pd.MultiIndex.from_tuples([
895-
("a", np.nan, np.nan),
896-
("b", "c", np.nan),
897-
("d", "e", "f")
892+
(1, "a", np.nan),
893+
(1, np.nan, np.nan),
894+
(2, "b", "c")
898895
])
899-
tm.assert_index_equal(result, expected)
896+
tm.assert_index_equal(result, expected, exact=True)
900897

901-
# Test case 5: DataFrame with tuple index
902-
df = pd.DataFrame(
903-
{"col": ["v1", "v2"]},
904-
index=pd.MultiIndex.from_tuples([("l1",), ("l1", "l2")])
905-
)
906-
expected_index = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
907-
expected_df = pd.DataFrame(
908-
{"col": ["v1", "v2"]},
909-
index=expected_index
910-
)
911-
tm.assert_frame_equal(df, expected_df)
898+
# Test case 5: Empty tuples
899+
tuples = []
900+
with pytest.raises(TypeError, match="Cannot infer number of levels"):
901+
pd.MultiIndex.from_tuples(tuples)
902+
903+
# Test case 6: Single level consistency
904+
tuples = [("a",)]
905+
result = pd.MultiIndex.from_tuples(tuples)
906+
expected = pd.MultiIndex.from_tuples([("a",)])
907+
tm.assert_index_equal(result, expected, exact=True)

0 commit comments

Comments
 (0)