Skip to content

Commit af99a25

Browse files
committed
fix for pandas-dev#60695 fix Series constructor dropping key levels when keys have varying entry counts
1 parent f4ed47a commit af99a25

File tree

1 file changed

+103
-55
lines changed

1 file changed

+103
-55
lines changed

pandas/core/indexes/multi.py

+103-55
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,123 @@
11
from __future__ import annotations
22

3-
from typing import (
4-
TYPE_CHECKING,
5-
Any,
3+
from collections.abc import (
64
Callable,
75
Collection,
6+
Generator,
87
Hashable,
98
Iterable,
10-
Iterator,
11-
List,
12-
Literal,
13-
Mapping,
149
Sequence,
10+
)
11+
from functools import wraps
12+
from sys import getsizeof
13+
from typing import (
14+
TYPE_CHECKING,
15+
Any,
16+
Literal,
1517
cast,
16-
overload,
1718
)
19+
import warnings
1820

1921
import numpy as np
2022

21-
from pandas._libs import lib
22-
from pandas._libs.hashtable import duplicated_int64
23+
from pandas._config import get_option
24+
25+
from pandas._libs import (
26+
algos as libalgos,
27+
index as libindex,
28+
lib,
29+
)
30+
from pandas._libs.hashtable import duplicated
2331
from pandas._typing import (
24-
ArrayLike,
32+
AnyAll,
33+
AnyArrayLike,
2534
Axis,
35+
DropKeep,
2636
DtypeObj,
2737
F,
38+
IgnoreRaise,
39+
IndexLabel,
40+
IndexT,
41+
Scalar,
42+
Self,
2843
Shape,
2944
npt,
3045
)
31-
from pandas.errors import InvalidIndexError
46+
from pandas.compat.numpy import function as nv
47+
from pandas.errors import (
48+
InvalidIndexError,
49+
PerformanceWarning,
50+
UnsortedIndexError,
51+
)
3252
from pandas.util._decorators import (
53+
Appender,
3354
cache_readonly,
3455
doc,
56+
set_module,
3557
)
3658
from pandas.util._exceptions import find_stack_level
3759

3860
from pandas.core.dtypes.cast import coerce_indexer_dtype
3961
from pandas.core.dtypes.common import (
4062
ensure_int64,
4163
ensure_platform_int,
42-
is_categorical_dtype,
43-
is_extension_array_dtype,
64+
is_hashable,
65+
is_integer,
66+
is_iterator,
4467
is_list_like,
4568
is_object_dtype,
69+
is_scalar,
70+
is_string_dtype,
4671
pandas_dtype,
4772
)
48-
from pandas.core.dtypes.dtypes import ExtensionDtype
49-
from pandas.core.dtypes.missing import array_equivalent, isna
73+
from pandas.core.dtypes.dtypes import (
74+
CategoricalDtype,
75+
ExtensionDtype,
76+
)
77+
from pandas.core.dtypes.generic import (
78+
ABCDataFrame,
79+
ABCSeries,
80+
)
81+
from pandas.core.dtypes.inference import is_array_like
82+
from pandas.core.dtypes.missing import (
83+
array_equivalent,
84+
isna,
85+
)
5086

5187
import pandas.core.algorithms as algos
52-
from pandas.core.arrays.categorical import Categorical
88+
from pandas.core.array_algos.putmask import validate_putmask
89+
from pandas.core.arrays import (
90+
Categorical,
91+
ExtensionArray,
92+
)
93+
from pandas.core.arrays.categorical import (
94+
factorize_from_iterables,
95+
recode_for_categories,
96+
)
97+
import pandas.core.common as com
98+
from pandas.core.construction import sanitize_array
99+
import pandas.core.indexes.base as ibase
53100
from pandas.core.indexes.base import (
54101
Index,
55102
_index_shared_docs,
56103
ensure_index,
57104
get_unanimous_names,
58105
)
59106
from pandas.core.indexes.frozen import FrozenList
107+
from pandas.core.ops.invalid import make_invalid_op
108+
from pandas.core.sorting import (
109+
get_group_index,
110+
lexsort_indexer,
111+
)
112+
113+
from pandas.io.formats.printing import pprint_thing
60114

61115
if TYPE_CHECKING:
62-
from pandas import DataFrame
116+
from pandas import (
117+
CategoricalIndex,
118+
DataFrame,
119+
Series,
120+
)
63121

64122
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
65123
_index_doc_kwargs.update(
@@ -468,54 +526,44 @@ def from_tuples(
468526
) -> MultiIndex:
469527
"""
470528
Convert list of tuples to MultiIndex.
471-
472-
Parameters
473-
----------
474-
tuples : list / sequence of tuple-likes
475-
Each tuple is the index of one row/column.
476-
sortorder : int or None
477-
Level of sortedness (must be lexicographically sorted by that level).
478-
names : list / sequence of str, optional
479-
Names for the levels in the index.
480-
481-
Returns
482-
-------
483-
MultiIndex
484529
"""
485530
if not is_list_like(tuples):
486531
raise TypeError("Input must be a list / sequence of tuple-likes.")
487-
488-
# Handle empty tuples case first
489-
if isinstance(tuples, (list, tuple)) and len(tuples) == 0:
490-
if names is None:
491-
raise TypeError("Cannot infer number of levels from empty list")
492-
names_seq = cast(Sequence[Hashable], names)
493-
arrays: List[ArrayLike] = [[]] * len(names_seq)
494-
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
495-
496-
# Convert iterator to list
532+
497533
if is_iterator(tuples):
498534
tuples = list(tuples)
499-
535+
500536
tuples = cast(Collection[tuple[Hashable, ...]], tuples)
501537

502-
# Handle numpy array or Index
503-
if isinstance(tuples, (np.ndarray, Index)):
538+
# handling the empty tuple cases
539+
if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
540+
codes = [np.zeros(len(tuples))]
541+
levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
542+
return cls(
543+
levels=levels,
544+
codes=codes,
545+
sortorder=sortorder,
546+
names=names,
547+
verify_integrity=False,
548+
)
549+
550+
arrays: list[Sequence[Hashable]]
551+
if len(tuples) == 0:
552+
if names is None:
553+
raise TypeError("Cannot infer number of levels from empty list")
554+
# error: Argument 1 to "len" has incompatible type "Hashable";
555+
# expected "Sized"
556+
arrays = [[]] * len(names) # type: ignore[arg-type]
557+
elif isinstance(tuples, (np.ndarray, Index)):
504558
if isinstance(tuples, Index):
505559
tuples = np.asarray(tuples._values)
506-
arrays = list(lib.tuples_to_object_array(tuples).T)
507-
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
508560

509-
# Convert to list and normalize
510-
tuples_list = [t if isinstance(t, tuple) else (t,) for t in tuples]
511-
if not tuples_list:
512-
arrays = []
561+
arrays = list(lib.tuples_to_object_array(tuples).T)
562+
elif isinstance(tuples, list):
563+
arrays = list(lib.to_object_array_tuples(tuples).T)
513564
else:
514-
max_length = max(len(t) for t in tuples_list)
515-
result_tuples = [
516-
t + (np.nan,) * (max_length - len(t)) for t in tuples_list
517-
]
518-
arrays = list(lib.to_object_array_tuples(result_tuples).T)
565+
arrs = zip(*tuples)
566+
arrays = cast(list[Sequence[Hashable]], arrs)
519567

520568
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
521569

0 commit comments

Comments
 (0)