Skip to content

CLN: clean core.construction._extract_index #49930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 28, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 47 additions & 51 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,63 +580,59 @@ def _extract_index(data) -> Index:
"""
Try to infer an Index from the passed data, raise ValueError on failure.
"""
index = None
index: Index
if len(data) == 0:
index = Index([])
else:
raw_lengths = []
indexes: list[list[Hashable] | Index] = []

have_raw_arrays = False
have_series = False
have_dicts = False

for val in data:
if isinstance(val, ABCSeries):
have_series = True
indexes.append(val.index)
elif isinstance(val, dict):
have_dicts = True
indexes.append(list(val.keys()))
elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
have_raw_arrays = True
raw_lengths.append(len(val))
elif isinstance(val, np.ndarray) and val.ndim > 1:
raise ValueError("Per-column arrays must each be 1-dimensional")

if not indexes and not raw_lengths:
raise ValueError("If using all scalar values, you must pass an index")
return Index([])

if have_series:
index = union_indexes(indexes)
elif have_dicts:
index = union_indexes(indexes, sort=False)
raw_lengths = []
indexes: list[list[Hashable] | Index] = []

if have_raw_arrays:
lengths = list(set(raw_lengths))
if len(lengths) > 1:
raise ValueError("All arrays must be of the same length")
have_raw_arrays = False
have_series = False
have_dicts = False

if have_dicts:
raise ValueError(
"Mixing dicts with non-Series may lead to ambiguous ordering."
)
for val in data:
if isinstance(val, ABCSeries):
have_series = True
indexes.append(val.index)
elif isinstance(val, dict):
have_dicts = True
indexes.append(list(val.keys()))
elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
have_raw_arrays = True
raw_lengths.append(len(val))
elif isinstance(val, np.ndarray) and val.ndim > 1:
raise ValueError("Per-column arrays must each be 1-dimensional")

if not indexes and not raw_lengths:
raise ValueError("If using all scalar values, you must pass an index")

if have_series:
index = union_indexes(indexes)
elif have_dicts:
index = union_indexes(indexes, sort=False)

if have_raw_arrays:
lengths = list(set(raw_lengths))
if len(lengths) > 1:
raise ValueError("All arrays must be of the same length")

if have_dicts:
raise ValueError(
"Mixing dicts with non-Series may lead to ambiguous ordering."
)

if have_series:
assert index is not None # for mypy
if lengths[0] != len(index):
msg = (
f"array length {lengths[0]} does not match index "
f"length {len(index)}"
)
raise ValueError(msg)
else:
index = default_index(lengths[0])
if have_series:
if lengths[0] != len(index):
msg = (
f"array length {lengths[0]} does not match index "
f"length {len(index)}"
)
raise ValueError(msg)
else:
index = default_index(lengths[0])

# error: Argument 1 to "ensure_index" has incompatible type "Optional[Index]";
# expected "Union[Union[Union[ExtensionArray, ndarray], Index, Series],
# Sequence[Any]]"
return ensure_index(index) # type: ignore[arg-type]
return ensure_index(index)


def reorder_arrays(
Expand Down