Skip to content

TYP: tighten types in core.construction #41729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,15 @@ def __init__(
if index is None or columns is None:
raise ValueError("DataFrame constructor not properly called!")

# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
# expected "Union[Union[Union[ExtensionArray, ndarray],
# Index, Series], Sequence[Any]]"
index = ensure_index(index) # type: ignore[arg-type]
# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
# expected "Union[Union[Union[ExtensionArray, ndarray],
# Index, Series], Sequence[Any]]"
columns = ensure_index(columns) # type: ignore[arg-type]

if not dtype:
dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)

Expand Down Expand Up @@ -2325,6 +2334,7 @@ def _from_arrays(
dtype = pandas_dtype(dtype)

manager = get_option("mode.data_manager")
columns = ensure_index(columns)
mgr = arrays_to_mgr(
arrays,
columns,
Expand Down
35 changes: 17 additions & 18 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDatetimeIndex,
ABCIndex,
ABCSeries,
ABCTimedeltaIndex,
)

from pandas.core import (
Expand All @@ -71,7 +68,9 @@
)
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
DatetimeIndex,
Index,
TimedeltaIndex,
ensure_index,
get_objs_combined_axis,
union_indexes,
Expand Down Expand Up @@ -101,7 +100,7 @@

def arrays_to_mgr(
arrays,
arr_names,
arr_names: Index,
index,
columns,
*,
Expand All @@ -115,8 +114,6 @@ def arrays_to_mgr(

Needs to handle a lot of exceptional cases.
"""
arr_names = ensure_index(arr_names)

if verify_integrity:
# figure out the index, if necessary
if index is None:
Expand Down Expand Up @@ -286,10 +283,12 @@ def ndarray_to_mgr(

if columns is None:
columns = Index(range(len(values)))
else:
columns = ensure_index(columns)

return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)

if is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
# i.e. Datetime64TZ
values = extract_array(values, extract_numpy=True)
if copy:
Expand Down Expand Up @@ -454,7 +453,7 @@ def dict_to_mgr(
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
arrays = [arr if not isinstance(arr, ABCIndex) else arr._data for arr in arrays]
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
arrays = [
arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
]
Expand All @@ -480,7 +479,7 @@ def nested_data_to_arrays(
columns: Index | None,
index: Index | None,
dtype: DtypeObj | None,
):
) -> tuple[list[ArrayLike], Index, Index]:
"""
Convert a single sequence of arrays to multiple arrays.
"""
Expand Down Expand Up @@ -548,7 +547,7 @@ def convert(v):
if is_list_like(values[0]):
values = np.array([convert(v) for v in values])
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
# GH#21861
# GH#21861 see test_constructor_list_of_lists
values = np.array([convert(v) for v in values])
else:
values = convert(values)
Expand All @@ -566,31 +565,30 @@ def convert(v):
return values


def _homogenize(data, index: Index, dtype: DtypeObj | None):
def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
oindex = None
homogenized = []

for val in data:
if isinstance(val, ABCSeries):
if dtype is not None:
val = val.astype(dtype)
val = val.astype(dtype, copy=False)
if val.index is not index:
# Forces alignment. No need to copy data since we
# are putting it into an ndarray later
val = val.reindex(index, copy=False)
# TODO extract_array should be preferred, but that gives failures for
# `extension/test_numpy.py` (extract_array will convert numpy arrays
# to PandasArray), see https://github.com/pandas-dev/pandas/issues/40021
# val = extract_array(val, extract_numpy=True)

val = val._values
else:
if isinstance(val, dict):
if oindex is None:
oindex = index.astype("O")

if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)):
if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
# see test_constructor_dict_datetime64_index
val = dict_compat(val)
else:
# see test_constructor_subclass_dict
val = dict(val)
val = lib.fast_multiget(val, oindex._values, default=np.nan)
val = sanitize_array(
Expand Down Expand Up @@ -749,6 +747,7 @@ def to_arrays(
Return list of arrays, columns.
"""
if isinstance(data, ABCDataFrame):
# see test_from_records_with_index_data, test_from_records_bad_index_column
if columns is not None:
arrays = [
data._ixs(i, axis=1).values
Expand Down Expand Up @@ -884,7 +883,7 @@ def _list_of_dict_to_arrays(

# assure that they are of the base dict class and not of derived
# classes
data = [(type(d) is dict) and d or dict(d) for d in data]
data = [d if type(d) is dict else dict(d) for d in data]

content = lib.dicts_to_array(data, list(columns))
return content, columns
Expand Down