Skip to content

Commit f9fd0b6

Browse files
jbrockmendelTLouf
authored andcommitted
TYP: tighten types in core.construction (pandas-dev#41729)
1 parent 720266c commit f9fd0b6

File tree

2 files changed

+27
-18
lines changed

2 files changed

+27
-18
lines changed

pandas/core/frame.py

+10
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,15 @@ def __init__(
728728
if index is None or columns is None:
729729
raise ValueError("DataFrame constructor not properly called!")
730730

731+
# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
732+
# expected "Union[Union[Union[ExtensionArray, ndarray],
733+
# Index, Series], Sequence[Any]]"
734+
index = ensure_index(index) # type: ignore[arg-type]
735+
# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
736+
# expected "Union[Union[Union[ExtensionArray, ndarray],
737+
# Index, Series], Sequence[Any]]"
738+
columns = ensure_index(columns) # type: ignore[arg-type]
739+
731740
if not dtype:
732741
dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)
733742

@@ -2325,6 +2334,7 @@ def _from_arrays(
23252334
dtype = pandas_dtype(dtype)
23262335

23272336
manager = get_option("mode.data_manager")
2337+
columns = ensure_index(columns)
23282338
mgr = arrays_to_mgr(
23292339
arrays,
23302340
columns,

pandas/core/internals/construction.py

+17-18
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,7 @@
4747
from pandas.core.dtypes.dtypes import ExtensionDtype
4848
from pandas.core.dtypes.generic import (
4949
ABCDataFrame,
50-
ABCDatetimeIndex,
51-
ABCIndex,
5250
ABCSeries,
53-
ABCTimedeltaIndex,
5451
)
5552

5653
from pandas.core import (
@@ -71,7 +68,9 @@
7168
)
7269
from pandas.core.indexes import base as ibase
7370
from pandas.core.indexes.api import (
71+
DatetimeIndex,
7472
Index,
73+
TimedeltaIndex,
7574
ensure_index,
7675
get_objs_combined_axis,
7776
union_indexes,
@@ -101,7 +100,7 @@
101100

102101
def arrays_to_mgr(
103102
arrays,
104-
arr_names,
103+
arr_names: Index,
105104
index,
106105
columns,
107106
*,
@@ -115,8 +114,6 @@ def arrays_to_mgr(
115114
116115
Needs to handle a lot of exceptional cases.
117116
"""
118-
arr_names = ensure_index(arr_names)
119-
120117
if verify_integrity:
121118
# figure out the index, if necessary
122119
if index is None:
@@ -286,10 +283,12 @@ def ndarray_to_mgr(
286283

287284
if columns is None:
288285
columns = Index(range(len(values)))
286+
else:
287+
columns = ensure_index(columns)
289288

290289
return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)
291290

292-
if is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
291+
elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
293292
# i.e. Datetime64TZ
294293
values = extract_array(values, extract_numpy=True)
295294
if copy:
@@ -454,7 +453,7 @@ def dict_to_mgr(
454453
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
455454
# GH#24096 need copy to be deep for datetime64tz case
456455
# TODO: See if we can avoid these copies
457-
arrays = [arr if not isinstance(arr, ABCIndex) else arr._data for arr in arrays]
456+
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
458457
arrays = [
459458
arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
460459
]
@@ -480,7 +479,7 @@ def nested_data_to_arrays(
480479
columns: Index | None,
481480
index: Index | None,
482481
dtype: DtypeObj | None,
483-
):
482+
) -> tuple[list[ArrayLike], Index, Index]:
484483
"""
485484
Convert a single sequence of arrays to multiple arrays.
486485
"""
@@ -548,7 +547,7 @@ def convert(v):
548547
if is_list_like(values[0]):
549548
values = np.array([convert(v) for v in values])
550549
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
551-
# GH#21861
550+
# GH#21861 see test_constructor_list_of_lists
552551
values = np.array([convert(v) for v in values])
553552
else:
554553
values = convert(values)
@@ -566,31 +565,30 @@ def convert(v):
566565
return values
567566

568567

569-
def _homogenize(data, index: Index, dtype: DtypeObj | None):
568+
def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
570569
oindex = None
571570
homogenized = []
572571

573572
for val in data:
574573
if isinstance(val, ABCSeries):
575574
if dtype is not None:
576-
val = val.astype(dtype)
575+
val = val.astype(dtype, copy=False)
577576
if val.index is not index:
578577
# Forces alignment. No need to copy data since we
579578
# are putting it into an ndarray later
580579
val = val.reindex(index, copy=False)
581-
# TODO extract_array should be preferred, but that gives failures for
582-
# `extension/test_numpy.py` (extract_array will convert numpy arrays
583-
# to PandasArray), see https://github.com/pandas-dev/pandas/issues/40021
584-
# val = extract_array(val, extract_numpy=True)
580+
585581
val = val._values
586582
else:
587583
if isinstance(val, dict):
588584
if oindex is None:
589585
oindex = index.astype("O")
590586

591-
if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)):
587+
if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
588+
# see test_constructor_dict_datetime64_index
592589
val = dict_compat(val)
593590
else:
591+
# see test_constructor_subclass_dict
594592
val = dict(val)
595593
val = lib.fast_multiget(val, oindex._values, default=np.nan)
596594
val = sanitize_array(
@@ -749,6 +747,7 @@ def to_arrays(
749747
Return list of arrays, columns.
750748
"""
751749
if isinstance(data, ABCDataFrame):
750+
# see test_from_records_with_index_data, test_from_records_bad_index_column
752751
if columns is not None:
753752
arrays = [
754753
data._ixs(i, axis=1).values
@@ -884,7 +883,7 @@ def _list_of_dict_to_arrays(
884883

885884
# assure that they are of the base dict class and not of derived
886885
# classes
887-
data = [(type(d) is dict) and d or dict(d) for d in data]
886+
data = [d if type(d) is dict else dict(d) for d in data]
888887

889888
content = lib.dicts_to_array(data, list(columns))
890889
return content, columns

0 commit comments

Comments
 (0)