Skip to content

REF: tighter typing in constructor functions #49591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 12 additions & 26 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,15 @@ def sanitize_array(
# e.g. test_constructor_floating_data_int_dtype
# TODO: where is the discussion that documents the reason for this?
subarr = np.array(data, copy=copy)

elif dtype is None:
subarr = data
if data.dtype == object:
subarr = maybe_infer_to_datetimelike(data)

if subarr is data and copy:
subarr = subarr.copy()

else:
# we will try to copy by-definition here
subarr = _try_cast(data, dtype, copy)
Expand Down Expand Up @@ -754,7 +763,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:

def _try_cast(
arr: list | np.ndarray,
dtype: np.dtype | None,
dtype: np.dtype,
copy: bool,
) -> ArrayLike:
"""
Expand All @@ -764,7 +773,7 @@ def _try_cast(
----------
arr : ndarray or list
Excludes: ExtensionArray, Series, Index.
dtype : np.dtype or None
dtype : np.dtype
copy : bool
If False, don't copy the data if not needed.

Expand All @@ -774,30 +783,7 @@ def _try_cast(
"""
is_ndarray = isinstance(arr, np.ndarray)

if dtype is None:
# perf shortcut as this is the most common case
if is_ndarray:
arr = cast(np.ndarray, arr)
if arr.dtype != object:
if copy:
return arr.copy()
return arr

out = maybe_infer_to_datetimelike(arr)
if out is arr and copy:
out = out.copy()
return out

else:
# i.e. list
varr = np.array(arr, copy=False)
# filter out cases that we _dont_ want to go through
# maybe_infer_to_datetimelike
if varr.dtype != object or varr.size == 0:
return varr
return maybe_infer_to_datetimelike(varr)

elif is_object_dtype(dtype):
if is_object_dtype(dtype):
if not is_ndarray:
subarr = construct_1d_object_array_from_listlike(arr)
return subarr
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ def __init__(

# For data is list-like, or Iterable (will consume into list)
elif is_list_like(data):
if not isinstance(data, (abc.Sequence, ExtensionArray)):
if not isinstance(data, abc.Sequence):
if hasattr(data, "__array__"):
# GH#44616 big perf improvement for e.g. pytorch tensor
data = np.asarray(data)
Expand Down
87 changes: 45 additions & 42 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@
)
from pandas.core.arrays import (
Categorical,
DatetimeArray,
ExtensionArray,
TimedeltaArray,
)
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
Expand Down Expand Up @@ -277,14 +275,20 @@ def ndarray_to_mgr(

return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)

elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
# i.e. Datetime64TZ, PeriodDtype
elif is_extension_array_dtype(vdtype):
# i.e. Datetime64TZ, PeriodDtype; cases with is_1d_only_ea_dtype(vdtype)
# are already caught above
values = extract_array(values, extract_numpy=True)
if copy:
values = values.copy()
if values.ndim == 1:
values = values.reshape(-1, 1)

elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)):
# drop subclass info
values = np.array(values, copy=copy_on_sanitize)
values = _ensure_2d(values)

else:
# by definition an array here
# the dtypes will be coerced to a single dtype
Expand Down Expand Up @@ -496,51 +500,50 @@ def treat_as_nested(data) -> bool:
# ---------------------------------------------------------------------


def _prep_ndarraylike(
values, copy: bool = True
) -> np.ndarray | DatetimeArray | TimedeltaArray:
if isinstance(values, TimedeltaArray) or (
isinstance(values, DatetimeArray) and values.tz is None
):
# By retaining DTA/TDA instead of unpacking, we end up retaining non-nano
pass

elif not isinstance(values, (np.ndarray, ABCSeries, Index)):
if len(values) == 0:
return np.empty((0, 0), dtype=object)
elif isinstance(values, range):
arr = range_to_ndarray(values)
return arr[..., np.newaxis]

def convert(v):
if not is_list_like(v) or isinstance(v, ABCDataFrame):
return v

v = extract_array(v, extract_numpy=True)
res = maybe_convert_platform(v)
return res

# we could have a 1-dim or 2-dim list here
# this is equiv of np.asarray, but does object conversion
# and platform dtype preservation
if is_list_like(values[0]):
values = np.array([convert(v) for v in values])
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
# GH#21861 see test_constructor_list_of_lists
values = np.array([convert(v) for v in values])
else:
values = convert(values)

def _prep_ndarraylike(values, copy: bool = True) -> np.ndarray:
# values is specifically _not_ ndarray, EA, Index, or Series
# We only get here with `not treat_as_nested(values)`

if len(values) == 0:
return np.empty((0, 0), dtype=object)
elif isinstance(values, range):
arr = range_to_ndarray(values)
return arr[..., np.newaxis]

def convert(v):
if not is_list_like(v) or isinstance(v, ABCDataFrame):
return v

v = extract_array(v, extract_numpy=True)
res = maybe_convert_platform(v)
# We don't do maybe_infer_to_datetimelike here bc we will end up doing
# it column-by-column in ndarray_to_mgr
return res

# we could have a 1-dim or 2-dim list here
# this is equiv of np.asarray, but does object conversion
# and platform dtype preservation
# does not convert e.g. [1, "a", True] to ["1", "a", "True"] like
# np.asarray would
if is_list_like(values[0]):
values = np.array([convert(v) for v in values])
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
# GH#21861 see test_constructor_list_of_lists
values = np.array([convert(v) for v in values])
else:
values = convert(values)

# drop subclass info
values = np.array(values, copy=copy)
return _ensure_2d(values)


def _ensure_2d(values: np.ndarray) -> np.ndarray:
"""
Reshape 1D values, raise on anything else other than 2D.
"""
if values.ndim == 1:
values = values.reshape((values.shape[0], 1))
elif values.ndim != 2:
raise ValueError(f"Must pass 2-d input. shape={values.shape}")

return values


Expand Down