Skip to content

Commit bbeef69

Browse files
jbrockmendelcodamuse
authored andcommitted
REF: tighter typing in constructor functions (pandas-dev#49591)
1 parent 6c0d3fb commit bbeef69

File tree

3 files changed

+58
-69
lines changed

3 files changed

+58
-69
lines changed

pandas/core/construction.py

+12-26
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,15 @@ def sanitize_array(
597597
# e.g. test_constructor_floating_data_int_dtype
598598
# TODO: where is the discussion that documents the reason for this?
599599
subarr = np.array(data, copy=copy)
600+
601+
elif dtype is None:
602+
subarr = data
603+
if data.dtype == object:
604+
subarr = maybe_infer_to_datetimelike(data)
605+
606+
if subarr is data and copy:
607+
subarr = subarr.copy()
608+
600609
else:
601610
# we will try to copy by-definition here
602611
subarr = _try_cast(data, dtype, copy)
@@ -754,7 +763,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
754763

755764
def _try_cast(
756765
arr: list | np.ndarray,
757-
dtype: np.dtype | None,
766+
dtype: np.dtype,
758767
copy: bool,
759768
) -> ArrayLike:
760769
"""
@@ -764,7 +773,7 @@ def _try_cast(
764773
----------
765774
arr : ndarray or list
766775
Excludes: ExtensionArray, Series, Index.
767-
dtype : np.dtype or None
776+
dtype : np.dtype
768777
copy : bool
769778
If False, don't copy the data if not needed.
770779
@@ -774,30 +783,7 @@ def _try_cast(
774783
"""
775784
is_ndarray = isinstance(arr, np.ndarray)
776785

777-
if dtype is None:
778-
# perf shortcut as this is the most common case
779-
if is_ndarray:
780-
arr = cast(np.ndarray, arr)
781-
if arr.dtype != object:
782-
if copy:
783-
return arr.copy()
784-
return arr
785-
786-
out = maybe_infer_to_datetimelike(arr)
787-
if out is arr and copy:
788-
out = out.copy()
789-
return out
790-
791-
else:
792-
# i.e. list
793-
varr = np.array(arr, copy=False)
794-
# filter out cases that we _dont_ want to go through
795-
# maybe_infer_to_datetimelike
796-
if varr.dtype != object or varr.size == 0:
797-
return varr
798-
return maybe_infer_to_datetimelike(varr)
799-
800-
elif is_object_dtype(dtype):
786+
if is_object_dtype(dtype):
801787
if not is_ndarray:
802788
subarr = construct_1d_object_array_from_listlike(arr)
803789
return subarr

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ def __init__(
738738

739739
# For data is list-like, or Iterable (will consume into list)
740740
elif is_list_like(data):
741-
if not isinstance(data, (abc.Sequence, ExtensionArray)):
741+
if not isinstance(data, abc.Sequence):
742742
if hasattr(data, "__array__"):
743743
# GH#44616 big perf improvement for e.g. pytorch tensor
744744
data = np.asarray(data)

pandas/core/internals/construction.py

+45-42
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@
5353
)
5454
from pandas.core.arrays import (
5555
Categorical,
56-
DatetimeArray,
5756
ExtensionArray,
58-
TimedeltaArray,
5957
)
6058
from pandas.core.construction import (
6159
ensure_wrapped_if_datetimelike,
@@ -277,14 +275,20 @@ def ndarray_to_mgr(
277275

278276
return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)
279277

280-
elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
281-
# i.e. Datetime64TZ, PeriodDtype
278+
elif is_extension_array_dtype(vdtype):
279+
# i.e. Datetime64TZ, PeriodDtype; cases with is_1d_only_ea_dtype(vdtype)
280+
# are already caught above
282281
values = extract_array(values, extract_numpy=True)
283282
if copy:
284283
values = values.copy()
285284
if values.ndim == 1:
286285
values = values.reshape(-1, 1)
287286

287+
elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)):
288+
# drop subclass info
289+
values = np.array(values, copy=copy_on_sanitize)
290+
values = _ensure_2d(values)
291+
288292
else:
289293
# by definition an array here
290294
# the dtypes will be coerced to a single dtype
@@ -496,51 +500,50 @@ def treat_as_nested(data) -> bool:
496500
# ---------------------------------------------------------------------
497501

498502

499-
def _prep_ndarraylike(
500-
values, copy: bool = True
501-
) -> np.ndarray | DatetimeArray | TimedeltaArray:
502-
if isinstance(values, TimedeltaArray) or (
503-
isinstance(values, DatetimeArray) and values.tz is None
504-
):
505-
# By retaining DTA/TDA instead of unpacking, we end up retaining non-nano
506-
pass
507-
508-
elif not isinstance(values, (np.ndarray, ABCSeries, Index)):
509-
if len(values) == 0:
510-
return np.empty((0, 0), dtype=object)
511-
elif isinstance(values, range):
512-
arr = range_to_ndarray(values)
513-
return arr[..., np.newaxis]
514-
515-
def convert(v):
516-
if not is_list_like(v) or isinstance(v, ABCDataFrame):
517-
return v
518-
519-
v = extract_array(v, extract_numpy=True)
520-
res = maybe_convert_platform(v)
521-
return res
522-
523-
# we could have a 1-dim or 2-dim list here
524-
# this is equiv of np.asarray, but does object conversion
525-
# and platform dtype preservation
526-
if is_list_like(values[0]):
527-
values = np.array([convert(v) for v in values])
528-
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
529-
# GH#21861 see test_constructor_list_of_lists
530-
values = np.array([convert(v) for v in values])
531-
else:
532-
values = convert(values)
533-
503+
def _prep_ndarraylike(values, copy: bool = True) -> np.ndarray:
504+
# values is specifically _not_ ndarray, EA, Index, or Series
505+
# We only get here with `not treat_as_nested(values)`
506+
507+
if len(values) == 0:
508+
return np.empty((0, 0), dtype=object)
509+
elif isinstance(values, range):
510+
arr = range_to_ndarray(values)
511+
return arr[..., np.newaxis]
512+
513+
def convert(v):
514+
if not is_list_like(v) or isinstance(v, ABCDataFrame):
515+
return v
516+
517+
v = extract_array(v, extract_numpy=True)
518+
res = maybe_convert_platform(v)
519+
# We don't do maybe_infer_to_datetimelike here bc we will end up doing
520+
# it column-by-column in ndarray_to_mgr
521+
return res
522+
523+
# we could have a 1-dim or 2-dim list here
524+
# this is equiv of np.asarray, but does object conversion
525+
# and platform dtype preservation
526+
# does not convert e.g. [1, "a", True] to ["1", "a", "True"] like
527+
# np.asarray would
528+
if is_list_like(values[0]):
529+
values = np.array([convert(v) for v in values])
530+
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
531+
# GH#21861 see test_constructor_list_of_lists
532+
values = np.array([convert(v) for v in values])
534533
else:
534+
values = convert(values)
535535

536-
# drop subclass info
537-
values = np.array(values, copy=copy)
536+
return _ensure_2d(values)
538537

538+
539+
def _ensure_2d(values: np.ndarray) -> np.ndarray:
540+
"""
541+
Reshape 1D values, raise on anything else other than 2D.
542+
"""
539543
if values.ndim == 1:
540544
values = values.reshape((values.shape[0], 1))
541545
elif values.ndim != 2:
542546
raise ValueError(f"Must pass 2-d input. shape={values.shape}")
543-
544547
return values
545548

546549

0 commit comments

Comments
 (0)