Skip to content

REF: _try_cast; go through fastpath more often #41597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@
construct_1d_object_array_from_listlike,
maybe_cast_to_datetime,
maybe_cast_to_integer_array,
maybe_castable,
maybe_convert_platform,
maybe_upcast,
sanitize_to_nanoseconds,
)
from pandas.core.dtypes.common import (
is_datetime64_ns_dtype,
Expand Down Expand Up @@ -656,33 +656,53 @@ def _try_cast(
-------
np.ndarray or ExtensionArray
"""
is_ndarray = isinstance(arr, np.ndarray)

# perf shortcut as this is the most common case
# Item "List[Any]" of "Union[List[Any], ndarray]" has no attribute "dtype"
if (
isinstance(arr, np.ndarray)
and maybe_castable(arr.dtype)
is_ndarray
and arr.dtype != object # type: ignore[union-attr]
and not copy
and dtype is None
):
return arr
# Argument 1 to "sanitize_to_nanoseconds" has incompatible type
# "Union[List[Any], ndarray]"; expected "ndarray"
return sanitize_to_nanoseconds(arr) # type: ignore[arg-type]

if isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype):
if isinstance(dtype, ExtensionDtype):
# create an extension array from its dtype
# DatetimeTZ case needs to go through maybe_cast_to_datetime but
# SparseDtype does not
if isinstance(dtype, DatetimeTZDtype):
# We can't go through _from_sequence because it handles dt64naive
# data differently; _from_sequence treats naive as wall times,
# while maybe_cast_to_datetime treats it as UTC
# see test_maybe_promote_any_numpy_dtype_with_datetimetz

# error: Incompatible return value type (got "Union[ExtensionArray,
# ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]")
return maybe_cast_to_datetime(arr, dtype) # type: ignore[return-value]
# TODO: copy?

array_type = dtype.construct_array_type()._from_sequence
subarr = array_type(arr, dtype=dtype, copy=copy)
return subarr

if is_object_dtype(dtype) and not isinstance(arr, np.ndarray):
subarr = construct_1d_object_array_from_listlike(arr)
return subarr
elif is_object_dtype(dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could do this check upfront and assign to a variable (as do this at the top)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure. might as well optimize to the gills

if not is_ndarray:
subarr = construct_1d_object_array_from_listlike(arr)
return subarr
return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

if dtype is None and isinstance(arr, list):
elif dtype is None and not is_ndarray:
# filter out cases that we _dont_ want to go through maybe_cast_to_datetime
varr = np.array(arr, copy=False)
if varr.dtype != object or varr.size == 0:
return varr
arr = varr
# error: Incompatible return value type (got "Union[ExtensionArray,
# ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]")
return maybe_cast_to_datetime(varr, None) # type: ignore[return-value]

try:
# GH#15832: Check if we are requesting a numeric dtype and
Expand Down
17 changes: 0 additions & 17 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
POSSIBLY_CAST_DTYPES,
TD64NS_DTYPE,
ensure_int8,
ensure_int16,
Expand All @@ -59,7 +58,6 @@
is_complex,
is_complex_dtype,
is_datetime64_dtype,
is_datetime64_ns_dtype,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
Expand All @@ -74,7 +72,6 @@
is_sparse,
is_string_dtype,
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
is_unsigned_integer_dtype,
pandas_dtype,
)
Expand Down Expand Up @@ -1480,20 +1477,6 @@ def convert_dtypes(
return inferred_dtype


def maybe_castable(dtype: np.dtype) -> bool:
# return False to force a non-fastpath

# check datetime64[ns]/timedelta64[ns] are valid
# otherwise try to coerce
kind = dtype.kind
if kind == "M":
return is_datetime64_ns_dtype(dtype)
elif kind == "m":
return is_timedelta64_ns_dtype(dtype)

return dtype.name not in POSSIBLY_CAST_DTYPES


def maybe_infer_to_datetimelike(
value: np.ndarray,
) -> np.ndarray | DatetimeArray | TimedeltaArray:
Expand Down
15 changes: 0 additions & 15 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,6 @@
is_sequence,
)

POSSIBLY_CAST_DTYPES = {
np.dtype(t).name
for t in [
"O",
"int8",
"uint8",
"int16",
"uint16",
"int32",
"uint32",
"int64",
"uint64",
]
}

DT64NS_DTYPE = conversion.DT64NS_DTYPE
TD64NS_DTYPE = conversion.TD64NS_DTYPE
INT64_DTYPE = np.dtype(np.int64)
Expand Down