From 7075100dc01554e26880ffd7189b069a2428593d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 May 2021 18:57:01 -0700 Subject: [PATCH 1/3] REF: collect dtype=None cases within _try_cast --- pandas/core/construction.py | 46 ++++++++++++++++++++----------------- pandas/core/dtypes/cast.py | 5 +++- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 51b9ed5fd22c7..e590458e792d0 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -658,19 +658,28 @@ def _try_cast( """ is_ndarray = isinstance(arr, np.ndarray) - # perf shortcut as this is the most common case - # Item "List[Any]" of "Union[List[Any], ndarray]" has no attribute "dtype" - if ( - is_ndarray - and arr.dtype != object # type: ignore[union-attr] - and not copy - and dtype is None - ): - # Argument 1 to "sanitize_to_nanoseconds" has incompatible type - # "Union[List[Any], ndarray]"; expected "ndarray" - return sanitize_to_nanoseconds(arr) # type: ignore[arg-type] + if dtype is None: + # perf shortcut as this is the most common case + if is_ndarray: + if arr.dtype != object: + return sanitize_to_nanoseconds(arr, copy=copy) + + out = maybe_cast_to_datetime(arr, None) + if out is arr and copy: + out = out.copy() + return out - if isinstance(dtype, ExtensionDtype): + else: + # i.e. list + varr = np.array(arr, copy=False) + # filter out cases that we _dont_ want to go through maybe_cast_to_datetime + if varr.dtype != object or varr.size == 0: + return varr + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]") + return maybe_cast_to_datetime(varr, None) # type: ignore[return-value] + + elif isinstance(dtype, ExtensionDtype): # create an extension array from its dtype # DatetimeTZ case needs to go through maybe_cast_to_datetime but # SparseDtype does not @@ -695,15 +704,6 @@ def _try_cast( return subarr return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) - elif dtype is None and not is_ndarray: - # filter out cases that we _dont_ want to go through maybe_cast_to_datetime - varr = np.array(arr, copy=False) - if varr.dtype != object or varr.size == 0: - return varr - # error: Incompatible return value type (got "Union[ExtensionArray, - # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]") - return maybe_cast_to_datetime(varr, None) # type: ignore[return-value] - try: # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. @@ -719,7 +719,11 @@ def _try_cast( return subarr if not isinstance(subarr, ABCExtensionArray): + # 4 tests fail if we move this to a try/except/else; see + # test_constructor_compound_dtypes, test_constructor_cast_failure + # test_constructor_dict_cast2, test_loc_setitem_dtype subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) + except OutOfBoundsDatetime: # in case of out of bound datetime64 -> always raise raise diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e3616bc857140..c23f8f423c3d8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1687,7 +1687,7 @@ def maybe_cast_to_datetime( return value -def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray: +def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray: """ Safely convert non-nanosecond datetime64 or timedelta64 values to nanosecond. """ @@ -1698,6 +1698,9 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray: elif dtype.kind == "m" and dtype != TD64NS_DTYPE: values = conversion.ensure_timedelta64ns(values) + elif copy: + values = values.copy() + return values From 4f75a2f3a154aeface2851197674fd9b89a6b6ef Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 May 2021 20:30:11 -0700 Subject: [PATCH 2/3] REF: dtype=None cases go through lower-level casting function --- pandas/core/construction.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e590458e792d0..468075f14a4e6 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -39,6 +39,7 @@ maybe_cast_to_datetime, maybe_cast_to_integer_array, maybe_convert_platform, + maybe_infer_to_datetimelike, maybe_upcast, sanitize_to_nanoseconds, ) @@ -546,11 +547,12 @@ def sanitize_array( if dtype is not None or len(data) == 0: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: + # TODO: copy? subarr = maybe_convert_platform(data) - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray, List[Any]]", variable has type - # "ExtensionArray") - subarr = maybe_cast_to_datetime(subarr, dtype) # type: ignore[assignment] + if subarr.dtype == object: + # Argument 1 to "maybe_infer_to_datetimelike" has incompatible + # type "Union[ExtensionArray, ndarray]"; expected "ndarray" + subarr = maybe_infer_to_datetimelike(subarr) # type: ignore[arg-type] subarr = _sanitize_ndim(subarr, data, dtype, index) @@ -664,7 +666,7 @@ def _try_cast( if arr.dtype != object: return sanitize_to_nanoseconds(arr, copy=copy) - out = maybe_cast_to_datetime(arr, None) + out = maybe_infer_to_datetimelike(arr) if out is arr and copy: out = out.copy() return out @@ -672,17 +674,14 @@ def _try_cast( else: # i.e. list varr = np.array(arr, copy=False) - # filter out cases that we _dont_ want to go through maybe_cast_to_datetime + # filter out cases that we _dont_ want to go through + # maybe_infer_to_datetimelike if varr.dtype != object or varr.size == 0: return varr - # error: Incompatible return value type (got "Union[ExtensionArray, - # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]") - return maybe_cast_to_datetime(varr, None) # type: ignore[return-value] + return maybe_infer_to_datetimelike(varr) elif isinstance(dtype, ExtensionDtype): # create an extension array from its dtype - # DatetimeTZ case needs to go through maybe_cast_to_datetime but - # SparseDtype does not if isinstance(dtype, DatetimeTZDtype): # We can't go through _from_sequence because it handles dt64naive # data differently; _from_sequence treats naive as wall times, @@ -710,7 +709,6 @@ def _try_cast( if is_integer_dtype(dtype): # this will raise if we have e.g. floats - dtype = cast(np.dtype, dtype) maybe_cast_to_integer_array(arr, dtype) subarr = arr else: From 35cf2bb61d766ec7faccade4b824c00608a610a7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 May 2021 18:44:54 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/construction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 468075f14a4e6..c0759e90da980 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -663,6 +663,7 @@ def _try_cast( if dtype is None: # perf shortcut as this is the most common case if is_ndarray: + arr = cast(np.ndarray, arr) if arr.dtype != object: return sanitize_to_nanoseconds(arr, copy=copy)