From 4e92195291e4bdbd6924d77fbfc3edafb9002d9a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Oct 2022 14:59:12 -0700 Subject: [PATCH 1/2] REF: simplify maybe_infer_to_datetimelike --- pandas/core/dtypes/cast.py | 48 +++++++++++++++----------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index cabd85aed1bbe..c06f0bba385fe 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1232,22 +1232,20 @@ def maybe_infer_to_datetimelike( if not len(v): return value - def try_datetime(v: np.ndarray) -> ArrayLike: + def try_datetime(v: np.ndarray) -> np.ndarray | DatetimeArray: # Coerce to datetime64, datetime64tz, or in corner cases # object[datetimes] from pandas.core.arrays.datetimes import sequence_to_datetimes try: - # GH#19671 we pass require_iso8601 to be relatively strict - # when parsing strings. - dta = sequence_to_datetimes(v, require_iso8601=True) - except (ValueError, TypeError): - # e.g. is not convertible to datetime - return v.reshape(shape) - else: + dta = sequence_to_datetimes(v) + except (ValueError, OutOfBoundsDatetime): + # ValueError for e.g. mixed tzs # GH#19761 we may have mixed timezones, in which cast 'dta' is # an ndarray[object]. Only 1 test # relies on this behavior, see GH#40111 + return v.reshape(shape) + else: return dta.reshape(shape) def try_timedelta(v: np.ndarray) -> np.ndarray: @@ -1259,12 +1257,14 @@ def try_timedelta(v: np.ndarray) -> np.ndarray: # `np.asarray(to_timedelta(v))`, but using a lower-level API that # does not require a circular import. td_values = array_to_timedelta64(v).view("m8[ns]") - except (ValueError, OverflowError): + except OutOfBoundsTimedelta: return v.reshape(shape) else: return td_values.reshape(shape) - # TODO: can we just do lib.maybe_convert_objects for this entire function? + # TODO: this is _almost_equivalent to lib.maybe_convert_objects, + # the main differences are described in GH#49340 and GH#49341 + # and maybe_convert_objects doesn't catch OutOfBoundsDatetime inferred_type = lib.infer_datetimelike_array(ensure_object(v)) if inferred_type in ["period", "interval"]: @@ -1276,31 +1276,21 @@ def try_timedelta(v: np.ndarray) -> np.ndarray: ) if inferred_type == "datetime": - # error: Incompatible types in assignment (expression has type "ExtensionArray", - # variable has type "Union[ndarray, List[Any]]") + # Incompatible types in assignment (expression has type + # "Union[ndarray[Any, Any], DatetimeArray]", variable has type + # "ndarray[Any, Any]") value = try_datetime(v) # type: ignore[assignment] elif inferred_type == "timedelta": value = try_timedelta(v) elif inferred_type == "nat": + # if all NaT, return as datetime # only reached if we have at least 1 NaT and the rest (NaT or None or np.nan) - # if all NaT, return as datetime - if isna(v).all(): - # error: Incompatible types in assignment (expression has type - # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") - value = try_datetime(v) # type: ignore[assignment] - else: - # We have at least a NaT and a string - # try timedelta first to avoid spurious datetime conversions - # e.g. '00:00:01' is a timedelta but technically is also a datetime - value = try_timedelta(v) - if lib.infer_dtype(value, skipna=False) in ["mixed"]: - # cannot skip missing values, as NaT implies that the string - # is actually a datetime - - # error: Incompatible types in assignment (expression has type - # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") - value = try_datetime(v) # type: ignore[assignment] + # Incompatible types in assignment (expression has type + # "Union[ndarray[Any, Any], DatetimeArray]", variable has type + # "ndarray[Any, Any]") + value = try_datetime(v) # type: ignore[assignment] + assert value.dtype == "M8[ns]" return value From 24e880ea784c680d6de657b77ac1806acd9b3186 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Oct 2022 15:01:28 -0700 Subject: [PATCH 2/2] simplify sequence_to_datetimes --- pandas/core/arrays/datetimes.py | 3 +-- pandas/core/dtypes/cast.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8395d54224f1d..1768bb7507dd9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1985,14 +1985,13 @@ def std( # Constructor Helpers -def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray: +def sequence_to_datetimes(data) -> DatetimeArray: """ Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. """ result, tz, freq = _sequence_to_dt64ns( data, allow_mixed=True, - require_iso8601=require_iso8601, ) unit = np.datetime_data(result.dtype)[0] diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c06f0bba385fe..65112fc19ae56 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1262,7 +1262,7 @@ def try_timedelta(v: np.ndarray) -> np.ndarray: else: return td_values.reshape(shape) - # TODO: this is _almost_equivalent to lib.maybe_convert_objects, + # TODO: this is _almost_ equivalent to lib.maybe_convert_objects, # the main differences are described in GH#49340 and GH#49341 # and maybe_convert_objects doesn't catch OutOfBoundsDatetime inferred_type = lib.infer_datetimelike_array(ensure_object(v))