From 0bdfc8c05954469a7f55c79964eb085fddfb13a8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Dec 2020 21:19:22 -0800 Subject: [PATCH 1/8] REF: do less inside try/excpet --- pandas/core/construction.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5f67db0244a4a..ee060cb344655 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -32,7 +32,6 @@ is_extension_array_dtype, is_float_dtype, is_integer_dtype, - is_iterator, is_list_like, is_object_dtype, is_sparse, @@ -600,6 +599,10 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo subarr = array_type(arr, dtype=dtype, copy=copy) return subarr + if is_object_dtype(dtype) and not isinstance(arr, np.ndarray): + subarr = construct_1d_object_array_from_listlike(arr) + return subarr + try: # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. @@ -609,15 +612,12 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo subarr = arr else: subarr = maybe_cast_to_datetime(arr, dtype) + if isinstance(subarr, (ABCExtensionArray, ABCIndex)) or ( + subarr is not arr and subarr.dtype == dtype + ): + return subarr - # Take care in creating object arrays (but iterators are not - # supported): - if is_object_dtype(dtype) and ( - is_list_like(subarr) - and not (is_iterator(subarr) or isinstance(subarr, np.ndarray)) - ): - subarr = construct_1d_object_array_from_listlike(subarr) - elif not is_extension_array_dtype(subarr): + if not isinstance(subarr, ABCExtensionArray): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) except OutOfBoundsDatetime: # in case of out of bound datetime64 -> always raise From 959dc1343d651bd807f8efebc8caa331319c307c Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 09:44:50 -0800 Subject: [PATCH 2/8] Simplify check --- pandas/core/construction.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ee060cb344655..2fce1a06063c5 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -612,9 +612,7 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo subarr = arr else: subarr = maybe_cast_to_datetime(arr, dtype) - if isinstance(subarr, (ABCExtensionArray, ABCIndex)) or ( - subarr is not arr and subarr.dtype == dtype - ): + if isinstance(subarr, (ABCExtensionArray, ABCIndex)): return subarr if not isinstance(subarr, ABCExtensionArray): From 809d71a9492f4f0b0fb65dfee8003d14dbc917eb Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 10:05:57 -0800 Subject: [PATCH 3/8] simplify --- pandas/core/construction.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 2fce1a06063c5..6fd81d3580f0c 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -612,10 +612,8 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo subarr = arr else: subarr = maybe_cast_to_datetime(arr, dtype) - if isinstance(subarr, (ABCExtensionArray, ABCIndex)): - return subarr - if not isinstance(subarr, ABCExtensionArray): + if not isinstance(subarr, (ABCExtensionArray, ABCIndex)): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) except OutOfBoundsDatetime: # in case of out of bound datetime64 -> always raise From 611bc3264fe62a212ad4aca5e5b3ba77dd11a515 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 10:30:24 -0800 Subject: [PATCH 4/8] simplify --- pandas/core/construction.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 6fd81d3580f0c..261b13e52777b 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -461,10 +461,7 @@ def sanitize_array( try: subarr = _try_cast(data, dtype, copy, True) except ValueError: - if copy: - subarr = data.copy() - else: - subarr = np.array(data, copy=False) + subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) From 09f202d61470359fd98d6a93967b6b4ec39cd92b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Dec 2020 10:57:47 -0800 Subject: [PATCH 5/8] CLN: simplify soft_convert_objects --- pandas/core/dtypes/cast.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1dbafb90ea00f..412f2a26c1acc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1092,27 +1092,22 @@ def soft_convert_objects( raise ValueError("At least one of datetime, numeric or timedelta must be True.") # Soft conversions - if datetime: + if datetime or timedelta: # GH 20380, when datetime is beyond year 2262, hence outside # bound of nanosecond-resolution 64-bit integers. try: - values = lib.maybe_convert_objects(values, convert_datetime=True) + values = lib.maybe_convert_objects( + values, convert_datetime=datetime, convert_timedelta=timedelta + ) except OutOfBoundsDatetime: - pass - - if timedelta and is_object_dtype(values.dtype): - # Object check to ensure only run if previous did not convert - values = lib.maybe_convert_objects(values, convert_timedelta=True) + return values if numeric and is_object_dtype(values.dtype): - try: - converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) - except (ValueError, TypeError): - pass - else: - # If all NaNs, then do not-alter - values = converted if not isna(converted).all() else values - values = values.copy() if copy else values + converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) + + # If all NaNs, then do not-alter + values = converted if not isna(converted).all() else values + values = values.copy() if copy else values return values From b0fa13e6444f4f5eb089d4c284dda3d93c5a47c5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 20:45:15 -0800 Subject: [PATCH 6/8] CLN: dtypes.cast --- pandas/core/dtypes/cast.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 412f2a26c1acc..d018f80e78d05 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -53,7 +53,6 @@ is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, - is_datetime_or_timedelta_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -609,13 +608,12 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = mst elif fill_value is None or fill_value is libmissing.NA: + # Note: we already excluded dt64/td64 dtypes above if is_float_dtype(dtype) or is_complex_dtype(dtype): fill_value = np.nan elif is_integer_dtype(dtype): dtype = np.float64 fill_value = np.nan - elif is_datetime_or_timedelta_dtype(dtype): - fill_value = dtype.type("NaT", "ns") else: dtype = np.dtype(np.object_) if fill_value is not libmissing.NA: @@ -951,7 +949,7 @@ def astype_td64_unit_conversion( def astype_nansafe( - arr, dtype: DtypeObj, copy: bool = True, skipna: bool = False + arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False ) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. @@ -979,6 +977,9 @@ def astype_nansafe( order = "F" if flags.f_contiguous else "C" return result.reshape(arr.shape, order=order) + # We get here with 0-dim from sparse + arr = np.atleast_1d(arr) + # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) @@ -995,9 +996,7 @@ def astype_nansafe( return arr.astype(dtype, copy=copy) if issubclass(dtype.type, str): - return lib.ensure_string_array( - arr.ravel(), skipna=skipna, convert_na_value=False - ).reshape(arr.shape) + return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False) elif is_datetime64_dtype(arr): if dtype == np.int64: @@ -1031,7 +1030,7 @@ def astype_nansafe( # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): - return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + return lib.astype_intsafe(arr, dtype) # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe From d0db4998a1e81cd83476783d93b66933b3e6f551 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 21 Dec 2020 18:02:27 -0800 Subject: [PATCH 7/8] REF: collected dtypes.cast simplification --- pandas/core/dtypes/cast.py | 20 ++++++++------------ pandas/tests/dtypes/cast/test_downcast.py | 4 ++-- pandas/tests/io/parser/test_c_parser_only.py | 14 +++++++++----- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d018f80e78d05..5a91a94f555d1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -218,14 +218,11 @@ def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): # a datetimelike # GH12821, iNaT is cast to float if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: - if hasattr(dtype, "tz"): - # not a numpy dtype - if dtype.tz: - # convert to datetime and change timezone - from pandas import to_datetime - - result = to_datetime(result).tz_localize("utc") - result = result.tz_convert(dtype.tz) + if isinstance(dtype, DatetimeTZDtype): + # convert to datetime and change timezone + i8values = result.astype("i8", copy=False) + cls = dtype.construct_array_type() + result = cls._simple_new(i8values, dtype=dtype) else: result = result.astype(dtype) @@ -1268,6 +1265,7 @@ def try_datetime(v): # safe coerce to datetime64 try: # GH19671 + # tznaive only v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0] except ValueError: @@ -1279,12 +1277,10 @@ def try_datetime(v): try: values, tz = conversion.datetime_to_datetime64(v) - return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz) except (ValueError, TypeError): pass - - except Exception: - pass + else: + return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz) return v.reshape(shape) diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py index d6e6ed3022b75..e9057e9635f37 100644 --- a/pandas/tests/dtypes/cast/test_downcast.py +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -91,9 +91,9 @@ def test_datetime_likes_nan(klass): def test_datetime_with_timezone(as_asi): # see gh-15426 ts = Timestamp("2016-01-01 12:00:00", tz="US/Pacific") - exp = DatetimeIndex([ts, ts]) + exp = DatetimeIndex([ts, ts])._data obj = exp.asi8 if as_asi else exp res = maybe_downcast_to_dtype(obj, exp.dtype) - tm.assert_index_equal(res, exp) + tm.assert_datetime_array_equal(res, exp) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 81c75c29f88cf..15e7569ea9014 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -49,11 +49,15 @@ def test_buffer_rd_bytes(c_parser_only): ) parser = c_parser_only - for _ in range(100): - try: - parser.read_csv(StringIO(data), compression="gzip", delim_whitespace=True) - except Exception: - pass + with tm.assert_produces_warning(RuntimeWarning): + # compression has no effect when passing a non-binary object as input + for _ in range(100): + try: + parser.read_csv( + StringIO(data), compression="gzip", delim_whitespace=True + ) + except Exception: + pass def test_delim_whitespace_custom_terminator(c_parser_only): From 55e83d7c79a5f259e0fc34cdcf5cca19e6c3b253 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 21 Dec 2020 20:17:18 -0800 Subject: [PATCH 8/8] pass on TypeError --- pandas/core/dtypes/cast.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5a91a94f555d1..d1c16de05ce55 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1281,6 +1281,9 @@ def try_datetime(v): pass else: return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz) + except TypeError: + # e.g. is not convertible to datetime + pass return v.reshape(shape)