From d234627f215571b85aa1efb4e4dfc3551d41641e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 16:18:13 -0500 Subject: [PATCH 01/13] BUG: Respect errors="ignore" during extension astype --- doc/source/whatsnew/v1.1.2.rst | 1 + pandas/core/internals/blocks.py | 9 +++++++-- pandas/tests/frame/methods/test_astype.py | 16 ++++++++++++++++ pandas/tests/series/methods/test_astype.py | 19 ++++++++++++++++++- 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 9747a8ef3e71f..1d41d5cf5661f 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -29,6 +29,7 @@ Bug fixes - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`) - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a38b47a4c2a25..2edde54ad3380 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -580,8 +580,13 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # force the copy here if self.is_extension: - # TODO: Should we try/except this astype? - values = self.values.astype(dtype) + try: + values = self.values.astype(dtype) + except (ValueError, TypeError): + if errors == "ignore": + values = self.values + else: + raise else: if issubclass(dtype.type, str): diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index b0fd0496ea81e..8ae6544970b09 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -8,6 +8,7 @@ CategoricalDtype, DataFrame, DatetimeTZDtype, + Interval, IntervalDtype, NaT, Series, @@ -565,3 +566,18 @@ def test_astype_empty_dtype_dict(self): result = df.astype(dict()) tm.assert_frame_equal(result, df) assert result is not df + + @pytest.mark.parametrize( + "values", + [ + Series(["x", "y", "z"], dtype="string"), + Series(["x", "y", "z"], dtype="category"), + Series(3 * [Timestamp("2020-01-01")]), + Series(3 * [Interval(0, 1)]), + ], + ) + def test_astype_ignores_errors_for_extension_dtypes(self, values): + # https://github.com/pandas-dev/pandas/issues/35471 + expected = DataFrame(values) + result = expected.astype(float, errors="ignore") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9fdc4179de2e1..97d2a27ef4e8e 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,4 +1,6 @@ -from pandas import Series, date_range +import pytest + +from pandas import Interval, Series, Timestamp, date_range import pandas._testing as tm @@ -23,3 +25,18 @@ def test_astype_dt64tz_to_str(self): dtype=object, ) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + Series(["x", "y", "z"], dtype="string"), + Series(["x", "y", "z"], dtype="category"), + Series(3 * [Timestamp("2020-01-01")]), + Series(3 * [Interval(0, 1)]), + ], + ) + def test_astype_ignores_errors_for_extension_dtypes(self, values): + # https://github.com/pandas-dev/pandas/issues/35471 + expected = values + result = expected.astype(float, errors="ignore") + tm.assert_series_equal(result, expected) From 5bc2ba58447027d2a24bdfdb859d90295601da40 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 20:23:09 -0500 Subject: [PATCH 02/13] A whole bunch of stuff --- pandas/core/arrays/base.py | 10 ++++++--- pandas/core/arrays/boolean.py | 25 ++++++++++++++++++---- pandas/core/arrays/categorical.py | 12 +++++++++-- pandas/core/arrays/datetimelike.py | 13 +++++++---- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/arrays/integer.py | 15 +++++++++++-- pandas/core/arrays/interval.py | 22 +++++++++++++------ pandas/core/arrays/period.py | 4 ++-- pandas/core/arrays/sparse/array.py | 14 ++++++++++-- pandas/core/arrays/string_.py | 4 ++-- pandas/core/dtypes/cast.py | 12 +++++++++++ pandas/core/internals/blocks.py | 8 +------ pandas/tests/extension/arrow/arrays.py | 2 +- pandas/tests/extension/decimal/array.py | 4 ++-- pandas/tests/extension/json/array.py | 2 +- pandas/tests/frame/methods/test_astype.py | 3 +++ pandas/tests/series/methods/test_astype.py | 4 +++- 17 files changed, 116 insertions(+), 42 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d85647edc3b81..20e016d8e15bd 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -19,7 +19,7 @@ from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_fillna_kwargs -from pandas.core.dtypes.cast import maybe_cast_to_extension_array +from pandas.core.dtypes.cast import maybe_astype, maybe_cast_to_extension_array from pandas.core.dtypes.common import ( is_array_like, is_dtype_equal, @@ -438,7 +438,7 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): """ Cast to a NumPy array with 'dtype'. @@ -450,6 +450,9 @@ def astype(self, dtype, copy=True): Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -462,7 +465,8 @@ def astype(self, dtype, copy=True): if isinstance(dtype, StringDtype): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) - return np.array(self, dtype=dtype, copy=copy) + values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors) + return values def isna(self) -> ArrayLike: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index bd4bdc5ecb46f..b272076c8cae5 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -9,6 +9,7 @@ from pandas.compat import set_function_name from pandas.compat.numpy import function as nv +from pandas.core.dtypes.cast import maybe_astype from pandas.core.dtypes.common import ( is_bool_dtype, is_extension_array_dtype, @@ -345,7 +346,7 @@ def reconstruct(x): def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. @@ -357,6 +358,9 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -388,9 +392,14 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if is_extension_array_dtype(dtype) and is_integer_dtype(dtype): from pandas.core.arrays import IntegerArray - return IntegerArray( - self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False + result = maybe_astype( + values=self._data, dtype=dtype.numpy_dtype, copy=copy, errors=errors ) + + if result is self._data: + return self + else: + return IntegerArray(result, self._mask.copy(), copy=False) # for integer, error if there are missing values if is_integer_dtype(dtype): if self._hasna: @@ -401,7 +410,15 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if is_float_dtype(dtype): na_value = np.nan # coerce - return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + try: + result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + except (ValueError, TypeError): + if errors == "ignore": + result = self + else: + raise + + return result def _values_for_argsort(self) -> np.ndarray: """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 27b1afdb438cb..0df0836a91dc4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -17,6 +17,7 @@ from pandas.core.dtypes.cast import ( coerce_indexer_dtype, + maybe_astype, maybe_cast_to_extension_array, maybe_infer_to_datetimelike, ) @@ -450,7 +451,9 @@ def _formatter(self, boxed=False): # Defer to CategoricalFormatter's formatter. return None - def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + def astype( + self, dtype: Dtype, copy: bool = True, errors: str = "raise" + ) -> ArrayLike: """ Coerce this type to another dtype @@ -461,6 +464,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: By default, astype always returns a newly allocated object. If copy is set to False and dtype is categorical, the original object is returned. + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object """ if is_categorical_dtype(dtype): dtype = cast(Union[str, CategoricalDtype], dtype) @@ -475,7 +481,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: return array(self, dtype=dtype, copy=copy) if is_integer_dtype(dtype) and self.isna().any(): raise ValueError("Cannot convert float NaN to integer") - return np.array(self, dtype=dtype, copy=copy) + + values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors) + return values @cache_readonly def itemsize(self) -> int: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1b5e1d81f00d6..49fa06f3cd73c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -30,6 +30,7 @@ from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_fillna_kwargs +from pandas.core.dtypes.cast import maybe_astype from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64_any_dtype, @@ -622,7 +623,7 @@ def _maybe_clear_freq(self): # DatetimeArray and TimedeltaArray pass - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. @@ -655,13 +656,17 @@ def astype(self, dtype, copy=True): ) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float - msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" - raise TypeError(msg) + if errors == "ignore": + return self + else: + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) elif is_categorical_dtype(dtype): arr_cls = dtype.construct_array_type() return arr_cls(self, dtype=dtype) else: - return np.asarray(self, dtype=dtype) + result = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors) + return result def view(self, dtype=None): if dtype is None or dtype is self.dtype: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8b2bb7832b5d0..e2eef0d96d7ee 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -569,7 +569,7 @@ def __iter__(self): for v in converted: yield v - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): # We handle # --> datetime # --> period @@ -596,7 +596,7 @@ def astype(self, dtype, copy=True): return self elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) - return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy, errors=errors) # ----------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d83ff91a1315f..40562258893fa 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -425,7 +425,7 @@ def reconstruct(x): def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. @@ -437,6 +437,9 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -477,7 +480,15 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: else: na_value = lib.no_default - return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + try: + result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + except (ValueError, TypeError): + if errors == "ignore": + result = self + else: + raise + + return result def _values_for_argsort(self) -> np.ndarray: """ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d76e0fd628a48..8610c4b62a951 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -661,7 +661,7 @@ def fillna(self, value=None, method=None, limit=None): def dtype(self): return IntervalDtype(self.left.dtype) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): """ Cast to an ExtensionArray or NumPy array with dtype 'dtype'. @@ -674,6 +674,9 @@ def astype(self, dtype, copy=True): Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -694,10 +697,12 @@ def astype(self, dtype, copy=True): new_left = self.left.astype(dtype.subtype) new_right = self.right.astype(dtype.subtype) except TypeError as err: - msg = ( - f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" - ) - raise TypeError(msg) from err + if errors == "ignore": + new_left = self.left + new_right = self.right + else: + msg = f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + raise TypeError(msg) from err return self._shallow_copy(new_left, new_right) elif is_categorical_dtype(dtype): return Categorical(np.asarray(self)) @@ -708,8 +713,11 @@ def astype(self, dtype, copy=True): try: return np.asarray(self).astype(dtype, copy=copy) except (TypeError, ValueError) as err: - msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" - raise TypeError(msg) from err + if errors == "ignore": + return self + else: + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) from err @classmethod def _concat_same_type(cls, to_concat): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index cc39ffb5d1203..86138476eba94 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -573,14 +573,14 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): # ------------------------------------------------------------------ - def astype(self, dtype, copy: bool = True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): # We handle Period[T] -> Period[U] # Our parent handles everything else. dtype = pandas_dtype(dtype) if is_period_dtype(dtype): return self.asfreq(dtype.freq) - return super().astype(dtype, copy=copy) + return super().astype(dtype, copy=copy, errors=errors) # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 1531f7b292365..df871e06cb1e4 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1006,7 +1006,7 @@ def _concat_same_type(cls, to_concat): return cls(data, sparse_index=sp_index, fill_value=fill_value) - def astype(self, dtype=None, copy=True): + def astype(self, dtype=None, copy: bool = True, errors: str = "raise"): """ Change the dtype of a SparseArray. @@ -1025,6 +1025,10 @@ def astype(self, dtype=None, copy=True): copy : bool, default True Whether to ensure a copy is made, even if not necessary. + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + Returns ------- SparseArray @@ -1063,7 +1067,13 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ - dtype = self.dtype.update_dtype(dtype) + try: + dtype = self.dtype.update_dtype(dtype) + except ValueError: + if errors == "ignore": + return self + else: + raise subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot # passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456 diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 381968f9724b6..3b90664f63021 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -262,7 +262,7 @@ def fillna(self, value=None, method=None, limit=None): # TODO: validate dtype return super().fillna(value, method, limit) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): dtype = pandas_dtype(dtype) if isinstance(dtype, StringDtype): if copy: @@ -275,7 +275,7 @@ def astype(self, dtype, copy=True): values = arr.astype(dtype.numpy_dtype) return IntegerArray(values, mask, copy=False) - return super().astype(dtype, copy) + return super().astype(dtype, copy, errors=errors) def _reduce(self, name: str, skipna: bool = True, **kwargs): if name in ["min", "max"]: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e6b4cb598989b..8fa9be146703e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -595,6 +595,18 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value +def maybe_astype(values, dtype, copy, errors="raise"): + try: + result = np.array(values, dtype=dtype, copy=copy) + except (ValueError, TypeError): + if errors == "ignore": + result = values + else: + raise + + return result + + def _ensure_dtype_type(value, dtype): """ Ensure that the given value is an instance of the given dtype. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2edde54ad3380..f393c5b85b97e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -580,13 +580,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # force the copy here if self.is_extension: - try: - values = self.values.astype(dtype) - except (ValueError, TypeError): - if errors == "ignore": - values = self.values - else: - raise + values = self.values.astype(dtype, errors=errors) else: if issubclass(dtype.type, str): diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 8a18f505058bc..078c7fb4fb2bd 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -95,7 +95,7 @@ def __getitem__(self, item): def __len__(self): return len(self._data) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy=True, errors="raise"): # needed to fix this astype for the Series constructor. if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: if copy: diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2fbeec8dd8378..f9a1bf1a240a9 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -130,12 +130,12 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self._data.copy()) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): dtype = pandas_dtype(dtype) if isinstance(dtype, type(self.dtype)): return type(self)(self._data, context=dtype.context) - return super().astype(dtype, copy=copy) + return super().astype(dtype, copy=copy, errors=errors) def __setitem__(self, key, value): if pd.api.types.is_list_like(value): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 447a6108fc3c7..d747c4f07cdcf 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -158,7 +158,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self.data[:]) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True, errors: str = "raise"): # NumPy has issues when all the dicts are the same length. # np.array([UserDict(...), UserDict(...)]) fails, # but np.array([{...}, {...}]) works, so cast. diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 8ae6544970b09..458ab8380c246 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -11,6 +11,7 @@ Interval, IntervalDtype, NaT, + Period, Series, Timedelta, Timestamp, @@ -573,6 +574,8 @@ def test_astype_empty_dtype_dict(self): Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), Series(3 * [Timestamp("2020-01-01")]), + Series(3 * [Timedelta(0)]), + Series(3 * [Period("2020")]), Series(3 * [Interval(0, 1)]), ], ) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 97d2a27ef4e8e..2fd1aac2b07e0 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,6 +1,6 @@ import pytest -from pandas import Interval, Series, Timestamp, date_range +from pandas import Interval, Period, Series, Timedelta, Timestamp, date_range import pandas._testing as tm @@ -32,6 +32,8 @@ def test_astype_dt64tz_to_str(self): Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), Series(3 * [Timestamp("2020-01-01")]), + Series(3 * [Timedelta(0)]), + Series(3 * [Period("2020")]), Series(3 * [Interval(0, 1)]), ], ) From c84a9c6e2df0c8e9af9859f9898d2a0c50eb7044 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 20:32:46 -0500 Subject: [PATCH 03/13] Lint --- pandas/core/arrays/interval.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 8610c4b62a951..d718d923e18f4 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -701,7 +701,10 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): new_left = self.left new_right = self.right else: - msg = f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + msg = ( + f"Cannot convert {self.dtype} to {dtype}; " + "subtypes are incompatible" + ) raise TypeError(msg) from err return self._shallow_copy(new_left, new_right) elif is_categorical_dtype(dtype): From 85bd7f243298bbd3b0e2ecba4a6251904b815357 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 21:02:11 -0500 Subject: [PATCH 04/13] Don't type? --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 20e016d8e15bd..c87f207843737 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -438,7 +438,7 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy, errors): """ Cast to a NumPy array with 'dtype'. From 73d0ee051d08d4f9ef0cc94cadf32ddff6119c40 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 21:06:51 -0500 Subject: [PATCH 05/13] Nit --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8fa9be146703e..dc848610d2920 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -595,7 +595,7 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def maybe_astype(values, dtype, copy, errors="raise"): +def maybe_astype(values, dtype, copy, errors): try: result = np.array(values, dtype=dtype, copy=copy) except (ValueError, TypeError): From dbc3a760231afbe9258f1c24deec398f6bbf360f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 21:27:35 -0500 Subject: [PATCH 06/13] Revert "Nit" This reverts commit 73d0ee051d08d4f9ef0cc94cadf32ddff6119c40. --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index dc848610d2920..8fa9be146703e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -595,7 +595,7 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def maybe_astype(values, dtype, copy, errors): +def maybe_astype(values, dtype, copy, errors="raise"): try: result = np.array(values, dtype=dtype, copy=copy) except (ValueError, TypeError): From fc6538fb67d8dac77151958d05dca548a66668ea Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 29 Aug 2020 21:30:11 -0500 Subject: [PATCH 07/13] fixup --- pandas/core/arrays/base.py | 2 +- pandas/core/dtypes/cast.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c87f207843737..5e53a1872be4b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -438,7 +438,7 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - def astype(self, dtype, copy, errors): + def astype(self, dtype, copy, errors="raise"): """ Cast to a NumPy array with 'dtype'. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8fa9be146703e..dc848610d2920 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -595,7 +595,7 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def maybe_astype(values, dtype, copy, errors="raise"): +def maybe_astype(values, dtype, copy, errors): try: result = np.array(values, dtype=dtype, copy=copy) except (ValueError, TypeError): From 7efb4ba358ec9cad1bb237b508ddf21dd4adf609 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 3 Sep 2020 00:53:14 -0400 Subject: [PATCH 08/13] Revert --- pandas/core/arrays/base.py | 10 +++------ pandas/core/arrays/boolean.py | 25 ++++------------------ pandas/core/arrays/categorical.py | 12 ++--------- pandas/core/arrays/datetimelike.py | 13 ++++------- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/arrays/integer.py | 15 ++----------- pandas/core/arrays/interval.py | 25 ++++++---------------- pandas/core/arrays/period.py | 4 ++-- pandas/core/arrays/sparse/array.py | 14 ++---------- pandas/core/arrays/string_.py | 4 ++-- pandas/core/dtypes/cast.py | 12 ----------- pandas/core/internals/blocks.py | 8 ++++++- pandas/tests/extension/arrow/arrays.py | 2 +- pandas/tests/extension/decimal/array.py | 4 ++-- pandas/tests/extension/json/array.py | 2 +- pandas/tests/frame/methods/test_astype.py | 3 --- pandas/tests/series/methods/test_astype.py | 4 +--- 17 files changed, 42 insertions(+), 119 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 5e53a1872be4b..d85647edc3b81 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -19,7 +19,7 @@ from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_fillna_kwargs -from pandas.core.dtypes.cast import maybe_astype, maybe_cast_to_extension_array +from pandas.core.dtypes.cast import maybe_cast_to_extension_array from pandas.core.dtypes.common import ( is_array_like, is_dtype_equal, @@ -438,7 +438,7 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - def astype(self, dtype, copy, errors="raise"): + def astype(self, dtype, copy=True): """ Cast to a NumPy array with 'dtype'. @@ -450,9 +450,6 @@ def astype(self, dtype, copy, errors="raise"): Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. - errors : str, {'raise', 'ignore'}, default 'ignore' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -465,8 +462,7 @@ def astype(self, dtype, copy, errors="raise"): if isinstance(dtype, StringDtype): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) - values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors) - return values + return np.array(self, dtype=dtype, copy=copy) def isna(self) -> ArrayLike: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index b272076c8cae5..bd4bdc5ecb46f 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -9,7 +9,6 @@ from pandas.compat import set_function_name from pandas.compat.numpy import function as nv -from pandas.core.dtypes.cast import maybe_astype from pandas.core.dtypes.common import ( is_bool_dtype, is_extension_array_dtype, @@ -346,7 +345,7 @@ def reconstruct(x): def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value) - def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: + def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. @@ -358,9 +357,6 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. - errors : str, {'raise', 'ignore'}, default 'ignore' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -392,14 +388,9 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: if is_extension_array_dtype(dtype) and is_integer_dtype(dtype): from pandas.core.arrays import IntegerArray - result = maybe_astype( - values=self._data, dtype=dtype.numpy_dtype, copy=copy, errors=errors + return IntegerArray( + self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False ) - - if result is self._data: - return self - else: - return IntegerArray(result, self._mask.copy(), copy=False) # for integer, error if there are missing values if is_integer_dtype(dtype): if self._hasna: @@ -410,15 +401,7 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: if is_float_dtype(dtype): na_value = np.nan # coerce - try: - result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False) - except (ValueError, TypeError): - if errors == "ignore": - result = self - else: - raise - - return result + return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) def _values_for_argsort(self) -> np.ndarray: """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0df0836a91dc4..27b1afdb438cb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -17,7 +17,6 @@ from pandas.core.dtypes.cast import ( coerce_indexer_dtype, - maybe_astype, maybe_cast_to_extension_array, maybe_infer_to_datetimelike, ) @@ -451,9 +450,7 @@ def _formatter(self, boxed=False): # Defer to CategoricalFormatter's formatter. return None - def astype( - self, dtype: Dtype, copy: bool = True, errors: str = "raise" - ) -> ArrayLike: + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Coerce this type to another dtype @@ -464,9 +461,6 @@ def astype( By default, astype always returns a newly allocated object. If copy is set to False and dtype is categorical, the original object is returned. - errors : str, {'raise', 'ignore'}, default 'ignore' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object """ if is_categorical_dtype(dtype): dtype = cast(Union[str, CategoricalDtype], dtype) @@ -481,9 +475,7 @@ def astype( return array(self, dtype=dtype, copy=copy) if is_integer_dtype(dtype) and self.isna().any(): raise ValueError("Cannot convert float NaN to integer") - - values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors) - return values + return np.array(self, dtype=dtype, copy=copy) @cache_readonly def itemsize(self) -> int: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 49fa06f3cd73c..1b5e1d81f00d6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -30,7 +30,6 @@ from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_fillna_kwargs -from pandas.core.dtypes.cast import maybe_astype from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64_any_dtype, @@ -623,7 +622,7 @@ def _maybe_clear_freq(self): # DatetimeArray and TimedeltaArray pass - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy=True): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. @@ -656,17 +655,13 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): ) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float - if errors == "ignore": - return self - else: - msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" - raise TypeError(msg) + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) elif is_categorical_dtype(dtype): arr_cls = dtype.construct_array_type() return arr_cls(self, dtype=dtype) else: - result = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors) - return result + return np.asarray(self, dtype=dtype) def view(self, dtype=None): if dtype is None or dtype is self.dtype: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e2eef0d96d7ee..8b2bb7832b5d0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -569,7 +569,7 @@ def __iter__(self): for v in converted: yield v - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy=True): # We handle # --> datetime # --> period @@ -596,7 +596,7 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): return self elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) - return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy, errors=errors) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) # ----------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 40562258893fa..d83ff91a1315f 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -425,7 +425,7 @@ def reconstruct(x): def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: + def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. @@ -437,9 +437,6 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. - errors : str, {'raise', 'ignore'}, default 'ignore' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -480,15 +477,7 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike: else: na_value = lib.no_default - try: - result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False) - except (ValueError, TypeError): - if errors == "ignore": - result = self - else: - raise - - return result + return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) def _values_for_argsort(self) -> np.ndarray: """ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d718d923e18f4..d76e0fd628a48 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -661,7 +661,7 @@ def fillna(self, value=None, method=None, limit=None): def dtype(self): return IntervalDtype(self.left.dtype) - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy=True): """ Cast to an ExtensionArray or NumPy array with dtype 'dtype'. @@ -674,9 +674,6 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. - errors : str, {'raise', 'ignore'}, default 'ignore' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object Returns ------- @@ -697,15 +694,10 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): new_left = self.left.astype(dtype.subtype) new_right = self.right.astype(dtype.subtype) except TypeError as err: - if errors == "ignore": - new_left = self.left - new_right = self.right - else: - msg = ( - f"Cannot convert {self.dtype} to {dtype}; " - "subtypes are incompatible" - ) - raise TypeError(msg) from err + msg = ( + f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + ) + raise TypeError(msg) from err return self._shallow_copy(new_left, new_right) elif is_categorical_dtype(dtype): return Categorical(np.asarray(self)) @@ -716,11 +708,8 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): try: return np.asarray(self).astype(dtype, copy=copy) except (TypeError, ValueError) as err: - if errors == "ignore": - return self - else: - msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" - raise TypeError(msg) from err + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) from err @classmethod def _concat_same_type(cls, to_concat): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 86138476eba94..cc39ffb5d1203 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -573,14 +573,14 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): # ------------------------------------------------------------------ - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy: bool = True): # We handle Period[T] -> Period[U] # Our parent handles everything else. dtype = pandas_dtype(dtype) if is_period_dtype(dtype): return self.asfreq(dtype.freq) - return super().astype(dtype, copy=copy, errors=errors) + return super().astype(dtype, copy=copy) # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index df871e06cb1e4..1531f7b292365 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1006,7 +1006,7 @@ def _concat_same_type(cls, to_concat): return cls(data, sparse_index=sp_index, fill_value=fill_value) - def astype(self, dtype=None, copy: bool = True, errors: str = "raise"): + def astype(self, dtype=None, copy=True): """ Change the dtype of a SparseArray. @@ -1025,10 +1025,6 @@ def astype(self, dtype=None, copy: bool = True, errors: str = "raise"): copy : bool, default True Whether to ensure a copy is made, even if not necessary. - errors : str, {'raise', 'ignore'}, default 'ignore' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object - Returns ------- SparseArray @@ -1067,13 +1063,7 @@ def astype(self, dtype=None, copy: bool = True, errors: str = "raise"): IntIndex Indices: array([2, 3], dtype=int32) """ - try: - dtype = self.dtype.update_dtype(dtype) - except ValueError: - if errors == "ignore": - return self - else: - raise + dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot # passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456 diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 3b90664f63021..381968f9724b6 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -262,7 +262,7 @@ def fillna(self, value=None, method=None, limit=None): # TODO: validate dtype return super().fillna(value, method, limit) - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if isinstance(dtype, StringDtype): if copy: @@ -275,7 +275,7 @@ def astype(self, dtype, copy: bool = True, errors: str = "raise"): values = arr.astype(dtype.numpy_dtype) return IntegerArray(values, mask, copy=False) - return super().astype(dtype, copy, errors=errors) + return super().astype(dtype, copy) def _reduce(self, name: str, skipna: bool = True, **kwargs): if name in ["min", "max"]: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index dc848610d2920..e6b4cb598989b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -595,18 +595,6 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def maybe_astype(values, dtype, copy, errors): - try: - result = np.array(values, dtype=dtype, copy=copy) - except (ValueError, TypeError): - if errors == "ignore": - result = values - else: - raise - - return result - - def _ensure_dtype_type(value, dtype): """ Ensure that the given value is an instance of the given dtype. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f393c5b85b97e..2edde54ad3380 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -580,7 +580,13 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # force the copy here if self.is_extension: - values = self.values.astype(dtype, errors=errors) + try: + values = self.values.astype(dtype) + except (ValueError, TypeError): + if errors == "ignore": + values = self.values + else: + raise else: if issubclass(dtype.type, str): diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 078c7fb4fb2bd..8a18f505058bc 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -95,7 +95,7 @@ def __getitem__(self, item): def __len__(self): return len(self._data) - def astype(self, dtype, copy=True, errors="raise"): + def astype(self, dtype, copy=True): # needed to fix this astype for the Series constructor. if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: if copy: diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index f9a1bf1a240a9..2fbeec8dd8378 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -130,12 +130,12 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self._data.copy()) - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if isinstance(dtype, type(self.dtype)): return type(self)(self._data, context=dtype.context) - return super().astype(dtype, copy=copy, errors=errors) + return super().astype(dtype, copy=copy) def __setitem__(self, key, value): if pd.api.types.is_list_like(value): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index d747c4f07cdcf..447a6108fc3c7 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -158,7 +158,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self.data[:]) - def astype(self, dtype, copy: bool = True, errors: str = "raise"): + def astype(self, dtype, copy=True): # NumPy has issues when all the dicts are the same length. # np.array([UserDict(...), UserDict(...)]) fails, # but np.array([{...}, {...}]) works, so cast. diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 458ab8380c246..8ae6544970b09 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -11,7 +11,6 @@ Interval, IntervalDtype, NaT, - Period, Series, Timedelta, Timestamp, @@ -574,8 +573,6 @@ def test_astype_empty_dtype_dict(self): Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), Series(3 * [Timestamp("2020-01-01")]), - Series(3 * [Timedelta(0)]), - Series(3 * [Period("2020")]), Series(3 * [Interval(0, 1)]), ], ) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 2fd1aac2b07e0..97d2a27ef4e8e 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,6 +1,6 @@ import pytest -from pandas import Interval, Period, Series, Timedelta, Timestamp, date_range +from pandas import Interval, Series, Timestamp, date_range import pandas._testing as tm @@ -32,8 +32,6 @@ def test_astype_dt64tz_to_str(self): Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), Series(3 * [Timestamp("2020-01-01")]), - Series(3 * [Timedelta(0)]), - Series(3 * [Period("2020")]), Series(3 * [Interval(0, 1)]), ], ) From c16204b5320586675bd155a16db6a3927508133f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 5 Sep 2020 18:39:57 -0400 Subject: [PATCH 09/13] Update test --- pandas/tests/frame/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 8ae6544970b09..7f208266edf62 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -572,7 +572,7 @@ def test_astype_empty_dtype_dict(self): [ Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), - Series(3 * [Timestamp("2020-01-01")]), + Series(3 * [Timestamp("2020-01-01", tz="UTC")]), Series(3 * [Interval(0, 1)]), ], ) From de8091bd7815321e92cbc16f82098fef698a96d3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 5 Sep 2020 18:57:57 -0400 Subject: [PATCH 10/13] Update test --- pandas/tests/frame/methods/test_astype.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 7f208266edf62..5153786adb12f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -576,8 +576,14 @@ def test_astype_empty_dtype_dict(self): Series(3 * [Interval(0, 1)]), ], ) - def test_astype_ignores_errors_for_extension_dtypes(self, values): + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): # https://github.com/pandas-dev/pandas/issues/35471 expected = DataFrame(values) - result = expected.astype(float, errors="ignore") - tm.assert_frame_equal(result, expected) + if errors == "ignore": + result = expected.astype(float, errors=errors) + tm.assert_frame_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + expected.astype(float, errors=errors) From 234cfe1bcb43ac7f5568a81e9fbb968fe43c3c69 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 5 Sep 2020 20:52:14 -0400 Subject: [PATCH 11/13] Fix --- doc/source/whatsnew/v1.1.2.rst | 1 - pandas/tests/series/methods/test_astype.py | 14 ++++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 5ac420b647c55..54225495bd627 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -33,7 +33,6 @@ Bug fixes - Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`) - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) -- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`) - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`) - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 97d2a27ef4e8e..c6577da5d95f3 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -31,12 +31,18 @@ def test_astype_dt64tz_to_str(self): [ Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), - Series(3 * [Timestamp("2020-01-01")]), + Series(3 * [Timestamp("2020-01-01", tz="UTC")]), Series(3 * [Interval(0, 1)]), ], ) - def test_astype_ignores_errors_for_extension_dtypes(self, values): + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): # https://github.com/pandas-dev/pandas/issues/35471 expected = values - result = expected.astype(float, errors="ignore") - tm.assert_series_equal(result, expected) + if errors == "ignore": + result = expected.astype(float, errors="ignore") + tm.assert_series_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + expected.astype(float, errors=errors) From 88635de69f030efb47a5060ed29eec08381e3fbb Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 5 Sep 2020 20:56:50 -0400 Subject: [PATCH 12/13] Nit --- pandas/tests/frame/methods/test_astype.py | 18 +++++++++--------- pandas/tests/series/methods/test_astype.py | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 5153786adb12f..d3f256259b15f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -568,22 +568,22 @@ def test_astype_empty_dtype_dict(self): assert result is not df @pytest.mark.parametrize( - "values", + "df", [ - Series(["x", "y", "z"], dtype="string"), - Series(["x", "y", "z"], dtype="category"), - Series(3 * [Timestamp("2020-01-01", tz="UTC")]), - Series(3 * [Interval(0, 1)]), + DataFrame(Series(["x", "y", "z"], dtype="string")), + DataFrame(Series(["x", "y", "z"], dtype="category")), + DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])), + DataFrame(Series(3 * [Interval(0, 1)])), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) - def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): + def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): # https://github.com/pandas-dev/pandas/issues/35471 - expected = DataFrame(values) if errors == "ignore": - result = expected.astype(float, errors=errors) + expected = df + result = df.astype(float, errors=errors) tm.assert_frame_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): - expected.astype(float, errors=errors) + df.astype(float, errors=errors) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index c6577da5d95f3..5c753c35129d0 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -38,11 +38,11 @@ def test_astype_dt64tz_to_str(self): @pytest.mark.parametrize("errors", ["raise", "ignore"]) def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): # https://github.com/pandas-dev/pandas/issues/35471 - expected = values if errors == "ignore": + expected = values result = expected.astype(float, errors="ignore") tm.assert_series_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): - expected.astype(float, errors=errors) + values.astype(float, errors=errors) From 61b6eb8251f46fd504b3c266b7d06ab8c9e91112 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 5 Sep 2020 20:57:59 -0400 Subject: [PATCH 13/13] Fix --- pandas/tests/series/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 5c753c35129d0..b9d90a9fc63dd 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -40,7 +40,7 @@ def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): # https://github.com/pandas-dev/pandas/issues/35471 if errors == "ignore": expected = values - result = expected.astype(float, errors="ignore") + result = values.astype(float, errors="ignore") tm.assert_series_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)"