pandas-dev · jreback · Sep 6, 2020 · Aug 29, 2020 · Aug 30, 2020 · Aug 30, 2020
diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
@@ -29,6 +29,7 @@ Bug fixes
 - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
+- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -19,7 +19,7 @@
 from pandas.util._decorators import Appender, Substitution
 from pandas.util._validators import validate_fillna_kwargs
 
-from pandas.core.dtypes.cast import maybe_cast_to_extension_array
+from pandas.core.dtypes.cast import maybe_astype, maybe_cast_to_extension_array
 from pandas.core.dtypes.common import (
     is_array_like,
     is_dtype_equal,
@@ -438,7 +438,7 @@ def nbytes(self) -> int:
     # Additional Methods
     # ------------------------------------------------------------------------
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy, errors="raise"):
         """
         Cast to a NumPy array with 'dtype'.
 
@@ -450,6 +450,9 @@ def astype(self, dtype, copy=True):
             Whether to copy the data, even if not necessary. If False,
             a copy is made only if the old dtype does not match the
             new dtype.
+        errors : str, {'raise', 'ignore'}, default 'ignore'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
 
         Returns
         -------
@@ -462,7 +465,8 @@ def astype(self, dtype, copy=True):
         if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
 
-        return np.array(self, dtype=dtype, copy=copy)
+        values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors)
+        return values
 
     def isna(self) -> ArrayLike:
         """

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -9,6 +9,7 @@
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
 
+from pandas.core.dtypes.cast import maybe_astype
 from pandas.core.dtypes.common import (
     is_bool_dtype,
     is_extension_array_dtype,
@@ -345,7 +346,7 @@ def reconstruct(x):
     def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
         return coerce_to_array(value)
 
-    def astype(self, dtype, copy: bool = True) -> ArrayLike:
+    def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike:
         """
         Cast to a NumPy array or ExtensionArray with 'dtype'.
 
@@ -357,6 +358,9 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
             Whether to copy the data, even if not necessary. If False,
             a copy is made only if the old dtype does not match the
             new dtype.
+        errors : str, {'raise', 'ignore'}, default 'ignore'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
 
         Returns
         -------
@@ -388,9 +392,14 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
         if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
             from pandas.core.arrays import IntegerArray
 
-            return IntegerArray(
-                self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
+            result = maybe_astype(
+                values=self._data, dtype=dtype.numpy_dtype, copy=copy, errors=errors
             )
+
+            if result is self._data:
+                return self
+            else:
+                return IntegerArray(result, self._mask.copy(), copy=False)
         # for integer, error if there are missing values
         if is_integer_dtype(dtype):
             if self._hasna:
@@ -401,7 +410,15 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
         if is_float_dtype(dtype):
             na_value = np.nan
         # coerce
-        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
+        try:
+            result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
+        except (ValueError, TypeError):
+            if errors == "ignore":
+                result = self
+            else:
+                raise
+
+        return result
 
     def _values_for_argsort(self) -> np.ndarray:
         """

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -17,6 +17,7 @@
 
 from pandas.core.dtypes.cast import (
     coerce_indexer_dtype,
+    maybe_astype,
     maybe_cast_to_extension_array,
     maybe_infer_to_datetimelike,
 )
@@ -450,7 +451,9 @@ def _formatter(self, boxed=False):
         # Defer to CategoricalFormatter's formatter.
         return None
 
-    def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
+    def astype(
+        self, dtype: Dtype, copy: bool = True, errors: str = "raise"
+    ) -> ArrayLike:
         """
         Coerce this type to another dtype
 
@@ -461,6 +464,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
             By default, astype always returns a newly allocated object.
             If copy is set to False and dtype is categorical, the original
             object is returned.
+        errors : str, {'raise', 'ignore'}, default 'ignore'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
         """
         if is_categorical_dtype(dtype):
             dtype = cast(Union[str, CategoricalDtype], dtype)
@@ -475,7 +481,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
             return array(self, dtype=dtype, copy=copy)
         if is_integer_dtype(dtype) and self.isna().any():
             raise ValueError("Cannot convert float NaN to integer")
-        return np.array(self, dtype=dtype, copy=copy)
+
+        values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors)
+        return values
 
     @cache_readonly
     def itemsize(self) -> int:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -30,6 +30,7 @@
 from pandas.util._decorators import Appender, Substitution
 from pandas.util._validators import validate_fillna_kwargs
 
+from pandas.core.dtypes.cast import maybe_astype
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_datetime64_any_dtype,
@@ -622,7 +623,7 @@ def _maybe_clear_freq(self):
         # DatetimeArray and TimedeltaArray
         pass
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True, errors: str = "raise"):
         # Some notes on cases we don't have to handle here in the base class:
         #   1. PeriodArray.astype handles period -> period
         #   2. DatetimeArray.astype handles conversion between tz.
@@ -655,13 +656,17 @@ def astype(self, dtype, copy=True):
         ) or is_float_dtype(dtype):
             # disallow conversion between datetime/timedelta,
             # and conversions for any datetimelike to float
-            msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
-            raise TypeError(msg)
+            if errors == "ignore":
+                return self
+            else:
+                msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
+                raise TypeError(msg)
         elif is_categorical_dtype(dtype):
             arr_cls = dtype.construct_array_type()
             return arr_cls(self, dtype=dtype)
         else:
-            return np.asarray(self, dtype=dtype)
+            result = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors)
+            return result
 
     def view(self, dtype=None):
         if dtype is None or dtype is self.dtype:

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -569,7 +569,7 @@ def __iter__(self):
             for v in converted:
                 yield v
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True, errors: str = "raise"):
         # We handle
         #   --> datetime
         #   --> period
@@ -596,7 +596,7 @@ def astype(self, dtype, copy=True):
             return self
         elif is_period_dtype(dtype):
             return self.to_period(freq=dtype.freq)
-        return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
+        return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy, errors=errors)
 
     # -----------------------------------------------------------------
     # Rendering Methods

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -425,7 +425,7 @@ def reconstruct(x):
     def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
         return coerce_to_array(value, dtype=self.dtype)
 
-    def astype(self, dtype, copy: bool = True) -> ArrayLike:
+    def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike:
         """
         Cast to a NumPy array or ExtensionArray with 'dtype'.
 
@@ -437,6 +437,9 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
             Whether to copy the data, even if not necessary. If False,
             a copy is made only if the old dtype does not match the
             new dtype.
+        errors : str, {'raise', 'ignore'}, default 'ignore'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
 
         Returns
         -------
@@ -477,7 +480,15 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
         else:
             na_value = lib.no_default
 
-        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
+        try:
+            result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
+        except (ValueError, TypeError):
+            if errors == "ignore":
+                result = self
+            else:
+                raise
+
+        return result
 
     def _values_for_argsort(self) -> np.ndarray:
         """

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -661,7 +661,7 @@ def fillna(self, value=None, method=None, limit=None):
     def dtype(self):
         return IntervalDtype(self.left.dtype)
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True, errors: str = "raise"):
         """
         Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
 
@@ -674,6 +674,9 @@ def astype(self, dtype, copy=True):
             Whether to copy the data, even if not necessary. If False,
             a copy is made only if the old dtype does not match the
             new dtype.
+        errors : str, {'raise', 'ignore'}, default 'ignore'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
 
         Returns
         -------
@@ -694,10 +697,15 @@ def astype(self, dtype, copy=True):
                 new_left = self.left.astype(dtype.subtype)
                 new_right = self.right.astype(dtype.subtype)
             except TypeError as err:
-                msg = (
-                    f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
-                )
-                raise TypeError(msg) from err
+                if errors == "ignore":
+                    new_left = self.left
+                    new_right = self.right
+                else:
+                    msg = (
+                        f"Cannot convert {self.dtype} to {dtype}; "
+                        "subtypes are incompatible"
+                    )
+                    raise TypeError(msg) from err
             return self._shallow_copy(new_left, new_right)
         elif is_categorical_dtype(dtype):
             return Categorical(np.asarray(self))
@@ -708,8 +716,11 @@ def astype(self, dtype, copy=True):
         try:
             return np.asarray(self).astype(dtype, copy=copy)
         except (TypeError, ValueError) as err:
-            msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
-            raise TypeError(msg) from err
+            if errors == "ignore":
+                return self
+            else:
+                msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
+                raise TypeError(msg) from err
 
     @classmethod
     def _concat_same_type(cls, to_concat):

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -573,14 +573,14 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
 
     # ------------------------------------------------------------------
 
-    def astype(self, dtype, copy: bool = True):
+    def astype(self, dtype, copy: bool = True, errors: str = "raise"):
         # We handle Period[T] -> Period[U]
         # Our parent handles everything else.
         dtype = pandas_dtype(dtype)
 
         if is_period_dtype(dtype):
             return self.asfreq(dtype.freq)
-        return super().astype(dtype, copy=copy)
+        return super().astype(dtype, copy=copy, errors=errors)
 
     # ------------------------------------------------------------------
     # Arithmetic Methods

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -1006,7 +1006,7 @@ def _concat_same_type(cls, to_concat):
 
         return cls(data, sparse_index=sp_index, fill_value=fill_value)
 
-    def astype(self, dtype=None, copy=True):
+    def astype(self, dtype=None, copy: bool = True, errors: str = "raise"):
         """
         Change the dtype of a SparseArray.
 
@@ -1025,6 +1025,10 @@ def astype(self, dtype=None, copy=True):
         copy : bool, default True
             Whether to ensure a copy is made, even if not necessary.
 
+        errors : str, {'raise', 'ignore'}, default 'ignore'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
+
         Returns
         -------
         SparseArray
@@ -1063,7 +1067,13 @@ def astype(self, dtype=None, copy=True):
         IntIndex
         Indices: array([2, 3], dtype=int32)
         """
-        dtype = self.dtype.update_dtype(dtype)
+        try:
+            dtype = self.dtype.update_dtype(dtype)
+        except ValueError:
+            if errors == "ignore":
+                return self
+            else:
+                raise
         subtype = dtype._subtype_with_str
         # TODO copy=False is broken for astype_nansafe with int -> float, so cannot
         # passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -262,7 +262,7 @@ def fillna(self, value=None, method=None, limit=None):
         # TODO: validate dtype
         return super().fillna(value, method, limit)
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True, errors: str = "raise"):
         dtype = pandas_dtype(dtype)
         if isinstance(dtype, StringDtype):
             if copy:
@@ -275,7 +275,7 @@ def astype(self, dtype, copy=True):
             values = arr.astype(dtype.numpy_dtype)
             return IntegerArray(values, mask, copy=False)
 
-        return super().astype(dtype, copy)
+        return super().astype(dtype, copy, errors=errors)
 
     def _reduce(self, name: str, skipna: bool = True, **kwargs):
         if name in ["min", "max"]:

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -595,6 +595,18 @@ def maybe_promote(dtype, fill_value=np.nan):
     return dtype, fill_value
 
 
+def maybe_astype(values, dtype, copy, errors):
+    try:
+        result = np.array(values, dtype=dtype, copy=copy)
+    except (ValueError, TypeError):
+        if errors == "ignore":
+            result = values
+        else:
+            raise
+
+    return result
+
+
 def _ensure_dtype_type(value, dtype):
     """
     Ensure that the given value is an instance of the given dtype.

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -580,8 +580,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
 
         # force the copy here
         if self.is_extension:
-            # TODO: Should we try/except this astype?
-            values = self.values.astype(dtype)
+            values = self.values.astype(dtype, errors=errors)
         else:
             if issubclass(dtype.type, str):
 

diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py
@@ -95,7 +95,7 @@ def __getitem__(self, item):
     def __len__(self):
         return len(self._data)
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy=True, errors="raise"):
         # needed to fix this astype for the Series constructor.
         if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
             if copy: