Skip to content

BUG: Respect errors="ignore" during extension astype #35979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Sep 6, 2020
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Bug fixes
- Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pandas.util._decorators import Appender, Substitution
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.cast import maybe_cast_to_extension_array
from pandas.core.dtypes.cast import maybe_astype, maybe_cast_to_extension_array
from pandas.core.dtypes.common import (
is_array_like,
is_dtype_equal,
Expand Down Expand Up @@ -438,7 +438,7 @@ def nbytes(self) -> int:
# Additional Methods
# ------------------------------------------------------------------------

def astype(self, dtype, copy=True):
def astype(self, dtype, copy, errors="raise"):
"""
Cast to a NumPy array with 'dtype'.

Expand All @@ -450,6 +450,9 @@ def astype(self, dtype, copy=True):
Whether to copy the data, even if not necessary. If False,
a copy is made only if the old dtype does not match the
new dtype.
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object

Returns
-------
Expand All @@ -462,7 +465,8 @@ def astype(self, dtype, copy=True):
if isinstance(dtype, StringDtype): # allow conversion to StringArrays
return dtype.construct_array_type()._from_sequence(self, copy=False)

return np.array(self, dtype=dtype, copy=copy)
values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors)
return values

def isna(self) -> ArrayLike:
"""
Expand Down
25 changes: 21 additions & 4 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas.compat import set_function_name
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.cast import maybe_astype
from pandas.core.dtypes.common import (
is_bool_dtype,
is_extension_array_dtype,
Expand Down Expand Up @@ -345,7 +346,7 @@ def reconstruct(x):
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value)

def astype(self, dtype, copy: bool = True) -> ArrayLike:
def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike:
"""
Cast to a NumPy array or ExtensionArray with 'dtype'.

Expand All @@ -357,6 +358,9 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
Whether to copy the data, even if not necessary. If False,
a copy is made only if the old dtype does not match the
new dtype.
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object

Returns
-------
Expand Down Expand Up @@ -388,9 +392,14 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
from pandas.core.arrays import IntegerArray

return IntegerArray(
self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
result = maybe_astype(
values=self._data, dtype=dtype.numpy_dtype, copy=copy, errors=errors
)

if result is self._data:
return self
else:
return IntegerArray(result, self._mask.copy(), copy=False)
# for integer, error if there are missing values
if is_integer_dtype(dtype):
if self._hasna:
Expand All @@ -401,7 +410,15 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
if is_float_dtype(dtype):
na_value = np.nan
# coerce
return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
try:
result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
except (ValueError, TypeError):
if errors == "ignore":
result = self
else:
raise

return result

def _values_for_argsort(self) -> np.ndarray:
"""
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from pandas.core.dtypes.cast import (
coerce_indexer_dtype,
maybe_astype,
maybe_cast_to_extension_array,
maybe_infer_to_datetimelike,
)
Expand Down Expand Up @@ -450,7 +451,9 @@ def _formatter(self, boxed=False):
# Defer to CategoricalFormatter's formatter.
return None

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
def astype(
self, dtype: Dtype, copy: bool = True, errors: str = "raise"
) -> ArrayLike:
"""
Coerce this type to another dtype

Expand All @@ -461,6 +464,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
By default, astype always returns a newly allocated object.
If copy is set to False and dtype is categorical, the original
object is returned.
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
"""
if is_categorical_dtype(dtype):
dtype = cast(Union[str, CategoricalDtype], dtype)
Expand All @@ -475,7 +481,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
return array(self, dtype=dtype, copy=copy)
if is_integer_dtype(dtype) and self.isna().any():
raise ValueError("Cannot convert float NaN to integer")
return np.array(self, dtype=dtype, copy=copy)

values = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors)
return values

@cache_readonly
def itemsize(self) -> int:
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pandas.util._decorators import Appender, Substitution
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.cast import maybe_astype
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_datetime64_any_dtype,
Expand Down Expand Up @@ -622,7 +623,7 @@ def _maybe_clear_freq(self):
# DatetimeArray and TimedeltaArray
pass

def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True, errors: str = "raise"):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
Expand Down Expand Up @@ -655,13 +656,17 @@ def astype(self, dtype, copy=True):
) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
raise TypeError(msg)
if errors == "ignore":
return self
else:
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
raise TypeError(msg)
elif is_categorical_dtype(dtype):
arr_cls = dtype.construct_array_type()
return arr_cls(self, dtype=dtype)
else:
return np.asarray(self, dtype=dtype)
result = maybe_astype(values=self, dtype=dtype, copy=copy, errors=errors)
return result

def view(self, dtype=None):
if dtype is None or dtype is self.dtype:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ def __iter__(self):
for v in converted:
yield v

def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True, errors: str = "raise"):
# We handle
# --> datetime
# --> period
Expand All @@ -596,7 +596,7 @@ def astype(self, dtype, copy=True):
return self
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy, errors=errors)

# -----------------------------------------------------------------
# Rendering Methods
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def reconstruct(x):
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

def astype(self, dtype, copy: bool = True) -> ArrayLike:
def astype(self, dtype, copy: bool = True, errors: str = "raise") -> ArrayLike:
"""
Cast to a NumPy array or ExtensionArray with 'dtype'.

Expand All @@ -437,6 +437,9 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
Whether to copy the data, even if not necessary. If False,
a copy is made only if the old dtype does not match the
new dtype.
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object

Returns
-------
Expand Down Expand Up @@ -477,7 +480,15 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
else:
na_value = lib.no_default

return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
try:
result = self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
except (ValueError, TypeError):
if errors == "ignore":
result = self
else:
raise

return result

def _values_for_argsort(self) -> np.ndarray:
"""
Expand Down
25 changes: 18 additions & 7 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def fillna(self, value=None, method=None, limit=None):
def dtype(self):
return IntervalDtype(self.left.dtype)

def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True, errors: str = "raise"):
"""
Cast to an ExtensionArray or NumPy array with dtype 'dtype'.

Expand All @@ -674,6 +674,9 @@ def astype(self, dtype, copy=True):
Whether to copy the data, even if not necessary. If False,
a copy is made only if the old dtype does not match the
new dtype.
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object

Returns
-------
Expand All @@ -694,10 +697,15 @@ def astype(self, dtype, copy=True):
new_left = self.left.astype(dtype.subtype)
new_right = self.right.astype(dtype.subtype)
except TypeError as err:
msg = (
f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
)
raise TypeError(msg) from err
if errors == "ignore":
new_left = self.left
new_right = self.right
else:
msg = (
f"Cannot convert {self.dtype} to {dtype}; "
"subtypes are incompatible"
)
raise TypeError(msg) from err
return self._shallow_copy(new_left, new_right)
elif is_categorical_dtype(dtype):
return Categorical(np.asarray(self))
Expand All @@ -708,8 +716,11 @@ def astype(self, dtype, copy=True):
try:
return np.asarray(self).astype(dtype, copy=copy)
except (TypeError, ValueError) as err:
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
raise TypeError(msg) from err
if errors == "ignore":
return self
else:
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
raise TypeError(msg) from err

@classmethod
def _concat_same_type(cls, to_concat):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,14 +573,14 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):

# ------------------------------------------------------------------

def astype(self, dtype, copy: bool = True):
def astype(self, dtype, copy: bool = True, errors: str = "raise"):
# We handle Period[T] -> Period[U]
# Our parent handles everything else.
dtype = pandas_dtype(dtype)

if is_period_dtype(dtype):
return self.asfreq(dtype.freq)
return super().astype(dtype, copy=copy)
return super().astype(dtype, copy=copy, errors=errors)

# ------------------------------------------------------------------
# Arithmetic Methods
Expand Down
14 changes: 12 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ def _concat_same_type(cls, to_concat):

return cls(data, sparse_index=sp_index, fill_value=fill_value)

def astype(self, dtype=None, copy=True):
def astype(self, dtype=None, copy: bool = True, errors: str = "raise"):
"""
Change the dtype of a SparseArray.

Expand All @@ -1025,6 +1025,10 @@ def astype(self, dtype=None, copy=True):
copy : bool, default True
Whether to ensure a copy is made, even if not necessary.

errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object

Returns
-------
SparseArray
Expand Down Expand Up @@ -1063,7 +1067,13 @@ def astype(self, dtype=None, copy=True):
IntIndex
Indices: array([2, 3], dtype=int32)
"""
dtype = self.dtype.update_dtype(dtype)
try:
dtype = self.dtype.update_dtype(dtype)
except ValueError:
if errors == "ignore":
return self
else:
raise
subtype = dtype._subtype_with_str
# TODO copy=False is broken for astype_nansafe with int -> float, so cannot
# passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def fillna(self, value=None, method=None, limit=None):
# TODO: validate dtype
return super().fillna(value, method, limit)

def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True, errors: str = "raise"):
dtype = pandas_dtype(dtype)
if isinstance(dtype, StringDtype):
if copy:
Expand All @@ -275,7 +275,7 @@ def astype(self, dtype, copy=True):
values = arr.astype(dtype.numpy_dtype)
return IntegerArray(values, mask, copy=False)

return super().astype(dtype, copy)
return super().astype(dtype, copy, errors=errors)

def _reduce(self, name: str, skipna: bool = True, **kwargs):
if name in ["min", "max"]:
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,18 @@ def maybe_promote(dtype, fill_value=np.nan):
return dtype, fill_value


def maybe_astype(values, dtype, copy, errors):
try:
result = np.array(values, dtype=dtype, copy=copy)
except (ValueError, TypeError):
if errors == "ignore":
result = values
else:
raise

return result


def _ensure_dtype_type(value, dtype):
"""
Ensure that the given value is an instance of the given dtype.
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,8 +580,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):

# force the copy here
if self.is_extension:
# TODO: Should we try/except this astype?
values = self.values.astype(dtype)
values = self.values.astype(dtype, errors=errors)
else:
if issubclass(dtype.type, str):

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/arrow/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def __getitem__(self, item):
def __len__(self):
return len(self._data)

def astype(self, dtype, copy=True):
def astype(self, dtype, copy=True, errors="raise"):
# needed to fix this astype for the Series constructor.
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
if copy:
Expand Down
Loading