Skip to content

Implement safe_ea_cast to avoid catching Exception #29293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 2, 2019
1 change: 1 addition & 0 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
ExtensionArray,
ExtensionOpsMixin,
ExtensionScalarOpsMixin,
try_cast_to_ea,
)
from .categorical import Categorical # noqa: F401
from .datetimes import DatetimeArray # noqa: F401
Expand Down
29 changes: 26 additions & 3 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,29 @@
_extension_array_shared_docs = dict() # type: Dict[str, str]


def try_cast_to_ea(cls_or_instance, obj, dtype=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if can type at some point

"""
Call to `_from_sequence` that returns the object unchanged on Exception.
Parameters
----------
cls_or_instance : ExtensionArray subclass or instance
obj : arraylike
Values to pass to cls._from_sequence
dtype : ExtensionDtype, optional
Returns
-------
ExtensionArray or obj
"""
try:
result = cls_or_instance._from_sequence(obj, dtype=dtype)
except Exception:
# We can't predict what downstream EA constructors may raise
result = obj
return result


class ExtensionArray:
"""
Abstract base class for custom 1-D array types.
Expand Down Expand Up @@ -1156,9 +1179,9 @@ def _maybe_convert(arr):
# https://github.com/pandas-dev/pandas/issues/22850
# We catch all regular exceptions here, and fall back
# to an ndarray.
try:
res = self._from_sequence(arr)
except Exception:
res = try_cast_to_ea(self, arr)
if not isinstance(res, type(self)):
# exception raised in _from_sequence; ensure we have ndarray
res = np.asarray(arr)
else:
res = np.asarray(arr)
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

from pandas.io.formats import console

from .base import ExtensionArray, _extension_array_shared_docs
from .base import ExtensionArray, _extension_array_shared_docs, try_cast_to_ea

_take_msg = textwrap.dedent(
"""\
Expand Down Expand Up @@ -2613,10 +2613,10 @@ def _get_codes_for_values(values, categories):
# Support inferring the correct extension dtype from an array of
# scalar objects. e.g.
# Categorical(array[Period, Period], categories=PeriodIndex(...))
try:
values = categories.dtype.construct_array_type()._from_sequence(values)
except Exception:
# but that may fail for any reason, so fall back to object
cls = categories.dtype.construct_array_type()
values = try_cast_to_ea(cls, values)
if not isinstance(values, cls):
# exception raised in _from_sequence
values = ensure_object(values)
categories = ensure_object(categories)
else:
Expand Down
12 changes: 3 additions & 9 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class providing the base-class of operations.

from pandas.core import nanops
import pandas.core.algorithms as algorithms
from pandas.core.arrays import Categorical
from pandas.core.arrays import Categorical, try_cast_to_ea
from pandas.core.base import DataError, PandasObject, SelectionMixin
import pandas.core.common as com
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -819,14 +819,8 @@ def _try_cast(self, result, obj, numeric_only=False):
# if the type is compatible with the calling EA.

# return the same type (Series) as our caller
try:
result = obj._values._from_sequence(result, dtype=dtype)
except Exception:
# https://github.com/pandas-dev/pandas/issues/22850
# pandas has no control over what 3rd-party ExtensionArrays
# do in _values_from_sequence. We still want ops to work
# though, so we catch any regular Exception.
pass
cls = dtype.construct_array_type()
result = try_cast_to_ea(cls, result, dtype=dtype)
elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
result = maybe_downcast_to_dtype(result, dtype)

Expand Down
11 changes: 2 additions & 9 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
import pandas as pd
from pandas.core import algorithms, base, generic, nanops, ops
from pandas.core.accessor import CachedAccessor
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays import ExtensionArray, try_cast_to_ea
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
from pandas.core.arrays.sparse import SparseAccessor
import pandas.core.common as com
Expand Down Expand Up @@ -2849,14 +2849,7 @@ def combine(self, other, func, fill_value=None):
elif is_extension_array_dtype(self.values):
# The function can return something of any type, so check
# if the type is compatible with the calling EA.
try:
new_values = self._values._from_sequence(new_values)
except Exception:
# https://github.com/pandas-dev/pandas/issues/22850
# pandas has no control over what 3rd-party ExtensionArrays
# do in _values_from_sequence. We still want ops to work
# though, so we catch any regular Exception.
pass
new_values = try_cast_to_ea(self._values, new_values)
return self._constructor(new_values, index=new_index, name=new_name)

def combine_first(self, other):
Expand Down