From eac4a30e4bf8a4b9eb9518a6d73a610a039eb3d2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 30 Oct 2019 08:32:58 -0700 Subject: [PATCH 1/4] Implement safe_ea_cast to avoid catching Exception --- pandas/core/arrays/__init__.py | 1 + pandas/core/arrays/base.py | 18 +++++++++++++++--- pandas/core/arrays/categorical.py | 10 +++++----- pandas/core/groupby/groupby.py | 12 +++--------- pandas/core/series.py | 11 ++--------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 868118bac6a7b..b0bbe9e4952d9 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -2,6 +2,7 @@ ExtensionArray, ExtensionOpsMixin, ExtensionScalarOpsMixin, + safe_ea_cast, ) from .categorical import Categorical # noqa: F401 from .datetimes import DatetimeArray # noqa: F401 diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 08901df963f20..fa353a928f93c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -32,6 +32,18 @@ _extension_array_shared_docs = dict() # type: Dict[str, str] +def safe_ea_cast(cls, obj, dtype=None): + """ + Call to cls._from_sequence that returns the object unchanged on Exception. + """ + try: + result = cls._from_sequence(obj, dtype=dtype) + except Exception: + # We can't predict what downstream EA constructors may raise + result = obj + return result + + class ExtensionArray: """ Abstract base class for custom 1-D array types. @@ -1156,9 +1168,9 @@ def _maybe_convert(arr): # https://github.com/pandas-dev/pandas/issues/22850 # We catch all regular exceptions here, and fall back # to an ndarray. - try: - res = self._from_sequence(arr) - except Exception: + res = safe_ea_cast(self, arr) + if not isinstance(res, type(self)): + # exception raised in _from_sequence; ensure we have ndarray res = np.asarray(arr) else: res = np.asarray(arr) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4d065bd234e0b..fb31f1e9a4b7c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -56,7 +56,7 @@ from pandas.io.formats import console -from .base import ExtensionArray, _extension_array_shared_docs +from .base import ExtensionArray, _extension_array_shared_docs, safe_ea_cast _take_msg = textwrap.dedent( """\ @@ -2613,10 +2613,10 @@ def _get_codes_for_values(values, categories): # Support inferring the correct extension dtype from an array of # scalar objects. e.g. # Categorical(array[Period, Period], categories=PeriodIndex(...)) - try: - values = categories.dtype.construct_array_type()._from_sequence(values) - except Exception: - # but that may fail for any reason, so fall back to object + cls = categories.dtype.construct_array_type() + values = safe_ea_cast(cls, values) + if not isinstance(values, Categorical): + # exception raised in _from_sequence values = ensure_object(values) categories = ensure_object(categories) else: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7d1c74e415658..beea1afef2657 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -43,7 +43,7 @@ class providing the base-class of operations. from pandas.core import nanops import pandas.core.algorithms as algorithms -from pandas.core.arrays import Categorical +from pandas.core.arrays import Categorical, safe_ea_cast from pandas.core.base import DataError, PandasObject, SelectionMixin import pandas.core.common as com from pandas.core.construction import extract_array @@ -816,14 +816,8 @@ def _try_cast(self, result, obj, numeric_only=False): # if the type is compatible with the calling EA. # return the same type (Series) as our caller - try: - result = obj._values._from_sequence(result, dtype=dtype) - except Exception: - # https://github.com/pandas-dev/pandas/issues/22850 - # pandas has no control over what 3rd-party ExtensionArrays - # do in _values_from_sequence. We still want ops to work - # though, so we catch any regular Exception. - pass + cls = dtype.construct_array_type() + result = safe_ea_cast(cls, result, dtype=dtype) elif numeric_only and is_numeric_dtype(dtype) or not numeric_only: result = maybe_downcast_to_dtype(result, dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e9d3d5c04559..0c030572faf52 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -55,7 +55,7 @@ import pandas as pd from pandas.core import algorithms, base, generic, nanops, ops from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import ExtensionArray, safe_ea_cast from pandas.core.arrays.categorical import Categorical, CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com @@ -2849,14 +2849,7 @@ def combine(self, other, func, fill_value=None): elif is_extension_array_dtype(self.values): # The function can return something of any type, so check # if the type is compatible with the calling EA. - try: - new_values = self._values._from_sequence(new_values) - except Exception: - # https://github.com/pandas-dev/pandas/issues/22850 - # pandas has no control over what 3rd-party ExtensionArrays - # do in _values_from_sequence. We still want ops to work - # though, so we catch any regular Exception. - pass + new_values = safe_ea_cast(self._values, new_values) return self._constructor(new_values, index=new_index, name=new_name) def combine_first(self, other): From dba4b7589ab90215e9a7f74ce7f55e0153c87a5d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 31 Oct 2019 08:13:49 -0700 Subject: [PATCH 2/4] update for comments --- pandas/core/arrays/__init__.py | 2 +- pandas/core/arrays/base.py | 15 +++++++++++++-- pandas/core/arrays/categorical.py | 6 +++--- pandas/core/groupby/groupby.py | 4 ++-- pandas/core/series.py | 4 ++-- 5 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index b0bbe9e4952d9..03d998707c26b 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -2,7 +2,7 @@ ExtensionArray, ExtensionOpsMixin, ExtensionScalarOpsMixin, - safe_ea_cast, + try_cast_to_ea, ) from .categorical import Categorical # noqa: F401 from .datetimes import DatetimeArray # noqa: F401 diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fa353a928f93c..ab6f71a738c8b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -32,9 +32,20 @@ _extension_array_shared_docs = dict() # type: Dict[str, str] -def safe_ea_cast(cls, obj, dtype=None): +def try_cast_to_ea(cls, obj, dtype=None): """ Call to cls._from_sequence that returns the object unchanged on Exception. + + Parameters + ---------- + cls : ExtensionArray subclass + obj : arraylike + Values to pass to cls._from_sequence + dtype : ExtensionDtype, optional + + Returns + ------- + ExtensionArray or obj """ try: result = cls._from_sequence(obj, dtype=dtype) @@ -1168,7 +1179,7 @@ def _maybe_convert(arr): # https://github.com/pandas-dev/pandas/issues/22850 # We catch all regular exceptions here, and fall back # to an ndarray. - res = safe_ea_cast(self, arr) + res = try_cast_to_ea(self, arr) if not isinstance(res, type(self)): # exception raised in _from_sequence; ensure we have ndarray res = np.asarray(arr) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index e23d9b5d1b4d8..4f37bfc52c162 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -56,7 +56,7 @@ from pandas.io.formats import console -from .base import ExtensionArray, _extension_array_shared_docs, safe_ea_cast +from .base import ExtensionArray, _extension_array_shared_docs, try_cast_to_ea _take_msg = textwrap.dedent( """\ @@ -2614,8 +2614,8 @@ def _get_codes_for_values(values, categories): # scalar objects. e.g. # Categorical(array[Period, Period], categories=PeriodIndex(...)) cls = categories.dtype.construct_array_type() - values = safe_ea_cast(cls, values) - if not isinstance(values, Categorical): + values = try_cast_to_ea(cls, values) + if not isinstance(values, cls): # exception raised in _from_sequence values = ensure_object(values) categories = ensure_object(categories) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1fca9f1031991..404da096d8535 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -43,7 +43,7 @@ class providing the base-class of operations. from pandas.core import nanops import pandas.core.algorithms as algorithms -from pandas.core.arrays import Categorical, safe_ea_cast +from pandas.core.arrays import Categorical, try_cast_to_ea from pandas.core.base import DataError, PandasObject, SelectionMixin import pandas.core.common as com from pandas.core.construction import extract_array @@ -820,7 +820,7 @@ def _try_cast(self, result, obj, numeric_only=False): # return the same type (Series) as our caller cls = dtype.construct_array_type() - result = safe_ea_cast(cls, result, dtype=dtype) + result = try_cast_to_ea(cls, result, dtype=dtype) elif numeric_only and is_numeric_dtype(dtype) or not numeric_only: result = maybe_downcast_to_dtype(result, dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0c030572faf52..1a0db520f6a6a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -55,7 +55,7 @@ import pandas as pd from pandas.core import algorithms, base, generic, nanops, ops from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import ExtensionArray, safe_ea_cast +from pandas.core.arrays import ExtensionArray, try_cast_to_ea from pandas.core.arrays.categorical import Categorical, CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com @@ -2849,7 +2849,7 @@ def combine(self, other, func, fill_value=None): elif is_extension_array_dtype(self.values): # The function can return something of any type, so check # if the type is compatible with the calling EA. - new_values = safe_ea_cast(self._values, new_values) + new_values = try_cast_to_ea(self._values, new_values) return self._constructor(new_values, index=new_index, name=new_name) def combine_first(self, other): From d324eac0b2689a37daa77e50a38e90c6ff9df35f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Nov 2019 12:49:36 -0700 Subject: [PATCH 3/4] rename arg per comment --- pandas/core/arrays/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index ab6f71a738c8b..def947361ff7c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -32,13 +32,13 @@ _extension_array_shared_docs = dict() # type: Dict[str, str] -def try_cast_to_ea(cls, obj, dtype=None): +def try_cast_to_ea(cls_or_instance, obj, dtype=None): """ Call to cls._from_sequence that returns the object unchanged on Exception. Parameters ---------- - cls : ExtensionArray subclass + cls_or_instance : ExtensionArray subclass or instance obj : arraylike Values to pass to cls._from_sequence dtype : ExtensionDtype, optional @@ -48,7 +48,7 @@ def try_cast_to_ea(cls, obj, dtype=None): ExtensionArray or obj """ try: - result = cls._from_sequence(obj, dtype=dtype) + result = cls_or_instance._from_sequence(obj, dtype=dtype) except Exception: # We can't predict what downstream EA constructors may raise result = obj From ab4966daaa520d7b9f94ec5b21b50b5dc5fa64da Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Nov 2019 14:04:51 -0700 Subject: [PATCH 4/4] Update pandas/core/arrays/base.py Co-Authored-By: gfyoung --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index def947361ff7c..7333254831838 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -34,7 +34,7 @@ def try_cast_to_ea(cls_or_instance, obj, dtype=None): """ - Call to cls._from_sequence that returns the object unchanged on Exception. + Call to `_from_sequence` that returns the object unchanged on Exception. Parameters ----------