Skip to content

REF: add custom Exception for safe_sort #25569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
3 changes: 2 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pandas.compat as compat
from pandas.compat import lzip, u
from pandas.compat.numpy import function as nv
from pandas.errors import SortError
from pandas.util._decorators import (
Appender, Substitution, cache_readonly, deprecate_kwarg)
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
Expand Down Expand Up @@ -356,7 +357,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
if dtype.categories is None:
try:
codes, categories = factorize(values, sort=True)
except TypeError:
except SortError:
codes, categories = factorize(values, sort=False)
if dtype.ordered:
# raise, as we don't have a sortable data structure and so
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pandas.compat as compat
from pandas.compat import range, set_function_name, u
from pandas.compat.numpy import function as nv
from pandas.errors import SortError
from pandas.util._decorators import Appender, Substitution, cache_readonly

from pandas.core.dtypes.cast import maybe_cast_to_integer_array
Expand Down Expand Up @@ -2344,7 +2345,7 @@ def union(self, other, sort=None):
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
except SortError as e:
warnings.warn("{}, sort order is undefined for "
"incomparable objects".format(e),
RuntimeWarning, stacklevel=3)
Expand Down Expand Up @@ -2503,7 +2504,7 @@ def difference(self, other, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

return this._shallow_copy(the_diff, name=result_name, freq=None)
Expand Down Expand Up @@ -2579,7 +2580,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

attribs = self._get_attributes_dict()
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas._libs import algos, hashtable, lib
from pandas._libs.hashtable import unique_label_indices
from pandas.compat import PY3, long, string_types
from pandas.errors import SortError

from pandas.core.dtypes.cast import infer_dtype_from_array
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -430,8 +431,9 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
------
TypeError
* If ``values`` is not list-like or if ``labels`` is neither None
nor list-like
* If ``values`` cannot be sorted
nor list-like.
pandas.error.SortError
* If ``values`` cannot be sorted.
ValueError
* If ``labels`` is not None and ``values`` contain duplicates.
"""
Expand All @@ -449,8 +451,11 @@ def sort_mixed(values):
# order ints before strings, safe in py3
str_pos = np.array([isinstance(x, string_types) for x in values],
dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
try:
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
except TypeError as e:
raise SortError(e)
return np.concatenate([nums, np.asarray(strs, dtype=object)])

sorter = None
Expand Down
7 changes: 7 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ class MergeError(ValueError):
"""


class SortError(TypeError):
"""
Error raised when problems arise during sorting due to problems
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to be more explicit about the types of "problems" that could arise? Is this just when sorting mixed types?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd : the rationale here was to a create an exception analogous to the custom MergeError. Exceptions should be considered part of the api and so this SortError is a subclass of TypeError to be backwards compatible with the exceptions currently raised by np.sort from safe_sort and therefore result in a non-breaking api change.

Rather than be explicit on the exceptions covered, I consider this to be a base class for any sort related errors that may need to be handled. (although i would probably not have chosen TypeError as the base class). I should probably subclass this again for the safe_sort errors, but that is not strictly necessary at the moment.

with input data. Subclass of `TypeError`.
"""


class NullFrequencyError(ValueError):
"""
Error raised when a null `freq` attribute is used in an operation
Expand Down
9 changes: 3 additions & 6 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@

from pandas._libs import (
algos as libalgos, groupby as libgroupby, hashtable as ht)
from pandas.compat import PY2, lrange, range
from pandas.compat import lrange, range
from pandas.compat.numpy import np_array_datetime64_compat
from pandas.errors import SortError
import pandas.util._test_decorators as td

from pandas.core.dtypes.dtypes import CategoricalDtype as CDT
Expand Down Expand Up @@ -224,15 +225,11 @@ def test_factorize_tuple_list(self, data, expected_label, expected_level):
dtype=object)
tm.assert_numpy_array_equal(result[1], expected_level_array)

@pytest.mark.skipif(PY2, reason="pytest.raises match regex fails")
def test_complex_sorting(self):
# gh 12666 - check no segfault
x17 = np.array([complex(i) for i in range(17)], dtype=object)

msg = (r"'(<|>)' not supported between instances of 'complex' and"
r" 'complex'|"
r"unorderable types: complex\(\) > complex\(\)")
with pytest.raises(TypeError, match=msg):
with pytest.raises(SortError, match="complex"):
algos.factorize(x17[::-1], sort=True)

def test_float64_factorize(self, writable):
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from numpy import nan
import pytest

from pandas.compat import PY2
from pandas.errors import SortError

from pandas import DataFrame, MultiIndex, Series, compat, concat, merge
from pandas.core import common as com
Expand Down Expand Up @@ -405,21 +405,17 @@ def test_mixed_integer_from_list(self):
expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.skipif(PY2, reason="pytest.raises match regex fails")
def test_unsortable(self):
# GH 13714
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
msg = (r"'(<|>)' not supported between instances of ('"
r"datetime\.datetime' and 'int'|'int' and 'datetime\.datetime"
r"')|"
r"unorderable types: int\(\) > datetime\.datetime\(\)")
msg = "int.*datetime|datetime.*int"
if compat.PY2:
# RuntimeWarning: tp_compare didn't return -1 or -2 for exception
with warnings.catch_warnings():
with pytest.raises(TypeError, match=msg):
with pytest.raises(SortError, match=msg):
safe_sort(arr)
else:
with pytest.raises(TypeError, match=msg):
with pytest.raises(SortError, match=msg):
safe_sort(arr)

def test_exceptions(self):
Expand Down