From 026e58872b30d34076db9c098b0530f9575226ec Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Apr 2023 17:56:05 -0700 Subject: [PATCH 1/2] CLN/TYP: stronger typing in safe_sort --- pandas/core/algorithms.py | 25 +++++++------------------ pandas/core/apply.py | 8 ++++---- pandas/core/indexes/base.py | 2 +- pandas/tests/test_sorting.py | 22 ++++++++++++---------- 4 files changed, 24 insertions(+), 33 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4f771b3c80791..490b60ae24962 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -32,10 +32,7 @@ from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level -from pandas.core.dtypes.cast import ( - construct_1d_object_array_from_listlike, - infer_dtype_from_array, -) +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( ensure_float64, ensure_object, @@ -1468,8 +1465,8 @@ def diff(arr, n: int, axis: AxisInt = 0): # low-dependency, is used in this module, and used private methods from # this module. def safe_sort( - values, - codes=None, + values: Index | ArrayLike, + codes: npt.NDArray[np.intp] | None = None, use_na_sentinel: bool = True, assume_unique: bool = False, verify: bool = True, @@ -1484,7 +1481,7 @@ def safe_sort( ---------- values : list-like Sequence; must be unique if ``codes`` is not None. - codes : list_like, optional + codes : np.ndarray[intp] or None, default None Indices to ``values``. All out of bound indices are treated as "not found" and will be masked with ``-1``. use_na_sentinel : bool, default True @@ -1515,20 +1512,12 @@ def safe_sort( ValueError * If ``codes`` is not None and ``values`` contain duplicates. """ - if not is_list_like(values): + if not isinstance(values, (np.ndarray, ABCExtensionArray, ABCIndex)): raise TypeError( - "Only list-like objects are allowed to be passed to safe_sort as values" + "Only np.ndarray, ExtensionArray, and Index objects are allowed to " + "be passed to safe_sort as values" ) - if not is_array_like(values): - # don't convert to string types - dtype, _ = infer_dtype_from_array(values) - # error: Argument "dtype" to "asarray" has incompatible type "Union[dtype[Any], - # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, - # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], - # _DTypeDict, Tuple[Any, Any]]]" - values = np.asarray(values, dtype=dtype) # type: ignore[arg-type] - sorter = None ordered: AnyArrayLike diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a4f2ba9133928..1a1ccd6bba131 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -56,7 +56,6 @@ ABCSeries, ) -from pandas.core.algorithms import safe_sort from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike @@ -580,10 +579,11 @@ def normalize_dictlike_arg( if obj.ndim != 1: # Check for missing columns on a frame - cols = set(func.keys()) - set(obj.columns) + from pandas import Index + + cols = Index(list(func.keys())).difference(obj.columns, sort=True) if len(cols) > 0: - cols_sorted = list(safe_sort(list(cols))) - raise KeyError(f"Column(s) {cols_sorted} do not exist") + raise KeyError(f"Column(s) {list(cols)} do not exist") aggregator_types = (list, tuple, dict) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 75320a28eb16b..3251d1f6b222e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7381,7 +7381,7 @@ def _unpack_nested_dtype(other: Index) -> Index: return other -def _maybe_try_sort(result, sort): +def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None): if sort is not False: try: result = algos.safe_sort(result) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index b827ab64e3521..bdad8174c160d 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -373,12 +373,15 @@ class TestSafeSort: "arg, exp", [ [[3, 1, 2, 0, 4], [0, 1, 2, 3, 4]], - [list("baaacb"), np.array(list("aaabbc"), dtype=object)], + [ + np.array(list("baaacb"), dtype=object), + np.array(list("aaabbc"), dtype=object), + ], [[], []], ], ) def test_basic_sort(self, arg, exp): - result = safe_sort(arg) + result = safe_sort(np.array(arg)) expected = np.array(exp) tm.assert_numpy_array_equal(result, expected) @@ -391,7 +394,7 @@ def test_basic_sort(self, arg, exp): ], ) def test_codes(self, verify, codes, exp_codes): - values = [3, 1, 2, 0, 4] + values = np.array([3, 1, 2, 0, 4]) expected = np.array([0, 1, 2, 3, 4]) result, result_codes = safe_sort( @@ -407,7 +410,7 @@ def test_codes(self, verify, codes, exp_codes): "Windows fatal exception: access violation", ) def test_codes_out_of_bound(self): - values = [3, 1, 2, 0, 4] + values = np.array([3, 1, 2, 0, 4]) expected = np.array([0, 1, 2, 3, 4]) # out of bound indices @@ -417,9 +420,8 @@ def test_codes_out_of_bound(self): tm.assert_numpy_array_equal(result, expected) tm.assert_numpy_array_equal(result_codes, expected_codes) - @pytest.mark.parametrize("box", [lambda x: np.array(x, dtype=object), list]) - def test_mixed_integer(self, box): - values = box(["b", 1, 0, "a", 0, "b"]) + def test_mixed_integer(self): + values = np.array(["b", 1, 0, "a", 0, "b"], dtype=object) result = safe_sort(values) expected = np.array([0, 0, 1, "a", "b", "b"], dtype=object) tm.assert_numpy_array_equal(result, expected) @@ -443,9 +445,9 @@ def test_unsortable(self): @pytest.mark.parametrize( "arg, codes, err, msg", [ - [1, None, TypeError, "Only list-like objects are allowed"], - [[0, 1, 2], 1, TypeError, "Only list-like objects or None"], - [[0, 1, 2, 1], [0, 1], ValueError, "values should be unique"], + [1, None, TypeError, "Only np.ndarray, ExtensionArray, and Index"], + [np.array([0, 1, 2]), 1, TypeError, "Only list-like objects or None"], + [np.array([0, 1, 2, 1]), [0, 1], ValueError, "values should be unique"], ], ) def test_exceptions(self, arg, codes, err, msg): From f94b0f3d0a1a4899d8feaba6e452557e812ecb9c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Apr 2023 18:53:41 -0700 Subject: [PATCH 2/2] mypy fixup --- pandas/core/algorithms.py | 10 ++++++++-- pandas/core/indexes/base.py | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 490b60ae24962..abfac85e31658 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1535,7 +1535,10 @@ def safe_sort( # which would work, but which fails for special case of 1d arrays # with tuples. if values.size and isinstance(values[0], tuple): - ordered = _sort_tuples(values) + # error: Argument 1 to "_sort_tuples" has incompatible type + # "Union[Index, ExtensionArray, ndarray[Any, Any]]"; expected + # "ndarray[Any, Any]" + ordered = _sort_tuples(values) # type: ignore[arg-type] else: ordered = _sort_mixed(values) @@ -1556,7 +1559,10 @@ def safe_sort( if sorter is None: # mixed types - hash_klass, values = _get_hashtable_algo(values) + # error: Argument 1 to "_get_hashtable_algo" has incompatible type + # "Union[Index, ExtensionArray, ndarray[Any, Any]]"; expected + # "ndarray[Any, Any]" + hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type] t = hash_klass(len(values)) t.map_locations(values) sorter = ensure_platform_int(t.lookup(ordered)) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3251d1f6b222e..ae8ad30ce61b9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7384,7 +7384,12 @@ def _unpack_nested_dtype(other: Index) -> Index: def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None): if sort is not False: try: - result = algos.safe_sort(result) + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray[Any, Any], Index, Series, + # Tuple[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], + # ndarray[Any, Any]]]", variable has type "Union[Index, + # Union[ExtensionArray, ndarray[Any, Any]]]") + result = algos.safe_sort(result) # type: ignore[assignment] except TypeError as err: if sort is True: raise