From 4f34ed1e262411edc7711601eec10f8f5b540ac0 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 28 Jan 2021 23:42:58 +0700 Subject: [PATCH 01/12] TST: add failing tests --- pandas/tests/frame/methods/test_sort_index.py | 16 ++++++++++++++++ pandas/tests/series/methods/test_sort_index.py | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 221296bfd6d76..0ee50979ab0da 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -761,6 +761,22 @@ def test_sort_index_with_categories(self, categories): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "ascending", + [ + None, + (True, None), + (False, 'True'), + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + df = DataFrame(np.arange(64)) + length = len(df.index) + df.index = [(i - length / 2) % length for i in range(length)] + match = "ascending must be either a bool or a sequence of bools" + with pytest.raises(ValueError, match=match): + df.sort_index(axis=0, ascending=ascending, na_position="first") + class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index ccaa8a797e312..565186c8fe5d7 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -198,6 +198,20 @@ def test_sort_index_ascending_list(self): expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "ascending", + [ + None, + (True, None), + (False, 'True'), + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9]) + match = "ascending must be either a bool or a sequence of bools" + with pytest.raises(ValueError, match=match): + ser.sort_index(ascending=ascending) + class TestSeriesSortIndexKey: def test_sort_index_multiindex_key(self): From 8e40cacb1a5e52911310a0cfac520556b20ca5bf Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 28 Jan 2021 23:43:17 +0700 Subject: [PATCH 02/12] BUG: add ascending kwarg validation --- pandas/core/generic.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 96b35f1aaab9c..97766c2a276e5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4533,6 +4533,8 @@ def sort_index( inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) + ascending = validate_ascending(ascending) + target = self._get_axis(axis) indexer = get_indexer_indexer( @@ -11772,3 +11774,25 @@ def _doc_params(cls): The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. """ + + +def validate_ascending( + ascending: Union[bool, Sequence[bool]], +) -> Union[bool, Sequence[bool]]: + """Validate ``ascending`` kwargs for ``sort_index`` method.""" + if not isinstance(ascending, (list, tuple)): + _check_ascending_element(ascending) + return ascending + + for item in ascending: + _check_ascending_element(item) + return ascending + + +def _check_ascending_element(value): + """Ensure that each item in ``ascending`` kwarg is either bool or int.""" + if ( + value is None + or not isinstance(value, (bool, int)) + ): + raise ValueError("ascending must be either a bool or a sequence of bools") From 513e0c5cccf5b88afa2f636f273941d2b197d7be Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Fri, 29 Jan 2021 00:19:13 +0700 Subject: [PATCH 03/12] LINT: black --- pandas/core/generic.py | 5 +---- pandas/tests/frame/methods/test_sort_index.py | 2 +- pandas/tests/series/methods/test_sort_index.py | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 97766c2a276e5..16e515f4f1b5a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11791,8 +11791,5 @@ def validate_ascending( def _check_ascending_element(value): """Ensure that each item in ``ascending`` kwarg is either bool or int.""" - if ( - value is None - or not isinstance(value, (bool, int)) - ): + if value is None or not isinstance(value, (bool, int)): raise ValueError("ascending must be either a bool or a sequence of bools") diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 0ee50979ab0da..fb37727a3e023 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -766,7 +766,7 @@ def test_sort_index_with_categories(self, categories): [ None, (True, None), - (False, 'True'), + (False, "True"), ], ) def test_sort_index_ascending_bad_value_raises(self, ascending): diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index 565186c8fe5d7..7d7b32f96839b 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -203,7 +203,7 @@ def test_sort_index_ascending_list(self): [ None, (True, None), - (False, 'True'), + (False, "True"), ], ) def test_sort_index_ascending_bad_value_raises(self, ascending): From dd2d191c29085c592957f8ede1a58ef10a26723d Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Fri, 29 Jan 2021 15:43:45 +0700 Subject: [PATCH 04/12] REF: add kwargs none_allowed and int_allowed --- pandas/core/generic.py | 18 +++------- pandas/tests/frame/methods/test_sort_index.py | 2 +- .../tests/series/methods/test_sort_index.py | 2 +- pandas/util/_validators.py | 36 +++++++++++++++++-- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 16e515f4f1b5a..778db90fe6232 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11776,20 +11776,10 @@ def _doc_params(cls): """ -def validate_ascending( - ascending: Union[bool, Sequence[bool]], -) -> Union[bool, Sequence[bool]]: +def validate_ascending(ascending): """Validate ``ascending`` kwargs for ``sort_index`` method.""" + kwargs = {"none_allowed": False, "int_allowed": True} if not isinstance(ascending, (list, tuple)): - _check_ascending_element(ascending) - return ascending + return validate_bool_kwarg(ascending, "ascending", **kwargs) - for item in ascending: - _check_ascending_element(item) - return ascending - - -def _check_ascending_element(value): - """Ensure that each item in ``ascending`` kwarg is either bool or int.""" - if value is None or not isinstance(value, (bool, int)): - raise ValueError("ascending must be either a bool or a sequence of bools") + return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index fb37727a3e023..849aae1d1e940 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -773,7 +773,7 @@ def test_sort_index_ascending_bad_value_raises(self, ascending): df = DataFrame(np.arange(64)) length = len(df.index) df.index = [(i - length / 2) % length for i in range(length)] - match = "ascending must be either a bool or a sequence of bools" + match = 'For argument "ascending" expected type bool' with pytest.raises(ValueError, match=match): df.sort_index(axis=0, ascending=ascending, na_position="first") diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index 7d7b32f96839b..8c6f2e2665ec3 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -208,7 +208,7 @@ def test_sort_index_ascending_list(self): ) def test_sort_index_ascending_bad_value_raises(self, ascending): ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9]) - match = "ascending must be either a bool or a sequence of bools" + match = 'For argument "ascending" expected type bool' with pytest.raises(ValueError, match=match): ser.sort_index(ascending=ascending) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index fa7201a5188a5..b65a331bbf03a 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -205,9 +205,39 @@ def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_ar validate_kwargs(fname, kwargs, compat_args) -def validate_bool_kwarg(value, arg_name): - """ Ensures that argument passed in arg_name is of type bool. """ - if not (is_bool(value) or value is None): +def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False): + """ + Ensure that argument passed in arg_name can be interpreted as boolean. + + Parameters + ---------- + value : bool + Value to be validated. + arg_name : str + Name of the argument. To be reflected in the error message. + none_allowed : bool, optional + Whether to consider None to be a valid boolean. + int_allowed : bool, optional + Whether to consider integer value to be a valid boolean. + + Returns + ------- + value + The same value as input. + + Raises + ------ + ValueError + If the value is not a valid boolean. + """ + good_value = is_bool(value) + if none_allowed: + good_value = good_value or value is None + + if int_allowed: + good_value = good_value or isinstance(value, int) + + if not good_value: raise ValueError( f'For argument "{arg_name}" expected type bool, received ' f"type {type(value).__name__}." From f06d5f499647040a1d618cf5df8c7a0da64ce2a4 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 2 Feb 2021 12:19:21 +0700 Subject: [PATCH 05/12] DOC: add default values to docstring --- pandas/util/_validators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index b65a331bbf03a..ee918cef124c6 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -215,9 +215,9 @@ def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False): Value to be validated. arg_name : str Name of the argument. To be reflected in the error message. - none_allowed : bool, optional + none_allowed : bool, default True Whether to consider None to be a valid boolean. - int_allowed : bool, optional + int_allowed : bool, default False Whether to consider integer value to be a valid boolean. Returns From 4b8da49cc94422bf05c2c34aa348a8f46c4d4e8d Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 11 Feb 2021 18:02:45 +0700 Subject: [PATCH 06/12] REF/TYP: move validate_ascending to _validators --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 17 ++++++----------- pandas/core/series.py | 10 +++++++--- pandas/core/sorting.py | 2 +- pandas/util/_validators.py | 13 ++++++++++++- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7585b21abe99..637cbb55bad0c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5560,7 +5560,7 @@ def sort_index( self, axis: Axis = 0, level: Optional[Level] = None, - ascending: bool = True, + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 778db90fe6232..526b4fc71bd4a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -59,7 +59,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError, InvalidIndexError from pandas.util._decorators import doc, rewrite_axis_style_signature -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_fillna_kwargs, +) from pandas.core.dtypes.common import ( ensure_int64, @@ -4522,7 +4526,7 @@ def sort_index( self, axis=0, level=None, - ascending: bool_t = True, + ascending: Union[Union[bool_t, int], Sequence[Union[bool_t, int]]] = True, inplace: bool_t = False, kind: str = "quicksort", na_position: str = "last", @@ -11774,12 +11778,3 @@ def _doc_params(cls): The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. """ - - -def validate_ascending(ascending): - """Validate ``ascending`` kwargs for ``sort_index`` method.""" - kwargs = {"none_allowed": False, "int_allowed": True} - if not isinstance(ascending, (list, tuple)): - return validate_bool_kwarg(ascending, "ascending", **kwargs) - - return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] diff --git a/pandas/core/series.py b/pandas/core/series.py index f75292f32dbca..b54006b80bf0c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -15,9 +15,11 @@ Iterable, List, Optional, + Sequence, Tuple, Type, Union, + cast, ) import warnings @@ -3039,7 +3041,7 @@ def update(self, other) -> None: def sort_values( self, axis=0, - ascending=True, + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", @@ -3217,6 +3219,7 @@ def sort_values( ) if is_list_like(ascending): + ascending = cast(Sequence[Union[bool, int]], ascending) if len(ascending) != 1: raise ValueError( f"Length of ascending ({len(ascending)}) must be 1 for Series" @@ -3231,7 +3234,8 @@ def sort_values( # GH 35922. Make sorting stable by leveraging nargsort values_to_sort = ensure_key_mapped(self, key)._values if key else self._values - sorted_index = nargsort(values_to_sort, kind, ascending, na_position) + + sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) result = self._constructor( self._values[sorted_index], index=self.index[sorted_index] @@ -3249,7 +3253,7 @@ def sort_index( self, axis=0, level=None, - ascending: bool = True, + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2c2e0c16a4482..8354061e6daea 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -41,7 +41,7 @@ def get_indexer_indexer( target: Index, level: Union[str, int, List[str], List[int]], - ascending: bool, + ascending: Union[Sequence[Union[bool, int]], Union[bool, int]], kind: str, na_position: str, sort_remaining: bool, diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index ee918cef124c6..289ea63be62e2 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,7 +2,7 @@ Module that contains many useful utilities for validating data or function arguments """ -from typing import Iterable, Union +from typing import Iterable, Sequence, Union import warnings import numpy as np @@ -411,3 +411,14 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: if not all(0 <= qs <= 1 for qs in q_arr): raise ValueError(msg.format(q_arr / 100.0)) return q_arr + + +def validate_ascending( + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, +): + """Validate ``ascending`` kwargs for ``sort_index`` method.""" + kwargs = {"none_allowed": False, "int_allowed": True} + if not isinstance(ascending, (list, tuple)): + return validate_bool_kwarg(ascending, "ascending", **kwargs) + + return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] From 836329399c9c80174d2a409eaceef26ad9410cca Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 11 Feb 2021 18:40:47 +0700 Subject: [PATCH 07/12] FIX: add missing import --- pandas/core/series.py | 1 - pandas/core/sorting.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b54006b80bf0c..a3881becbd121 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3234,7 +3234,6 @@ def sort_values( # GH 35922. Make sorting stable by leveraging nargsort values_to_sort = ensure_key_mapped(self, key)._values if key else self._values - sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) result = self._constructor( diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 8354061e6daea..d78ccb40b01eb 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -10,6 +10,7 @@ Iterable, List, Optional, + Sequence, Tuple, Union, ) From 25b4119e21190f213166e86951fb57ef99009c08 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 25 Feb 2021 22:02:52 +0700 Subject: [PATCH 08/12] DOC: update docstring in Series.sort_values --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d623ef0e7a32b..b211328570f5f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3119,7 +3119,7 @@ def sort_values( axis : {0 or 'index'}, default 0 Axis to direct sorting. The value 'index' is accepted for compatibility with DataFrame.sort_values. - ascending : bool, default True + ascending : bool or list of bools, default True If True, sort values in ascending order, otherwise descending. inplace : bool, default False If True, perform operation in-place. From 6a422b2bb74c3079e8c5768279637836472ce9b9 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 25 Feb 2021 22:04:27 +0700 Subject: [PATCH 09/12] DOC: update docstring in DataFrame.sort_index --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 42bc57124d6a7..a39aef9b91662 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5646,7 +5646,7 @@ def sort_index( and 1 identifies the columns. level : int or level name or list of ints or list of level names If not None, sort on values in specified index level(s). - ascending : bool or list of bools, default True + ascending : bool or list-like of bools, default True Sort ascending vs. descending. When the index is a MultiIndex the sort direction can be controlled for each level individually. inplace : bool, default False From 860c8a5bdb09ddc5f45a9e13b1e5d4bf152c0634 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 25 Feb 2021 22:40:09 +0700 Subject: [PATCH 10/12] TST: use lists, reference GH issue --- pandas/tests/frame/methods/test_sort_index.py | 5 +++-- pandas/util/_validators.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 849aae1d1e940..5fa60b55f4e21 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -765,11 +765,12 @@ def test_sort_index_with_categories(self, categories): "ascending", [ None, - (True, None), - (False, "True"), + [True, None], + [False, "True"], ], ) def test_sort_index_ascending_bad_value_raises(self, ascending): + # GH 39434 df = DataFrame(np.arange(64)) length = len(df.index) df.index = [(i - length / 2) % length for i in range(length)] diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 289ea63be62e2..087dccfadcce1 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,7 +2,11 @@ Module that contains many useful utilities for validating data or function arguments """ -from typing import Iterable, Sequence, Union +from typing import ( + Iterable, + Sequence, + Union, +) import warnings import numpy as np From a51a343841ff07f8a5aef0d43e950a73283ba033 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Thu, 25 Feb 2021 23:23:27 +0700 Subject: [PATCH 11/12] DOC: update docstring in Series.sort_index --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b211328570f5f..5fece72ccddca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3332,7 +3332,7 @@ def sort_index( Axis to direct sorting. This can only be 0 for Series. level : int, optional If not None, sort on values in specified index level(s). - ascending : bool or list of bools, default True + ascending : bool or list-like of bools, default True Sort ascending vs. descending. When the index is a MultiIndex the sort direction can be controlled for each level individually. inplace : bool, default False From 6b430f818d79fbaa7c00b37b290ce8eb1a498eba Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Fri, 26 Feb 2021 19:00:50 +0700 Subject: [PATCH 12/12] DOC: add whatsnew entry --- doc/source/whatsnew/v1.2.3.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst index 28fc83459b69d..f72ee78bf243a 100644 --- a/doc/source/whatsnew/v1.2.3.rst +++ b/doc/source/whatsnew/v1.2.3.rst @@ -19,7 +19,11 @@ Fixed regressions - Fixed regression in nullable integer unary ops propagating mask on assignment (:issue:`39943`) - Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`) - Fixed regression in :meth:`~DataFrame.to_json` failing to use ``compression`` with URL-like paths that are internally opened in binary mode or with user-provided file objects that are opened in binary mode (:issue:`39985`) -- +- Fixed regression in :meth:`~Series.sort_index` and :meth:`~DataFrame.sort_index`, + which exited with an ungraceful error when having kwarg ``ascending=None`` passed (:issue:`39434`). + Passing ``ascending=None`` is still considered invalid, + and the new error message suggests a proper usage + (``ascending`` must be a boolean or a list-like boolean). .. ---------------------------------------------------------------------------