diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1253788d7ff27..96f35e25331c0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -207,6 +207,7 @@ Other enhancements - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) Build Changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 51330bfc55dc3..27c60aa03e590 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4704,6 +4704,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", + ignore_index=False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) @@ -4737,6 +4738,9 @@ def sort_values( indexer, axis=self._get_block_manager_axis(axis), verify=False ) + if ignore_index: + new_data.axes[1] = ibase.default_index(len(indexer)) + if inplace: return self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bea246c3f1b98..b0cf652d91df4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4087,6 +4087,7 @@ def sort_values( inplace: bool_t = False, kind: str = "quicksort", na_position: str = "last", + ignore_index: bool_t = False, ): """ Sort by the values along either axis. @@ -4109,6 +4110,10 @@ def sort_values( na_position : {'first', 'last'}, default 'last' Puts NaNs at the beginning if `first`; `last` puts NaNs at the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 54c163330e6ee..1204676ed0c8a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2693,6 +2693,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", + ignore_index=False, ): """ Sort by the values. @@ -2715,6 +2716,10 @@ def sort_values( na_position : {'first' or 'last'}, default 'last' Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 Returns ------- @@ -2820,7 +2825,7 @@ def _try_kind_sort(arr): return arr.argsort(kind="quicksort") arr = self._values - sortedIdx = np.empty(len(self), dtype=np.int32) + sorted_index = np.empty(len(self), dtype=np.int32) bad = isna(arr) @@ -2844,16 +2849,19 @@ def _try_kind_sort(arr): if na_position == "last": n = good.sum() - sortedIdx[:n] = idx[good][argsorted] - sortedIdx[n:] = idx[bad] + sorted_index[:n] = idx[good][argsorted] + sorted_index[n:] = idx[bad] elif na_position == "first": n = bad.sum() - sortedIdx[n:] = idx[good][argsorted] - sortedIdx[:n] = idx[bad] + sorted_index[n:] = idx[good][argsorted] + sorted_index[:n] = idx[bad] else: raise ValueError(f"invalid na_position: {na_position}") - result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) + result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) + + if ignore_index: + result.index = ibase.default_index(len(sorted_index)) if inplace: self._update_inplace(result) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 540bed452d9e9..e733c01e01740 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -460,3 +460,45 @@ def test_sort_values_na_position_with_categories_raises(self): with pytest.raises(ValueError): df.sort_values(by="c", ascending=False, na_position="bad_position") + + @pytest.mark.parametrize( + "original_dict, sorted_dict, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + True, + [0, 1, 2], + ), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + False, + [2, 1, 0], + ), + ], + ) + def test_sort_values_ignore_index( + self, original_dict, sorted_dict, ignore_index, output_index + ): + # GH 30114 + df = DataFrame(original_dict) + expected = DataFrame(sorted_dict, index=output_index) + + # Test when inplace is False + sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index) + tm.assert_frame_equal(sorted_df, expected) + + tm.assert_frame_equal(df, DataFrame(original_dict)) + + # Test when inplace is True + copied_df = df.copy() + + copied_df.sort_values( + "A", ascending=False, ignore_index=ignore_index, inplace=True + ) + tm.assert_frame_equal(copied_df, expected) + + tm.assert_frame_equal(df, DataFrame(original_dict)) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index ec3b8385e79e7..2cea6f061de76 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -156,3 +156,30 @@ def test_sort_values_categorical(self): result = df.sort_values(by=["grade", "id"]) expected = df.iloc[[2, 1, 5, 4, 3, 0]] tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "original_list, sorted_list, ignore_index, output_index", + [ + ([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]), + ], + ) + def test_sort_values_ignore_index( + self, original_list, sorted_list, ignore_index, output_index + ): + # GH 30114 + sr = Series(original_list) + expected = Series(sorted_list, index=output_index) + + # Test when inplace is False + sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index) + tm.assert_series_equal(sorted_sr, expected) + + tm.assert_series_equal(sr, Series(original_list)) + + # Test when inplace is True + copied_sr = sr.copy() + copied_sr.sort_values(ascending=False, ignore_index=ignore_index, inplace=True) + tm.assert_series_equal(copied_sr, expected) + + tm.assert_series_equal(sr, Series(original_list))