From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/19] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From d0a134f7d3c28be9f7ab30b78768b9648cfbdce5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 22 Dec 2019 13:12:53 +0100 Subject: [PATCH 02/19] Add ignore index for sort values --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/frame.py | 4 ++++ pandas/tests/frame/test_sorting.py | 13 +++++++++++++ 3 files changed, 18 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a15d5b319fc82..513ad6173b244 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -474,6 +474,7 @@ Other API changes Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) - :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). +- ``ignore_index`` is added in :meth:`DataFrame.sort_values` to be able to get reset index after sorting (:issue:`30114`) .. _whatsnew_1000.api.documentation: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 766437dbad8f8..cfb3d212d7373 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4704,6 +4704,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", + ignore_index: bool = False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) @@ -4737,6 +4738,9 @@ def sort_values( indexer, axis=self._get_block_manager_axis(axis), verify=False ) + if ignore_index: + new_data.axes[1] = ibase.default_index(len(indexer)) + if inplace: return self._update_inplace(new_data) else: diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 64294d5cdcb81..fc6d3c371e581 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -376,6 +376,19 @@ def test_sort_nat(self): sorted_df = df.sort_values(by=["a", "b"]) tm.assert_frame_equal(sorted_df, expected) + @pytest.mark.parametrize("original_dict, sorted_dict", [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}), + ({"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}) + ]) + def test_sort_values_ignore_index(self, original_dict, sorted_dict): + + # GH 30114 + df = pd.DataFrame(original_dict) + sorted_df = df.sort_values("A", ascending=False, ignore_index=True) + + expected = pd.DataFrame(sorted_dict, index=[0, 1, 2]) + tm.assert_frame_equal(sorted_df, expected) + class TestDataFrameSortIndexKinds: def test_sort_index_multicolumn(self): From 6d52765c85444bdc1765b9507362328ca5eefc45 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 22 Dec 2019 16:09:11 +0100 Subject: [PATCH 03/19] black reformat --- pandas/tests/frame/test_sorting.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index fc6d3c371e581..80a55787b73b0 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -376,10 +376,13 @@ def test_sort_nat(self): sorted_df = df.sort_values(by=["a", "b"]) tm.assert_frame_equal(sorted_df, expected) - @pytest.mark.parametrize("original_dict, sorted_dict", [ - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}), - ({"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}) - ]) + @pytest.mark.parametrize( + "original_dict, sorted_dict", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}), + ({"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}), + ], + ) def test_sort_values_ignore_index(self, original_dict, sorted_dict): # GH 30114 From a31797d0f60c06800f7a647f835ee8331003505c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 22 Dec 2019 16:31:02 +0100 Subject: [PATCH 04/19] add tests --- pandas/tests/frame/test_sorting.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 80a55787b73b0..0a4a4ca332b76 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -377,19 +377,33 @@ def test_sort_nat(self): tm.assert_frame_equal(sorted_df, expected) @pytest.mark.parametrize( - "original_dict, sorted_dict", + "original_dict, sorted_dict, ignore_index, output_index", [ - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}), - ({"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + True, + [0, 1, 2], + ), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + False, + [2, 1, 0], + ), ], ) - def test_sort_values_ignore_index(self, original_dict, sorted_dict): + def test_sort_values_ignore_index( + self, original_dict, sorted_dict, ignore_index, output_index + ): # GH 30114 df = pd.DataFrame(original_dict) - sorted_df = df.sort_values("A", ascending=False, ignore_index=True) + sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index) - expected = pd.DataFrame(sorted_dict, index=[0, 1, 2]) + expected = pd.DataFrame(sorted_dict, index=output_index) tm.assert_frame_equal(sorted_df, expected) From b80f380711871dcffecea579459d05715beeb199 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 22 Dec 2019 22:46:15 +0100 Subject: [PATCH 05/19] remove type hint to see if test passes --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cfb3d212d7373..804a7e27ef298 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4704,7 +4704,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index: bool = False, + ignore_index=False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) From b997d3f622801708dd85a651442b96cc2ad30e01 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 11:00:35 +0100 Subject: [PATCH 06/19] code change based on WA review --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 3 +++ pandas/core/series.py | 7 ++++--- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 513ad6173b244..e86a015edab4f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -474,7 +474,7 @@ Other API changes Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) - :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). -- ``ignore_index`` is added in :meth:`DataFrame.sort_values` to be able to get reset index after sorting (:issue:`30114`) +- ``ignore_index`` is added in :meth:`DataFrame.sort_values` to be able to reset index after sorting (:issue:`30114`) .. _whatsnew_1000.api.documentation: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 804a7e27ef298..cfb3d212d7373 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4704,7 +4704,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index=False, + ignore_index: bool = False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 03bd1b331ec30..b1e3a0b373b7e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4087,6 +4087,7 @@ def sort_values( inplace: bool_t = False, kind: str = "quicksort", na_position: str = "last", + ignore_index: bool = False, ): """ Sort by the values along either axis. @@ -4109,6 +4110,8 @@ def sort_values( na_position : {'first', 'last'}, default 'last' Puts NaNs at the beginning if `first`; `last` puts NaNs at the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 54c163330e6ee..1f80d09a4bba2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2693,6 +2693,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", + ignore_index: bool = False, ): """ Sort by the values. @@ -2715,6 +2716,8 @@ def sort_values( na_position : {'first' or 'last'}, default 'last' Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. Returns ------- @@ -4386,9 +4389,7 @@ def to_period(self, freq=None, copy=True): hist = pandas.plotting.hist_series -Series._setup_axes( - ["index"], docs={"index": "The index (axis labels) of the Series."}, -) +Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."}) Series._add_numeric_operations() Series._add_series_only_operations() Series._add_series_or_dataframe_operations() From 12d1260b3eef364003e373bd4c720386d8831ed5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 11:03:11 +0100 Subject: [PATCH 07/19] restore reformat change on other parts --- pandas/core/series.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1f80d09a4bba2..aa162ae45fdc3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4389,7 +4389,9 @@ def to_period(self, freq=None, copy=True): hist = pandas.plotting.hist_series -Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."}) +Series._setup_axes( + ["index"], docs={"index": "The index (axis labels) of the Series."} +) Series._add_numeric_operations() Series._add_series_only_operations() Series._add_series_or_dataframe_operations() From 4ff2493db46f0bb6c1e43892e3076b467e5914d1 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 11:03:56 +0100 Subject: [PATCH 08/19] revert change --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index aa162ae45fdc3..64b7a4b2109d1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4390,7 +4390,7 @@ def to_period(self, freq=None, copy=True): Series._setup_axes( - ["index"], docs={"index": "The index (axis labels) of the Series."} + ["index"], docs={"index": "The index (axis labels) of the Series."}, ) Series._add_numeric_operations() Series._add_series_only_operations() From e9d63f4c56d73c7c79f3ee0ef4bc725c3ac9caef Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 11:27:51 +0100 Subject: [PATCH 09/19] change bool --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b1e3a0b373b7e..e5095868627ad 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4087,7 +4087,7 @@ def sort_values( inplace: bool_t = False, kind: str = "quicksort", na_position: str = "last", - ignore_index: bool = False, + ignore_index: bool_t = False, ): """ Sort by the values along either axis. From 70ffec7367287c36a3612ee75ad018e44a1994bf Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 12:00:54 +0100 Subject: [PATCH 10/19] remove annotation --- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cfb3d212d7373..804a7e27ef298 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4704,7 +4704,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index: bool = False, + ignore_index=False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 64b7a4b2109d1..d39a871146363 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2693,7 +2693,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index: bool = False, + ignore_index=False, ): """ Sort by the values. From b4245d777e29bedf5a59bb55e2f3a4dd616b8fbc Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 13:00:57 +0100 Subject: [PATCH 11/19] remove for series --- pandas/core/series.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d39a871146363..54c163330e6ee 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2693,7 +2693,6 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index=False, ): """ Sort by the values. @@ -2716,8 +2715,6 @@ def sort_values( na_position : {'first' or 'last'}, default 'last' Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. - ignore_index : bool, default False - If True, the resulting axis will be labeled 0, 1, …, n - 1. Returns ------- From f9e7ec245a7037a884ce54b8d2e03b043b402f10 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 13:21:18 +0100 Subject: [PATCH 12/19] add ignore_index for series --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/series.py | 6 ++++++ pandas/tests/series/test_sorting.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e86a015edab4f..6cc78483fb9be 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -474,7 +474,7 @@ Other API changes Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) - :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). -- ``ignore_index`` is added in :meth:`DataFrame.sort_values` to be able to reset index after sorting (:issue:`30114`) +- ``ignore_index`` is added in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` to be able to reset index after sorting (:issue:`30114`) .. _whatsnew_1000.api.documentation: diff --git a/pandas/core/series.py b/pandas/core/series.py index 54c163330e6ee..d630062540d8b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2693,6 +2693,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", + ignore_index=False, ): """ Sort by the values. @@ -2715,6 +2716,8 @@ def sort_values( na_position : {'first' or 'last'}, default 'last' Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. Returns ------- @@ -2855,6 +2858,9 @@ def _try_kind_sort(arr): result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) + if ignore_index: + result = result.reset_index(drop=True) + if inplace: self._update_inplace(result) else: diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index fd3445e271699..411c1ff60dd49 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -269,3 +269,21 @@ def test_sort_values_categorical(self): result = df.sort_values(by=["grade", "id"]) expected = df.iloc[[2, 1, 5, 4, 3, 0]] tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "original_list, sorted_list, ignore_index, output_index", + [ + ([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]), + ], + ) + def test_sort_values_ignore_index( + self, original_list, sorted_list, ignore_index, output_index + ): + + # GH 30114 + sr = Series(original_list) + sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index) + + expected = Series(sorted_list, index=output_index) + tm.assert_series_equal(sorted_sr, expected) From d95a89f5c9b52509df76cae27bc3ac107c02e1b0 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 19:42:01 +0100 Subject: [PATCH 13/19] keep consistency --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 804a7e27ef298..a2f1984a9c5b7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4739,7 +4739,7 @@ def sort_values( ) if ignore_index: - new_data.axes[1] = ibase.default_index(len(indexer)) + new_data = new_data.reset_index(drop=True) if inplace: return self._update_inplace(new_data) From f241e67784eacfe8ba0f07f4c848d22598319a9d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 21:22:03 +0100 Subject: [PATCH 14/19] revert change --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a2f1984a9c5b7..804a7e27ef298 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4739,7 +4739,7 @@ def sort_values( ) if ignore_index: - new_data = new_data.reset_index(drop=True) + new_data.axes[1] = ibase.default_index(len(indexer)) if inplace: return self._update_inplace(new_data) From bbb4754b5fb7d2998ec4f30353a8bfc99be03e1a Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 22:01:35 +0100 Subject: [PATCH 15/19] restore change --- .../tests/frame/methods/test_sort_values.py | 276 ++---------------- 1 file changed, 32 insertions(+), 244 deletions(-) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index fece17717c185..b62540ae64b9e 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -341,250 +341,8 @@ def test_sort_values_nat_values_in_int_column(self): df_sorted = df.sort_values(["datetime", "float"], ascending=False) tm.assert_frame_equal(df_sorted, df) - def test_sort_nat(self): - - # GH 16836 - - d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]] - d2 = [ - Timestamp(x) - for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] - ] - df = pd.DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) - - d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] - d4 = [ - Timestamp(x) - for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] - ] - expected = pd.DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) - sorted_df = df.sort_values(by=["a", "b"]) - tm.assert_frame_equal(sorted_df, expected) - - @pytest.mark.parametrize( - "original_dict, sorted_dict, ignore_index, output_index", - [ - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), - ( - {"A": [1, 2, 3], "B": [2, 3, 4]}, - {"A": [3, 2, 1], "B": [4, 3, 2]}, - True, - [0, 1, 2], - ), - ( - {"A": [1, 2, 3], "B": [2, 3, 4]}, - {"A": [3, 2, 1], "B": [4, 3, 2]}, - False, - [2, 1, 0], - ), - ], - ) - def test_sort_values_ignore_index( - self, original_dict, sorted_dict, ignore_index, output_index - ): - - # GH 30114 - df = pd.DataFrame(original_dict) - sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index) - - expected = pd.DataFrame(sorted_dict, index=output_index) - tm.assert_frame_equal(sorted_df, expected) - - -class TestDataFrameSortIndexKinds: - def test_sort_index_multicolumn(self): - A = np.arange(5).repeat(20) - B = np.tile(np.arange(5), 20) - random.shuffle(A) - random.shuffle(B) - frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) - - result = frame.sort_values(by=["A", "B"]) - indexer = np.lexsort((frame["B"], frame["A"])) - expected = frame.take(indexer) - tm.assert_frame_equal(result, expected) - - result = frame.sort_values(by=["A", "B"], ascending=False) - indexer = np.lexsort( - (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) - ) - expected = frame.take(indexer) - tm.assert_frame_equal(result, expected) - - result = frame.sort_values(by=["B", "A"]) - indexer = np.lexsort((frame["A"], frame["B"])) - expected = frame.take(indexer) - tm.assert_frame_equal(result, expected) - - def test_sort_index_inplace(self): - frame = DataFrame( - np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] - ) - - # axis=0 - unordered = frame.loc[[3, 2, 4, 1]] - a_id = id(unordered["A"]) - df = unordered.copy() - df.sort_index(inplace=True) - expected = frame - tm.assert_frame_equal(df, expected) - assert a_id != id(df["A"]) - - df = unordered.copy() - df.sort_index(ascending=False, inplace=True) - expected = frame[::-1] - tm.assert_frame_equal(df, expected) - - # axis=1 - unordered = frame.loc[:, ["D", "B", "C", "A"]] - df = unordered.copy() - df.sort_index(axis=1, inplace=True) - expected = frame - tm.assert_frame_equal(df, expected) - - df = unordered.copy() - df.sort_index(axis=1, ascending=False, inplace=True) - expected = frame.iloc[:, ::-1] - tm.assert_frame_equal(df, expected) - - def test_sort_index_different_sortorder(self): - A = np.arange(20).repeat(5) - B = np.tile(np.arange(5), 20) - - indexer = np.random.permutation(100) - A = A.take(indexer) - B = B.take(indexer) - - df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) - - ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) - expected = df.take(ex_indexer) - - # test with multiindex, too - idf = df.set_index(["A", "B"]) - - result = idf.sort_index(ascending=[1, 0]) - expected = idf.take(ex_indexer) - tm.assert_frame_equal(result, expected) - - # also, Series! - result = idf["C"].sort_index(ascending=[1, 0]) - tm.assert_series_equal(result, expected["C"]) - - def test_sort_index_level(self): - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) - df = DataFrame([[1, 2], [3, 4]], mi) - - result = df.sort_index(level="A", sort_remaining=False) - expected = df - tm.assert_frame_equal(result, expected) - - result = df.sort_index(level=["A", "B"], sort_remaining=False) - expected = df - tm.assert_frame_equal(result, expected) - - # Error thrown by sort_index when - # first index is sorted last (#26053) - result = df.sort_index(level=["C", "B", "A"]) - expected = df.iloc[[1, 0]] - tm.assert_frame_equal(result, expected) - - result = df.sort_index(level=["B", "C", "A"]) - expected = df.iloc[[1, 0]] - tm.assert_frame_equal(result, expected) - - result = df.sort_index(level=["C", "A"]) - expected = df.iloc[[1, 0]] - tm.assert_frame_equal(result, expected) - - def test_sort_index_categorical_index(self): - - df = DataFrame( - { - "A": np.arange(6, dtype="int64"), - "B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))), - } - ).set_index("B") - - result = df.sort_index() - expected = df.iloc[[4, 0, 1, 5, 2, 3]] - tm.assert_frame_equal(result, expected) - - result = df.sort_index(ascending=False) - expected = df.iloc[[2, 3, 0, 1, 5, 4]] - tm.assert_frame_equal(result, expected) - - def test_sort_index(self): - # GH13496 - - frame = DataFrame( - np.arange(16).reshape(4, 4), - index=[1, 2, 3, 4], - columns=["A", "B", "C", "D"], - ) - - # axis=0 : sort rows by index labels - unordered = frame.loc[[3, 2, 4, 1]] - result = unordered.sort_index(axis=0) - expected = frame - tm.assert_frame_equal(result, expected) - - result = unordered.sort_index(ascending=False) - expected = frame[::-1] - tm.assert_frame_equal(result, expected) - - # axis=1 : sort columns by column names - unordered = frame.iloc[:, [2, 1, 3, 0]] - result = unordered.sort_index(axis=1) - tm.assert_frame_equal(result, frame) - - result = unordered.sort_index(axis=1, ascending=False) - expected = frame.iloc[:, ::-1] - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("level", ["A", 0]) # GH 21052 - def test_sort_index_multiindex(self, level): - # GH13496 - - # sort rows by specified level of multi-index - mi = MultiIndex.from_tuples( - [[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC") - ) - df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) - - expected_mi = MultiIndex.from_tuples( - [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") - ) - expected = pd.DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) - result = df.sort_index(level=level) - tm.assert_frame_equal(result, expected) - - # sort_remaining=False - expected_mi = MultiIndex.from_tuples( - [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") - ) - expected = pd.DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) - result = df.sort_index(level=level, sort_remaining=False) - tm.assert_frame_equal(result, expected) - - def test_sort_index_intervalindex(self): - # this is a de-facto sort via unstack - # confirming that we sort in the order of the bins - y = Series(np.random.randn(100)) - x1 = Series(np.sign(np.random.randn(100))) - x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) - model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) - - result = model.groupby(["X1", "X2"], observed=True).mean().unstack() - expected = IntervalIndex.from_tuples( - [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right" - ) - result = result.columns.levels[1].categories - tm.assert_index_equal(result, expected) - - def test_sort_index_na_position_with_categories(self): - # GH 22556 + def test_sort_values_na_position_with_categories(self): + # GH#22556 # Positioning missing value properly when column is Categorical. categories = ["A", "B", "C"] category_indices = [0, 2, 4] @@ -702,3 +460,33 @@ def test_sort_values_na_position_with_categories_raises(self): with pytest.raises(ValueError): df.sort_values(by="c", ascending=False, na_position="bad_position") + + @pytest.mark.parametrize( + "original_dict, sorted_dict, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + True, + [0, 1, 2], + ), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + False, + [2, 1, 0], + ), + ], + ) + def test_sort_values_ignore_index( + self, original_dict, sorted_dict, ignore_index, output_index + ): + + # GH 30114 + df = DataFrame(original_dict) + sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index) + + expected = DataFrame(sorted_dict, index=output_index) + tm.assert_frame_equal(sorted_df, expected) From 3c37eb99ee1d47b9cac5a6213e66b66bf7cae174 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 24 Dec 2019 19:35:36 +0100 Subject: [PATCH 16/19] code change based on WA and JR reviews --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/frame.py | 4 +++- pandas/core/generic.py | 2 ++ pandas/core/series.py | 8 ++++++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a5a9340fa5322..c333bd9258911 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -207,6 +207,7 @@ Other enhancements - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` to be able to reset index after sorting (:issue:`30114`) Build Changes @@ -474,7 +475,6 @@ Other API changes Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) - :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). -- ``ignore_index`` is added in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` to be able to reset index after sorting (:issue:`30114`) .. _whatsnew_1000.api.documentation: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 27c60aa03e590..353ce3e8b8df7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -305,6 +305,8 @@ # ----------------------------------------------------------------------- # DataFrame class +bool_t = bool # Need alias because NDFrame has def bool: + class DataFrame(NDFrame): """ @@ -4704,7 +4706,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index=False, + ignore_index: bool_t = False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 86282c3c4077c..b0cf652d91df4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4113,6 +4113,8 @@ def sort_values( ignore_index : bool, default False If True, the resulting axis will be labeled 0, 1, …, n - 1. + .. versionadded:: 1.0.0 + Returns ------- sorted_obj : DataFrame or None diff --git a/pandas/core/series.py b/pandas/core/series.py index d630062540d8b..3eeff6c8fe6c3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -117,6 +117,8 @@ def wrapper(self): # ---------------------------------------------------------------------- # Series class +bool_t = bool # Need alias because generic.NDFrame has def bool: + class Series(base.IndexOpsMixin, generic.NDFrame): """ @@ -2693,7 +2695,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index=False, + ignore_index: bool_t = False, ): """ Sort by the values. @@ -2719,6 +2721,8 @@ def sort_values( ignore_index : bool, default False If True, the resulting axis will be labeled 0, 1, …, n - 1. + .. versionadded:: 1.0.0 + Returns ------- Series @@ -2859,7 +2863,7 @@ def _try_kind_sort(arr): result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) if ignore_index: - result = result.reset_index(drop=True) + result.index = ibase.default_index(len(sortedIdx)) if inplace: self._update_inplace(result) From 4ce9f439698e96ec6c99a9bfaa56100c77da3453 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 24 Dec 2019 19:37:03 +0100 Subject: [PATCH 17/19] better english --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c333bd9258911..96f35e25331c0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -207,7 +207,7 @@ Other enhancements - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) -- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` to be able to reset index after sorting (:issue:`30114`) +- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) Build Changes From 0f89aa29767dffae6e1692d0a256ffbe22593a01 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 24 Dec 2019 20:42:47 +0100 Subject: [PATCH 18/19] skip annotation --- pandas/core/frame.py | 4 +--- pandas/core/series.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 353ce3e8b8df7..27c60aa03e590 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -305,8 +305,6 @@ # ----------------------------------------------------------------------- # DataFrame class -bool_t = bool # Need alias because NDFrame has def bool: - class DataFrame(NDFrame): """ @@ -4706,7 +4704,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index: bool_t = False, + ignore_index=False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3eeff6c8fe6c3..6bc2712061bc6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -117,8 +117,6 @@ def wrapper(self): # ---------------------------------------------------------------------- # Series class -bool_t = bool # Need alias because generic.NDFrame has def bool: - class Series(base.IndexOpsMixin, generic.NDFrame): """ @@ -2695,7 +2693,7 @@ def sort_values( inplace=False, kind="quicksort", na_position="last", - ignore_index: bool_t = False, + ignore_index=False, ): """ Sort by the values. From d02b651792de516ab73c21be7430681c8e17dde5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 26 Dec 2019 09:31:27 +0100 Subject: [PATCH 19/19] code change on JR review --- pandas/core/series.py | 14 +++++++------- pandas/tests/frame/methods/test_sort_values.py | 18 +++++++++++++++--- .../tests/series/methods/test_sort_values.py | 15 ++++++++++++--- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6bc2712061bc6..1204676ed0c8a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2825,7 +2825,7 @@ def _try_kind_sort(arr): return arr.argsort(kind="quicksort") arr = self._values - sortedIdx = np.empty(len(self), dtype=np.int32) + sorted_index = np.empty(len(self), dtype=np.int32) bad = isna(arr) @@ -2849,19 +2849,19 @@ def _try_kind_sort(arr): if na_position == "last": n = good.sum() - sortedIdx[:n] = idx[good][argsorted] - sortedIdx[n:] = idx[bad] + sorted_index[:n] = idx[good][argsorted] + sorted_index[n:] = idx[bad] elif na_position == "first": n = bad.sum() - sortedIdx[n:] = idx[good][argsorted] - sortedIdx[:n] = idx[bad] + sorted_index[n:] = idx[good][argsorted] + sorted_index[:n] = idx[bad] else: raise ValueError(f"invalid na_position: {na_position}") - result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) + result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) if ignore_index: - result.index = ibase.default_index(len(sortedIdx)) + result.index = ibase.default_index(len(sorted_index)) if inplace: self._update_inplace(result) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index b62540ae64b9e..e733c01e01740 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -483,10 +483,22 @@ def test_sort_values_na_position_with_categories_raises(self): def test_sort_values_ignore_index( self, original_dict, sorted_dict, ignore_index, output_index ): - # GH 30114 df = DataFrame(original_dict) - sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index) - expected = DataFrame(sorted_dict, index=output_index) + + # Test when inplace is False + sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index) tm.assert_frame_equal(sorted_df, expected) + + tm.assert_frame_equal(df, DataFrame(original_dict)) + + # Test when inplace is True + copied_df = df.copy() + + copied_df.sort_values( + "A", ascending=False, ignore_index=ignore_index, inplace=True + ) + tm.assert_frame_equal(copied_df, expected) + + tm.assert_frame_equal(df, DataFrame(original_dict)) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 656949f8bb375..2cea6f061de76 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -167,10 +167,19 @@ def test_sort_values_categorical(self): def test_sort_values_ignore_index( self, original_list, sorted_list, ignore_index, output_index ): - # GH 30114 sr = Series(original_list) - sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index) - expected = Series(sorted_list, index=output_index) + + # Test when inplace is False + sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index) tm.assert_series_equal(sorted_sr, expected) + + tm.assert_series_equal(sr, Series(original_list)) + + # Test when inplace is True + copied_sr = sr.copy() + copied_sr.sort_values(ascending=False, ignore_index=ignore_index, inplace=True) + tm.assert_series_equal(copied_sr, expected) + + tm.assert_series_equal(sr, Series(original_list))