From d4ca93d937ba80160b51d84036b2624cd0c21f74 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sun, 18 Apr 2021 14:38:38 +0200 Subject: [PATCH 01/10] Add tests and fix --- pandas/core/apply.py | 8 ++++-- pandas/tests/apply/test_frame_apply.py | 35 +++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 86cde647cc798..9c41f1d0c596c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -382,12 +382,16 @@ def agg_list_like(self) -> FrameOrSeriesUnion: raise ValueError("no results") try: - return concat(results, keys=keys, axis=1, sort=False) + concatenated = concat(results, keys=keys, axis=1, sort=False) + indices = [result.index for result in results] + largest_index = max(indices, key=len) + if len(largest_index) != len(indices[0]): + concatenated = concatenated.reindex(largest_index, copy=True) + return concatenated except TypeError as err: # we are concatting non-NDFrame objects, # e.g. a list of scalars - from pandas import Series result = Series(results, index=keys, name=obj.name) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index cee8a0218e9e8..005121746aafa 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1109,10 +1109,7 @@ def test_agg_multiple_mixed_no_warning(): with tm.assert_produces_warning(None): result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) - # For backwards compatibility, the result's index is - # still sorted by function name, so it's ['min', 'sum'] - # not ['sum', 'min']. - expected = expected[["D", "C", "B", "A"]] + expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) tm.assert_frame_equal(result, expected) @@ -1518,3 +1515,33 @@ def test_apply_np_reducer(float_frame, op, how): getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns ) tm.assert_series_equal(result, expected) + + +def test_aggregation_func_column_order(): + df = DataFrame( + [ + ("1", 1, 0, 0), + ("2", 2, 0, 0), + ("3", 3, 0, 0), + ("4", 4, 5, 4), + ("5", 5, 6, 6), + ("6", 6, 7, 7), + ], + columns=("item", "att1", "att2", "att3"), + ) + + def foo(s): + return s.sum() / 2 + + aggs = ["sum", foo, "count", "min"] + result = df.agg(aggs) + expected = DataFrame( + { + "item": ["123456", np.nan, 6, "1"], + "att1": [21.0, 10.5, 6.0, 1.0], + "att2": [18.0, 9.0, 6.0, 0.0], + "att3": [17.0, 8.5, 6.0, 0.0], + }, + index=["sum", "foo", "count", "min"], + ) + tm.assert_frame_equal(result, expected) From 133d189d3a6fe7b528ea6115a84a25306689cef4 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sun, 18 Apr 2021 14:41:43 +0200 Subject: [PATCH 02/10] Add comment and issue number. --- pandas/tests/apply/test_frame_apply.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 005121746aafa..8ff1860e9d7bd 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1109,6 +1109,8 @@ def test_agg_multiple_mixed_no_warning(): with tm.assert_produces_warning(None): result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) tm.assert_frame_equal(result, expected) @@ -1518,6 +1520,8 @@ def test_apply_np_reducer(float_frame, op, how): def test_aggregation_func_column_order(): + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. df = DataFrame( [ ("1", 1, 0, 0), From 15dfe4d9f2781aed1277021e6a68c00e0abbd862 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sun, 18 Apr 2021 15:42:32 +0200 Subject: [PATCH 03/10] Improve solution. --- pandas/core/apply.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9c41f1d0c596c..c55fa3ca844ad 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -383,10 +383,18 @@ def agg_list_like(self) -> FrameOrSeriesUnion: try: concatenated = concat(results, keys=keys, axis=1, sort=False) - indices = [result.index for result in results] - largest_index = max(indices, key=len) - if len(largest_index) != len(indices[0]): - concatenated = concatenated.reindex(largest_index, copy=True) + # Concat uses the first index to determine the final indexing order. + # The union with the other indices with a shorter first index causes + # the index sorting to be different from the order of the aggregating + # functions. Reindex if this is the case. + index_size = concatenated.index.size + if results[0].index.size != index_size: + good_index = next( + result.index + for result in results + if result.index.size == index_size + ) + concatenated = concatenated.reindex(good_index) return concatenated except TypeError as err: From 33edd72222431d7bf746a749dc4c17023104f7a9 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sun, 18 Apr 2021 19:43:28 +0200 Subject: [PATCH 04/10] Add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 85d9acff353be..091af586afa23 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -893,6 +893,7 @@ Other - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) +- Bug in :meth:`DataFrame.agg()` sometimes not sorting the non-concatenated axis in the order of the provided aggragation functions. .. --------------------------------------------------------------------------- From d07365c0edecdf527b01c0176298b07611c3f3a0 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Mon, 19 Apr 2021 09:31:56 +0200 Subject: [PATCH 05/10] Only catch TypeError from concat --- pandas/core/apply.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c55fa3ca844ad..23e47b4ad7fba 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -383,8 +383,20 @@ def agg_list_like(self) -> FrameOrSeriesUnion: try: concatenated = concat(results, keys=keys, axis=1, sort=False) + except TypeError as err: + # we are concatting non-NDFrame objects, + # e.g. a list of scalars + from pandas import Series + + result = Series(results, index=keys, name=obj.name) + if is_nested_object(result): + raise ValueError( + "cannot combine transform and aggregation operations" + ) from err + return result + else: # Concat uses the first index to determine the final indexing order. - # The union with the other indices with a shorter first index causes + # The union of a shorter first index with the other indices causes # the index sorting to be different from the order of the aggregating # functions. Reindex if this is the case. index_size = concatenated.index.size @@ -396,18 +408,6 @@ def agg_list_like(self) -> FrameOrSeriesUnion: ) concatenated = concatenated.reindex(good_index) return concatenated - except TypeError as err: - - # we are concatting non-NDFrame objects, - # e.g. a list of scalars - from pandas import Series - - result = Series(results, index=keys, name=obj.name) - if is_nested_object(result): - raise ValueError( - "cannot combine transform and aggregation operations" - ) from err - return result def agg_dict_like(self) -> FrameOrSeriesUnion: """ From 769923c485271fa4c2da2e75a5da12bed1356534 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 23 Apr 2021 10:00:06 +0200 Subject: [PATCH 06/10] Cleanup reindex --- pandas/core/apply.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 23e47b4ad7fba..cc020423f40b0 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -400,14 +400,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion: # the index sorting to be different from the order of the aggregating # functions. Reindex if this is the case. index_size = concatenated.index.size - if results[0].index.size != index_size: - good_index = next( - result.index - for result in results - if result.index.size == index_size - ) - concatenated = concatenated.reindex(good_index) - return concatenated + good_index = next( + result.index for result in results if result.index.size == index_size + ) + return concatenated.reindex(good_index, copy=False) def agg_dict_like(self) -> FrameOrSeriesUnion: """ From 05b29888d877bb95cc747064d0df0ab5e26bde0b Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 23 Apr 2021 10:08:29 +0200 Subject: [PATCH 07/10] Update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1fcb592200179..6dff9bfcf4974 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -910,7 +910,7 @@ Other - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) -- Bug in :meth:`DataFrame.agg()` sometimes not sorting the non-concatenated axis in the order of the provided aggragation functions. +- Bug in :meth:`DataFrame.agg()` not sorting the non-concatenated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`). .. --------------------------------------------------------------------------- From b7fb554601284be2ac68ba5609d35b169035b4b4 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Wed, 12 May 2021 20:37:39 +0200 Subject: [PATCH 08/10] Remove merge remainder. --- doc/source/whatsnew/v1.3.0.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bbf5ae97f3da6..0b0346863f793 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -910,11 +910,8 @@ Other - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) -<<<<<<< HEAD - Bug in :meth:`DataFrame.agg()` not sorting the non-concatenated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`). -======= - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) ->>>>>>> upstream/master .. --------------------------------------------------------------------------- From 7f07b2245b9f897b21a30440341d2f0a48033f37 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Wed, 12 May 2021 20:38:05 +0200 Subject: [PATCH 09/10] Rename index variable. --- pandas/core/apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index cc020423f40b0..b9e912f4d59c3 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -400,10 +400,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion: # the index sorting to be different from the order of the aggregating # functions. Reindex if this is the case. index_size = concatenated.index.size - good_index = next( + full_ordered_index = next( result.index for result in results if result.index.size == index_size ) - return concatenated.reindex(good_index, copy=False) + return concatenated.reindex(full_ordered_index, copy=False) def agg_dict_like(self) -> FrameOrSeriesUnion: """ From 6e0b7f31d3f8ef9a69f411f13c6a220acd89ea1e Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 14 May 2021 09:58:37 +0200 Subject: [PATCH 10/10] Update whatsnew. --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 20e7871f4fd0d..893739980055d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -965,7 +965,7 @@ Other - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) -- Bug in :meth:`DataFrame.agg()` not sorting the non-concatenated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`). +- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`) - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) .. ---------------------------------------------------------------------------