From eae304ef360d65c62be4ef21a44f5b052a60d843 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 17 Oct 2020 14:51:27 +0200 Subject: [PATCH 1/3] BUG: Regression in Resample.apply raised error when apply affected only a Series --- doc/source/whatsnew/v1.1.4.rst | 1 + pandas/core/resample.py | 3 ++- pandas/tests/resample/test_resampler_grouper.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 6892fb62028c9..6e365e2fff15c 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) - Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`) - Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`) +- Fixed regression in :meth:`Resample.apply` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 3f1b1dac080a7..76accdb240653 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -369,8 +369,9 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): result = grouped._aggregate_item_by_item(how, *args, **kwargs) else: result = grouped.aggregate(how, *args, **kwargs) - except DataError: + except (DataError, AttributeError): # we have a non-reducing function; try to evaluate + # alternatively we want to evaluate only a column of the input result = grouped.apply(how, *args, **kwargs) except ValueError as err: if "Must produce aggregated value" in str(err): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 73bf7dafac254..5c7f2ce68212e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -347,3 +347,16 @@ def test_median_duplicate_columns(): result = df.resample("5s").median() expected.columns = result.columns tm.assert_frame_equal(result, expected) + + +def test_apply_to_one_column_of_df(): + # GH: 36951 + df = pd.DataFrame( + {"col": range(10), "col1": range(10, 20)}, + index=pd.date_range("2012-01-01", periods=10, freq="20min"), + ) + result = df.resample("H").apply(lambda group: group.col.sum()) + expected = pd.Series( + [3, 12, 21, 9], index=pd.date_range("2012-01-01", periods=4, freq="H") + ) + tm.assert_series_equal(result, expected) From f91e7958924e6b9e75948f7e713d22ccc266fb32 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 17 Oct 2020 18:09:19 +0200 Subject: [PATCH 2/3] BUG: Non deterministic level order of MultiIndex in join --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/base.py | 12 ++++++++---- pandas/tests/reshape/merge/test_join.py | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9fc094330fb36..7cba04a169a42 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -471,6 +471,7 @@ Reshaping - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) +- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Sparse diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6b71e455782e3..938eac9301feb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3577,8 +3577,12 @@ def _join_multi(self, other, how, return_indexers=True): from pandas.core.reshape.merge import restore_dropped_levels_multijoin # figure out join names - self_names = set(com.not_none(*self.names)) - other_names = set(com.not_none(*other.names)) + self_names = list(com.not_none(*self.names)) + other_names = list(com.not_none(*other.names)) + self_names_order = self_names.index + other_names_order = other_names.index + self_names = set(self_names) + other_names = set(other_names) overlap = self_names & other_names # need at least 1 in common @@ -3588,8 +3592,8 @@ def _join_multi(self, other, how, return_indexers=True): if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): # Drop the non-matching levels from left and right respectively - ldrop_names = list(self_names - overlap) - rdrop_names = list(other_names - overlap) + ldrop_names = sorted(list(self_names - overlap), key=self_names_order) + rdrop_names = sorted(list(other_names - overlap), key=other_names_order) # if only the order differs if not len(ldrop_names + rdrop_names): diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index d4d4c4190417e..3c0cb0426ae16 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -879,3 +879,20 @@ def _join_by_hand(a, b, how="left"): for col, s in b_re.items(): a_re[col] = s return a_re.reindex(columns=result_columns) + + +def test_join_inner_multiindex_deterministic_order(): + # GH: 36910 + left = pd.DataFrame( + data={"e": 5}, + index=pd.MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")), + ) + right = pd.DataFrame( + data={"f": 6}, index=pd.MultiIndex.from_tuples([(2, 3)], names=("b", "c")) + ) + result = left.join(right, how="inner") + expected = pd.DataFrame( + {"e": [5], "f": [6]}, + index=pd.MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")), + ) + tm.assert_frame_equal(result, expected) From 74d97ceddb40802f9f0ee14ad4396d73f79323c4 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 17 Oct 2020 18:10:43 +0200 Subject: [PATCH 3/3] Revert "BUG: Regression in Resample.apply raised error when apply affected only a Series" This reverts commit eae304ef --- doc/source/whatsnew/v1.1.4.rst | 1 - pandas/core/resample.py | 3 +-- pandas/tests/resample/test_resampler_grouper.py | 13 ------------- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 6e365e2fff15c..6892fb62028c9 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -20,7 +20,6 @@ Fixed regressions - Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) - Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`) - Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`) -- Fixed regression in :meth:`Resample.apply` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 76accdb240653..3f1b1dac080a7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -369,9 +369,8 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): result = grouped._aggregate_item_by_item(how, *args, **kwargs) else: result = grouped.aggregate(how, *args, **kwargs) - except (DataError, AttributeError): + except DataError: # we have a non-reducing function; try to evaluate - # alternatively we want to evaluate only a column of the input result = grouped.apply(how, *args, **kwargs) except ValueError as err: if "Must produce aggregated value" in str(err): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 5c7f2ce68212e..73bf7dafac254 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -347,16 +347,3 @@ def test_median_duplicate_columns(): result = df.resample("5s").median() expected.columns = result.columns tm.assert_frame_equal(result, expected) - - -def test_apply_to_one_column_of_df(): - # GH: 36951 - df = pd.DataFrame( - {"col": range(10), "col1": range(10, 20)}, - index=pd.date_range("2012-01-01", periods=10, freq="20min"), - ) - result = df.resample("H").apply(lambda group: group.col.sum()) - expected = pd.Series( - [3, 12, 21, 9], index=pd.date_range("2012-01-01", periods=4, freq="H") - ) - tm.assert_series_equal(result, expected)