From 4c9e32d2cf36ded8e0ad803b1915d8874405b642 Mon Sep 17 00:00:00 2001 From: Nicolai Reeve Date: Mon, 17 Jul 2017 18:37:38 -0700 Subject: [PATCH 1/6] BUG: fixed issue with mixed type groupby aggregate Fixes issue #16916, where using aggregate on a mixed type grouping vector fails. Added test in test_aggregate.py to ensure that the bug is fixed. --- pandas/core/groupby.py | 7 ++++++- pandas/tests/groupby/test_aggregate.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index daf3381ae4e89..f44fab01054fc 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2883,7 +2883,12 @@ def aggregate(self, func_or_funcs, *args, **kwargs): except Exception: result = self._aggregate_named(func_or_funcs, *args, **kwargs) - index = Index(sorted(result), name=self.grouper.names[0]) + # mixed types fail to sort + try: + values = sorted(result) + except TypeError: + values = result + index = Index(values, name=self.grouper.names[0]) ret = Series(result, index=index) if not self.as_index: # pragma: no cover diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index efc833575843c..39d5326951094 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -892,3 +892,15 @@ def test_sum_uint64_overflow(self): expected.index.name = 0 result = df.groupby(0).sum() tm.assert_frame_equal(result, expected) + + def test_mixed_type_grouping(self): + X = pd.DataFrame(data=[[[1, 1], [2, 2], [3, 3]], + [[1, 1], [2, 2], [3, 3]]], + columns=['X', 'Y', 'Z'], + index=pd.Index(data=[2, 'g1'], name='grouping')) + + S = pd.DataFrame(data=[[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]], + columns=list('XYZ'), index=list('qwer')) + S['grouping'] = ['g1', 'g1', 2, 2] + T = S.groupby('grouping').aggregate(lambda x: x.tolist()) + tm.assert_frame_equal(T, X) From a58de542390937ddb1ea19b98a91c56c79bb2497 Mon Sep 17 00:00:00 2001 From: Nicolai Reeve Date: Wed, 19 Jul 2017 11:32:59 -0700 Subject: [PATCH 2/6] Addressing review comments from PR issue16916 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/tests/groupby/test_aggregate.py | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2716d9b09eaa9..27c549f442a5e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -174,6 +174,7 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) - Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``Grouper.aggregate()`` on using a mixed type grouping vector (:issue:`16916`) Sparse diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 39d5326951094..daa78f2ec79d8 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -894,13 +894,15 @@ def test_sum_uint64_overflow(self): tm.assert_frame_equal(result, expected) def test_mixed_type_grouping(self): - X = pd.DataFrame(data=[[[1, 1], [2, 2], [3, 3]], - [[1, 1], [2, 2], [3, 3]]], - columns=['X', 'Y', 'Z'], - index=pd.Index(data=[2, 'g1'], name='grouping')) - - S = pd.DataFrame(data=[[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]], - columns=list('XYZ'), index=list('qwer')) - S['grouping'] = ['g1', 'g1', 2, 2] - T = S.groupby('grouping').aggregate(lambda x: x.tolist()) - tm.assert_frame_equal(T, X) + # see gh-19616 + expected = pd.DataFrame(data=[[[1, 1], [2, 2], [3, 3]], + [[1, 1], [2, 2], [3, 3]]], + columns=['X', 'Y', 'Z'], + index=pd.Index(data=[2, 'g1'], + name='grouping')) + + df = pd.DataFrame(data=[[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]], + columns=list('XYZ'), index=list('qwer')) + df['grouping'] = ['g1', 'g1', 2, 2] + result = df.groupby('grouping').aggregate(lambda x: x.tolist()) + tm.assert_frame_equal(result, expected) From a7a20d53842544660d864f5a9c9ba734112d753f Mon Sep 17 00:00:00 2001 From: Nicolai Reeve Date: Thu, 27 Jul 2017 17:37:00 -0700 Subject: [PATCH 3/6] DOC: Fix incorrect merge conflict --- doc/source/whatsnew/v0.21.0.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7d9855782fca9..f620d1e032afe 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -276,10 +276,10 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) -- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) -- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) -- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) -- Bug in ``Grouper.aggregate()`` on using a mixed type grouping vector (:issue:`16916`) +- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) +- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) +- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``Grouper.aggregate()`` where grouping by a vector of mixed types would raise a ``TypeError`` only if the aggregating function returned a list-like object (:issue:`16916`) Sparse ^^^^^^ From 34ca8d891b0b4cb23623aaff8a9b20ebe1925e9d Mon Sep 17 00:00:00 2001 From: Nicolai Reeve Date: Fri, 4 Aug 2017 13:48:39 -0700 Subject: [PATCH 4/6] used safe_sort in groupby.py --- pandas/core/groupby.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f44fab01054fc..66ec67334d34c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -59,6 +59,7 @@ from pandas.io.formats.printing import pprint_thing from pandas.util._validators import validate_kwargs +from pandas.core.sorting import safe_sort import pandas.core.algorithms as algorithms import pandas.core.common as com from pandas.core.config import option_context @@ -2883,12 +2884,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): except Exception: result = self._aggregate_named(func_or_funcs, *args, **kwargs) - # mixed types fail to sort - try: - values = sorted(result) - except TypeError: - values = result - index = Index(values, name=self.grouper.names[0]) + index = Index(safe_sort(result), name=self.grouper.names[0]) ret = Series(result, index=index) if not self.as_index: # pragma: no cover From b634bc960eeaf9073ab960e80062097964e3bab6 Mon Sep 17 00:00:00 2001 From: Nicolai Reeve Date: Fri, 4 Aug 2017 13:53:56 -0700 Subject: [PATCH 5/6] re-added bug fix at line 281 --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 390a7828bf8fd..c5b3d70ce0449 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -278,6 +278,7 @@ Indexing - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) +- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) I/O ^^^ From 89c585be812caa08466e3c0c2a5b8bfc3aa205d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoshiki=20V=C3=A1zquez=20Baeza?= Date: Sun, 6 Aug 2017 09:32:33 -0400 Subject: [PATCH 6/6] DOC: Correct release notes --- doc/source/whatsnew/v0.21.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c5b3d70ce0449..05e1c883c0d59 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -278,7 +278,6 @@ Indexing - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) -- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) I/O ^^^ @@ -300,10 +299,10 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Grouper.aggregate()`` where grouping by a vector of mixed types would raise a ``TypeError`` only if the aggregating function returned a list-like object (:issue:`16916`) Sparse