From 9376d3ec3f64941ed98929d89713d6222061e334 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 27 Sep 2019 18:08:01 -0500 Subject: [PATCH 01/15] Add groupby.apply test --- pandas/tests/frame/test_apply.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 0328232213588..8343bd824cddd 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1357,3 +1357,10 @@ def test_frequency_is_original(self, num_cols): df = DataFrame(1, index=index, columns=range(num_cols)) df.apply(lambda x: x) assert index.freq == original.freq + + def test_func_returns_object(self): + df = pd.DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) + result = df.groupby("a").apply(lambda g: g.index) + + assert result[1] == pd.Int64Index([1]) + assert result[2] == pd.Int64Index([2]) \ No newline at end of file From b093f98912e3193d2dc632f97d8f37525caa1e35 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 27 Sep 2019 18:08:31 -0500 Subject: [PATCH 02/15] Copy before append --- pandas/_libs/reduction.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 361c21c18c4da..5535af752a4f9 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,3 +1,4 @@ +from copy import copy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -543,7 +544,7 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - results.append(piece) + results.append(copy(piece)) # If the data was modified inplace we need to # take the slow path to not risk segfaults From ef968a53c2f6e4eb31666ed8f4c10794516e914b Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 27 Sep 2019 18:15:40 -0500 Subject: [PATCH 03/15] Add whatsnew entry --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index eb4b72d01d59a..2bdf1e3cde1db 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -270,7 +270,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`DataFrame.groupby.apply` only returning output from a single group in some cases (:issue:`28652`) - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) From 714f296214757ebb76e9fcff589347ffa19e8717 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 27 Sep 2019 18:16:55 -0500 Subject: [PATCH 04/15] Blacken --- pandas/tests/frame/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 8343bd824cddd..709d97b2211c5 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1363,4 +1363,4 @@ def test_func_returns_object(self): result = df.groupby("a").apply(lambda g: g.index) assert result[1] == pd.Int64Index([1]) - assert result[2] == pd.Int64Index([2]) \ No newline at end of file + assert result[2] == pd.Int64Index([2]) From abd1e3684c575016a98edf742d87bbeafa9fd131 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 28 Sep 2019 09:22:19 -0500 Subject: [PATCH 05/15] Update test --- pandas/tests/frame/test_apply.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 709d97b2211c5..f0e565b8888b2 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1359,8 +1359,9 @@ def test_frequency_is_original(self, num_cols): assert index.freq == original.freq def test_func_returns_object(self): - df = pd.DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) + # GH 28652 + df = DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) result = df.groupby("a").apply(lambda g: g.index) - assert result[1] == pd.Int64Index([1]) - assert result[2] == pd.Int64Index([2]) + tm.assert_index_equal(result[1], pd.Int64Index([1])) + tm.assert_index_equal(result[2], pd.Int64Index([2])) From 0d46af02840d2699e1276f7ae81492655f1e0821 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 28 Sep 2019 09:30:34 -0500 Subject: [PATCH 06/15] Use is_scalar check --- pandas/_libs/reduction.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 5535af752a4f9..da58f9b0f747a 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -17,6 +17,7 @@ cnp.import_array() cimport pandas._libs.util as util from pandas._libs.lib import maybe_convert_objects, values_from_object +from pandas.api.types import is_scalar cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): @@ -544,7 +545,10 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - results.append(copy(piece)) + if is_scalar(piece): + results.append(piece) + else: + results.append(copy(piece)) # If the data was modified inplace we need to # take the slow path to not risk segfaults From 980b23944035b33c929a47832ea3b6b4dc00d303 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 28 Sep 2019 21:50:13 -0500 Subject: [PATCH 07/15] Edit test --- pandas/tests/frame/test_apply.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index f0e565b8888b2..f5871d99fc696 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1362,6 +1362,9 @@ def test_func_returns_object(self): # GH 28652 df = DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) result = df.groupby("a").apply(lambda g: g.index) + expected = Series( + [pd.Int64Index([1]), pd.Int64Index([2])], + index=pd.Int64Index([1, 2], name="a"), + ) - tm.assert_index_equal(result[1], pd.Int64Index([1])) - tm.assert_index_equal(result[2], pd.Int64Index([2])) + tm.assert_series_equal(result, expected) From 80b2860eb67e5193cc3e40f29438eec1a5a9db34 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Tue, 1 Oct 2019 17:50:05 -0500 Subject: [PATCH 08/15] Move test into groupby --- pandas/tests/groupby/test_apply.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 76588549532b1..688b2ab307cdb 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -657,3 +657,14 @@ def test_apply_with_mixed_types(): result = g.apply(lambda x: x / x.sum()) tm.assert_frame_equal(result, expected) + + +def test_func_returns_object(self): + # GH 28652 + df = DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) + result = df.groupby("a").apply(lambda g: g.index) + expected = Series( + [pd.Int64Index([1]), pd.Int64Index([2])], index=pd.Int64Index([1, 2], name="a") + ) + + tm.assert_series_equal(result, expected) From d07f57613ed368d20ec6eaf6c003e2070ca93219 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Tue, 1 Oct 2019 17:53:52 -0500 Subject: [PATCH 09/15] Fix --- pandas/tests/frame/test_apply.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index f5871d99fc696..0328232213588 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1357,14 +1357,3 @@ def test_frequency_is_original(self, num_cols): df = DataFrame(1, index=index, columns=range(num_cols)) df.apply(lambda x: x) assert index.freq == original.freq - - def test_func_returns_object(self): - # GH 28652 - df = DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) - result = df.groupby("a").apply(lambda g: g.index) - expected = Series( - [pd.Int64Index([1]), pd.Int64Index([2])], - index=pd.Int64Index([1, 2], name="a"), - ) - - tm.assert_series_equal(result, expected) From d74f53c7ba3745a89f14a144f6e70baf34a27ffe Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Wed, 2 Oct 2019 17:27:18 -0500 Subject: [PATCH 10/15] Remove self --- pandas/tests/groupby/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 688b2ab307cdb..2003dc0f274d9 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -659,7 +659,7 @@ def test_apply_with_mixed_types(): tm.assert_frame_equal(result, expected) -def test_func_returns_object(self): +def test_func_returns_object(): # GH 28652 df = DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) result = df.groupby("a").apply(lambda g: g.index) From 9673af57c349607c83f0f22ffe78de2442f0cab9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 19 Oct 2019 10:49:15 -0500 Subject: [PATCH 11/15] Copy once --- pandas/_libs/reduction.pyx | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 348fab7274c47..7dc6652aa4643 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,4 +1,3 @@ -from copy import copy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -545,18 +544,16 @@ def apply_frame_axis0(object frame, object f, object names, # Need to infer if low level index slider will cause segfaults require_slow_apply = i == 0 and piece is chunk try: - if piece.index is chunk.index: - piece = piece.copy(deep='all') - else: + if piece.index is not chunk.index: mutated = True except AttributeError: # `piece` might not have an index, could be e.g. an int pass - if is_scalar(piece): - results.append(piece) - else: - results.append(copy(piece)) + if not is_scalar(piece): + piece = piece.copy(deep="all") + + results.append(piece) # If the data was modified inplace we need to # take the slow path to not risk segfaults From a1bbd9f3833a565e1a1a52737c9855e700dfe82f Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 19 Oct 2019 11:22:05 -0500 Subject: [PATCH 12/15] Check for copy attribute --- pandas/_libs/reduction.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 7dc6652aa4643..e8af1c8584993 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,3 +1,4 @@ +from copy import copy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -551,7 +552,10 @@ def apply_frame_axis0(object frame, object f, object names, pass if not is_scalar(piece): - piece = piece.copy(deep="all") + if hasattr(piece, "copy"): + piece = piece.copy(deep="all") + else: + piece = copy(piece) results.append(piece) From b3efdf8fab1278a700b134ad839f54f2706aba47 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 19 Oct 2019 11:32:15 -0500 Subject: [PATCH 13/15] Edit release note --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ce5c5875572b8..01a1f298e6890 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -280,7 +280,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`DataFrame.groupby.apply` only returning output from a single group in some cases (:issue:`28652`) +- Bug in :meth:`DataFrame.groupby.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`) - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) From f3870836fddb30095801e3e6b06736e2a84a251e Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Tue, 22 Oct 2019 18:12:26 -0400 Subject: [PATCH 14/15] Add comment --- pandas/_libs/reduction.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index e8af1c8584993..7d4b8e8ccceb8 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -552,6 +552,7 @@ def apply_frame_axis0(object frame, object f, object names, pass if not is_scalar(piece): + # Need to copy data to avoid appending references if hasattr(piece, "copy"): piece = piece.copy(deep="all") else: From 8fcad2bdaed5ec90c9bb4021ffb61728207a1486 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 15 Nov 2019 19:33:51 -0600 Subject: [PATCH 15/15] Move import --- pandas/_libs/reduction.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 449b660e184ee..f2c2ecee8774d 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -16,8 +16,7 @@ from numpy cimport (ndarray, cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.lib import maybe_convert_objects -from pandas.api.types import is_scalar +from pandas._libs.lib import maybe_convert_objects, is_scalar cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):