From d08283faa6ad158b711592270987c1a86ee96c4d Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Fri, 9 Sep 2022 14:25:38 +0100 Subject: [PATCH 1/4] fix valueError for empty inputs for groupby sample --- pandas/core/groupby/groupby.py | 3 +++ pandas/tests/groupby/test_sample.py | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7cd6a93fea92d..a42193da1bb32 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4256,6 +4256,9 @@ def sample( 2 blue 2 0 red 0 """ # noqa:E501 + if self._selected_obj.empty: + # GH48459 prevent ValueError + return self._selected_obj size = sample.process_sampling_size(n, frac, replace) if weights is not None: weights_arr = sample.preprocess_weights( diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 9153fac0927c5..332339989d77c 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -142,3 +142,12 @@ def test_groupby_sample_with_selections(): result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None) expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index) tm.assert_frame_equal(result, expected) + + +def test_groupby_sample_with_empty_inputs(): + df = DataFrame({"a": [], "b": []}) + gb_df = df.groupby("a").sample() + + result = gb_df + expected = df + tm.assert_frame_equal(result, expected) From add0d41307c7a1f651cf34e31a9d6d42e0d86f4e Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Sat, 10 Sep 2022 13:44:02 +0100 Subject: [PATCH 2/4] add to whatsnew/v1.6.0.rst doc --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_sample.py | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 42d3ce8069322..8967fa2b8ee82 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -185,7 +185,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`DataFrameGroupBy.sample` raises ``ValueError`` when the object is empty (:issue:`48459`) - Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a42193da1bb32..649ea1cc7a755 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4257,7 +4257,7 @@ def sample( 0 red 0 """ # noqa:E501 if self._selected_obj.empty: - # GH48459 prevent ValueError + # GH48459 prevent ValueError when object is empty return self._selected_obj size = sample.process_sampling_size(n, frac, replace) if weights is not None: diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 332339989d77c..0a5fb06bc60d9 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -145,9 +145,7 @@ def test_groupby_sample_with_selections(): def test_groupby_sample_with_empty_inputs(): - df = DataFrame({"a": [], "b": []}) - gb_df = df.groupby("a").sample() + result = DataFrame({"a": [], "b": []}) + expected = result.groupby("a").sample() - result = gb_df - expected = df tm.assert_frame_equal(result, expected) From 81240793138b6b1c0b8f6501940a78964ae9738e Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Sat, 10 Sep 2022 19:41:36 +0100 Subject: [PATCH 3/4] refactor test for groupby sample --- pandas/tests/groupby/test_sample.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 0a5fb06bc60d9..db1a11941cdf7 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -145,7 +145,12 @@ def test_groupby_sample_with_selections(): def test_groupby_sample_with_empty_inputs(): - result = DataFrame({"a": [], "b": []}) - expected = result.groupby("a").sample() + # GH48459 + df = DataFrame({"a": [], "b": []}) + groupby_df = df.groupby("a") + + result = groupby_df.sample() + expected = df + tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result, expected) From d945112592c0d9fca9b161f5c7a92685b35d1e0a Mon Sep 17 00:00:00 2001 From: Dennis Chukwunta Date: Sat, 10 Sep 2022 20:17:17 +0100 Subject: [PATCH 4/4] fix duplicate assert in test --- pandas/tests/groupby/test_sample.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index db1a11941cdf7..4dd474741740d 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -152,5 +152,3 @@ def test_groupby_sample_with_empty_inputs(): result = groupby_df.sample() expected = df tm.assert_frame_equal(result, expected) - - tm.assert_frame_equal(result, expected)