From 9ef05b14dc97af26a603e16c18750dc2346218ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Sat, 2 Sep 2023 14:17:14 -0300 Subject: [PATCH 1/5] add test case of ngroup with nan value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Lucas Silva Mayer Co-authored-by: Willian Wang --- pandas/tests/groupby/test_groupby.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 772ce90b1e611..69dba7c5b44cf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3187,3 +3187,11 @@ def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): else: expected = DataFrame({"b": [3, 4]}, index=Index([1, 1], name="a")) tm.assert_equal(result, expected) + +def test_groupby_ngroup_with_nan(): + # GH#50100 + df = DataFrame({'a': pd.Categorical([np.nan]), 'b': [1]}) + g = df.groupby(['a', 'b'], dropna=False, observed=False).ngroup() + result = g.iloc[0] + expected = 0 + tm.assert_equal(result, expected) From ae975f2388ff0f99e4305196b50d03a6f883a745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Sat, 2 Sep 2023 15:17:48 -0300 Subject: [PATCH 2/5] fix linter issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Lucas Silva Mayer Co-authored-by: Willian Wang --- pandas/tests/groupby/test_groupby.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 32e1244e031a2..4c28407651b72 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3188,14 +3188,16 @@ def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): expected = DataFrame({"b": [3, 4]}, index=Index([1, 1], name="a")) tm.assert_equal(result, expected) + def test_groupby_ngroup_with_nan(): # GH#50100 - df = DataFrame({'a': pd.Categorical([np.nan]), 'b': [1]}) - g = df.groupby(['a', 'b'], dropna=False, observed=False).ngroup() + df = DataFrame({"a": pd.Categorical([np.nan]), "b": [1]}) + g = df.groupby(["a", "b"], dropna=False, observed=False).ngroup() result = g.iloc[0] expected = 0 tm.assert_equal(result, expected) + def test_get_group_axis_1(): # GH#54858 df = DataFrame( From 38a645486d85ecf4c5a4f12444dcb56aa2d615a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Sat, 2 Sep 2023 15:37:05 -0300 Subject: [PATCH 3/5] use Categorical object instead of pd.Categorical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Lucas Silva Mayer Co-authored-by: Willian Wang --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4c28407651b72..0c998b6711b1a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3191,7 +3191,7 @@ def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): def test_groupby_ngroup_with_nan(): # GH#50100 - df = DataFrame({"a": pd.Categorical([np.nan]), "b": [1]}) + df = DataFrame({"a": Categorical([np.nan]), "b": [1]}) g = df.groupby(["a", "b"], dropna=False, observed=False).ngroup() result = g.iloc[0] expected = 0 From 3a7dc58278f7e236a4fdde1cdedc492dd0adf716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Mayer?= Date: Tue, 5 Sep 2023 16:24:15 -0300 Subject: [PATCH 4/5] use native assert function Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0c998b6711b1a..08bfb7c73fa38 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3195,7 +3195,7 @@ def test_groupby_ngroup_with_nan(): g = df.groupby(["a", "b"], dropna=False, observed=False).ngroup() result = g.iloc[0] expected = 0 - tm.assert_equal(result, expected) + assert result == expected def test_get_group_axis_1(): From 669b3e54683585f270b6814755e3fc4938682c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Wed, 6 Sep 2023 00:47:14 -0300 Subject: [PATCH 5/5] test full result of ngroup method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: José Lucas Silva Mayer --- pandas/tests/groupby/test_groupby.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 08bfb7c73fa38..3e9d6eb19bacb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3192,10 +3192,9 @@ def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): def test_groupby_ngroup_with_nan(): # GH#50100 df = DataFrame({"a": Categorical([np.nan]), "b": [1]}) - g = df.groupby(["a", "b"], dropna=False, observed=False).ngroup() - result = g.iloc[0] - expected = 0 - assert result == expected + result = df.groupby(["a", "b"], dropna=False, observed=False).ngroup() + expected = Series([0]) + tm.assert_series_equal(result, expected) def test_get_group_axis_1():