From 4fec5df8622c61a4a21cdcdc0b709aafa531b8fc Mon Sep 17 00:00:00 2001 From: louis <@> Date: Sat, 2 Nov 2019 14:56:14 +0000 Subject: [PATCH 1/4] TST: Test for fix of type issue in empty groupby from DataFrame with categorical. --- pandas/tests/groupby/test_groupby.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e17181f55fdba..fae272bbb0bdb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1825,6 +1825,18 @@ def test_groupby_empty_list_raises(): df.groupby([[]]) +def test_groupby_with_empty_category(): + # GH-9614 + df = pd.DataFrame({"id": [None] * 3, "spam": [None] * 3}) + df["spam"] = df["spam"].astype("category") + df["spam"] == "spam" # works as expected + result = df.groupby("id").first()["spam"] == "spam" + expected = Series( + [], name="spam", dtype=bool, index=pd.Series([], dtype="object", name="id") + ) + tm.assert_series_equal(result, expected) + + def test_groupby_multiindex_series_keys_len_equal_group_axis(): # GH 25704 index_array = [["x", "x"], ["a", "b"], ["k", "k"]] From 3396aa7fbc0e62d96c5e253695ef815b406d21fe Mon Sep 17 00:00:00 2001 From: louis <@> Date: Sat, 2 Nov 2019 23:48:25 +0000 Subject: [PATCH 2/4] TST: Refactor test for empty groupby with categorical data. --- pandas/tests/groupby/test_categorical.py | 12 ++++++++++++ pandas/tests/groupby/test_groupby.py | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 22a23407b2521..1b92615bf6ac0 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -781,6 +781,18 @@ def test_categorical_no_compress(): tm.assert_numpy_array_equal(result, exp) +def test_groupby_empty_with_category(): + # GH-9614 + df = pd.DataFrame({"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])}) + result = df.groupby("A").first()["B"] + expected = pd.Series( + pd.Categorical([], categories=["test", "train"]), + index=pd.Series([], dtype="object", name="A"), + name="B", + ) + tm.assert_series_equal(result, expected) + + def test_sort(): # http://stackoverflow.com/questions/23814368/sorting-pandas- diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fae272bbb0bdb..e17181f55fdba 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1825,18 +1825,6 @@ def test_groupby_empty_list_raises(): df.groupby([[]]) -def test_groupby_with_empty_category(): - # GH-9614 - df = pd.DataFrame({"id": [None] * 3, "spam": [None] * 3}) - df["spam"] = df["spam"].astype("category") - df["spam"] == "spam" # works as expected - result = df.groupby("id").first()["spam"] == "spam" - expected = Series( - [], name="spam", dtype=bool, index=pd.Series([], dtype="object", name="id") - ) - tm.assert_series_equal(result, expected) - - def test_groupby_multiindex_series_keys_len_equal_group_axis(): # GH 25704 index_array = [["x", "x"], ["a", "b"], ["k", "k"]] From 18293ab35c18e81bc529a2cbd23acd31efb0fbf5 Mon Sep 17 00:00:00 2001 From: louis <@> Date: Sun, 3 Nov 2019 00:04:34 +0000 Subject: [PATCH 3/4] TST: Refactor test for empty groupby with categorical data. --- pandas/tests/groupby/test_categorical.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 1b92615bf6ac0..7577167ccfe3e 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -783,7 +783,9 @@ def test_categorical_no_compress(): def test_groupby_empty_with_category(): # GH-9614 - df = pd.DataFrame({"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])}) + df = pd.DataFrame( + {"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])} + ) result = df.groupby("A").first()["B"] expected = pd.Series( pd.Categorical([], categories=["test", "train"]), From 4b77aaa9ea242872fc1828097d80ca651bbe3397 Mon Sep 17 00:00:00 2001 From: louis <@> Date: Sun, 3 Nov 2019 08:38:29 +0000 Subject: [PATCH 4/4] Add test comment to re-trigger build. --- pandas/tests/groupby/test_categorical.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7577167ccfe3e..a187781ea214c 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -783,6 +783,8 @@ def test_categorical_no_compress(): def test_groupby_empty_with_category(): # GH-9614 + # test fix for when group by on None resulted in + # coercion of dtype categorical -> float df = pd.DataFrame( {"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])} )