From bce5e0ac67636b35dd7d51f68290db21aa3121fb Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Tue, 23 Feb 2021 13:44:25 +0700 Subject: [PATCH 01/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_groupby.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4dce7e8553be4..b139c3129e03d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -146,6 +146,29 @@ def func(dataf): assert isinstance(result, DataFrame) +def test_multi_index_sort(): + # GH 24271 + df = DataFrame({'group':['A']*6 + ['B']*6, + 'dose':['high', 'med', 'low']*4, + 'outcomes':np.arange(12.0)}) + + df.dose = pd.Categorical(df.dose, + categories=['low', 'med', 'high'], + ordered=True) + + result = df.groupby('group')['dose'].value_counts().sort_index(level=0, + sort_remaining=True) + index = [('A','low'), + ('A','med'), + ('A','high'), + ('B','low'), + ('B','med'), + ('B','high')] + index = MultiIndex.from_tuples(index,names=['group','dose']) + expected = Series([2]*6,index=index) + tm.assert_series_equal(result,expected) + + def test_inconsistent_return_type(): # GH5592 # inconsistent return type From bd67f972a50967b001b08ae9cf25a90151341570 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Tue, 23 Feb 2021 18:50:42 +0700 Subject: [PATCH 02/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_groupby.py | 42 +++++++++++++++------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b139c3129e03d..9c69a29bb2462 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -148,25 +148,29 @@ def func(dataf): def test_multi_index_sort(): # GH 24271 - df = DataFrame({'group':['A']*6 + ['B']*6, - 'dose':['high', 'med', 'low']*4, - 'outcomes':np.arange(12.0)}) - - df.dose = pd.Categorical(df.dose, - categories=['low', 'med', 'high'], - ordered=True) - - result = df.groupby('group')['dose'].value_counts().sort_index(level=0, - sort_remaining=True) - index = [('A','low'), - ('A','med'), - ('A','high'), - ('B','low'), - ('B','med'), - ('B','high')] - index = MultiIndex.from_tuples(index,names=['group','dose']) - expected = Series([2]*6,index=index) - tm.assert_series_equal(result,expected) + df = DataFrame( + { + "group": ["A"] * 6 + ["B"] * 6, + "dose": ["high", "med", "low"] * 4, + "outcomes": np.arange(12.0), + } + ) + + df.dose = pd.Categorical(df.dose, categories=["low", "med", "high"], ordered=True) + + result = df.groupby("group")["dose"].value_counts() + result = result.sort_index(level=0, sort_remaining=True) + index = [ + ("A", "low"), + ("A", "med"), + ("A", "high"), + ("B", "low"), + ("B", "med"), + ("B", "high"), + ] + index = MultiIndex.from_tuples(index, names=["group", "dose"]) + expected = Series([2] * 6, index=index, name="dose") + tm.assert_series_equal(result, expected) def test_inconsistent_return_type(): From 4b585181dfa306b686dbdd61b2c2d612e39e5293 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Mon, 1 Mar 2021 13:21:59 +0700 Subject: [PATCH 03/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 27 ++++++++++++++++++++++++ pandas/tests/groupby/test_groupby.py | 27 ------------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 4049ef46f3006..5fce9ff79b5da 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -239,6 +239,33 @@ def test_level_get_group(observed): tm.assert_frame_equal(result, expected) +def test_sorting_with_different_categoricals(): + # GH 24271 + df = DataFrame( + { + "group": ["A"] * 6 + ["B"] * 6, + "dose": ["high", "med", "low"] * 4, + "outcomes": np.arange(12.0), + } + ) + + df.dose = pd.Categorical(df.dose, categories=["low", "med", "high"], ordered=True) + + result = df.groupby("group")["dose"].value_counts() + result = result.sort_index(level=0, sort_remaining=True) + index = [ + ("A", "low"), + ("A", "med"), + ("A", "high"), + ("B", "low"), + ("B", "med"), + ("B", "high"), + ] + index = MultiIndex.from_tuples(index, names=["group", "dose"]) + expected = Series([2] * 6, index=index, name="dose") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) def test_apply(ordered): # GH 10138 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 9c69a29bb2462..4dce7e8553be4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -146,33 +146,6 @@ def func(dataf): assert isinstance(result, DataFrame) -def test_multi_index_sort(): - # GH 24271 - df = DataFrame( - { - "group": ["A"] * 6 + ["B"] * 6, - "dose": ["high", "med", "low"] * 4, - "outcomes": np.arange(12.0), - } - ) - - df.dose = pd.Categorical(df.dose, categories=["low", "med", "high"], ordered=True) - - result = df.groupby("group")["dose"].value_counts() - result = result.sort_index(level=0, sort_remaining=True) - index = [ - ("A", "low"), - ("A", "med"), - ("A", "high"), - ("B", "low"), - ("B", "med"), - ("B", "high"), - ] - index = MultiIndex.from_tuples(index, names=["group", "dose"]) - expected = Series([2] * 6, index=index, name="dose") - tm.assert_series_equal(result, expected) - - def test_inconsistent_return_type(): # GH5592 # inconsistent return type From d08280e6d96bdc890825ac45640b2bb7554b5c74 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 07:58:03 +0700 Subject: [PATCH 04/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 5fce9ff79b5da..50a21e7b8c0a1 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -253,15 +253,10 @@ def test_sorting_with_different_categoricals(): result = df.groupby("group")["dose"].value_counts() result = result.sort_index(level=0, sort_remaining=True) - index = [ - ("A", "low"), - ("A", "med"), - ("A", "high"), - ("B", "low"), - ("B", "med"), - ("B", "high"), - ] - index = MultiIndex.from_tuples(index, names=["group", "dose"]) + index = ["low", "med", "high", "low", "med", "high"] + index = pd.Categorical(index, categories=["low", "med", "high"], ordered=True) + index = [["A", "A", "A", "B", "B", "B"], pd.CategoricalIndex(index)] + index = MultiIndex.from_arrays(index, names=["group", "dose"]) expected = Series([2] * 6, index=index, name="dose") tm.assert_series_equal(result, expected) From 4d28cd99ae89a34d1880ce5b53781cc63c277397 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 08:09:31 +0700 Subject: [PATCH 05/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 50a21e7b8c0a1..eeab47ac6cfa5 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -255,7 +255,7 @@ def test_sorting_with_different_categoricals(): result = result.sort_index(level=0, sort_remaining=True) index = ["low", "med", "high", "low", "med", "high"] index = pd.Categorical(index, categories=["low", "med", "high"], ordered=True) - index = [["A", "A", "A", "B", "B", "B"], pd.CategoricalIndex(index)] + index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] index = MultiIndex.from_arrays(index, names=["group", "dose"]) expected = Series([2] * 6, index=index, name="dose") tm.assert_series_equal(result, expected) From 0f1f9da889709885ed687d56152ad36e11620b7a Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 08:22:28 +0700 Subject: [PATCH 06/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index eeab47ac6cfa5..23dff7ffc0a68 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -249,12 +249,12 @@ def test_sorting_with_different_categoricals(): } ) - df.dose = pd.Categorical(df.dose, categories=["low", "med", "high"], ordered=True) + df.dose = Categorical(df.dose, categories=["low", "med", "high"], ordered=True) result = df.groupby("group")["dose"].value_counts() result = result.sort_index(level=0, sort_remaining=True) index = ["low", "med", "high", "low", "med", "high"] - index = pd.Categorical(index, categories=["low", "med", "high"], ordered=True) + index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] index = MultiIndex.from_arrays(index, names=["group", "dose"]) expected = Series([2] * 6, index=index, name="dose") From cec4a9255614e6900faade1042652a1047db0f76 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 09:06:57 +0700 Subject: [PATCH 07/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 23dff7ffc0a68..743ddd8604107 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -257,7 +257,7 @@ def test_sorting_with_different_categoricals(): index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] index = MultiIndex.from_arrays(index, names=["group", "dose"]) - expected = Series([2] * 6, index=index, name="dose") + expected = Series([2] * 6, index=index) tm.assert_series_equal(result, expected) From 1895877fe7bd466884536a771e8946a456836487 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 10:06:01 +0700 Subject: [PATCH 08/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 743ddd8604107..a791e7404970e 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -256,7 +256,7 @@ def test_sorting_with_different_categoricals(): index = ["low", "med", "high", "low", "med", "high"] index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] - index = MultiIndex.from_arrays(index, names=["group", "dose"]) + index = MultiIndex.from_arrays(index) expected = Series([2] * 6, index=index) tm.assert_series_equal(result, expected) From f049151c6ddf0cddbf5214cdeb37fa8f0a134112 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 10:43:53 +0700 Subject: [PATCH 09/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index a791e7404970e..f592ff4667ef3 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -256,7 +256,7 @@ def test_sorting_with_different_categoricals(): index = ["low", "med", "high", "low", "med", "high"] index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] - index = MultiIndex.from_arrays(index) + index = MultiIndex.from_arrays(index, names=["group"]) expected = Series([2] * 6, index=index) tm.assert_series_equal(result, expected) From 9a0acb7b092406260fcc4547ad0643c89fe6286e Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 11:27:57 +0700 Subject: [PATCH 10/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index f592ff4667ef3..7ebbc4aea794c 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -256,7 +256,7 @@ def test_sorting_with_different_categoricals(): index = ["low", "med", "high", "low", "med", "high"] index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] - index = MultiIndex.from_arrays(index, names=["group"]) + index = MultiIndex.from_arrays(index, names="group") expected = Series([2] * 6, index=index) tm.assert_series_equal(result, expected) From f0627f7286c75a20e6d962ca10390b64c84cccda Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 11:29:56 +0700 Subject: [PATCH 11/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7ebbc4aea794c..cc0ff75bbe3eb 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -256,7 +256,7 @@ def test_sorting_with_different_categoricals(): index = ["low", "med", "high", "low", "med", "high"] index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] - index = MultiIndex.from_arrays(index, names="group") + index = MultiIndex.from_arrays(index, names=["group", None]) expected = Series([2] * 6, index=index) tm.assert_series_equal(result, expected) From 52beb584e83a3d0bb1061e2294b0a6cc9a3e81f5 Mon Sep 17 00:00:00 2001 From: ftrihardjo Date: Sat, 13 Mar 2021 12:01:30 +0700 Subject: [PATCH 12/12] pandas-dev issue #24271 --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index cc0ff75bbe3eb..43e4070b9b163 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -257,7 +257,7 @@ def test_sorting_with_different_categoricals(): index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] index = MultiIndex.from_arrays(index, names=["group", None]) - expected = Series([2] * 6, index=index) + expected = Series([2] * 6, index=index, name="dose") tm.assert_series_equal(result, expected)