From 64b7cbaed1d1d08ac7c5c6964aba85f4c5c20411 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 27 Jul 2020 10:40:43 +0100
Subject: [PATCH 1/5] adding test for .describe() with duplicate columns

---
 pandas/tests/groupby/test_function.py | 57 +++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index e693962e57ac3..97e99337f5ffd 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -992,6 +992,63 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("as_index", [True, False])
+def test_describe_with_duplicate_output_column_names(as_index):
+    # GH #35314
+    df = pd.DataFrame(
+        {
+            "a": [99, 99, 99, 88, 88, 88],
+            "b": [1, 2, 3, 4, 5, 6],
+            "c": [10, 20, 30, 40, 50, 60],
+        },
+        columns=["a", "b", "b"],
+    )
+
+    expected = (
+        pd.DataFrame.from_records(
+            [
+                ("a", "count", 3.0, 3.0),
+                ("a", "mean", 88.0, 99.0),
+                ("a", "std", 0.0, 0.0),
+                ("a", "min", 88.0, 99.0),
+                ("a", "25%", 88.0, 99.0),
+                ("a", "50%", 88.0, 99.0),
+                ("a", "75%", 88.0, 99.0),
+                ("a", "max", 88.0, 99.0),
+                ("b", "count", 3.0, 3.0),
+                ("b", "mean", 5.0, 2.0),
+                ("b", "std", 1.0, 1.0),
+                ("b", "min", 4.0, 1.0),
+                ("b", "25%", 4.5, 1.5),
+                ("b", "50%", 5.0, 2.0),
+                ("b", "75%", 5.5, 2.5),
+                ("b", "max", 6.0, 3.0),
+                ("b", "count", 3.0, 3.0),
+                ("b", "mean", 5.0, 2.0),
+                ("b", "std", 1.0, 1.0),
+                ("b", "min", 4.0, 1.0),
+                ("b", "25%", 4.5, 1.5),
+                ("b", "50%", 5.0, 2.0),
+                ("b", "75%", 5.5, 2.5),
+                ("b", "max", 6.0, 3.0),
+            ],
+        )
+        .set_index([0, 1])
+        .T
+    )
+    expected.columns.names = [None, None]
+    expected.index = pd.Index([88, 99], name="a")
+
+    if as_index:
+        expected = expected.drop(columns=["a"], level=0)
+    else:
+        expected = expected.reset_index(drop=True)
+
+    result = df.groupby("a", as_index=as_index).describe()
+
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_mean_no_overflow():
     # Regression test for (#22487)
     df = pd.DataFrame(

From 9bef2dde81890dca489d9d007f9074edd36f79b1 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sat, 1 Aug 2020 23:46:43 +0100
Subject: [PATCH 2/5] addressing PerformanceWarning in test

---
 pandas/tests/groupby/test_function.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 97e99337f5ffd..01a7ea867105e 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1046,6 +1046,11 @@ def test_describe_with_duplicate_output_column_names(as_index):
 
     result = df.groupby("a", as_index=as_index).describe()
 
+    tm.assert_index_equal(result.columns, expected.columns)
+
+    result.columns = pd.RangeIndex(result.shape[1])
+    expected.columns = pd.RangeIndex(expected.shape[1])
+
     tm.assert_frame_equal(result, expected)
 
 

From 3ec647edc327d3f6407d4264a916602c81ca74aa Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sun, 2 Aug 2020 19:38:33 +0100
Subject: [PATCH 3/5] amend comment to start tests

---
 pandas/tests/groupby/test_function.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 01a7ea867105e..f2113f8967e72 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -994,7 +994,7 @@ def test_frame_describe_unstacked_format():
 
 @pytest.mark.parametrize("as_index", [True, False])
 def test_describe_with_duplicate_output_column_names(as_index):
-    # GH #35314
+    # GH 35314
     df = pd.DataFrame(
         {
             "a": [99, 99, 99, 88, 88, 88],

From 351726a6c143a88f79260647da8b0e45f4a061d9 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 3 Aug 2020 21:45:00 +0100
Subject: [PATCH 4/5] pytest.mark.filterwarnings

---
 pandas/tests/groupby/test_function.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index f2113f8967e72..d58aad79e1982 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -992,6 +992,9 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings(
+    "ignore:indexing past lexsort depth may impact performance:pandas.errors.PerformanceWarning"
+)
 @pytest.mark.parametrize("as_index", [True, False])
 def test_describe_with_duplicate_output_column_names(as_index):
     # GH 35314
@@ -1046,11 +1049,6 @@ def test_describe_with_duplicate_output_column_names(as_index):
 
     result = df.groupby("a", as_index=as_index).describe()
 
-    tm.assert_index_equal(result.columns, expected.columns)
-
-    result.columns = pd.RangeIndex(result.shape[1])
-    expected.columns = pd.RangeIndex(expected.shape[1])
-
     tm.assert_frame_equal(result, expected)
 
 

From 75208184b46470bda4e7e30e34c11ce078c53071 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 3 Aug 2020 21:57:58 +0100
Subject: [PATCH 5/5] fix PEP8 violation

---
 pandas/tests/groupby/test_function.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index d58aad79e1982..cbfba16223f74 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -993,7 +993,9 @@ def test_frame_describe_unstacked_format():
 
 
 @pytest.mark.filterwarnings(
-    "ignore:indexing past lexsort depth may impact performance:pandas.errors.PerformanceWarning"
+    "ignore:"
+    "indexing past lexsort depth may impact performance:"
+    "pandas.errors.PerformanceWarning"
 )
 @pytest.mark.parametrize("as_index", [True, False])
 def test_describe_with_duplicate_output_column_names(as_index):