From 9d5de5bdfff5edb5f3122278ecca18160ad7de7a Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Fri, 8 May 2020 11:41:05 -0500
Subject: [PATCH 1/3] TST/CLN: Move groupby tests

---
 pandas/tests/groupby/test_counting.py | 135 +++++-
 pandas/tests/groupby/test_function.py | 659 --------------------------
 pandas/tests/groupby/test_nunique.py  | 164 +++++++
 pandas/tests/groupby/test_quantile.py | 336 +++++++++++++
 pandas/tests/groupby/test_size.py     |  38 ++
 5 files changed, 672 insertions(+), 660 deletions(-)
 create mode 100644 pandas/tests/groupby/test_nunique.py
 create mode 100644 pandas/tests/groupby/test_quantile.py
 create mode 100644 pandas/tests/groupby/test_size.py

diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
index 56a18757da6e7..997d9b006c802 100644
--- a/pandas/tests/groupby/test_counting.py
+++ b/pandas/tests/groupby/test_counting.py
@@ -1,9 +1,20 @@
 from itertools import product
+from string import ascii_lowercase
 
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, Timestamp
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Period,
+    Series,
+    Timedelta,
+    Timestamp,
+    date_range,
+)
 import pandas._testing as tm
 
 
@@ -229,3 +240,125 @@ def test_count_groupby_column_with_nan_in_groupby_column(self):
             index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]}
         )
         tm.assert_frame_equal(expected, res)
+
+
+def test_groupby_timedelta_cython_count():
+    df = DataFrame(
+        {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")}
+    )
+    expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt")
+    result = df.groupby("g").delt.count()
+    tm.assert_series_equal(expected, result)
+
+
+def test_count():
+    n = 1 << 15
+    dr = date_range("2015-08-30", periods=n // 10, freq="T")
+
+    df = DataFrame(
+        {
+            "1st": np.random.choice(list(ascii_lowercase), n),
+            "2nd": np.random.randint(0, 5, n),
+            "3rd": np.random.randn(n).round(3),
+            "4th": np.random.randint(-10, 10, n),
+            "5th": np.random.choice(dr, n),
+            "6th": np.random.randn(n).round(3),
+            "7th": np.random.randn(n).round(3),
+            "8th": np.random.choice(dr, n) - np.random.choice(dr, 1),
+            "9th": np.random.choice(list(ascii_lowercase), n),
+        }
+    )
+
+    for col in df.columns.drop(["1st", "2nd", "4th"]):
+        df.loc[np.random.choice(n, n // 10), col] = np.nan
+
+    df["9th"] = df["9th"].astype("category")
+
+    for key in ["1st", "2nd", ["1st", "2nd"]]:
+        left = df.groupby(key).count()
+        right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
+        tm.assert_frame_equal(left, right)
+
+
+def test_count_non_nulls():
+    # GH#5610
+    # count counts non-nulls
+    df = pd.DataFrame(
+        [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]],
+        columns=["A", "B", "C"],
+    )
+
+    count_as = df.groupby("A").count()
+    count_not_as = df.groupby("A", as_index=False).count()
+
+    expected = DataFrame([[1, 2], [0, 0]], columns=["B", "C"], index=[1, 3])
+    expected.index.name = "A"
+    tm.assert_frame_equal(count_not_as, expected.reset_index())
+    tm.assert_frame_equal(count_as, expected)
+
+    count_B = df.groupby("A")["B"].count()
+    tm.assert_series_equal(count_B, expected["B"])
+
+
+def test_count_object():
+    df = pd.DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3})
+    result = df.groupby("c").a.count()
+    expected = pd.Series([3, 3], index=pd.Index([2, 3], name="c"), name="a")
+    tm.assert_series_equal(result, expected)
+
+    df = pd.DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3})
+    result = df.groupby("c").a.count()
+    expected = pd.Series([1, 3], index=pd.Index([2, 3], name="c"), name="a")
+    tm.assert_series_equal(result, expected)
+
+
+def test_count_cross_type():
+    # GH8169
+    vals = np.hstack(
+        (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2)))
+    )
+
+    df = pd.DataFrame(vals, columns=["a", "b", "c", "d"])
+    df[df == 2] = np.nan
+    expected = df.groupby(["c", "d"]).count()
+
+    for t in ["float32", "object"]:
+        df["a"] = df["a"].astype(t)
+        df["b"] = df["b"].astype(t)
+        result = df.groupby(["c", "d"]).count()
+        tm.assert_frame_equal(result, expected)
+
+
+def test_lower_int_prec_count():
+    df = DataFrame(
+        {
+            "a": np.array([0, 1, 2, 100], np.int8),
+            "b": np.array([1, 2, 3, 6], np.uint32),
+            "c": np.array([4, 5, 6, 8], np.int16),
+            "grp": list("ab" * 2),
+        }
+    )
+    result = df.groupby("grp").count()
+    expected = DataFrame(
+        {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_count_uses_size_on_exception():
+    class RaisingObjectException(Exception):
+        pass
+
+    class RaisingObject:
+        def __init__(self, msg="I will raise inside Cython"):
+            super().__init__()
+            self.msg = msg
+
+        def __eq__(self, other):
+            # gets called in Cython to check that raising calls the method
+            raise RaisingObjectException(self.msg)
+
+    df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)})
+    result = df.groupby("grp").count()
+    expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp"))
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 93dd1bf23c308..11673532681a4 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1,7 +1,5 @@
 import builtins
-import datetime as dt
 from io import StringIO
-from string import ascii_lowercase
 
 import numpy as np
 import pytest
@@ -13,7 +11,6 @@
     DataFrame,
     Index,
     MultiIndex,
-    NaT,
     Series,
     Timestamp,
     date_range,
@@ -983,659 +980,3 @@ def test_frame_describe_unstacked_format():
         columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
     )
     tm.assert_frame_equal(result, expected)
-
-
-# nunique
-# --------------------------------
-
-
-@pytest.mark.parametrize("n", 10 ** np.arange(2, 6))
-@pytest.mark.parametrize("m", [10, 100, 1000])
-@pytest.mark.parametrize("sort", [False, True])
-@pytest.mark.parametrize("dropna", [False, True])
-def test_series_groupby_nunique(n, m, sort, dropna):
-    def check_nunique(df, keys, as_index=True):
-        original_df = df.copy()
-        gr = df.groupby(keys, as_index=as_index, sort=sort)
-        left = gr["julie"].nunique(dropna=dropna)
-
-        gr = df.groupby(keys, as_index=as_index, sort=sort)
-        right = gr["julie"].apply(Series.nunique, dropna=dropna)
-        if not as_index:
-            right = right.reset_index(drop=True)
-
-        tm.assert_series_equal(left, right, check_names=False)
-        tm.assert_frame_equal(df, original_df)
-
-    days = date_range("2015-08-23", periods=10)
-
-    frame = DataFrame(
-        {
-            "jim": np.random.choice(list(ascii_lowercase), n),
-            "joe": np.random.choice(days, n),
-            "julie": np.random.randint(0, m, n),
-        }
-    )
-
-    check_nunique(frame, ["jim"])
-    check_nunique(frame, ["jim", "joe"])
-
-    frame.loc[1::17, "jim"] = None
-    frame.loc[3::37, "joe"] = None
-    frame.loc[7::19, "julie"] = None
-    frame.loc[8::19, "julie"] = None
-    frame.loc[9::19, "julie"] = None
-
-    check_nunique(frame, ["jim"])
-    check_nunique(frame, ["jim", "joe"])
-    check_nunique(frame, ["jim"], as_index=False)
-    check_nunique(frame, ["jim", "joe"], as_index=False)
-
-
-def test_nunique():
-    df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")})
-
-    expected = DataFrame({"A": [1] * 3, "B": [1, 2, 1], "C": [1, 1, 2]})
-    result = df.groupby("A", as_index=False).nunique()
-    tm.assert_frame_equal(result, expected)
-
-    # as_index
-    expected.index = list("abc")
-    expected.index.name = "A"
-    result = df.groupby("A").nunique()
-    tm.assert_frame_equal(result, expected)
-
-    # with na
-    result = df.replace({"x": None}).groupby("A").nunique(dropna=False)
-    tm.assert_frame_equal(result, expected)
-
-    # dropna
-    expected = DataFrame({"A": [1] * 3, "B": [1] * 3, "C": [1] * 3}, index=list("abc"))
-    expected.index.name = "A"
-    result = df.replace({"x": None}).groupby("A").nunique()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_nunique_with_object():
-    # GH 11077
-    data = pd.DataFrame(
-        [
-            [100, 1, "Alice"],
-            [200, 2, "Bob"],
-            [300, 3, "Charlie"],
-            [-400, 4, "Dan"],
-            [500, 5, "Edith"],
-        ],
-        columns=["amount", "id", "name"],
-    )
-
-    result = data.groupby(["id", "amount"])["name"].nunique()
-    index = MultiIndex.from_arrays([data.id, data.amount])
-    expected = pd.Series([1] * 5, name="name", index=index)
-    tm.assert_series_equal(result, expected)
-
-
-def test_nunique_with_empty_series():
-    # GH 12553
-    data = pd.Series(name="name", dtype=object)
-    result = data.groupby(level=0).nunique()
-    expected = pd.Series(name="name", dtype="int64")
-    tm.assert_series_equal(result, expected)
-
-
-def test_nunique_with_timegrouper():
-    # GH 13453
-    test = pd.DataFrame(
-        {
-            "time": [
-                Timestamp("2016-06-28 09:35:35"),
-                Timestamp("2016-06-28 16:09:30"),
-                Timestamp("2016-06-28 16:46:28"),
-            ],
-            "data": ["1", "2", "3"],
-        }
-    ).set_index("time")
-    result = test.groupby(pd.Grouper(freq="h"))["data"].nunique()
-    expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(pd.Series.nunique)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "key, data, dropna, expected",
-    [
-        (
-            ["x", "x", "x"],
-            [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")],
-            True,
-            Series([1], index=pd.Index(["x"], name="key"), name="data"),
-        ),
-        (
-            ["x", "x", "x"],
-            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
-            True,
-            Series([1], index=pd.Index(["x"], name="key"), name="data"),
-        ),
-        (
-            ["x", "x", "x", "y", "y"],
-            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
-            False,
-            Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"),
-        ),
-        (
-            ["x", "x", "x", "x", "y"],
-            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
-            False,
-            Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"),
-        ),
-    ],
-)
-def test_nunique_with_NaT(key, data, dropna, expected):
-    # GH 27951
-    df = pd.DataFrame({"key": key, "data": data})
-    result = df.groupby(["key"])["data"].nunique(dropna=dropna)
-    tm.assert_series_equal(result, expected)
-
-
-def test_nunique_preserves_column_level_names():
-    # GH 23222
-    test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))
-    result = test.groupby([0, 0, 0]).nunique()
-    expected = pd.DataFrame([2], columns=test.columns)
-    tm.assert_frame_equal(result, expected)
-
-
-# count
-# --------------------------------
-
-
-def test_groupby_timedelta_cython_count():
-    df = DataFrame(
-        {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")}
-    )
-    expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt")
-    result = df.groupby("g").delt.count()
-    tm.assert_series_equal(expected, result)
-
-
-def test_count():
-    n = 1 << 15
-    dr = date_range("2015-08-30", periods=n // 10, freq="T")
-
-    df = DataFrame(
-        {
-            "1st": np.random.choice(list(ascii_lowercase), n),
-            "2nd": np.random.randint(0, 5, n),
-            "3rd": np.random.randn(n).round(3),
-            "4th": np.random.randint(-10, 10, n),
-            "5th": np.random.choice(dr, n),
-            "6th": np.random.randn(n).round(3),
-            "7th": np.random.randn(n).round(3),
-            "8th": np.random.choice(dr, n) - np.random.choice(dr, 1),
-            "9th": np.random.choice(list(ascii_lowercase), n),
-        }
-    )
-
-    for col in df.columns.drop(["1st", "2nd", "4th"]):
-        df.loc[np.random.choice(n, n // 10), col] = np.nan
-
-    df["9th"] = df["9th"].astype("category")
-
-    for key in ["1st", "2nd", ["1st", "2nd"]]:
-        left = df.groupby(key).count()
-        right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
-        tm.assert_frame_equal(left, right)
-
-
-def test_count_non_nulls():
-    # GH#5610
-    # count counts non-nulls
-    df = pd.DataFrame(
-        [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]],
-        columns=["A", "B", "C"],
-    )
-
-    count_as = df.groupby("A").count()
-    count_not_as = df.groupby("A", as_index=False).count()
-
-    expected = DataFrame([[1, 2], [0, 0]], columns=["B", "C"], index=[1, 3])
-    expected.index.name = "A"
-    tm.assert_frame_equal(count_not_as, expected.reset_index())
-    tm.assert_frame_equal(count_as, expected)
-
-    count_B = df.groupby("A")["B"].count()
-    tm.assert_series_equal(count_B, expected["B"])
-
-
-def test_count_object():
-    df = pd.DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3})
-    result = df.groupby("c").a.count()
-    expected = pd.Series([3, 3], index=pd.Index([2, 3], name="c"), name="a")
-    tm.assert_series_equal(result, expected)
-
-    df = pd.DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3})
-    result = df.groupby("c").a.count()
-    expected = pd.Series([1, 3], index=pd.Index([2, 3], name="c"), name="a")
-    tm.assert_series_equal(result, expected)
-
-
-def test_count_cross_type():
-    # GH8169
-    vals = np.hstack(
-        (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2)))
-    )
-
-    df = pd.DataFrame(vals, columns=["a", "b", "c", "d"])
-    df[df == 2] = np.nan
-    expected = df.groupby(["c", "d"]).count()
-
-    for t in ["float32", "object"]:
-        df["a"] = df["a"].astype(t)
-        df["b"] = df["b"].astype(t)
-        result = df.groupby(["c", "d"]).count()
-        tm.assert_frame_equal(result, expected)
-
-
-def test_lower_int_prec_count():
-    df = DataFrame(
-        {
-            "a": np.array([0, 1, 2, 100], np.int8),
-            "b": np.array([1, 2, 3, 6], np.uint32),
-            "c": np.array([4, 5, 6, 8], np.int16),
-            "grp": list("ab" * 2),
-        }
-    )
-    result = df.groupby("grp").count()
-    expected = DataFrame(
-        {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp")
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_count_uses_size_on_exception():
-    class RaisingObjectException(Exception):
-        pass
-
-    class RaisingObject:
-        def __init__(self, msg="I will raise inside Cython"):
-            super().__init__()
-            self.msg = msg
-
-        def __eq__(self, other):
-            # gets called in Cython to check that raising calls the method
-            raise RaisingObjectException(self.msg)
-
-    df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)})
-    result = df.groupby("grp").count()
-    expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp"))
-    tm.assert_frame_equal(result, expected)
-
-
-# size
-# --------------------------------
-
-
-@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
-def test_size(df, by):
-    grouped = df.groupby(by=by)
-    result = grouped.size()
-    for key, group in grouped:
-        assert result[key] == len(group)
-
-
-@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
-@pytest.mark.parametrize("sort", [True, False])
-def test_size_sort(df, sort, by):
-    df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC"))
-    left = df.groupby(by=by, sort=sort).size()
-    right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0])
-    tm.assert_series_equal(left, right, check_names=False)
-
-
-def test_size_series_dataframe():
-    # https://github.com/pandas-dev/pandas/issues/11699
-    df = DataFrame(columns=["A", "B"])
-    out = Series(dtype="int64", index=Index([], name="A"))
-    tm.assert_series_equal(df.groupby("A").size(), out)
-
-
-def test_size_groupby_all_null():
-    # https://github.com/pandas-dev/pandas/issues/23050
-    # Assert no 'Value Error : Length of passed values is 2, index implies 0'
-    df = DataFrame({"A": [None, None]})  # all-null groups
-    result = df.groupby("A").size()
-    expected = Series(dtype="int64", index=Index([], name="A"))
-    tm.assert_series_equal(result, expected)
-
-
-# quantile
-# --------------------------------
-
-
-@pytest.mark.parametrize(
-    "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
-)
-@pytest.mark.parametrize(
-    "a_vals,b_vals",
-    [
-        # Ints
-        ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]),
-        ([1, 2, 3, 4], [4, 3, 2, 1]),
-        ([1, 2, 3, 4, 5], [4, 3, 2, 1]),
-        # Floats
-        ([1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]),
-        # Missing data
-        ([1.0, np.nan, 3.0, np.nan, 5.0], [5.0, np.nan, 3.0, np.nan, 1.0]),
-        ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]),
-        # Timestamps
-        (
-            list(pd.date_range("1/1/18", freq="D", periods=5)),
-            list(pd.date_range("1/1/18", freq="D", periods=5))[::-1],
-        ),
-        # All NA
-        ([np.nan] * 5, [np.nan] * 5),
-    ],
-)
-@pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1])
-def test_quantile(interpolation, a_vals, b_vals, q):
-    if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]:
-        pytest.skip(
-            "Unclear numpy expectation for nearest result with equidistant data"
-        )
-
-    a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation)
-    b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation)
-
-    df = DataFrame(
-        {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals}
-    )
-
-    expected = DataFrame(
-        [a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key")
-    )
-    result = df.groupby("key").quantile(q, interpolation=interpolation)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_quantile_array():
-    # https://github.com/pandas-dev/pandas/issues/27526
-    df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
-    result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])
-
-    index = pd.MultiIndex.from_product([[0, 1], [0.25]])
-    expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
-    tm.assert_frame_equal(result, expected)
-
-    df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
-    index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
-
-    result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
-    expected = pd.DataFrame(
-        {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_quantile_array2():
-    # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959
-    df = pd.DataFrame(
-        np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC")
-    )
-    result = df.groupby("A").quantile([0.3, 0.7])
-    expected = pd.DataFrame(
-        {
-            "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0],
-            "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0],
-        },
-        index=pd.MultiIndex.from_product(
-            [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None]
-        ),
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_quantile_array_no_sort():
-    df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
-    result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
-    expected = pd.DataFrame(
-        {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
-        index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
-    )
-    tm.assert_frame_equal(result, expected)
-
-    result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
-    expected = pd.DataFrame(
-        {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
-        index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_quantile_array_multiple_levels():
-    df = pd.DataFrame(
-        {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
-    )
-    result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
-    index = pd.MultiIndex.from_tuples(
-        [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
-        names=["c", "d", None],
-    )
-    expected = pd.DataFrame(
-        {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)])
-@pytest.mark.parametrize("groupby", [[0], [0, 1]])
-@pytest.mark.parametrize("q", [[0.5, 0.6]])
-def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q):
-    # GH30289
-    nrow, ncol = frame_size
-    df = pd.DataFrame(
-        np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)
-    )
-
-    idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q]
-    idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [
-        list(range(len(q))) * min(nrow, 4)
-    ]
-    expected_index = pd.MultiIndex(
-        levels=idx_levels, codes=idx_codes, names=groupby + [None]
-    )
-    expected_values = [
-        [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q
-    ]
-    expected_columns = [x for x in range(ncol) if x not in groupby]
-    expected = pd.DataFrame(
-        expected_values, index=expected_index, columns=expected_columns
-    )
-    result = df.groupby(groupby).quantile(q)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_quantile_raises():
-    df = pd.DataFrame(
-        [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]
-    )
-
-    with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
-        df.groupby("key").quantile()
-
-
-def test_quantile_out_of_bounds_q_raises():
-    # https://github.com/pandas-dev/pandas/issues/27470
-    df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))
-    g = df.groupby([0, 0, 0, 1, 1, 1])
-    with pytest.raises(ValueError, match="Got '50.0' instead"):
-        g.quantile(50)
-
-    with pytest.raises(ValueError, match="Got '-1.0' instead"):
-        g.quantile(-1)
-
-
-def test_quantile_missing_group_values_no_segfaults():
-    # GH 28662
-    data = np.array([1.0, np.nan, 1.0])
-    df = pd.DataFrame(dict(key=data, val=range(3)))
-
-    # Random segfaults; would have been guaranteed in loop
-    grp = df.groupby("key")
-    for _ in range(100):
-        grp.quantile()
-
-
-def test_quantile_missing_group_values_correct_results():
-    # GH 28662
-    data = np.array([1.0, np.nan, 3.0, np.nan])
-    df = pd.DataFrame(dict(key=data, val=range(4)))
-
-    result = df.groupby("key").quantile()
-    expected = pd.DataFrame(
-        [1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"]
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "values",
-    [
-        pd.array([1, 0, None] * 2, dtype="Int64"),
-        pd.array([True, False, None] * 2, dtype="boolean"),
-    ],
-)
-@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
-def test_groupby_quantile_nullable_array(values, q):
-    # https://github.com/pandas-dev/pandas/issues/33136
-    df = pd.DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values})
-    result = df.groupby("a")["b"].quantile(q)
-
-    if isinstance(q, list):
-        idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None])
-        true_quantiles = [0.0, 0.5, 1.0]
-    else:
-        idx = pd.Index(["x", "y"], name="a")
-        true_quantiles = [0.5]
-
-    expected = pd.Series(true_quantiles * 2, index=idx, name="b")
-    tm.assert_series_equal(result, expected)
-
-
-# pipe
-# --------------------------------
-
-
-def test_pipe():
-    # Test the pipe method of DataFrameGroupBy.
-    # Issue #17871
-
-    random_state = np.random.RandomState(1234567890)
-
-    df = DataFrame(
-        {
-            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
-            "B": random_state.randn(8),
-            "C": random_state.randn(8),
-        }
-    )
-
-    def f(dfgb):
-        return dfgb.B.max() - dfgb.C.min().min()
-
-    def square(srs):
-        return srs ** 2
-
-    # Note that the transformations are
-    # GroupBy -> Series
-    # Series -> Series
-    # This then chains the GroupBy.pipe and the
-    # NDFrame.pipe methods
-    result = df.groupby("A").pipe(f).pipe(square)
-
-    index = Index(["bar", "foo"], dtype="object", name="A")
-    expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index)
-
-    tm.assert_series_equal(expected, result)
-
-
-def test_pipe_args():
-    # Test passing args to the pipe method of DataFrameGroupBy.
-    # Issue #17871
-
-    df = pd.DataFrame(
-        {
-            "group": ["A", "A", "B", "B", "C"],
-            "x": [1.0, 2.0, 3.0, 2.0, 5.0],
-            "y": [10.0, 100.0, 1000.0, -100.0, -1000.0],
-        }
-    )
-
-    def f(dfgb, arg1):
-        return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby(
-            dfgb.grouper
-        )
-
-    def g(dfgb, arg2):
-        return dfgb.sum() / dfgb.sum().sum() + arg2
-
-    def h(df, arg3):
-        return df.x + df.y - arg3
-
-    result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)
-
-    # Assert the results here
-    index = pd.Index(["A", "B", "C"], name="group")
-    expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index)
-
-    tm.assert_series_equal(expected, result)
-
-    # test SeriesGroupby.pipe
-    ser = pd.Series([1, 1, 2, 2, 3, 3])
-    result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
-
-    expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
-
-    tm.assert_series_equal(result, expected)
-
-
-def test_groupby_mean_no_overflow():
-    # Regression test for (#22487)
-    df = pd.DataFrame(
-        {
-            "user": ["A", "A", "A", "A", "A"],
-            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
-        }
-    )
-    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
-
-
-@pytest.mark.parametrize(
-    "values",
-    [
-        {
-            "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
-            "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2],
-        },
-        {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]},
-    ],
-)
-@pytest.mark.parametrize("function", ["mean", "median", "var"])
-def test_apply_to_nullable_integer_returns_float(values, function):
-    # https://github.com/pandas-dev/pandas/issues/32219
-    output = 0.5 if function == "var" else 1.5
-    arr = np.array([output] * 3, dtype=float)
-    idx = pd.Index([1, 2, 3], dtype=object, name="a")
-    expected = pd.DataFrame({"b": arr}, index=idx)
-
-    groups = pd.DataFrame(values, dtype="Int64").groupby("a")
-
-    result = getattr(groups, function)()
-    tm.assert_frame_equal(result, expected)
-
-    result = groups.agg(function)
-    tm.assert_frame_equal(result, expected)
-
-    result = groups.agg([function])
-    expected.columns = MultiIndex.from_tuples([("b", function)])
-    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py
new file mode 100644
index 0000000000000..427a8accf7e7a
--- /dev/null
+++ b/pandas/tests/groupby/test_nunique.py
@@ -0,0 +1,164 @@
+import datetime as dt
+from string import ascii_lowercase
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, MultiIndex, NaT, Series, Timestamp, date_range
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("n", 10 ** np.arange(2, 6))
+@pytest.mark.parametrize("m", [10, 100, 1000])
+@pytest.mark.parametrize("sort", [False, True])
+@pytest.mark.parametrize("dropna", [False, True])
+def test_series_groupby_nunique(n, m, sort, dropna):
+    def check_nunique(df, keys, as_index=True):
+        original_df = df.copy()
+        gr = df.groupby(keys, as_index=as_index, sort=sort)
+        left = gr["julie"].nunique(dropna=dropna)
+
+        gr = df.groupby(keys, as_index=as_index, sort=sort)
+        right = gr["julie"].apply(Series.nunique, dropna=dropna)
+        if not as_index:
+            right = right.reset_index(drop=True)
+
+        tm.assert_series_equal(left, right, check_names=False)
+        tm.assert_frame_equal(df, original_df)
+
+    days = date_range("2015-08-23", periods=10)
+
+    frame = DataFrame(
+        {
+            "jim": np.random.choice(list(ascii_lowercase), n),
+            "joe": np.random.choice(days, n),
+            "julie": np.random.randint(0, m, n),
+        }
+    )
+
+    check_nunique(frame, ["jim"])
+    check_nunique(frame, ["jim", "joe"])
+
+    frame.loc[1::17, "jim"] = None
+    frame.loc[3::37, "joe"] = None
+    frame.loc[7::19, "julie"] = None
+    frame.loc[8::19, "julie"] = None
+    frame.loc[9::19, "julie"] = None
+
+    check_nunique(frame, ["jim"])
+    check_nunique(frame, ["jim", "joe"])
+    check_nunique(frame, ["jim"], as_index=False)
+    check_nunique(frame, ["jim", "joe"], as_index=False)
+
+
+def test_nunique():
+    df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")})
+
+    expected = DataFrame({"A": [1] * 3, "B": [1, 2, 1], "C": [1, 1, 2]})
+    result = df.groupby("A", as_index=False).nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # as_index
+    expected.index = list("abc")
+    expected.index.name = "A"
+    result = df.groupby("A").nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # with na
+    result = df.replace({"x": None}).groupby("A").nunique(dropna=False)
+    tm.assert_frame_equal(result, expected)
+
+    # dropna
+    expected = DataFrame({"A": [1] * 3, "B": [1] * 3, "C": [1] * 3}, index=list("abc"))
+    expected.index.name = "A"
+    result = df.replace({"x": None}).groupby("A").nunique()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nunique_with_object():
+    # GH 11077
+    data = pd.DataFrame(
+        [
+            [100, 1, "Alice"],
+            [200, 2, "Bob"],
+            [300, 3, "Charlie"],
+            [-400, 4, "Dan"],
+            [500, 5, "Edith"],
+        ],
+        columns=["amount", "id", "name"],
+    )
+
+    result = data.groupby(["id", "amount"])["name"].nunique()
+    index = MultiIndex.from_arrays([data.id, data.amount])
+    expected = pd.Series([1] * 5, name="name", index=index)
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_empty_series():
+    # GH 12553
+    data = pd.Series(name="name", dtype=object)
+    result = data.groupby(level=0).nunique()
+    expected = pd.Series(name="name", dtype="int64")
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_timegrouper():
+    # GH 13453
+    test = pd.DataFrame(
+        {
+            "time": [
+                Timestamp("2016-06-28 09:35:35"),
+                Timestamp("2016-06-28 16:09:30"),
+                Timestamp("2016-06-28 16:46:28"),
+            ],
+            "data": ["1", "2", "3"],
+        }
+    ).set_index("time")
+    result = test.groupby(pd.Grouper(freq="h"))["data"].nunique()
+    expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(pd.Series.nunique)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "key, data, dropna, expected",
+    [
+        (
+            ["x", "x", "x"],
+            [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")],
+            True,
+            Series([1], index=pd.Index(["x"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x"],
+            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
+            True,
+            Series([1], index=pd.Index(["x"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x", "y", "y"],
+            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
+            False,
+            Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x", "x", "y"],
+            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
+            False,
+            Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"),
+        ),
+    ],
+)
+def test_nunique_with_NaT(key, data, dropna, expected):
+    # GH 27951
+    df = pd.DataFrame({"key": key, "data": data})
+    result = df.groupby(["key"])["data"].nunique(dropna=dropna)
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_preserves_column_level_names():
+    # GH 23222
+    test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))
+    result = test.groupby([0, 0, 0]).nunique()
+    expected = pd.DataFrame([2], columns=test.columns)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
new file mode 100644
index 0000000000000..69ffdba06cbca
--- /dev/null
+++ b/pandas/tests/groupby/test_quantile.py
@@ -0,0 +1,336 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, Index, MultiIndex
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
+)
+@pytest.mark.parametrize(
+    "a_vals,b_vals",
+    [
+        # Ints
+        ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]),
+        ([1, 2, 3, 4], [4, 3, 2, 1]),
+        ([1, 2, 3, 4, 5], [4, 3, 2, 1]),
+        # Floats
+        ([1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]),
+        # Missing data
+        ([1.0, np.nan, 3.0, np.nan, 5.0], [5.0, np.nan, 3.0, np.nan, 1.0]),
+        ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]),
+        # Timestamps
+        (
+            list(pd.date_range("1/1/18", freq="D", periods=5)),
+            list(pd.date_range("1/1/18", freq="D", periods=5))[::-1],
+        ),
+        # All NA
+        ([np.nan] * 5, [np.nan] * 5),
+    ],
+)
+@pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1])
+def test_quantile(interpolation, a_vals, b_vals, q):
+    if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]:
+        pytest.skip(
+            "Unclear numpy expectation for nearest result with equidistant data"
+        )
+
+    a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation)
+    b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation)
+
+    df = DataFrame(
+        {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals}
+    )
+
+    expected = DataFrame(
+        [a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key")
+    )
+    result = df.groupby("key").quantile(q, interpolation=interpolation)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array():
+    # https://github.com/pandas-dev/pandas/issues/27526
+    df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
+    result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])
+
+    index = pd.MultiIndex.from_product([[0, 1], [0.25]])
+    expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
+    tm.assert_frame_equal(result, expected)
+
+    df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
+    index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
+
+    result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
+    expected = pd.DataFrame(
+        {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array2():
+    # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959
+    df = pd.DataFrame(
+        np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC")
+    )
+    result = df.groupby("A").quantile([0.3, 0.7])
+    expected = pd.DataFrame(
+        {
+            "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0],
+            "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0],
+        },
+        index=pd.MultiIndex.from_product(
+            [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array_no_sort():
+    df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
+    result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
+    expected = pd.DataFrame(
+        {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+    result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
+    expected = pd.DataFrame(
+        {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array_multiple_levels():
+    df = pd.DataFrame(
+        {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
+    )
+    result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
+    index = pd.MultiIndex.from_tuples(
+        [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
+        names=["c", "d", None],
+    )
+    expected = pd.DataFrame(
+        {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)])
+@pytest.mark.parametrize("groupby", [[0], [0, 1]])
+@pytest.mark.parametrize("q", [[0.5, 0.6]])
+def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q):
+    # GH30289
+    nrow, ncol = frame_size
+    df = pd.DataFrame(
+        np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)
+    )
+
+    idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q]
+    idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [
+        list(range(len(q))) * min(nrow, 4)
+    ]
+    expected_index = pd.MultiIndex(
+        levels=idx_levels, codes=idx_codes, names=groupby + [None]
+    )
+    expected_values = [
+        [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q
+    ]
+    expected_columns = [x for x in range(ncol) if x not in groupby]
+    expected = pd.DataFrame(
+        expected_values, index=expected_index, columns=expected_columns
+    )
+    result = df.groupby(groupby).quantile(q)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_raises():
+    df = pd.DataFrame(
+        [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]
+    )
+
+    with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
+        df.groupby("key").quantile()
+
+
+def test_quantile_out_of_bounds_q_raises():
+    # https://github.com/pandas-dev/pandas/issues/27470
+    df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))
+    g = df.groupby([0, 0, 0, 1, 1, 1])
+    with pytest.raises(ValueError, match="Got '50.0' instead"):
+        g.quantile(50)
+
+    with pytest.raises(ValueError, match="Got '-1.0' instead"):
+        g.quantile(-1)
+
+
+def test_quantile_missing_group_values_no_segfaults():
+    # GH 28662
+    data = np.array([1.0, np.nan, 1.0])
+    df = pd.DataFrame(dict(key=data, val=range(3)))
+
+    # Random segfaults; would have been guaranteed in loop
+    grp = df.groupby("key")
+    for _ in range(100):
+        grp.quantile()
+
+
+def test_quantile_missing_group_values_correct_results():
+    # GH 28662
+    data = np.array([1.0, np.nan, 3.0, np.nan])
+    df = pd.DataFrame(dict(key=data, val=range(4)))
+
+    result = df.groupby("key").quantile()
+    expected = pd.DataFrame(
+        [1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        pd.array([1, 0, None] * 2, dtype="Int64"),
+        pd.array([True, False, None] * 2, dtype="boolean"),
+    ],
+)
+@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
+def test_groupby_quantile_nullable_array(values, q):
+    # https://github.com/pandas-dev/pandas/issues/33136
+    df = pd.DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values})
+    result = df.groupby("a")["b"].quantile(q)
+
+    if isinstance(q, list):
+        idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None])
+        true_quantiles = [0.0, 0.5, 1.0]
+    else:
+        idx = pd.Index(["x", "y"], name="a")
+        true_quantiles = [0.5]
+
+    expected = pd.Series(true_quantiles * 2, index=idx, name="b")
+    tm.assert_series_equal(result, expected)
+
+
+# pipe
+# --------------------------------
+
+
+def test_pipe():
+    # Test the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    random_state = np.random.RandomState(1234567890)
+
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": random_state.randn(8),
+            "C": random_state.randn(8),
+        }
+    )
+
+    def f(dfgb):
+        return dfgb.B.max() - dfgb.C.min().min()
+
+    def square(srs):
+        return srs ** 2
+
+    # Note that the transformations are
+    # GroupBy -> Series
+    # Series -> Series
+    # This then chains the GroupBy.pipe and the
+    # NDFrame.pipe methods
+    result = df.groupby("A").pipe(f).pipe(square)
+
+    index = Index(["bar", "foo"], dtype="object", name="A")
+    expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index)
+
+    tm.assert_series_equal(expected, result)
+
+
+def test_pipe_args():
+    # Test passing args to the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    df = pd.DataFrame(
+        {
+            "group": ["A", "A", "B", "B", "C"],
+            "x": [1.0, 2.0, 3.0, 2.0, 5.0],
+            "y": [10.0, 100.0, 1000.0, -100.0, -1000.0],
+        }
+    )
+
+    def f(dfgb, arg1):
+        return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby(
+            dfgb.grouper
+        )
+
+    def g(dfgb, arg2):
+        return dfgb.sum() / dfgb.sum().sum() + arg2
+
+    def h(df, arg3):
+        return df.x + df.y - arg3
+
+    result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)
+
+    # Assert the results here
+    index = pd.Index(["A", "B", "C"], name="group")
+    expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index)
+
+    tm.assert_series_equal(expected, result)
+
+    # test SeriesGroupby.pipe
+    ser = pd.Series([1, 1, 2, 2, 3, 3])
+    result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
+
+    expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_groupby_mean_no_overflow():
+    # Regression test for (#22487)
+    df = pd.DataFrame(
+        {
+            "user": ["A", "A", "A", "A", "A"],
+            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
+        }
+    )
+    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        {
+            "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2],
+        },
+        {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]},
+    ],
+)
+@pytest.mark.parametrize("function", ["mean", "median", "var"])
+def test_apply_to_nullable_integer_returns_float(values, function):
+    # https://github.com/pandas-dev/pandas/issues/32219
+    output = 0.5 if function == "var" else 1.5
+    arr = np.array([output] * 3, dtype=float)
+    idx = pd.Index([1, 2, 3], dtype=object, name="a")
+    expected = pd.DataFrame({"b": arr}, index=idx)
+
+    groups = pd.DataFrame(values, dtype="Int64").groupby("a")
+
+    result = getattr(groups, function)()
+    tm.assert_frame_equal(result, expected)
+
+    result = groups.agg(function)
+    tm.assert_frame_equal(result, expected)
+
+    result = groups.agg([function])
+    expected.columns = MultiIndex.from_tuples([("b", function)])
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
new file mode 100644
index 0000000000000..346e6ae6cb9cb
--- /dev/null
+++ b/pandas/tests/groupby/test_size.py
@@ -0,0 +1,38 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index, Series
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
+def test_size(df, by):
+    grouped = df.groupby(by=by)
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+
+@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
+@pytest.mark.parametrize("sort", [True, False])
+def test_size_sort(df, sort, by):
+    df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC"))
+    left = df.groupby(by=by, sort=sort).size()
+    right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0])
+    tm.assert_series_equal(left, right, check_names=False)
+
+
+def test_size_series_dataframe():
+    # https://github.com/pandas-dev/pandas/issues/11699
+    df = DataFrame(columns=["A", "B"])
+    out = Series(dtype="int64", index=Index([], name="A"))
+    tm.assert_series_equal(df.groupby("A").size(), out)
+
+
+def test_size_groupby_all_null():
+    # https://github.com/pandas-dev/pandas/issues/23050
+    # Assert no 'Value Error : Length of passed values is 2, index implies 0'
+    df = DataFrame({"A": [None, None]})  # all-null groups
+    result = df.groupby("A").size()
+    expected = Series(dtype="int64", index=Index([], name="A"))
+    tm.assert_series_equal(result, expected)

From 8deed5f5ff5e1d9b567f01078340eda0d2261361 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Fri, 8 May 2020 11:46:30 -0500
Subject: [PATCH 2/3] Fixup

---
 pandas/tests/groupby/test_function.py |  42 +++++++++
 pandas/tests/groupby/test_pipe.py     |  78 +++++++++++++++++
 pandas/tests/groupby/test_quantile.py | 119 --------------------------
 3 files changed, 120 insertions(+), 119 deletions(-)
 create mode 100644 pandas/tests/groupby/test_pipe.py

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 11673532681a4..840976a455f79 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -980,3 +980,45 @@ def test_frame_describe_unstacked_format():
         columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_mean_no_overflow():
+    # Regression test for (#22487)
+    df = pd.DataFrame(
+        {
+            "user": ["A", "A", "A", "A", "A"],
+            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
+        }
+    )
+    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        {
+            "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2],
+        },
+        {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]},
+    ],
+)
+@pytest.mark.parametrize("function", ["mean", "median", "var"])
+def test_apply_to_nullable_integer_returns_float(values, function):
+    # https://github.com/pandas-dev/pandas/issues/32219
+    output = 0.5 if function == "var" else 1.5
+    arr = np.array([output] * 3, dtype=float)
+    idx = pd.Index([1, 2, 3], dtype=object, name="a")
+    expected = pd.DataFrame({"b": arr}, index=idx)
+
+    groups = pd.DataFrame(values, dtype="Int64").groupby("a")
+
+    result = getattr(groups, function)()
+    tm.assert_frame_equal(result, expected)
+
+    result = groups.agg(function)
+    tm.assert_frame_equal(result, expected)
+
+    result = groups.agg([function])
+    expected.columns = MultiIndex.from_tuples([("b", function)])
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py
new file mode 100644
index 0000000000000..d2ab016f608fa
--- /dev/null
+++ b/pandas/tests/groupby/test_pipe.py
@@ -0,0 +1,78 @@
+import numpy as np
+
+import pandas as pd
+from pandas import DataFrame, Index
+import pandas._testing as tm
+
+
+def test_pipe():
+    # Test the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    random_state = np.random.RandomState(1234567890)
+
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": random_state.randn(8),
+            "C": random_state.randn(8),
+        }
+    )
+
+    def f(dfgb):
+        return dfgb.B.max() - dfgb.C.min().min()
+
+    def square(srs):
+        return srs ** 2
+
+    # Note that the transformations are
+    # GroupBy -> Series
+    # Series -> Series
+    # This then chains the GroupBy.pipe and the
+    # NDFrame.pipe methods
+    result = df.groupby("A").pipe(f).pipe(square)
+
+    index = Index(["bar", "foo"], dtype="object", name="A")
+    expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index)
+
+    tm.assert_series_equal(expected, result)
+
+
+def test_pipe_args():
+    # Test passing args to the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    df = pd.DataFrame(
+        {
+            "group": ["A", "A", "B", "B", "C"],
+            "x": [1.0, 2.0, 3.0, 2.0, 5.0],
+            "y": [10.0, 100.0, 1000.0, -100.0, -1000.0],
+        }
+    )
+
+    def f(dfgb, arg1):
+        return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby(
+            dfgb.grouper
+        )
+
+    def g(dfgb, arg2):
+        return dfgb.sum() / dfgb.sum().sum() + arg2
+
+    def h(df, arg3):
+        return df.x + df.y - arg3
+
+    result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)
+
+    # Assert the results here
+    index = pd.Index(["A", "B", "C"], name="group")
+    expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index)
+
+    tm.assert_series_equal(expected, result)
+
+    # test SeriesGroupby.pipe
+    ser = pd.Series([1, 1, 2, 2, 3, 3])
+    result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
+
+    expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
+
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 69ffdba06cbca..6cab1f5d540e9 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -215,122 +215,3 @@ def test_groupby_quantile_nullable_array(values, q):
 
     expected = pd.Series(true_quantiles * 2, index=idx, name="b")
     tm.assert_series_equal(result, expected)
-
-
-# pipe
-# --------------------------------
-
-
-def test_pipe():
-    # Test the pipe method of DataFrameGroupBy.
-    # Issue #17871
-
-    random_state = np.random.RandomState(1234567890)
-
-    df = DataFrame(
-        {
-            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
-            "B": random_state.randn(8),
-            "C": random_state.randn(8),
-        }
-    )
-
-    def f(dfgb):
-        return dfgb.B.max() - dfgb.C.min().min()
-
-    def square(srs):
-        return srs ** 2
-
-    # Note that the transformations are
-    # GroupBy -> Series
-    # Series -> Series
-    # This then chains the GroupBy.pipe and the
-    # NDFrame.pipe methods
-    result = df.groupby("A").pipe(f).pipe(square)
-
-    index = Index(["bar", "foo"], dtype="object", name="A")
-    expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index)
-
-    tm.assert_series_equal(expected, result)
-
-
-def test_pipe_args():
-    # Test passing args to the pipe method of DataFrameGroupBy.
-    # Issue #17871
-
-    df = pd.DataFrame(
-        {
-            "group": ["A", "A", "B", "B", "C"],
-            "x": [1.0, 2.0, 3.0, 2.0, 5.0],
-            "y": [10.0, 100.0, 1000.0, -100.0, -1000.0],
-        }
-    )
-
-    def f(dfgb, arg1):
-        return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby(
-            dfgb.grouper
-        )
-
-    def g(dfgb, arg2):
-        return dfgb.sum() / dfgb.sum().sum() + arg2
-
-    def h(df, arg3):
-        return df.x + df.y - arg3
-
-    result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)
-
-    # Assert the results here
-    index = pd.Index(["A", "B", "C"], name="group")
-    expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index)
-
-    tm.assert_series_equal(expected, result)
-
-    # test SeriesGroupby.pipe
-    ser = pd.Series([1, 1, 2, 2, 3, 3])
-    result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
-
-    expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
-
-    tm.assert_series_equal(result, expected)
-
-
-def test_groupby_mean_no_overflow():
-    # Regression test for (#22487)
-    df = pd.DataFrame(
-        {
-            "user": ["A", "A", "A", "A", "A"],
-            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
-        }
-    )
-    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
-
-
-@pytest.mark.parametrize(
-    "values",
-    [
-        {
-            "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
-            "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2],
-        },
-        {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]},
-    ],
-)
-@pytest.mark.parametrize("function", ["mean", "median", "var"])
-def test_apply_to_nullable_integer_returns_float(values, function):
-    # https://github.com/pandas-dev/pandas/issues/32219
-    output = 0.5 if function == "var" else 1.5
-    arr = np.array([output] * 3, dtype=float)
-    idx = pd.Index([1, 2, 3], dtype=object, name="a")
-    expected = pd.DataFrame({"b": arr}, index=idx)
-
-    groups = pd.DataFrame(values, dtype="Int64").groupby("a")
-
-    result = getattr(groups, function)()
-    tm.assert_frame_equal(result, expected)
-
-    result = groups.agg(function)
-    tm.assert_frame_equal(result, expected)
-
-    result = groups.agg([function])
-    expected.columns = MultiIndex.from_tuples([("b", function)])
-    tm.assert_frame_equal(result, expected)

From a920597752cd7f7be5f24f3b8bdb9ec96fa6ecf3 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Fri, 8 May 2020 12:15:31 -0500
Subject: [PATCH 3/3] Lint

---
 pandas/tests/groupby/test_function.py | 10 +---------
 pandas/tests/groupby/test_quantile.py |  2 +-
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 840976a455f79..e3862b92faf9f 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -7,15 +7,7 @@
 from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
-from pandas import (
-    DataFrame,
-    Index,
-    MultiIndex,
-    Series,
-    Timestamp,
-    date_range,
-    isna,
-)
+from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
 import pandas._testing as tm
 import pandas.core.nanops as nanops
 from pandas.util import _test_decorators as td
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 6cab1f5d540e9..87347fe1293ef 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex
+from pandas import DataFrame, Index
 import pandas._testing as tm