diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f716a3a44cd54..39121e92dcd83 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1738,6 +1738,7 @@ def test_pivot_table_values_key_error(): ) def test_empty_groupby(columns, keys, values, method, op, request): # GH8093 & GH26411 + override_dtype = None if isinstance(values, Categorical) and len(keys) == 1 and method == "apply": mark = pytest.mark.xfail(raises=TypeError, match="'str' object is not callable") @@ -1784,12 +1785,9 @@ def test_empty_groupby(columns, keys, values, method, op, request): and op in ["sum", "prod"] and method != "apply" ): - mark = pytest.mark.xfail( - raises=AssertionError, match="(DataFrame|Series) are different" - ) - request.node.add_marker(mark) + # We expect to get Int64 back for these + override_dtype = "Int64" - override_dtype = None if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply": # sum/product of bools is an integer override_dtype = "int64" diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 20edf03c5b96c..6a73d540c7088 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -11,7 +11,6 @@ concat, ) import pandas._testing as tm -from pandas.core.base import DataError def test_rank_apply(): @@ -462,7 +461,6 @@ def test_rank_avg_even_vals(dtype, upper): tm.assert_frame_equal(result, exp_df) -@pytest.mark.xfail(reason="Works now, needs tests") @pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) @pytest.mark.parametrize("ascending", [True, False]) @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) @@ -470,13 +468,25 @@ def test_rank_avg_even_vals(dtype, upper): @pytest.mark.parametrize( "vals", [["bar", "bar", "foo", "bar", "baz"], ["bar", np.nan, "foo", np.nan, "baz"]] ) -def test_rank_object_raises(ties_method, ascending, na_option, pct, vals): +def test_rank_object_dtype(ties_method, ascending, na_option, pct, vals): df = DataFrame({"key": ["foo"] * 5, "val": vals}) + mask = df["val"].isna() - with pytest.raises(DataError, match="No numeric types to aggregate"): - df.groupby("key").rank( - method=ties_method, ascending=ascending, na_option=na_option, pct=pct - ) + gb = df.groupby("key") + res = gb.rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) + + # construct our expected by using numeric values with the same ordering + if mask.any(): + df2 = DataFrame({"key": ["foo"] * 5, "val": [0, np.nan, 2, np.nan, 1]}) + else: + df2 = DataFrame({"key": ["foo"] * 5, "val": [0, 0, 2, 0, 1]}) + + gb2 = df2.groupby("key") + alt = gb2.rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + tm.assert_frame_equal(res, alt) @pytest.mark.parametrize("na_option", [True, "bad", 1])