|
16 | 16 | DataFrame,
|
17 | 17 | Grouper,
|
18 | 18 | Index,
|
| 19 | + Interval, |
19 | 20 | MultiIndex,
|
20 | 21 | RangeIndex,
|
21 | 22 | Series,
|
@@ -2972,6 +2973,47 @@ def test_groupby_numeric_only_std_no_result(numeric_only):
|
2972 | 2973 | dfgb.std(numeric_only=numeric_only)
|
2973 | 2974 |
|
2974 | 2975 |
|
| 2976 | +def test_grouping_with_categorical_interval_columns(): |
| 2977 | + # GH#34164 |
| 2978 | + df = DataFrame({"x": [0.1, 0.2, 0.3, -0.4, 0.5], "w": ["a", "b", "a", "c", "a"]}) |
| 2979 | + qq = pd.qcut(df["x"], q=np.linspace(0, 1, 5)) |
| 2980 | + result = df.groupby([qq, "w"], observed=False)["x"].agg("mean") |
| 2981 | + categorical_index_level_1 = Categorical( |
| 2982 | + [ |
| 2983 | + Interval(-0.401, 0.1, closed="right"), |
| 2984 | + Interval(0.1, 0.2, closed="right"), |
| 2985 | + Interval(0.2, 0.3, closed="right"), |
| 2986 | + Interval(0.3, 0.5, closed="right"), |
| 2987 | + ], |
| 2988 | + ordered=True, |
| 2989 | + ) |
| 2990 | + index_level_2 = ["a", "b", "c"] |
| 2991 | + mi = MultiIndex.from_product( |
| 2992 | + [categorical_index_level_1, index_level_2], names=["x", "w"] |
| 2993 | + ) |
| 2994 | + expected = Series( |
| 2995 | + np.array( |
| 2996 | + [ |
| 2997 | + 0.1, |
| 2998 | + np.nan, |
| 2999 | + -0.4, |
| 3000 | + np.nan, |
| 3001 | + 0.2, |
| 3002 | + np.nan, |
| 3003 | + 0.3, |
| 3004 | + np.nan, |
| 3005 | + np.nan, |
| 3006 | + 0.5, |
| 3007 | + np.nan, |
| 3008 | + np.nan, |
| 3009 | + ] |
| 3010 | + ), |
| 3011 | + index=mi, |
| 3012 | + name="x", |
| 3013 | + ) |
| 3014 | + tm.assert_series_equal(result, expected) |
| 3015 | + |
| 3016 | + |
2975 | 3017 | @pytest.mark.parametrize("bug_var", [1, "a"])
|
2976 | 3018 | def test_groupby_sum_on_nan_should_return_nan(bug_var):
|
2977 | 3019 | # GH 24196
|
|
0 commit comments