-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: groupby agg fails silently with mixed dtypes #43213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 39 commits
91aa285
9b9b7af
d968e57
46a29e0
195db31
16e28db
854ecda
7dd27f3
309ee59
8d1bfb1
41471aa
3647f53
d6992e5
ab1fd87
23d2989
9039124
d75c57b
89a6bc7
64ca85a
8a0f5fe
4ae0d6b
ac51170
572f23c
183f245
62b5aac
625a751
9b0acd7
4b4618a
265b3bb
a55211a
753c7df
de86f72
2a29451
a37f1f9
00838be
a04d021
c9d6658
5db7155
5ccf385
9afd465
600b71e
fe13278
1aaf326
4ba8511
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2509,3 +2509,64 @@ def test_rolling_wrong_param_min_period(): | |
result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'" | ||
with pytest.raises(TypeError, match=result_error_msg): | ||
test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum() | ||
|
||
|
||
@pytest.mark.parametrize( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move these to pandas/tests/groupby/aggregate/test_aggreagte.py (or maybe test_cython) see where similar are. |
||
"func, expected, dtype, result_dtype_dict", | ||
[ | ||
("sum", [5, 7, 9], "int64", {}), | ||
("std", [4.5 ** 0.5] * 3, int, {"i": float, "j": float, "k": float}), | ||
# 1.2.5: ValueError: Length mismatch: Expected axis | ||
# has 0 elements, new values have 3 elements | ||
("var", [4.5] * 3, int, {"i": float, "j": float, "k": float}), | ||
# 1.2.5: DataError: No numeric types to aggregate | ||
("sum", [5, 7, 9], "Int64", {"j": "int64"}), | ||
# 1.2.5: j:float64 | ||
("std", [4.5 ** 0.5] * 3, "Int64", {"i": float, "j": float, "k": float}), | ||
# 1.2.5: ValueError: Length mismatch: Expected axis | ||
# has 0 elements, new values have 3 elements | ||
("var", [4.5] * 3, "Int64", {"i": "float64", "j": "float64", "k": "float64"}), | ||
# 1.2.5: DataError: No numeric types to aggregate | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you remove the comments here - appreciate the extra detail, but they shouldn't be part of a test; surfacing this in a comment on github directly would be great (and you already have another comment that does something similar - thanks for that as well). |
||
], | ||
) | ||
def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype_dict): | ||
# GH#43209 | ||
df = DataFrame( | ||
[[1, 2, 3, 4, 5, 6]] * 3, | ||
columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), | ||
).astype({("a", "j"): dtype, ("b", "j"): dtype}) | ||
result = df.groupby(level=1, axis=1).agg(func) | ||
expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( | ||
result_dtype_dict | ||
) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"func, expected_data, result_dtype_dict", | ||
[ | ||
("sum", [[2, 4], [10, 12], [18, 20]], {10: "int64", 20: "int64"}), | ||
# 1.2.5: 10: float64 | ||
# std should ideally return Int64 #43330 | ||
("std", [[2 ** 0.5] * 2] * 3, "float64"), | ||
# 1.2.5: ValueError: Length mismatch: Expected axis | ||
# has 0 elements, new values have 3 elements | ||
("var", [[2] * 2] * 3, {10: "float64", 20: "float64"}), | ||
# 1.2.5: DataError: No numeric types to aggregate | ||
], | ||
) | ||
def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): | ||
# GH#43209 | ||
df = DataFrame( | ||
np.arange(12).reshape(3, 4), | ||
index=Index([0, 1, 0], name="y"), | ||
columns=Index([10, 20, 10, 20], name="x"), | ||
dtype="int64", | ||
).astype({10: "Int64"}) | ||
result = df.groupby("x", axis=1).agg(func) | ||
expected = DataFrame( | ||
data=expected_data, | ||
index=Index([0, 1, 0], name="y"), | ||
columns=Index([10, 20], name="x"), | ||
).astype(result_dtype_dict) | ||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im a little late here, but could potentially use _get_data_to_aggregate here