-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
GroupBy enhancement unifies the return of iterating over GroupBy #42795 #47719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 69 commits
54c5068
2951fb1
6335204
8afd6a1
6a7ede4
eb0ed28
83ca209
a0b3a59
153bbe5
43d1f92
242468c
9717f5d
5d7331e
a6829a7
b0abd59
c709311
4e14c87
09dec70
d1e9525
6b5d26b
b7c797a
600cdd9
1cb253d
c0ef8b6
ef05b5b
bd157c0
0e10d13
a4d58c8
7efc7ef
fa15243
dd070b2
e995f37
7951358
47b7564
fa26f37
12cb573
614a91e
6c848f2
79944f4
070c726
cc267bb
7b37e3f
4f59ca8
698e494
330c3c3
eb8db3f
9c37aa3
207f44b
958edf5
73efbab
6f52b26
190eaab
986a1e9
cd74c7e
6fc7371
dafef53
a3a0dd2
9fb479f
6464033
d745717
b788c0e
143466e
56f5aa3
93889dd
a12a6fb
fdea4a8
871fec8
ebf7b92
6ea317e
20c65a7
78e1e04
43a89e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -806,7 +806,7 @@ def test_groupby_as_index_cython(df): | |
msg = "The default value of numeric_only" | ||
with tm.assert_produces_warning(FutureWarning, match=msg): | ||
result = grouped.mean() | ||
expected = data.groupby(["A"]).mean() | ||
expected = data.groupby("A").mean() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this changing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I mentioned in #47761 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I do not understand what this means. Can you expand on it? Also, it's not clear to me - is the result of |
||
expected.insert(0, "A", expected.index) | ||
expected.index = np.arange(len(expected)) | ||
tm.assert_frame_equal(result, expected) | ||
|
@@ -1259,7 +1259,7 @@ def test_consistency_name(): | |
} | ||
) | ||
|
||
expected = df.groupby(["A"]).B.count() | ||
expected = df.groupby("A").B.count() | ||
result = df.B.groupby(df.A).count() | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
@@ -1495,7 +1495,7 @@ def test_groupby_2d_malformed(): | |
d["label"] = ["l1", "l2"] | ||
msg = "The default value of numeric_only" | ||
with tm.assert_produces_warning(FutureWarning, match=msg): | ||
tmp = d.groupby(["group"]).mean() | ||
tmp = d.groupby("group").mean() | ||
res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) | ||
tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) | ||
tm.assert_numpy_array_equal(tmp.values, res_values) | ||
|
@@ -1888,7 +1888,7 @@ def test_pivot_table_values_key_error(): | |
|
||
|
||
@pytest.mark.parametrize("columns", ["C", ["C"]]) | ||
@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) | ||
@pytest.mark.parametrize("keys", ["A", ["A", "B"]]) | ||
@pytest.mark.parametrize( | ||
"values", | ||
[ | ||
|
@@ -2240,7 +2240,7 @@ def test_groupby_groups_in_BaseGrouper(): | |
assert result.groups == expected.groups | ||
|
||
|
||
@pytest.mark.parametrize("group_name", ["x", ["x"]]) | ||
@pytest.mark.parametrize("group_name", ["x"]) | ||
def test_groupby_axis_1(group_name): | ||
# GH 27614 | ||
df = DataFrame( | ||
|
@@ -2643,7 +2643,7 @@ def test_groupby_aggregation_non_numeric_dtype(): | |
index=Index(["M", "W"], dtype="object", name="MW"), | ||
) | ||
|
||
gb = df.groupby(by=["MW"]) | ||
gb = df.groupby(by="MW") | ||
result = gb.sum() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
@@ -2666,7 +2666,7 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): | |
index=Index([0, 1], dtype="int64", name="x"), | ||
) | ||
|
||
gb = df.groupby(by=["x"]) | ||
gb = df.groupby(by="x") | ||
result = gb.sum() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
@@ -2686,7 +2686,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): | |
index=Index([0, 1], dtype="int64", name="x"), | ||
) | ||
|
||
gb = df.groupby(by=["x"]) | ||
gb = df.groupby(by="x") | ||
msg = "The default value of numeric_only" | ||
with tm.assert_produces_warning(FutureWarning, match=msg): | ||
result = gb.sum() | ||
|
@@ -2766,7 +2766,7 @@ def test_by_column_values_with_same_starting_value(): | |
) | ||
aggregate_details = {"Mood": Series.mode, "Credit": "sum"} | ||
|
||
result = df.groupby(["Name"]).agg(aggregate_details) | ||
result = df.groupby("Name").agg(aggregate_details) | ||
expected_result = DataFrame( | ||
{ | ||
"Mood": [["happy", "sad"], "happy"], | ||
|
@@ -2795,3 +2795,21 @@ def test_groupby_none_column_name(): | |
result = df.groupby(by=[None]).sum() | ||
expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_groupby_iterator_one_grouper(): | ||
df = DataFrame(columns=["a", "b", "c"], index=["x", "y"]) | ||
df.loc["y"] = Series({"a": 1, "b": 5, "c": 2}) | ||
expected = True | ||
|
||
values, _ = next(iter(df.groupby(["a", "b"]))) | ||
result = isinstance(values, tuple) | ||
assert result == expected | ||
|
||
values, _ = next(iter(df.groupby(["a"]))) | ||
result = isinstance(values, tuple) | ||
assert result == expected | ||
|
||
values, _ = next(iter(df.groupby("a"))) | ||
result = isinstance(values, int) | ||
assert result == expected |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
because
.hist
and.box
use groupby internally in a single way. For example if I did hist by ['a','b','c','d'] the results will be like (a,), (b,), (c,), (d,).some plotting functions and the pivot table are actually iterating over groupby.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But here,
grouped
is only being used in L66 immediately below, right?In this usage, only the group is being used and the key is ignored. So why is this needed if it's only the key changing?