forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgroupby.py
106 lines (84 loc) · 4 KB
/
groupby.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.tests.extension.base.base import BaseExtensionTests
class BaseGroupbyTests(BaseExtensionTests):
"""Groupby-specific tests."""
def test_grouping_grouper(self, data_for_grouping):
df = pd.DataFrame(
{"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping}
)
gr1 = df.groupby("A").grouper.groupings[0]
gr2 = df.groupby("B").grouper.groupings[0]
tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
result = df.groupby("B", as_index=as_index).A.mean()
_, uniques = pd.factorize(data_for_grouping, sort=True)
if as_index:
index = pd.Index(uniques, name="B")
expected = pd.Series([3, 1, 4], index=index, name="A")
self.assert_series_equal(result, expected)
else:
expected = pd.DataFrame({"B": uniques, "A": [3, 1, 4]})
self.assert_frame_equal(result, expected)
def test_groupby_agg_extension(self, data_for_grouping):
# GH#38980 groupby agg on extension type fails for non-numeric types
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
expected = df.iloc[[0, 2, 4, 7]]
expected = expected.set_index("A")
result = df.groupby("A").agg({"B": "first"})
self.assert_frame_equal(result, expected)
result = df.groupby("A").agg("first")
self.assert_frame_equal(result, expected)
result = df.groupby("A").first()
self.assert_frame_equal(result, expected)
def test_groupby_extension_no_sort(self, data_for_grouping):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
result = df.groupby("B", sort=False).A.mean()
_, index = pd.factorize(data_for_grouping, sort=False)
index = pd.Index(index, name="B")
expected = pd.Series([1, 3, 4], index=index, name="A")
self.assert_series_equal(result, expected)
def test_groupby_extension_transform(self, data_for_grouping):
valid = data_for_grouping[~data_for_grouping.isna()]
df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid})
result = df.groupby("B").A.transform(len)
expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
self.assert_series_equal(result, expected)
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
df.groupby("B").apply(groupby_apply_op)
df.groupby("B").A.apply(groupby_apply_op)
df.groupby("A").apply(groupby_apply_op)
df.groupby("A").B.apply(groupby_apply_op)
def test_groupby_apply_identity(self, data_for_grouping):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
result = df.groupby("A").B.apply(lambda x: x.array)
expected = pd.Series(
[
df.B.iloc[[0, 1, 6]].array,
df.B.iloc[[2, 3]].array,
df.B.iloc[[4, 5]].array,
df.B.iloc[[7]].array,
],
index=pd.Index([1, 2, 3, 4], name="A"),
name="B",
)
self.assert_series_equal(result, expected)
def test_in_numeric_groupby(self, data_for_grouping):
df = pd.DataFrame(
{
"A": [1, 1, 2, 2, 3, 3, 1, 4],
"B": data_for_grouping,
"C": [1, 1, 1, 1, 1, 1, 1, 1],
}
)
result = df.groupby("A").sum().columns
if data_for_grouping.dtype._is_numeric:
expected = pd.Index(["B", "C"])
else:
expected = pd.Index(["C"])
tm.assert_index_equal(result, expected)