forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_size.py
105 lines (84 loc) · 3.47 KB
/
test_size.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer_dtype
from pandas import (
DataFrame,
Index,
PeriodIndex,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
def test_size(df, by):
grouped = df.groupby(by=by)
result = grouped.size()
for key, group in grouped:
assert result[key] == len(group)
@pytest.mark.parametrize(
"by",
[
[0, 0, 0, 0],
[0, 1, 1, 1],
[1, 0, 1, 1],
[0, None, None, None],
pytest.param([None, None, None, None], marks=pytest.mark.xfail),
],
)
def test_size_axis_1(df, axis_1, by, sort, dropna):
# GH#45715
counts = {key: sum(value == key for value in by) for key in dict.fromkeys(by)}
if dropna:
counts = {key: value for key, value in counts.items() if key is not None}
expected = Series(counts, dtype="int64")
if sort:
expected = expected.sort_index()
if is_integer_dtype(expected.index.dtype) and not any(x is None for x in by):
expected.index = expected.index.astype(np.int_)
msg = "DataFrame.groupby with axis=1 is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna)
result = grouped.size()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
@pytest.mark.parametrize("sort", [True, False])
def test_size_sort(sort, by):
df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC"))
left = df.groupby(by=by, sort=sort).size()
right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0])
tm.assert_series_equal(left, right, check_names=False)
def test_size_series_dataframe():
# https://github.com/pandas-dev/pandas/issues/11699
df = DataFrame(columns=["A", "B"])
out = Series(dtype="int64", index=Index([], name="A"))
tm.assert_series_equal(df.groupby("A").size(), out)
def test_size_groupby_all_null():
# https://github.com/pandas-dev/pandas/issues/23050
# Assert no 'Value Error : Length of passed values is 2, index implies 0'
df = DataFrame({"A": [None, None]}) # all-null groups
result = df.groupby("A").size()
expected = Series(dtype="int64", index=Index([], name="A"))
tm.assert_series_equal(result, expected)
def test_size_period_index():
# https://github.com/pandas-dev/pandas/issues/34010
ser = Series([1], index=PeriodIndex(["2000"], name="A", freq="D"))
grp = ser.groupby(level="A")
result = grp.size()
tm.assert_series_equal(result, ser)
@pytest.mark.parametrize("as_index", [True, False])
def test_size_on_categorical(as_index):
df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"])
df["A"] = df["A"].astype("category")
result = df.groupby(["A", "B"], as_index=as_index, observed=False).size()
expected = DataFrame(
[[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"]
)
expected["A"] = expected["A"].astype("category")
if as_index:
expected = expected.set_index(["A", "B"])["size"].rename(None)
tm.assert_equal(result, expected)
@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
def test_size_series_masked_type_returns_Int64(dtype):
ser = Series([1, 1, 1], index=["a", "a", "b"], dtype=dtype)
result = ser.groupby(level=0).size()
expected = Series([2, 1], dtype="Int64", index=["a", "b"])
tm.assert_series_equal(result, expected)