forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_missing.py
137 lines (113 loc) · 4.18 KB
/
test_missing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, date_range
import pandas._testing as tm
@pytest.mark.parametrize("func", ["ffill", "bfill"])
def test_groupby_column_index_name_lost_fill_funcs(func):
# GH: 29764 groupby loses index sometimes
df = DataFrame(
[[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]],
columns=Index(["type", "a", "b"], name="idx"),
)
df_grouped = df.groupby(["type"])[["a", "b"]]
result = getattr(df_grouped, func)().columns
expected = Index(["a", "b"], name="idx")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("func", ["ffill", "bfill"])
def test_groupby_fill_duplicate_column_names(func):
# GH: 25610 ValueError with duplicate column names
df1 = DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]})
df2 = DataFrame({"field1": [1, np.nan, 4]})
df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"])
expected = DataFrame(
[[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"]
)
result = getattr(df_grouped, func)()
tm.assert_frame_equal(result, expected)
def test_ffill_missing_arguments():
# GH 14955
df = DataFrame({"a": [1, 2], "b": [1, 1]})
with pytest.raises(ValueError, match="Must specify a fill"):
df.groupby("b").fillna()
def test_fill_consistency():
# GH9221
# pass thru keyword arguments to the generated wrapper
# are set if the passed kw is None (only)
df = DataFrame(
index=pd.MultiIndex.from_product(
[["value1", "value2"], date_range("2014-01-01", "2014-01-06")]
),
columns=Index(["1", "2"], name="id"),
)
df["1"] = [
np.nan,
1,
np.nan,
np.nan,
11,
np.nan,
np.nan,
2,
np.nan,
np.nan,
22,
np.nan,
]
df["2"] = [
np.nan,
3,
np.nan,
np.nan,
33,
np.nan,
np.nan,
4,
np.nan,
np.nan,
44,
np.nan,
]
expected = df.groupby(level=0, axis=0).fillna(method="ffill")
result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("method", ["ffill", "bfill"])
@pytest.mark.parametrize("dropna", [True, False])
@pytest.mark.parametrize("has_nan_group", [True, False])
def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
# GH 34725
df_without_nan_rows = DataFrame([(1, 0.1), (2, 0.2)])
ridx = [-1, 0, -1, -1, 1, -1]
df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
group_b = np.nan if has_nan_group else "b"
df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3)
grouped = df.groupby(by="group_col", dropna=dropna)
result = getattr(grouped, method)(limit=None)
expected_rows = {
("ffill", True, True): [-1, 0, 0, -1, -1, -1],
("ffill", True, False): [-1, 0, 0, -1, 1, 1],
("ffill", False, True): [-1, 0, 0, -1, 1, 1],
("ffill", False, False): [-1, 0, 0, -1, 1, 1],
("bfill", True, True): [0, 0, -1, -1, -1, -1],
("bfill", True, False): [0, 0, -1, 1, 1, -1],
("bfill", False, True): [0, 0, -1, 1, 1, -1],
("bfill", False, False): [0, 0, -1, 1, 1, -1],
}
ridx = expected_rows.get((method, dropna, has_nan_group))
expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("min_count, value", [(2, np.nan), (-1, 1.0)])
@pytest.mark.parametrize("func", ["first", "last", "max", "min"])
def test_min_count(func, min_count, value):
# GH#37821
df = DataFrame({"a": [1] * 3, "b": [1, np.nan, np.nan], "c": [np.nan] * 3})
result = getattr(df.groupby("a"), func)(min_count=min_count)
expected = DataFrame({"b": [value], "c": [np.nan]}, index=Index([1], name="a"))
tm.assert_frame_equal(result, expected)
def test_indicies_with_missing():
# GH 9304
df = DataFrame({"a": [1, 1, np.nan], "b": [2, 3, 4], "c": [5, 6, 7]})
g = df.groupby(["a", "b"])
result = g.indices
expected = {(1.0, 2): np.array([0]), (1.0, 3): np.array([1])}
assert result == expected