Skip to content

Commit d8d12d6

Browse files
jbrockmendeljreback
authored andcommitted
TST: more method-specific test files (#30453)
1 parent 0d76ecc commit d8d12d6

14 files changed

+498
-458
lines changed

pandas/tests/frame/test_duplicates.py renamed to pandas/tests/frame/methods/test_drop_duplicates.py

+2-88
Original file line numberDiff line numberDiff line change
@@ -3,95 +3,20 @@
33
import numpy as np
44
import pytest
55

6-
from pandas import DataFrame, Series
6+
from pandas import DataFrame
77
import pandas.util.testing as tm
88

99

1010
@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]])
11-
def test_duplicated_with_misspelled_column_name(subset):
11+
def test_drop_duplicates_with_misspelled_column_name(subset):
1212
# GH 19730
1313
df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
1414
msg = re.escape("Index(['a'], dtype='object')")
1515

16-
with pytest.raises(KeyError, match=msg):
17-
df.duplicated(subset)
18-
1916
with pytest.raises(KeyError, match=msg):
2017
df.drop_duplicates(subset)
2118

2219

23-
@pytest.mark.slow
24-
def test_duplicated_do_not_fail_on_wide_dataframes():
25-
# gh-21524
26-
# Given the wide dataframe with a lot of columns
27-
# with different (important!) values
28-
data = {
29-
"col_{0:02d}".format(i): np.random.randint(0, 1000, 30000) for i in range(100)
30-
}
31-
df = DataFrame(data).T
32-
result = df.duplicated()
33-
34-
# Then duplicates produce the bool Series as a result and don't fail during
35-
# calculation. Actual values doesn't matter here, though usually it's all
36-
# False in this case
37-
assert isinstance(result, Series)
38-
assert result.dtype == np.bool
39-
40-
41-
@pytest.mark.parametrize(
42-
"keep, expected",
43-
[
44-
("first", Series([False, False, True, False, True])),
45-
("last", Series([True, True, False, False, False])),
46-
(False, Series([True, True, True, False, True])),
47-
],
48-
)
49-
def test_duplicated_keep(keep, expected):
50-
df = DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "b", "c", "a"]})
51-
52-
result = df.duplicated(keep=keep)
53-
tm.assert_series_equal(result, expected)
54-
55-
56-
@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal")
57-
@pytest.mark.parametrize(
58-
"keep, expected",
59-
[
60-
("first", Series([False, False, True, False, True])),
61-
("last", Series([True, True, False, False, False])),
62-
(False, Series([True, True, True, False, True])),
63-
],
64-
)
65-
def test_duplicated_nan_none(keep, expected):
66-
df = DataFrame({"C": [np.nan, 3, 3, None, np.nan]}, dtype=object)
67-
68-
result = df.duplicated(keep=keep)
69-
tm.assert_series_equal(result, expected)
70-
71-
72-
@pytest.mark.parametrize("keep", ["first", "last", False])
73-
@pytest.mark.parametrize("subset", [None, ["A", "B"], "A"])
74-
def test_duplicated_subset(subset, keep):
75-
df = DataFrame(
76-
{
77-
"A": [0, 1, 1, 2, 0],
78-
"B": ["a", "b", "b", "c", "a"],
79-
"C": [np.nan, 3, 3, None, np.nan],
80-
}
81-
)
82-
83-
if subset is None:
84-
subset = list(df.columns)
85-
elif isinstance(subset, str):
86-
# need to have a DataFrame, not a Series
87-
# -> select columns with singleton list, not string
88-
subset = [subset]
89-
90-
expected = df[subset].duplicated(keep=keep)
91-
result = df.duplicated(keep=keep, subset=subset)
92-
tm.assert_series_equal(result, expected)
93-
94-
9520
def test_drop_duplicates():
9621
df = DataFrame(
9722
{
@@ -188,17 +113,6 @@ def test_drop_duplicates():
188113
assert df.duplicated(keep=keep).sum() == 0
189114

190115

191-
def test_duplicated_on_empty_frame():
192-
# GH 25184
193-
194-
df = DataFrame(columns=["a", "b"])
195-
dupes = df.duplicated("a")
196-
197-
result = df[dupes]
198-
expected = df.copy()
199-
tm.assert_frame_equal(result, expected)
200-
201-
202116
def test_drop_duplicates_with_duplicate_column_names():
203117
# GH17836
204118
df = DataFrame([[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import re
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, Series
7+
import pandas.util.testing as tm
8+
9+
10+
@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]])
11+
def test_duplicated_with_misspelled_column_name(subset):
12+
# GH 19730
13+
df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
14+
msg = re.escape("Index(['a'], dtype='object')")
15+
16+
with pytest.raises(KeyError, match=msg):
17+
df.duplicated(subset)
18+
19+
20+
@pytest.mark.slow
21+
def test_duplicated_do_not_fail_on_wide_dataframes():
22+
# gh-21524
23+
# Given the wide dataframe with a lot of columns
24+
# with different (important!) values
25+
data = {
26+
"col_{0:02d}".format(i): np.random.randint(0, 1000, 30000) for i in range(100)
27+
}
28+
df = DataFrame(data).T
29+
result = df.duplicated()
30+
31+
# Then duplicates produce the bool Series as a result and don't fail during
32+
# calculation. Actual values doesn't matter here, though usually it's all
33+
# False in this case
34+
assert isinstance(result, Series)
35+
assert result.dtype == np.bool
36+
37+
38+
@pytest.mark.parametrize(
39+
"keep, expected",
40+
[
41+
("first", Series([False, False, True, False, True])),
42+
("last", Series([True, True, False, False, False])),
43+
(False, Series([True, True, True, False, True])),
44+
],
45+
)
46+
def test_duplicated_keep(keep, expected):
47+
df = DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "b", "c", "a"]})
48+
49+
result = df.duplicated(keep=keep)
50+
tm.assert_series_equal(result, expected)
51+
52+
53+
@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal")
54+
@pytest.mark.parametrize(
55+
"keep, expected",
56+
[
57+
("first", Series([False, False, True, False, True])),
58+
("last", Series([True, True, False, False, False])),
59+
(False, Series([True, True, True, False, True])),
60+
],
61+
)
62+
def test_duplicated_nan_none(keep, expected):
63+
df = DataFrame({"C": [np.nan, 3, 3, None, np.nan]}, dtype=object)
64+
65+
result = df.duplicated(keep=keep)
66+
tm.assert_series_equal(result, expected)
67+
68+
69+
@pytest.mark.parametrize("keep", ["first", "last", False])
70+
@pytest.mark.parametrize("subset", [None, ["A", "B"], "A"])
71+
def test_duplicated_subset(subset, keep):
72+
df = DataFrame(
73+
{
74+
"A": [0, 1, 1, 2, 0],
75+
"B": ["a", "b", "b", "c", "a"],
76+
"C": [np.nan, 3, 3, None, np.nan],
77+
}
78+
)
79+
80+
if subset is None:
81+
subset = list(df.columns)
82+
elif isinstance(subset, str):
83+
# need to have a DataFrame, not a Series
84+
# -> select columns with singleton list, not string
85+
subset = [subset]
86+
87+
expected = df[subset].duplicated(keep=keep)
88+
result = df.duplicated(keep=keep, subset=subset)
89+
tm.assert_series_equal(result, expected)
90+
91+
92+
def test_duplicated_on_empty_frame():
93+
# GH 25184
94+
95+
df = DataFrame(columns=["a", "b"])
96+
dupes = df.duplicated("a")
97+
98+
result = df[dupes]
99+
expected = df.copy()
100+
tm.assert_frame_equal(result, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import DataFrame, Series
5+
import pandas.util.testing as tm
6+
7+
8+
class TestDataFramePctChange:
9+
def test_pct_change_numeric(self):
10+
# GH#11150
11+
pnl = DataFrame(
12+
[np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)]
13+
).astype(np.float64)
14+
pnl.iat[1, 0] = np.nan
15+
pnl.iat[1, 1] = np.nan
16+
pnl.iat[2, 3] = 60
17+
18+
for axis in range(2):
19+
expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1
20+
result = pnl.pct_change(axis=axis, fill_method="pad")
21+
22+
tm.assert_frame_equal(result, expected)
23+
24+
def test_pct_change(self, datetime_frame):
25+
rs = datetime_frame.pct_change(fill_method=None)
26+
tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1)
27+
28+
rs = datetime_frame.pct_change(2)
29+
filled = datetime_frame.fillna(method="pad")
30+
tm.assert_frame_equal(rs, filled / filled.shift(2) - 1)
31+
32+
rs = datetime_frame.pct_change(fill_method="bfill", limit=1)
33+
filled = datetime_frame.fillna(method="bfill", limit=1)
34+
tm.assert_frame_equal(rs, filled / filled.shift(1) - 1)
35+
36+
rs = datetime_frame.pct_change(freq="5D")
37+
filled = datetime_frame.fillna(method="pad")
38+
tm.assert_frame_equal(
39+
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
40+
)
41+
42+
def test_pct_change_shift_over_nas(self):
43+
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])
44+
45+
df = DataFrame({"a": s, "b": s})
46+
47+
chg = df.pct_change()
48+
expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
49+
edf = DataFrame({"a": expected, "b": expected})
50+
tm.assert_frame_equal(chg, edf)
51+
52+
@pytest.mark.parametrize(
53+
"freq, periods, fill_method, limit",
54+
[
55+
("5B", 5, None, None),
56+
("3B", 3, None, None),
57+
("3B", 3, "bfill", None),
58+
("7B", 7, "pad", 1),
59+
("7B", 7, "bfill", 3),
60+
("14B", 14, None, None),
61+
],
62+
)
63+
def test_pct_change_periods_freq(
64+
self, datetime_frame, freq, periods, fill_method, limit
65+
):
66+
# GH#7292
67+
rs_freq = datetime_frame.pct_change(
68+
freq=freq, fill_method=fill_method, limit=limit
69+
)
70+
rs_periods = datetime_frame.pct_change(
71+
periods, fill_method=fill_method, limit=limit
72+
)
73+
tm.assert_frame_equal(rs_freq, rs_periods)
74+
75+
empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns)
76+
rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
77+
rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
78+
tm.assert_frame_equal(rs_freq, rs_periods)

pandas/tests/frame/test_analytics.py

-18
Original file line numberDiff line numberDiff line change
@@ -893,24 +893,6 @@ def test_sum_bools(self):
893893
bools = isna(df)
894894
assert bools.sum(axis=1)[0] == 10
895895

896-
# ---------------------------------------------------------------------
897-
# Miscellanea
898-
899-
def test_pct_change(self):
900-
# GH#11150
901-
pnl = DataFrame(
902-
[np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)]
903-
).astype(np.float64)
904-
pnl.iat[1, 0] = np.nan
905-
pnl.iat[1, 1] = np.nan
906-
pnl.iat[2, 3] = 60
907-
908-
for axis in range(2):
909-
expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1
910-
result = pnl.pct_change(axis=axis, fill_method="pad")
911-
912-
tm.assert_frame_equal(result, expected)
913-
914896
# ----------------------------------------------------------------------
915897
# Index of max / min
916898

pandas/tests/frame/test_repr_info.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import re
44
import sys
55
import textwrap
6+
import warnings
67

78
import numpy as np
89
import pytest
@@ -29,17 +30,17 @@
2930
class TestDataFrameReprInfoEtc:
3031
def test_repr_empty(self):
3132
# empty
32-
foo = repr(DataFrame()) # noqa
33+
repr(DataFrame())
3334

3435
# empty with index
3536
frame = DataFrame(index=np.arange(1000))
36-
foo = repr(frame) # noqa
37+
repr(frame)
3738

3839
def test_repr_mixed(self, float_string_frame):
3940
buf = StringIO()
4041

4142
# mixed
42-
foo = repr(float_string_frame) # noqa
43+
repr(float_string_frame)
4344
float_string_frame.info(verbose=False, buf=buf)
4445

4546
@pytest.mark.slow
@@ -51,13 +52,13 @@ def test_repr_mixed_big(self):
5152
biggie.loc[:20, "A"] = np.nan
5253
biggie.loc[:20, "B"] = np.nan
5354

54-
foo = repr(biggie) # noqa
55+
repr(biggie)
5556

5657
def test_repr(self, float_frame):
5758
buf = StringIO()
5859

5960
# small one
60-
foo = repr(float_frame)
61+
repr(float_frame)
6162
float_frame.info(verbose=False, buf=buf)
6263

6364
# even smaller
@@ -68,7 +69,7 @@ def test_repr(self, float_frame):
6869

6970
# columns but no index
7071
no_index = DataFrame(columns=[0, 1, 3])
71-
foo = repr(no_index) # noqa
72+
repr(no_index)
7273

7374
# no columns or index
7475
DataFrame().info(buf=buf)
@@ -97,7 +98,6 @@ def test_repr_big(self):
9798

9899
def test_repr_unsortable(self, float_frame):
99100
# columns are not sortable
100-
import warnings
101101

102102
warn_filters = warnings.filters
103103
warnings.filterwarnings("ignore", category=FutureWarning, module=".*format")

0 commit comments

Comments
 (0)