Skip to content

Commit 9af3bd0

Browse files
authored
TST: Fixturize / parameterize test_sas7bdat (#45826)
1 parent 9a29ba1 commit 9af3bd0

File tree

4 files changed

+80
-88
lines changed

4 files changed

+80
-88
lines changed

pandas/tests/frame/test_nonunique_indexes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def test_column_dups_indexes(self):
173173
this_df = df.copy()
174174
expected_ser = Series(index.values, index=this_df.index)
175175
expected_df = DataFrame(
176-
{"A": expected_ser, "B": this_df["B"], "A": expected_ser},
176+
{"A": expected_ser, "B": this_df["B"]},
177177
columns=["A", "B", "A"],
178178
)
179179
this_df["A"] = index

pandas/tests/frame/test_reductions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def assert_bool_op_api(
238238
----------
239239
opname : str
240240
Name of the operator to test on frame
241-
float_frame : DataFrame
241+
bool_frame_with_na : DataFrame
242242
DataFrame with columns of type float
243243
float_string_frame : DataFrame
244244
DataFrame with both float and string columns

pandas/tests/io/sas/test_sas7bdat.py

+77-85
Original file line numberDiff line numberDiff line change
@@ -15,104 +15,98 @@
1515
import pandas._testing as tm
1616

1717

18+
@pytest.fixture
19+
def dirpath(datapath):
20+
return datapath("io", "sas", "data")
21+
22+
23+
@pytest.fixture(params=[(1, range(1, 16)), (2, [16])])
24+
def data_test_ix(request, dirpath):
25+
i, test_ix = request.param
26+
fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv")
27+
df = pd.read_csv(fname)
28+
epoch = datetime(1960, 1, 1)
29+
t1 = pd.to_timedelta(df["Column4"], unit="d")
30+
df["Column4"] = epoch + t1
31+
t2 = pd.to_timedelta(df["Column12"], unit="d")
32+
df["Column12"] = epoch + t2
33+
for k in range(df.shape[1]):
34+
col = df.iloc[:, k]
35+
if col.dtype == np.int64:
36+
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
37+
return df, test_ix
38+
39+
1840
# https://github.com/cython/cython/issues/1720
1941
@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
2042
class TestSAS7BDAT:
21-
@pytest.fixture(autouse=True)
22-
def setup_method(self, datapath):
23-
self.dirpath = datapath("io", "sas", "data")
24-
self.data = []
25-
self.test_ix = [list(range(1, 16)), [16]]
26-
for j in 1, 2:
27-
fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv")
28-
df = pd.read_csv(fname)
29-
epoch = datetime(1960, 1, 1)
30-
t1 = pd.to_timedelta(df["Column4"], unit="d")
31-
df["Column4"] = epoch + t1
32-
t2 = pd.to_timedelta(df["Column12"], unit="d")
33-
df["Column12"] = epoch + t2
34-
for k in range(df.shape[1]):
35-
col = df.iloc[:, k]
36-
if col.dtype == np.int64:
37-
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
38-
self.data.append(df)
39-
4043
@pytest.mark.slow
41-
def test_from_file(self):
42-
for j in 0, 1:
43-
df0 = self.data[j]
44-
for k in self.test_ix[j]:
45-
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
46-
df = pd.read_sas(fname, encoding="utf-8")
47-
tm.assert_frame_equal(df, df0)
44+
def test_from_file(self, dirpath, data_test_ix):
45+
df0, test_ix = data_test_ix
46+
for k in test_ix:
47+
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
48+
df = pd.read_sas(fname, encoding="utf-8")
49+
tm.assert_frame_equal(df, df0)
4850

4951
@pytest.mark.slow
50-
def test_from_buffer(self):
51-
for j in 0, 1:
52-
df0 = self.data[j]
53-
for k in self.test_ix[j]:
54-
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
55-
with open(fname, "rb") as f:
56-
byts = f.read()
57-
buf = io.BytesIO(byts)
58-
with pd.read_sas(
59-
buf, format="sas7bdat", iterator=True, encoding="utf-8"
60-
) as rdr:
61-
df = rdr.read()
62-
tm.assert_frame_equal(df, df0, check_exact=False)
52+
def test_from_buffer(self, dirpath, data_test_ix):
53+
df0, test_ix = data_test_ix
54+
for k in test_ix:
55+
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
56+
with open(fname, "rb") as f:
57+
byts = f.read()
58+
buf = io.BytesIO(byts)
59+
with pd.read_sas(
60+
buf, format="sas7bdat", iterator=True, encoding="utf-8"
61+
) as rdr:
62+
df = rdr.read()
63+
tm.assert_frame_equal(df, df0, check_exact=False)
6364

6465
@pytest.mark.slow
65-
def test_from_iterator(self):
66-
for j in 0, 1:
67-
df0 = self.data[j]
68-
for k in self.test_ix[j]:
69-
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
70-
with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr:
71-
df = rdr.read(2)
72-
tm.assert_frame_equal(df, df0.iloc[0:2, :])
73-
df = rdr.read(3)
74-
tm.assert_frame_equal(df, df0.iloc[2:5, :])
66+
def test_from_iterator(self, dirpath, data_test_ix):
67+
df0, test_ix = data_test_ix
68+
for k in test_ix:
69+
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
70+
with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr:
71+
df = rdr.read(2)
72+
tm.assert_frame_equal(df, df0.iloc[0:2, :])
73+
df = rdr.read(3)
74+
tm.assert_frame_equal(df, df0.iloc[2:5, :])
7575

7676
@pytest.mark.slow
77-
def test_path_pathlib(self):
78-
for j in 0, 1:
79-
df0 = self.data[j]
80-
for k in self.test_ix[j]:
81-
fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat"))
82-
df = pd.read_sas(fname, encoding="utf-8")
83-
tm.assert_frame_equal(df, df0)
77+
def test_path_pathlib(self, dirpath, data_test_ix):
78+
df0, test_ix = data_test_ix
79+
for k in test_ix:
80+
fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat"))
81+
df = pd.read_sas(fname, encoding="utf-8")
82+
tm.assert_frame_equal(df, df0)
8483

8584
@td.skip_if_no("py.path")
8685
@pytest.mark.slow
87-
def test_path_localpath(self):
86+
def test_path_localpath(self, dirpath, data_test_ix):
8887
from py.path import local as LocalPath
8988

90-
for j in 0, 1:
91-
df0 = self.data[j]
92-
for k in self.test_ix[j]:
93-
fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat"))
94-
df = pd.read_sas(fname, encoding="utf-8")
95-
tm.assert_frame_equal(df, df0)
89+
df0, test_ix = data_test_ix
90+
for k in test_ix:
91+
fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat"))
92+
df = pd.read_sas(fname, encoding="utf-8")
93+
tm.assert_frame_equal(df, df0)
9694

9795
@pytest.mark.slow
98-
def test_iterator_loop(self):
96+
@pytest.mark.parametrize("chunksize", (3, 5, 10, 11))
97+
@pytest.mark.parametrize("k", range(1, 17))
98+
def test_iterator_loop(self, dirpath, k, chunksize):
9999
# github #13654
100-
for j in 0, 1:
101-
for k in self.test_ix[j]:
102-
for chunksize in (3, 5, 10, 11):
103-
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
104-
with pd.read_sas(
105-
fname, chunksize=chunksize, encoding="utf-8"
106-
) as rdr:
107-
y = 0
108-
for x in rdr:
109-
y += x.shape[0]
110-
assert y == rdr.row_count
111-
112-
def test_iterator_read_too_much(self):
100+
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
101+
with pd.read_sas(fname, chunksize=chunksize, encoding="utf-8") as rdr:
102+
y = 0
103+
for x in rdr:
104+
y += x.shape[0]
105+
assert y == rdr.row_count
106+
107+
def test_iterator_read_too_much(self, dirpath):
113108
# github #14734
114-
k = self.test_ix[0][0]
115-
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
109+
fname = os.path.join(dirpath, "test1.sas7bdat")
116110
with pd.read_sas(
117111
fname, format="sas7bdat", iterator=True, encoding="utf-8"
118112
) as rdr:
@@ -183,19 +177,17 @@ def test_date_time(datapath):
183177
tm.assert_frame_equal(df, df0)
184178

185179

186-
def test_compact_numerical_values(datapath):
180+
@pytest.mark.parametrize("column", ["WGT", "CYL"])
181+
def test_compact_numerical_values(datapath, column):
187182
# Regression test for #21616
188183
fname = datapath("io", "sas", "data", "cars.sas7bdat")
189184
df = pd.read_sas(fname, encoding="latin-1")
190185
# The two columns CYL and WGT in cars.sas7bdat have column
191186
# width < 8 and only contain integral values.
192187
# Test that pandas doesn't corrupt the numbers by adding
193188
# decimals.
194-
result = df["WGT"]
195-
expected = df["WGT"].round()
196-
tm.assert_series_equal(result, expected, check_exact=True)
197-
result = df["CYL"]
198-
expected = df["CYL"].round()
189+
result = df[column]
190+
expected = df[column].round()
199191
tm.assert_series_equal(result, expected, check_exact=True)
200192

201193

pandas/tests/series/indexing/test_xs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_series_getitem_multiindex_xs_by_label(self):
4545
result = ser.xs("one", level="L2")
4646
tm.assert_series_equal(result, expected)
4747

48-
def test_series_getitem_multiindex_xs(xs):
48+
def test_series_getitem_multiindex_xs(self):
4949
# GH#6258
5050
dt = list(date_range("20130903", periods=3))
5151
idx = MultiIndex.from_product([list("AB"), dt])

0 commit comments

Comments
 (0)