|
15 | 15 | import pandas._testing as tm
|
16 | 16 |
|
17 | 17 |
|
| 18 | +@pytest.fixture |
| 19 | +def dirpath(datapath): |
| 20 | + return datapath("io", "sas", "data") |
| 21 | + |
| 22 | + |
| 23 | +@pytest.fixture(params=[(1, range(1, 16)), (2, [16])]) |
| 24 | +def data_test_ix(request, dirpath): |
| 25 | + i, test_ix = request.param |
| 26 | + fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv") |
| 27 | + df = pd.read_csv(fname) |
| 28 | + epoch = datetime(1960, 1, 1) |
| 29 | + t1 = pd.to_timedelta(df["Column4"], unit="d") |
| 30 | + df["Column4"] = epoch + t1 |
| 31 | + t2 = pd.to_timedelta(df["Column12"], unit="d") |
| 32 | + df["Column12"] = epoch + t2 |
| 33 | + for k in range(df.shape[1]): |
| 34 | + col = df.iloc[:, k] |
| 35 | + if col.dtype == np.int64: |
| 36 | + df.iloc[:, k] = df.iloc[:, k].astype(np.float64) |
| 37 | + return df, test_ix |
| 38 | + |
| 39 | + |
18 | 40 | # https://github.com/cython/cython/issues/1720
|
19 | 41 | @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
|
20 | 42 | class TestSAS7BDAT:
|
21 |
| - @pytest.fixture(autouse=True) |
22 |
| - def setup_method(self, datapath): |
23 |
| - self.dirpath = datapath("io", "sas", "data") |
24 |
| - self.data = [] |
25 |
| - self.test_ix = [list(range(1, 16)), [16]] |
26 |
| - for j in 1, 2: |
27 |
| - fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv") |
28 |
| - df = pd.read_csv(fname) |
29 |
| - epoch = datetime(1960, 1, 1) |
30 |
| - t1 = pd.to_timedelta(df["Column4"], unit="d") |
31 |
| - df["Column4"] = epoch + t1 |
32 |
| - t2 = pd.to_timedelta(df["Column12"], unit="d") |
33 |
| - df["Column12"] = epoch + t2 |
34 |
| - for k in range(df.shape[1]): |
35 |
| - col = df.iloc[:, k] |
36 |
| - if col.dtype == np.int64: |
37 |
| - df.iloc[:, k] = df.iloc[:, k].astype(np.float64) |
38 |
| - self.data.append(df) |
39 |
| - |
40 | 43 | @pytest.mark.slow
|
41 |
| - def test_from_file(self): |
42 |
| - for j in 0, 1: |
43 |
| - df0 = self.data[j] |
44 |
| - for k in self.test_ix[j]: |
45 |
| - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
46 |
| - df = pd.read_sas(fname, encoding="utf-8") |
47 |
| - tm.assert_frame_equal(df, df0) |
| 44 | + def test_from_file(self, dirpath, data_test_ix): |
| 45 | + df0, test_ix = data_test_ix |
| 46 | + for k in test_ix: |
| 47 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 48 | + df = pd.read_sas(fname, encoding="utf-8") |
| 49 | + tm.assert_frame_equal(df, df0) |
48 | 50 |
|
49 | 51 | @pytest.mark.slow
|
50 |
| - def test_from_buffer(self): |
51 |
| - for j in 0, 1: |
52 |
| - df0 = self.data[j] |
53 |
| - for k in self.test_ix[j]: |
54 |
| - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
55 |
| - with open(fname, "rb") as f: |
56 |
| - byts = f.read() |
57 |
| - buf = io.BytesIO(byts) |
58 |
| - with pd.read_sas( |
59 |
| - buf, format="sas7bdat", iterator=True, encoding="utf-8" |
60 |
| - ) as rdr: |
61 |
| - df = rdr.read() |
62 |
| - tm.assert_frame_equal(df, df0, check_exact=False) |
| 52 | + def test_from_buffer(self, dirpath, data_test_ix): |
| 53 | + df0, test_ix = data_test_ix |
| 54 | + for k in test_ix: |
| 55 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 56 | + with open(fname, "rb") as f: |
| 57 | + byts = f.read() |
| 58 | + buf = io.BytesIO(byts) |
| 59 | + with pd.read_sas( |
| 60 | + buf, format="sas7bdat", iterator=True, encoding="utf-8" |
| 61 | + ) as rdr: |
| 62 | + df = rdr.read() |
| 63 | + tm.assert_frame_equal(df, df0, check_exact=False) |
63 | 64 |
|
64 | 65 | @pytest.mark.slow
|
65 |
| - def test_from_iterator(self): |
66 |
| - for j in 0, 1: |
67 |
| - df0 = self.data[j] |
68 |
| - for k in self.test_ix[j]: |
69 |
| - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
70 |
| - with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: |
71 |
| - df = rdr.read(2) |
72 |
| - tm.assert_frame_equal(df, df0.iloc[0:2, :]) |
73 |
| - df = rdr.read(3) |
74 |
| - tm.assert_frame_equal(df, df0.iloc[2:5, :]) |
| 66 | + def test_from_iterator(self, dirpath, data_test_ix): |
| 67 | + df0, test_ix = data_test_ix |
| 68 | + for k in test_ix: |
| 69 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 70 | + with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: |
| 71 | + df = rdr.read(2) |
| 72 | + tm.assert_frame_equal(df, df0.iloc[0:2, :]) |
| 73 | + df = rdr.read(3) |
| 74 | + tm.assert_frame_equal(df, df0.iloc[2:5, :]) |
75 | 75 |
|
76 | 76 | @pytest.mark.slow
|
77 |
| - def test_path_pathlib(self): |
78 |
| - for j in 0, 1: |
79 |
| - df0 = self.data[j] |
80 |
| - for k in self.test_ix[j]: |
81 |
| - fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat")) |
82 |
| - df = pd.read_sas(fname, encoding="utf-8") |
83 |
| - tm.assert_frame_equal(df, df0) |
| 77 | + def test_path_pathlib(self, dirpath, data_test_ix): |
| 78 | + df0, test_ix = data_test_ix |
| 79 | + for k in test_ix: |
| 80 | + fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat")) |
| 81 | + df = pd.read_sas(fname, encoding="utf-8") |
| 82 | + tm.assert_frame_equal(df, df0) |
84 | 83 |
|
85 | 84 | @td.skip_if_no("py.path")
|
86 | 85 | @pytest.mark.slow
|
87 |
| - def test_path_localpath(self): |
| 86 | + def test_path_localpath(self, dirpath, data_test_ix): |
88 | 87 | from py.path import local as LocalPath
|
89 | 88 |
|
90 |
| - for j in 0, 1: |
91 |
| - df0 = self.data[j] |
92 |
| - for k in self.test_ix[j]: |
93 |
| - fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat")) |
94 |
| - df = pd.read_sas(fname, encoding="utf-8") |
95 |
| - tm.assert_frame_equal(df, df0) |
| 89 | + df0, test_ix = data_test_ix |
| 90 | + for k in test_ix: |
| 91 | + fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat")) |
| 92 | + df = pd.read_sas(fname, encoding="utf-8") |
| 93 | + tm.assert_frame_equal(df, df0) |
96 | 94 |
|
97 | 95 | @pytest.mark.slow
|
98 |
| - def test_iterator_loop(self): |
| 96 | + @pytest.mark.parametrize("chunksize", (3, 5, 10, 11)) |
| 97 | + @pytest.mark.parametrize("k", range(1, 17)) |
| 98 | + def test_iterator_loop(self, dirpath, k, chunksize): |
99 | 99 | # github #13654
|
100 |
| - for j in 0, 1: |
101 |
| - for k in self.test_ix[j]: |
102 |
| - for chunksize in (3, 5, 10, 11): |
103 |
| - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
104 |
| - with pd.read_sas( |
105 |
| - fname, chunksize=chunksize, encoding="utf-8" |
106 |
| - ) as rdr: |
107 |
| - y = 0 |
108 |
| - for x in rdr: |
109 |
| - y += x.shape[0] |
110 |
| - assert y == rdr.row_count |
111 |
| - |
112 |
| - def test_iterator_read_too_much(self): |
| 100 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 101 | + with pd.read_sas(fname, chunksize=chunksize, encoding="utf-8") as rdr: |
| 102 | + y = 0 |
| 103 | + for x in rdr: |
| 104 | + y += x.shape[0] |
| 105 | + assert y == rdr.row_count |
| 106 | + |
| 107 | + def test_iterator_read_too_much(self, dirpath): |
113 | 108 | # github #14734
|
114 |
| - k = self.test_ix[0][0] |
115 |
| - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
| 109 | + fname = os.path.join(dirpath, "test1.sas7bdat") |
116 | 110 | with pd.read_sas(
|
117 | 111 | fname, format="sas7bdat", iterator=True, encoding="utf-8"
|
118 | 112 | ) as rdr:
|
@@ -183,19 +177,17 @@ def test_date_time(datapath):
|
183 | 177 | tm.assert_frame_equal(df, df0)
|
184 | 178 |
|
185 | 179 |
|
186 |
| -def test_compact_numerical_values(datapath): |
| 180 | +@pytest.mark.parametrize("column", ["WGT", "CYL"]) |
| 181 | +def test_compact_numerical_values(datapath, column): |
187 | 182 | # Regression test for #21616
|
188 | 183 | fname = datapath("io", "sas", "data", "cars.sas7bdat")
|
189 | 184 | df = pd.read_sas(fname, encoding="latin-1")
|
190 | 185 | # The two columns CYL and WGT in cars.sas7bdat have column
|
191 | 186 | # width < 8 and only contain integral values.
|
192 | 187 | # Test that pandas doesn't corrupt the numbers by adding
|
193 | 188 | # decimals.
|
194 |
| - result = df["WGT"] |
195 |
| - expected = df["WGT"].round() |
196 |
| - tm.assert_series_equal(result, expected, check_exact=True) |
197 |
| - result = df["CYL"] |
198 |
| - expected = df["CYL"].round() |
| 189 | + result = df[column] |
| 190 | + expected = df[column].round() |
199 | 191 | tm.assert_series_equal(result, expected, check_exact=True)
|
200 | 192 |
|
201 | 193 |
|
|
0 commit comments