1
- import os
2
-
3
1
import numpy as np
4
2
import pytest
5
3
@@ -24,145 +22,154 @@ def numeric_as_float(data):
24
22
25
23
class TestXport :
26
24
@pytest .fixture (autouse = True )
27
- def setup_method (self , datapath ):
28
- self .dirpath = datapath ("io" , "sas" , "data" )
29
- self .file01 = os .path .join (self .dirpath , "DEMO_G.xpt" )
30
- self .file02 = os .path .join (self .dirpath , "SSHSV1_A.xpt" )
31
- self .file03 = os .path .join (self .dirpath , "DRXFCD_G.xpt" )
32
- self .file04 = os .path .join (self .dirpath , "paxraw_d_short.xpt" )
33
- self .file05 = os .path .join (self .dirpath , "DEMO_PUF.cpt" )
34
-
25
+ def setup_method (self ):
35
26
with td .file_leak_context ():
36
27
yield
37
28
29
+ @pytest .fixture
30
+ def file01 (self , datapath ):
31
+ return datapath ("io" , "sas" , "data" , "DEMO_G.xpt" )
32
+
33
+ @pytest .fixture
34
+ def file02 (self , datapath ):
35
+ return datapath ("io" , "sas" , "data" , "SSHSV1_A.xpt" )
36
+
37
+ @pytest .fixture
38
+ def file03 (self , datapath ):
39
+ return datapath ("io" , "sas" , "data" , "DRXFCD_G.xpt" )
40
+
41
+ @pytest .fixture
42
+ def file04 (self , datapath ):
43
+ return datapath ("io" , "sas" , "data" , "paxraw_d_short.xpt" )
44
+
45
+ @pytest .fixture
46
+ def file05 (self , datapath ):
47
+ return datapath ("io" , "sas" , "data" , "DEMO_PUF.cpt" )
48
+
38
49
@pytest .mark .slow
39
- def test1_basic (self ):
50
+ def test1_basic (self , file01 ):
40
51
# Tests with DEMO_G.xpt (all numeric file)
41
52
42
53
# Compare to this
43
- data_csv = pd .read_csv (self . file01 .replace (".xpt" , ".csv" ))
54
+ data_csv = pd .read_csv (file01 .replace (".xpt" , ".csv" ))
44
55
numeric_as_float (data_csv )
45
56
46
57
# Read full file
47
- data = read_sas (self . file01 , format = "xport" )
58
+ data = read_sas (file01 , format = "xport" )
48
59
tm .assert_frame_equal (data , data_csv )
49
60
num_rows = data .shape [0 ]
50
61
51
62
# Test reading beyond end of file
52
- with read_sas (self . file01 , format = "xport" , iterator = True ) as reader :
63
+ with read_sas (file01 , format = "xport" , iterator = True ) as reader :
53
64
data = reader .read (num_rows + 100 )
54
65
assert data .shape [0 ] == num_rows
55
66
56
67
# Test incremental read with `read` method.
57
- with read_sas (self . file01 , format = "xport" , iterator = True ) as reader :
68
+ with read_sas (file01 , format = "xport" , iterator = True ) as reader :
58
69
data = reader .read (10 )
59
70
tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :])
60
71
61
72
# Test incremental read with `get_chunk` method.
62
- with read_sas (self . file01 , format = "xport" , chunksize = 10 ) as reader :
73
+ with read_sas (file01 , format = "xport" , chunksize = 10 ) as reader :
63
74
data = reader .get_chunk ()
64
75
tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :])
65
76
66
77
# Test read in loop
67
78
m = 0
68
- with read_sas (self . file01 , format = "xport" , chunksize = 100 ) as reader :
79
+ with read_sas (file01 , format = "xport" , chunksize = 100 ) as reader :
69
80
for x in reader :
70
81
m += x .shape [0 ]
71
82
assert m == num_rows
72
83
73
84
# Read full file with `read_sas` method
74
- data = read_sas (self . file01 )
85
+ data = read_sas (file01 )
75
86
tm .assert_frame_equal (data , data_csv )
76
87
77
- def test1_index (self ):
88
+ def test1_index (self , file01 ):
78
89
# Tests with DEMO_G.xpt using index (all numeric file)
79
90
80
91
# Compare to this
81
- data_csv = pd .read_csv (self . file01 .replace (".xpt" , ".csv" ))
92
+ data_csv = pd .read_csv (file01 .replace (".xpt" , ".csv" ))
82
93
data_csv = data_csv .set_index ("SEQN" )
83
94
numeric_as_float (data_csv )
84
95
85
96
# Read full file
86
- data = read_sas (self . file01 , index = "SEQN" , format = "xport" )
97
+ data = read_sas (file01 , index = "SEQN" , format = "xport" )
87
98
tm .assert_frame_equal (data , data_csv , check_index_type = False )
88
99
89
100
# Test incremental read with `read` method.
90
- with read_sas (
91
- self .file01 , index = "SEQN" , format = "xport" , iterator = True
92
- ) as reader :
101
+ with read_sas (file01 , index = "SEQN" , format = "xport" , iterator = True ) as reader :
93
102
data = reader .read (10 )
94
103
tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :], check_index_type = False )
95
104
96
105
# Test incremental read with `get_chunk` method.
97
- with read_sas (
98
- self .file01 , index = "SEQN" , format = "xport" , chunksize = 10
99
- ) as reader :
106
+ with read_sas (file01 , index = "SEQN" , format = "xport" , chunksize = 10 ) as reader :
100
107
data = reader .get_chunk ()
101
108
tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :], check_index_type = False )
102
109
103
- def test1_incremental (self ):
110
+ def test1_incremental (self , file01 ):
104
111
# Test with DEMO_G.xpt, reading full file incrementally
105
112
106
- data_csv = pd .read_csv (self . file01 .replace (".xpt" , ".csv" ))
113
+ data_csv = pd .read_csv (file01 .replace (".xpt" , ".csv" ))
107
114
data_csv = data_csv .set_index ("SEQN" )
108
115
numeric_as_float (data_csv )
109
116
110
- with read_sas (self . file01 , index = "SEQN" , chunksize = 1000 ) as reader :
117
+ with read_sas (file01 , index = "SEQN" , chunksize = 1000 ) as reader :
111
118
all_data = list (reader )
112
119
data = pd .concat (all_data , axis = 0 )
113
120
114
121
tm .assert_frame_equal (data , data_csv , check_index_type = False )
115
122
116
- def test2 (self ):
123
+ def test2 (self , file02 ):
117
124
# Test with SSHSV1_A.xpt
118
125
119
126
# Compare to this
120
- data_csv = pd .read_csv (self . file02 .replace (".xpt" , ".csv" ))
127
+ data_csv = pd .read_csv (file02 .replace (".xpt" , ".csv" ))
121
128
numeric_as_float (data_csv )
122
129
123
- data = read_sas (self . file02 )
130
+ data = read_sas (file02 )
124
131
tm .assert_frame_equal (data , data_csv )
125
132
126
- def test2_binary (self ):
133
+ def test2_binary (self , file02 ):
127
134
# Test with SSHSV1_A.xpt, read as a binary file
128
135
129
136
# Compare to this
130
- data_csv = pd .read_csv (self . file02 .replace (".xpt" , ".csv" ))
137
+ data_csv = pd .read_csv (file02 .replace (".xpt" , ".csv" ))
131
138
numeric_as_float (data_csv )
132
139
133
- with open (self . file02 , "rb" ) as fd :
140
+ with open (file02 , "rb" ) as fd :
134
141
with td .file_leak_context ():
135
142
# GH#35693 ensure that if we pass an open file, we
136
143
# dont incorrectly close it in read_sas
137
144
data = read_sas (fd , format = "xport" )
138
145
139
146
tm .assert_frame_equal (data , data_csv )
140
147
141
- def test_multiple_types (self ):
148
+ def test_multiple_types (self , file03 ):
142
149
# Test with DRXFCD_G.xpt (contains text and numeric variables)
143
150
144
151
# Compare to this
145
- data_csv = pd .read_csv (self . file03 .replace (".xpt" , ".csv" ))
152
+ data_csv = pd .read_csv (file03 .replace (".xpt" , ".csv" ))
146
153
147
- data = read_sas (self . file03 , encoding = "utf-8" )
154
+ data = read_sas (file03 , encoding = "utf-8" )
148
155
tm .assert_frame_equal (data , data_csv )
149
156
150
- def test_truncated_float_support (self ):
157
+ def test_truncated_float_support (self , file04 ):
151
158
# Test with paxraw_d_short.xpt, a shortened version of:
152
159
# http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP
153
160
# This file has truncated floats (5 bytes in this case).
154
161
155
162
# GH 11713
156
163
157
- data_csv = pd .read_csv (self . file04 .replace (".xpt" , ".csv" ))
164
+ data_csv = pd .read_csv (file04 .replace (".xpt" , ".csv" ))
158
165
159
- data = read_sas (self . file04 , format = "xport" )
166
+ data = read_sas (file04 , format = "xport" )
160
167
tm .assert_frame_equal (data .astype ("int64" ), data_csv )
161
168
162
- def test_cport_header_found_raises (self ):
169
+ def test_cport_header_found_raises (self , file05 ):
163
170
# Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt
164
171
# from https://www.cms.gov/files/zip/puf2019.zip
165
172
# (despite the extension, it's a cpt file)
166
173
msg = "Header record indicates a CPORT file, which is not readable."
167
174
with pytest .raises (ValueError , match = msg ):
168
- read_sas (self . file05 , format = "xport" )
175
+ read_sas (file05 , format = "xport" )
0 commit comments