Skip to content

Commit fe62783

Browse files
Detect CPORT header in SAS files (#44300)
1 parent fd99c30 commit fe62783

File tree

3 files changed

+15
-0
lines changed

3 files changed

+15
-0
lines changed

pandas/io/sas/sas_xport.py

+6
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,12 @@ def _read_header(self):
279279
# read file header
280280
line1 = self._get_row()
281281
if line1 != _correct_line1:
282+
if "**COMPRESSED**" in line1:
283+
# this was created with the PROC CPORT method and can't be read
284+
# https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm
285+
raise ValueError(
286+
"Header record indicates a CPORT file, which is not readable."
287+
)
282288
raise ValueError("Header record is not an XPORT file.")
283289

284290
line2 = self._get_row()

pandas/tests/io/sas/data/DEMO_PUF.cpt

694 Bytes
Binary file not shown.

pandas/tests/io/sas/test_xport.py

+9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def setup_method(self, datapath):
3030
self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
3131
self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
3232
self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt")
33+
self.file05 = os.path.join(self.dirpath, "DEMO_PUF.cpt")
3334

3435
with td.file_leak_context():
3536
yield
@@ -157,3 +158,11 @@ def test_truncated_float_support(self):
157158

158159
data = read_sas(self.file04, format="xport")
159160
tm.assert_frame_equal(data.astype("int64"), data_csv)
161+
162+
def test_cport_header_found_raises(self):
163+
# Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt
164+
# from https://www.cms.gov/files/zip/puf2019.zip
165+
# (despite the extension, it's a cpt file)
166+
msg = "Header record indicates a CPORT file, which is not readable."
167+
with pytest.raises(ValueError, match=msg):
168+
read_sas(self.file05, format="xport")

0 commit comments

Comments
 (0)