diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index bdb7d86a9b37e..3f9bf6662e99f 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -279,6 +279,12 @@ def _read_header(self): # read file header line1 = self._get_row() if line1 != _correct_line1: + if "**COMPRESSED**" in line1: + # this was created with the PROC CPORT method and can't be read + # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm + raise ValueError( + "Header record indicates a CPORT file, which is not readable." + ) raise ValueError("Header record is not an XPORT file.") line2 = self._get_row() diff --git a/pandas/tests/io/sas/data/DEMO_PUF.cpt b/pandas/tests/io/sas/data/DEMO_PUF.cpt new file mode 100644 index 0000000000000..d74b6a70d2812 Binary files /dev/null and b/pandas/tests/io/sas/data/DEMO_PUF.cpt differ diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 5d3e3b8e23cdb..9232ea8a25e4d 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -30,6 +30,7 @@ def setup_method(self, datapath): self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") + self.file05 = os.path.join(self.dirpath, "DEMO_PUF.cpt") with td.file_leak_context(): yield @@ -157,3 +158,11 @@ def test_truncated_float_support(self): data = read_sas(self.file04, format="xport") tm.assert_frame_equal(data.astype("int64"), data_csv) + + def test_cport_header_found_raises(self): + # Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt + # from https://www.cms.gov/files/zip/puf2019.zip + # (despite the extension, it's a cpt file) + msg = "Header record indicates a CPORT file, which is not readable." + with pytest.raises(ValueError, match=msg): + read_sas(self.file05, format="xport")