diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8b28a4439e1da..bd4ee9b46db67 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -289,6 +289,7 @@ MultiIndex I/O ^^^ +- :func:`read_sas` no longer leaks resources on failure (:issue:`35566`) - Bug in :meth:`to_csv` caused a ``ValueError`` when it was called with a filename in combination with ``mode`` containing a ``b`` (:issue:`35058`) - In :meth:`read_csv` `float_precision='round_trip'` now handles `decimal` and `thousands` parameters (:issue:`35365`) - :meth:`to_pickle` and :meth:`read_pickle` were closing user-provided file objects (:issue:`35679`) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 76dac39d1889f..f2ee642d8fd42 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -142,8 +142,12 @@ def __init__( self._path_or_buf = open(self._path_or_buf, "rb") self.handle = self._path_or_buf - self._get_properties() - self._parse_metadata() + try: + self._get_properties() + self._parse_metadata() + except Exception: + self.close() + raise def column_data_lengths(self): """Return a numpy int64 array of the column data lengths""" diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 1a4ba544f5d59..9727ec930119b 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -264,7 +264,11 @@ def __init__( # should already be opened in binary mode in Python 3. self.filepath_or_buffer = filepath_or_buffer - self._read_header() + try: + self._read_header() + except Exception: + self.close() + raise def close(self): self.filepath_or_buffer.close() diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index ae9457a8e3147..31d1a6ad471ea 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -136,8 +136,8 @@ def read_sas( if iterator or chunksize: return reader - data = reader.read() - - if ioargs.should_close: - reader.close() - return data + try: + return reader.read() + finally: + if ioargs.should_close: + reader.close() diff --git a/pandas/tests/io/sas/data/corrupt.sas7bdat b/pandas/tests/io/sas/data/corrupt.sas7bdat new file mode 100644 index 0000000000000..2941ffe3ecdf5 Binary files /dev/null and b/pandas/tests/io/sas/data/corrupt.sas7bdat differ diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 8c14f9de9f61c..9de6ca75fd4d9 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -217,6 +217,14 @@ def test_zero_variables(datapath): pd.read_sas(fname) +def test_corrupt_read(datapath): + # We don't really care about the exact failure, the important thing is + # that the resource should be cleaned up afterwards (BUG #35566) + fname = datapath("io", "sas", "data", "corrupt.sas7bdat") + with pytest.raises(AttributeError): + pd.read_sas(fname) + + def round_datetime_to_ms(ts): if isinstance(ts, datetime): return ts.replace(microsecond=int(round(ts.microsecond, -3) / 1000) * 1000)