Skip to content

Commit 7aa5184

Browse files
committed
Properly close opened files in XportReader and SAS7BDATReader
1 parent 240383c commit 7aa5184

File tree

6 files changed

+36
-3
lines changed

6 files changed

+36
-3
lines changed

doc/source/whatsnew/v0.19.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -902,8 +902,8 @@ Bug Fixes
902902
- Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`)
903903
- Bug in ``pd.read_csv()`` with ``engine='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`)
904904
- Bug in ``pd.read_csv()`` with ``engine='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`)
905-
- Bug in ``pd.read_csv``, ``pd.read_table`` and ``pd.read_stata`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`)
906-
- Bug in ``StataReader`` and ``StataWriter`` where a file was not properly closed when an error was raised. (:issue:`13940`)
905+
- Bug in ``pd.read_csv``, ``pd.read_table``, ``pd.read_fwf``, ``pd.read_stata`` and ``pd.read_sas`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`)
906+
- Bug in ``StataReader``, ``StataWriter``, ``XportReader`` and ``SAS7BDATReader`` where a file was not properly closed when an error was raised. (:issue:`13940`)
907907

908908
- Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`)
909909
- Bug in ``pd.Series.str.zfill``, ``center``, ``ljust``, ``rjust``, and ``pad`` when passing non-integers, did not raise ``TypeError`` (:issue:`13598`)

pandas/io/sas/sas7bdat.py

+17
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,24 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
9292
self._path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
9393
if isinstance(self._path_or_buf, compat.string_types):
9494
self._path_or_buf = open(self._path_or_buf, 'rb')
95+
self.handle = self._path_or_buf
9596

9697
self._get_properties()
9798
self._parse_metadata()
9899

100+
def close(self):
101+
try:
102+
self.handle.close()
103+
except AttributeError:
104+
pass
105+
99106
def _get_properties(self):
100107

101108
# Check magic number
102109
self._path_or_buf.seek(0)
103110
self._cached_page = self._path_or_buf.read(288)
104111
if self._cached_page[0:len(const.magic)] != const.magic:
112+
self.close()
105113
raise ValueError("magic number mismatch (not a SAS file?)")
106114

107115
# Get alignment information
@@ -175,6 +183,7 @@ def _get_properties(self):
175183
buf = self._path_or_buf.read(self.header_length - 288)
176184
self._cached_page += buf
177185
if len(self._cached_page) != self.header_length:
186+
self.close()
178187
raise ValueError("The SAS7BDAT file appears to be truncated.")
179188

180189
self._page_length = self._read_int(const.page_size_offset + align1,
@@ -219,6 +228,7 @@ def _get_properties(self):
219228
# Read a single float of the given width (4 or 8).
220229
def _read_float(self, offset, width):
221230
if width not in (4, 8):
231+
self.close()
222232
raise ValueError("invalid float width")
223233
buf = self._read_bytes(offset, width)
224234
fd = "f" if width == 4 else "d"
@@ -227,6 +237,7 @@ def _read_float(self, offset, width):
227237
# Read a single signed integer of the given width (1, 2, 4 or 8).
228238
def _read_int(self, offset, width):
229239
if width not in (1, 2, 4, 8):
240+
self.close()
230241
raise ValueError("invalid int width")
231242
buf = self._read_bytes(offset, width)
232243
it = {1: "b", 2: "h", 4: "l", 8: "q"}[width]
@@ -238,11 +249,13 @@ def _read_bytes(self, offset, length):
238249
self._path_or_buf.seek(offset)
239250
buf = self._path_or_buf.read(length)
240251
if len(buf) < length:
252+
self.close()
241253
msg = "Unable to read {:d} bytes from file position {:d}."
242254
raise ValueError(msg.format(length, offset))
243255
return buf
244256
else:
245257
if offset + length > len(self._cached_page):
258+
self.close()
246259
raise ValueError("The cached page is too small.")
247260
return self._cached_page[offset:offset + length]
248261

@@ -253,6 +266,7 @@ def _parse_metadata(self):
253266
if len(self._cached_page) <= 0:
254267
break
255268
if len(self._cached_page) != self._page_length:
269+
self.close()
256270
raise ValueError(
257271
"Failed to read a meta data page from the SAS file.")
258272
done = self._process_page_meta()
@@ -302,6 +316,7 @@ def _get_subheader_index(self, signature, compression, ptype):
302316
if (self.compression != "") and f1 and f2:
303317
index = const.index.dataSubheaderIndex
304318
else:
319+
self.close()
305320
raise ValueError("Unknown subheader signature")
306321
return index
307322

@@ -598,6 +613,7 @@ def _read_next_page(self):
598613
if len(self._cached_page) <= 0:
599614
return True
600615
elif len(self._cached_page) != self._page_length:
616+
self.close()
601617
msg = ("failed to read complete page from file "
602618
"(read {:d} of {:d} bytes)")
603619
raise ValueError(msg.format(len(self._cached_page),
@@ -643,6 +659,7 @@ def _chunk_to_dataframe(self):
643659
rslt.loc[ii, name] = np.nan
644660
js += 1
645661
else:
662+
self.close()
646663
raise ValueError("unknown column type %s" %
647664
self.column_types[j])
648665

pandas/io/sas/sas_xport.py

+9
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
253253

254254
self._read_header()
255255

256+
def close(self):
257+
self.filepath_or_buffer.close()
258+
256259
def _get_row(self):
257260
return self.filepath_or_buffer.read(80).decode()
258261

@@ -262,13 +265,15 @@ def _read_header(self):
262265
# read file header
263266
line1 = self._get_row()
264267
if line1 != _correct_line1:
268+
self.close()
265269
raise ValueError("Header record is not an XPORT file.")
266270

267271
line2 = self._get_row()
268272
fif = [['prefix', 24], ['version', 8], ['OS', 8],
269273
['_', 24], ['created', 16]]
270274
file_info = _split_line(line2, fif)
271275
if file_info['prefix'] != "SAS SAS SASLIB":
276+
self.close()
272277
raise ValueError("Header record has invalid prefix.")
273278
file_info['created'] = _parse_date(file_info['created'])
274279
self.file_info = file_info
@@ -282,6 +287,7 @@ def _read_header(self):
282287
headflag1 = header1.startswith(_correct_header1)
283288
headflag2 = (header2 == _correct_header2)
284289
if not (headflag1 and headflag2):
290+
self.close()
285291
raise ValueError("Member header not found")
286292
# usually 140, could be 135
287293
fieldnamelength = int(header1[-5:-2])
@@ -321,6 +327,7 @@ def _read_header(self):
321327
field['ntype'] = types[field['ntype']]
322328
fl = field['field_length']
323329
if field['ntype'] == 'numeric' and ((fl < 2) or (fl > 8)):
330+
self.close()
324331
msg = "Floating field width {0} is not between 2 and 8."
325332
raise TypeError(msg.format(fl))
326333

@@ -335,6 +342,7 @@ def _read_header(self):
335342

336343
header = self._get_row()
337344
if not header == _correct_obs_header:
345+
self.close()
338346
raise ValueError("Observation header not found.")
339347

340348
self.fields = fields
@@ -425,6 +433,7 @@ def read(self, nrows=None):
425433
read_lines = min(nrows, self.nobs - self._lines_read)
426434
read_len = read_lines * self.record_length
427435
if read_len <= 0:
436+
self.close()
428437
raise StopIteration
429438
raw = self.filepath_or_buffer.read(read_len)
430439
data = np.frombuffer(raw, dtype=self._dtype, count=read_lines)

pandas/io/sas/sasreader.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,6 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
5858
if iterator or chunksize:
5959
return reader
6060

61-
return reader.read()
61+
data = reader.read()
62+
reader.close()
63+
return data

pandas/io/tests/sas/test_sas7bdat.py

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def test_encoding_options():
8181
from pandas.io.sas.sas7bdat import SAS7BDATReader
8282
rdr = SAS7BDATReader(fname, convert_header_text=False)
8383
df3 = rdr.read()
84+
rdr.close()
8485
for x, y in zip(df1.columns, df3.columns):
8586
assert(x == y.decode())
8687

pandas/io/tests/sas/test_xport.py

+4
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,13 @@ def test1_basic(self):
3939
# Test incremental read with `read` method.
4040
reader = read_sas(self.file01, format="xport", iterator=True)
4141
data = reader.read(10)
42+
reader.close()
4243
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
4344

4445
# Test incremental read with `get_chunk` method.
4546
reader = read_sas(self.file01, format="xport", chunksize=10)
4647
data = reader.get_chunk()
48+
reader.close()
4749
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
4850

4951
# Read full file with `read_sas` method
@@ -66,13 +68,15 @@ def test1_index(self):
6668
reader = read_sas(self.file01, index="SEQN", format="xport",
6769
iterator=True)
6870
data = reader.read(10)
71+
reader.close()
6972
tm.assert_frame_equal(data, data_csv.iloc[0:10, :],
7073
check_index_type=False)
7174

7275
# Test incremental read with `get_chunk` method.
7376
reader = read_sas(self.file01, index="SEQN", format="xport",
7477
chunksize=10)
7578
data = reader.get_chunk()
79+
reader.close()
7680
tm.assert_frame_equal(data, data_csv.iloc[0:10, :],
7781
check_index_type=False)
7882

0 commit comments

Comments
 (0)