-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: properly close files opened by parsers #13940
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d759156
1e39a5e
3b0f25f
30b61e6
99e16dd
c7e9c9c
812e6ec
75fc34d
39dcd99
52d1073
240383c
7aa5184
6592c73
3fa7d25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -393,11 +393,15 @@ def _read(filepath_or_buffer, kwds): | |
raise NotImplementedError("'nrows' and 'chunksize' cannot be used" | ||
" together yet.") | ||
elif nrows is not None: | ||
return parser.read(nrows) | ||
data = parser.read(nrows) | ||
parser.close() | ||
return data | ||
elif chunksize or iterator: | ||
return parser | ||
|
||
return parser.read() | ||
data = parser.read() | ||
parser.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. e.g here |
||
return data | ||
|
||
_parser_defaults = { | ||
'delimiter': None, | ||
|
@@ -727,10 +731,7 @@ def __init__(self, f, engine=None, **kwds): | |
self._make_engine(self.engine) | ||
|
||
def close(self): | ||
try: | ||
self._engine._reader.close() | ||
except: | ||
pass | ||
self._engine.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why can we get rid of this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I shouldn't have removed it. But this was only used by |
||
|
||
def _get_options_with_defaults(self, engine): | ||
kwds = self.orig_options | ||
|
@@ -898,7 +899,11 @@ def _clean_options(self, options, engine): | |
return result, engine | ||
|
||
def __next__(self): | ||
return self.get_chunk() | ||
try: | ||
return self.get_chunk() | ||
except StopIteration: | ||
self.close() | ||
raise | ||
|
||
def _make_engine(self, engine='c'): | ||
if engine == 'c': | ||
|
@@ -1057,8 +1062,13 @@ def __init__(self, kwds): | |
|
||
self._first_chunk = True | ||
|
||
# GH 13932 | ||
# keep references to file handles opened by the parser itself | ||
self.handles = [] | ||
|
||
def close(self): | ||
self._reader.close() | ||
for f in self.handles: | ||
f.close() | ||
|
||
@property | ||
def _has_complex_date_col(self): | ||
|
@@ -1356,6 +1366,7 @@ def __init__(self, src, **kwds): | |
if 'utf-16' in (kwds.get('encoding') or ''): | ||
if isinstance(src, compat.string_types): | ||
src = open(src, 'rb') | ||
self.handles.append(src) | ||
src = UTF8Recoder(src, kwds['encoding']) | ||
kwds['encoding'] = 'utf-8' | ||
|
||
|
@@ -1429,6 +1440,14 @@ def __init__(self, src, **kwds): | |
|
||
self._implicit_index = self._reader.leading_cols > 0 | ||
|
||
def close(self): | ||
for f in self.handles: | ||
f.close() | ||
try: | ||
self._reader.close() | ||
except: | ||
pass | ||
|
||
def _set_noconvert_columns(self): | ||
names = self.orig_names | ||
usecols = self.usecols | ||
|
@@ -1751,13 +1770,16 @@ def __init__(self, f, **kwds): | |
f = _get_handle(f, 'r', encoding=self.encoding, | ||
compression=self.compression, | ||
memory_map=self.memory_map) | ||
self.handles.append(f) | ||
elif self.compression: | ||
f = _wrap_compressed(f, self.compression, self.encoding) | ||
self.handles.append(f) | ||
# in Python 3, convert BytesIO or fileobjects passed with an encoding | ||
elif compat.PY3 and isinstance(f, compat.BytesIO): | ||
from io import TextIOWrapper | ||
|
||
f = TextIOWrapper(f, encoding=self.encoding) | ||
self.handles.append(f) | ||
|
||
# Set self.data to something that can read lines. | ||
if hasattr(f, 'readline'): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -253,6 +253,9 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', | |
|
||
self._read_header() | ||
|
||
def close(self): | ||
self.filepath_or_buffer.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this always a filehandle-like? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
def _get_row(self): | ||
return self.filepath_or_buffer.read(80).decode() | ||
|
||
|
@@ -262,13 +265,15 @@ def _read_header(self): | |
# read file header | ||
line1 = self._get_row() | ||
if line1 != _correct_line1: | ||
self.close() | ||
raise ValueError("Header record is not an XPORT file.") | ||
|
||
line2 = self._get_row() | ||
fif = [['prefix', 24], ['version', 8], ['OS', 8], | ||
['_', 24], ['created', 16]] | ||
file_info = _split_line(line2, fif) | ||
if file_info['prefix'] != "SAS SAS SASLIB": | ||
self.close() | ||
raise ValueError("Header record has invalid prefix.") | ||
file_info['created'] = _parse_date(file_info['created']) | ||
self.file_info = file_info | ||
|
@@ -282,6 +287,7 @@ def _read_header(self): | |
headflag1 = header1.startswith(_correct_header1) | ||
headflag2 = (header2 == _correct_header2) | ||
if not (headflag1 and headflag2): | ||
self.close() | ||
raise ValueError("Member header not found") | ||
# usually 140, could be 135 | ||
fieldnamelength = int(header1[-5:-2]) | ||
|
@@ -321,6 +327,7 @@ def _read_header(self): | |
field['ntype'] = types[field['ntype']] | ||
fl = field['field_length'] | ||
if field['ntype'] == 'numeric' and ((fl < 2) or (fl > 8)): | ||
self.close() | ||
msg = "Floating field width {0} is not between 2 and 8." | ||
raise TypeError(msg.format(fl)) | ||
|
||
|
@@ -335,6 +342,7 @@ def _read_header(self): | |
|
||
header = self._get_row() | ||
if not header == _correct_obs_header: | ||
self.close() | ||
raise ValueError("Observation header not found.") | ||
|
||
self.fields = fields | ||
|
@@ -425,6 +433,7 @@ def read(self, nrows=None): | |
read_lines = min(nrows, self.nobs - self._lines_read) | ||
read_len = read_lines * self.record_length | ||
if read_len <= 0: | ||
self.close() | ||
raise StopIteration | ||
raw = self.filepath_or_buffer.read(read_len) | ||
data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you do
I think you can eliminate the
.close()
calls where you raise on an exception (also needs changing at L402).and slightly more idiomatic.
That way exceptions will bubble up, but we will still close.