diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 849b599141c2b..1c8db4dd32393 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -40,7 +40,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`) -- +- Bug in :func:`read_csv` not closing an opened file handle when a ``csv.Error`` or ``UnicodeDecodeError`` occurred while initializing (:issue:`39024`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ca817be5d2ff6..e58e59a722b7a 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2297,7 +2297,11 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): self._open_handles(f, kwds) assert self.handles is not None assert hasattr(self.handles.handle, "readline") - self._make_reader(self.handles.handle) + try: + self._make_reader(self.handles.handle) + except (csv.Error, UnicodeDecodeError): + self.close() + raise # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index a2787ddad3683..57defb400b842 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -3,13 +3,17 @@ specific classification into the other test modules. """ import codecs +import csv from io import StringIO import os +from pathlib import Path +import warnings import numpy as np import pytest from pandas.errors import EmptyDataError, ParserError +import pandas.util._test_decorators as td from pandas import DataFrame import pandas._testing as tm @@ -208,3 +212,22 @@ def test_null_byte_char(all_parsers): msg = "NULL byte detected" with pytest.raises(ParserError, match=msg): parser.read_csv(StringIO(data), names=names) + + +@td.check_file_leaks +def test_open_file(all_parsers): + # GH 39024 + parser = all_parsers + if parser.engine == "c": + pytest.skip() + + with tm.ensure_clean() as path: + file = Path(path) + file.write_bytes(b"\xe4\na\n1") + + # should not trigger a ResourceWarning + warnings.simplefilter("always", category=ResourceWarning) + with warnings.catch_warnings(record=True) as record: + with pytest.raises(csv.Error, match="Could not determine delimiter"): + parser.read_csv(file, sep=None) + assert len(record) == 0, record[0].message