diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 849b599141c2b..1c8db4dd32393 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -40,7 +40,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`) -- +- Bug in :func:`read_csv` not closing an opened file handle when a ``csv.Error`` or ``UnicodeDecodeError`` occurred while initializing (:issue:`39024`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index fcbf7ec3897fc..d99abbea90a51 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2288,7 +2288,11 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): self._open_handles(f, kwds) assert self.handles is not None assert hasattr(self.handles.handle, "readline") - self._make_reader(self.handles.handle) + try: + self._make_reader(self.handles.handle) + except (csv.Error, UnicodeDecodeError): + self.close() + raise # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index d42bd7a004584..8871ea7205a46 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -8,8 +8,10 @@ from inspect import signature from io import BytesIO, StringIO import os +from pathlib import Path import platform from urllib.error import URLError +import warnings import numpy as np import pytest @@ -2369,3 +2371,22 @@ def test_context_manageri_user_provided(all_parsers, datapath): assert False except AssertionError: assert not reader._engine.handles.handle.closed + + +@td.check_file_leaks +def test_open_file(all_parsers): + # GH 39024 + parser = all_parsers + if parser.engine == "c": + pytest.skip() + + with tm.ensure_clean() as path: + file = Path(path) + file.write_bytes(b"\xe4\na\n1") + + # should not trigger a ResourceWarning + warnings.simplefilter("always", category=ResourceWarning) + with warnings.catch_warnings(record=True) as record: + with pytest.raises(csv.Error, match="Could not determine delimiter"): + parser.read_csv(file, sep=None) + assert len(record) == 0, record[0].message