From 0f6edcb1f68a4ab32901a9339dd691360a7e20a8 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 29 Mar 2022 12:46:57 +0200 Subject: [PATCH 1/7] Close FastParquet file even on error --- pandas/io/parquet.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index c7e8d67189e5d..30b37b5c89390 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -351,11 +351,11 @@ def read( parquet_file = self.api.ParquetFile(path, **parquet_kwargs) - result = parquet_file.to_pandas(columns=columns, **kwargs) - - if handles is not None: - handles.close() - return result + try: + return parquet_file.to_pandas(columns=columns, **kwargs) + finally: + if handles is not None: + handles.close() @doc(storage_options=_shared_docs["storage_options"]) From f5ced400fdccad6290f9666a29eab7d65d15f36e Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 29 Mar 2022 13:45:08 +0200 Subject: [PATCH 2/7] fix pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0a2f3f8f2506d..7456bdf346272 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/python/black - rev: 22.1.0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell From e0e6008aae5404951ac439798d2e12767be86e38 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 29 Mar 2022 13:54:36 +0200 Subject: [PATCH 3/7] move self.api.ParquetFile() into try block --- pandas/io/parquet.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 30b37b5c89390..27b0b3d08ad53 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -349,9 +349,8 @@ def read( ) path = handles.handle - parquet_file = self.api.ParquetFile(path, **parquet_kwargs) - try: + parquet_file = self.api.ParquetFile(path, **parquet_kwargs) return parquet_file.to_pandas(columns=columns, **kwargs) finally: if handles is not None: From 4113da24a01a42e2ca06edab18ebd4f487c611c8 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Wed, 30 Mar 2022 16:37:45 +0200 Subject: [PATCH 4/7] Add test --- pandas/tests/io/test_parquet.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3001922e95a54..ab922331bc3a9 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1155,3 +1155,13 @@ def test_use_nullable_dtypes_not_supported(self, fp): df.to_parquet(path) with pytest.raises(ValueError, match="not supported for the fastparquet"): read_parquet(path, engine="fastparquet", use_nullable_dtypes=True) + + def test_close_file_handle_on_read_error(self): + df = pd.DataFrame({"a": [1, 2]}) + with tm.ensure_clean() as path: + df.to_parquet(path) + with open(path, 'r+b') as f: + f.seek(16) + f.write(b'breakit') + with pytest.raises(Exception): # Not important which exception + read_parquet(path, engine="fastparquet") From 6bc00da41dff7d3c533b3a82ebb7e630ea77cbdb Mon Sep 17 00:00:00 2001 From: Philipp A Date: Wed, 30 Mar 2022 16:51:13 +0200 Subject: [PATCH 5/7] Add whatsnew entry --- doc/source/whatsnew/v1.5.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6cbee83247692..48a52f21e3640 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -498,7 +498,8 @@ I/O - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`) - Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`) -- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements(:issue:`45598`) +- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`) +- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) Period ^^^^^^ From 27f7bda9cf1ed74a36f4f7d1138188aeb0e1b86d Mon Sep 17 00:00:00 2001 From: Philipp A Date: Wed, 30 Mar 2022 17:22:14 +0200 Subject: [PATCH 6/7] Improve test --- pandas/tests/io/test_parquet.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ab922331bc3a9..1b545057aaea4 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1157,11 +1157,8 @@ def test_use_nullable_dtypes_not_supported(self, fp): read_parquet(path, engine="fastparquet", use_nullable_dtypes=True) def test_close_file_handle_on_read_error(self): - df = pd.DataFrame({"a": [1, 2]}) - with tm.ensure_clean() as path: - df.to_parquet(path) - with open(path, 'r+b') as f: - f.seek(16) - f.write(b'breakit') + with tm.ensure_clean("test.parquet") as path: + pathlib.Path(path).write_bytes(b"breakit") with pytest.raises(Exception): # Not important which exception read_parquet(path, engine="fastparquet") + pathlib.Path(path).unlink(missing_ok=False) # This raises an error on Windows if the file is still open From 41cbd5f6fa3aeb655b7ca92e4ab45ae767d48960 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Wed, 30 Mar 2022 17:25:43 +0200 Subject: [PATCH 7/7] Fix style --- pandas/tests/io/test_parquet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 1b545057aaea4..7c04a51e803f6 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1159,6 +1159,7 @@ def test_use_nullable_dtypes_not_supported(self, fp): def test_close_file_handle_on_read_error(self): with tm.ensure_clean("test.parquet") as path: pathlib.Path(path).write_bytes(b"breakit") - with pytest.raises(Exception): # Not important which exception + with pytest.raises(Exception, match=""): # Not important which exception read_parquet(path, engine="fastparquet") - pathlib.Path(path).unlink(missing_ok=False) # This raises an error on Windows if the file is still open + # The next line raises an error on Windows if the file is still open + pathlib.Path(path).unlink(missing_ok=False)