From c31bbdee71a6e531d7c8475dcd681944da059305 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 07:41:39 -0800 Subject: [PATCH 01/10] Fixes mypy attribute issue in io/parquet by adding a hasattr check --- pandas/io/parquet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index c5bfbd2b6b35d..984f7944376bd 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -181,7 +181,8 @@ def write( is_dir=partition_cols is not None, ) if isinstance(path_or_handle, io.BufferedWriter): - path_or_handle = path_or_handle.raw.name + if hasattr(path_or_handle, "name"): + path_or_handle = path_or_handle.raw.name try: if partition_cols is not None: From f2fdb3f1b506ac949f7a88ade893ec202ecd7be3 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 07:46:40 -0800 Subject: [PATCH 02/10] changed attr to name attribute inside raw --- pandas/io/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 984f7944376bd..3f8b0e3435d5c 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -181,7 +181,7 @@ def write( is_dir=partition_cols is not None, ) if isinstance(path_or_handle, io.BufferedWriter): - if hasattr(path_or_handle, "name"): + if hasattr(path_or_handle.raw, "name"): path_or_handle = path_or_handle.raw.name try: From 9834e85a3463ad2d35db7335add6d5ce5bd83d78 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 08:01:34 -0800 Subject: [PATCH 03/10] combined the conditional checks on io.BufferedWriter in io/parquet --- pandas/io/parquet.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 3f8b0e3435d5c..d605e0ab30ac2 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -180,9 +180,10 @@ def write( mode="wb", is_dir=partition_cols is not None, ) - if isinstance(path_or_handle, io.BufferedWriter): - if hasattr(path_or_handle.raw, "name"): - path_or_handle = path_or_handle.raw.name + if isinstance(path_or_handle, io.BufferedWriter) and hasattr( + path_or_handle.raw, "name" + ): + path_or_handle = path_or_handle.raw.name try: if partition_cols is not None: From 58d2be5e45248690d08a7500d3ceee38f99bf2b0 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 08:29:52 -0800 Subject: [PATCH 04/10] changed attribute from raw.name to name and added hasattr check --- pandas/io/parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index d605e0ab30ac2..e8bd0d814547b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -181,9 +181,9 @@ def write( is_dir=partition_cols is not None, ) if isinstance(path_or_handle, io.BufferedWriter) and hasattr( - path_or_handle.raw, "name" + path_or_handle, "name" ): - path_or_handle = path_or_handle.raw.name + path_or_handle = path_or_handle.name try: if partition_cols is not None: From 3925d51a34442678b196a4b5821fc49b91b29572 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 09:48:45 -0800 Subject: [PATCH 05/10] Skipped checks for graceful cleanup in windows --- pandas/tests/io/test_parquet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 93cc2fd5100c8..7a28a7c2a7ce6 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -2,6 +2,7 @@ import datetime from io import BytesIO import os +import sys import pathlib from warnings import ( catch_warnings, @@ -728,6 +729,7 @@ def test_unsupported_float16(self, pa): df = pd.DataFrame(data=data, columns=["fp16"]) self.check_external_error_on_write(df, pa, pyarrow.ArrowException) + @pytest.mark.skipif(sys.platform == "win32", reason="Cleanup is not working with Windows") @pytest.mark.parametrize("path_type", [str, pathlib.Path]) def test_unsupported_float16_cleanup(self, pa, path_type): # #44847, #44914 From 54e2d9cf4b48b1c179b1d9da380a24c52d50be1d Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 10:03:55 -0800 Subject: [PATCH 06/10] Changed checks for path object in parquet, linted and sorted imports --- pandas/io/parquet.py | 6 ++++-- pandas/tests/io/test_parquet.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index e8bd0d814547b..6fcbd69d880af 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -180,8 +180,10 @@ def write( mode="wb", is_dir=partition_cols is not None, ) - if isinstance(path_or_handle, io.BufferedWriter) and hasattr( - path_or_handle, "name" + if ( + isinstance(path_or_handle, io.BufferedWriter) + and isinstance(path_or_handle.raw, io.FileIO) + and isinstance(path_or_handle.name, (str, bytes)) ): path_or_handle = path_or_handle.name diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 7a28a7c2a7ce6..5b65e708f0322 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -729,7 +729,9 @@ def test_unsupported_float16(self, pa): df = pd.DataFrame(data=data, columns=["fp16"]) self.check_external_error_on_write(df, pa, pyarrow.ArrowException) - @pytest.mark.skipif(sys.platform == "win32", reason="Cleanup is not working with Windows") + @pytest.mark.skipif( + sys.platform == "win32", reason="Cleanup is not working with Windows" + ) @pytest.mark.parametrize("path_type", [str, pathlib.Path]) def test_unsupported_float16_cleanup(self, pa, path_type): # #44847, #44914 From d8495d6040b51458dd548fc35ea23628fd0baec7 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 10:30:55 -0800 Subject: [PATCH 07/10] Removed sys dependency and changed pytest check for windows --- pandas/tests/io/test_parquet.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 5b65e708f0322..9f1fbd0076454 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -2,7 +2,6 @@ import datetime from io import BytesIO import os -import sys import pathlib from warnings import ( catch_warnings, @@ -14,6 +13,7 @@ from pandas._config import get_option +from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( pa_version_under2p0, pa_version_under5p0, @@ -729,9 +729,6 @@ def test_unsupported_float16(self, pa): df = pd.DataFrame(data=data, columns=["fp16"]) self.check_external_error_on_write(df, pa, pyarrow.ArrowException) - @pytest.mark.skipif( - sys.platform == "win32", reason="Cleanup is not working with Windows" - ) @pytest.mark.parametrize("path_type", [str, pathlib.Path]) def test_unsupported_float16_cleanup(self, pa, path_type): # #44847, #44914 @@ -740,6 +737,10 @@ def test_unsupported_float16_cleanup(self, pa, path_type): data = np.arange(2, 10, dtype=np.float16) df = pd.DataFrame(data=data, columns=["fp16"]) + # Cleanup fails in windows + if is_platform_windows(): + pytest.skip() + with tm.ensure_clean() as path_str: path = path_type(path_str) with tm.external_error_raised(pyarrow.ArrowException): From 661a222000795edb39e4ba3d072af29a20318fe8 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 11:39:02 -0800 Subject: [PATCH 08/10] Changed the condition in io/parquet to hasattr from isinstance of io.FileIO --- pandas/io/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6fcbd69d880af..1b8526275c155 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -182,7 +182,7 @@ def write( ) if ( isinstance(path_or_handle, io.BufferedWriter) - and isinstance(path_or_handle.raw, io.FileIO) + and hasattr(path_or_handle, "name") and isinstance(path_or_handle.name, (str, bytes)) ): path_or_handle = path_or_handle.name From 77712a9edb2b568591c82d66dd01d2c65189ad61 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 12:00:15 -0800 Subject: [PATCH 09/10] Changed the pytest skip condition to decorator and added a message --- pandas/tests/io/test_parquet.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9f1fbd0076454..40caff8beee35 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -729,6 +729,13 @@ def test_unsupported_float16(self, pa): df = pd.DataFrame(data=data, columns=["fp16"]) self.check_external_error_on_write(df, pa, pyarrow.ArrowException) + @pytest.mark.skipif( + is_platform_windows(), + reason=( + "PyArrow does not cleanup of partial files dumps when unsupported " + "dtypes are passed to_parquet function in windows" + ), + ) @pytest.mark.parametrize("path_type", [str, pathlib.Path]) def test_unsupported_float16_cleanup(self, pa, path_type): # #44847, #44914 @@ -737,10 +744,6 @@ def test_unsupported_float16_cleanup(self, pa, path_type): data = np.arange(2, 10, dtype=np.float16) df = pd.DataFrame(data=data, columns=["fp16"]) - # Cleanup fails in windows - if is_platform_windows(): - pytest.skip() - with tm.ensure_clean() as path_str: path = path_type(path_str) with tm.external_error_raised(pyarrow.ArrowException): From 02df442bb28c60c9052e85054e3bb7fbf4323e79 Mon Sep 17 00:00:00 2001 From: anirudhsekar96 Date: Sun, 23 Jan 2022 13:34:54 -0800 Subject: [PATCH 10/10] Changed pytest skipif to mark.xfail --- pandas/tests/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 40caff8beee35..2eb8738d88b41 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -729,7 +729,7 @@ def test_unsupported_float16(self, pa): df = pd.DataFrame(data=data, columns=["fp16"]) self.check_external_error_on_write(df, pa, pyarrow.ArrowException) - @pytest.mark.skipif( + @pytest.mark.xfail( is_platform_windows(), reason=( "PyArrow does not cleanup of partial files dumps when unsupported "