From 3d5d488fdd2d10d07920b671b445ff31a7710a04 Mon Sep 17 00:00:00 2001 From: JEACO Date: Mon, 6 Apr 2020 13:53:12 +0200 Subject: [PATCH 1/9] Collect import error messages and display them --- pandas/io/parquet.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 46320355512d1..7912636113e59 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -18,20 +18,22 @@ def get_engine(engine: str) -> "BaseImpl": if engine == "auto": # try engines in this order - try: - return PyArrowImpl() - except ImportError: - pass + engine_impls = [PyArrowImpl, FastParquetImpl] - try: - return FastParquetImpl() - except ImportError: - pass + error_msgs = [] + for eimpl in engine_impls: + try: + return eimpl() + except ImportError as ie: + error_msgs.append(ie.msg) raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" - "pyarrow or fastparquet is required for parquet support" + "A suitable version of " + "pyarrow or fastparquet is required for parquet " + "support. \n" + "Trying to import the above resulted in these errors: \n" + "\n".join(error_msgs) ) if engine == "pyarrow": From 29bfc49b6db73c2454f775266d0f06f240a0bf67 Mon Sep 17 00:00:00 2001 From: JEACO Date: Mon, 6 Apr 2020 13:59:53 +0200 Subject: [PATCH 2/9] black --- pandas/io/parquet.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 7912636113e59..12b3ff56683f0 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -25,7 +25,7 @@ def get_engine(engine: str) -> "BaseImpl": try: return eimpl() except ImportError as ie: - error_msgs.append(ie.msg) + error_msgs.append(ie.msg) raise ImportError( "Unable to find a usable engine; " @@ -33,7 +33,8 @@ def get_engine(engine: str) -> "BaseImpl": "A suitable version of " "pyarrow or fastparquet is required for parquet " "support. \n" - "Trying to import the above resulted in these errors: \n" + "\n".join(error_msgs) + "Trying to import the above resulted in these errors: \n" + + "\n".join(error_msgs) ) if engine == "pyarrow": @@ -107,9 +108,7 @@ def write( **kwargs, ) else: - self.api.parquet.write_table( - table, path, compression=compression, **kwargs, - ) + self.api.parquet.write_table(table, path, compression=compression, **kwargs) def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) From bca4bd11fcd316d6a65a9dbf52726371f6586212 Mon Sep 17 00:00:00 2001 From: JEACO Date: Tue, 7 Apr 2020 15:46:51 +0200 Subject: [PATCH 3/9] Placate mypy who insists that "ImportError" has no attribute "msg" --- pandas/io/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 12b3ff56683f0..100f6a2fc1e0f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -25,7 +25,7 @@ def get_engine(engine: str) -> "BaseImpl": try: return eimpl() except ImportError as ie: - error_msgs.append(ie.msg) + error_msgs.append(str(ie)) raise ImportError( "Unable to find a usable engine; " From 2bc7dd00a93967c8984cf912d649bfa1aa442fcf Mon Sep 17 00:00:00 2001 From: JEACO Date: Tue, 7 Apr 2020 15:51:56 +0200 Subject: [PATCH 4/9] Rename variables --- pandas/io/parquet.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 100f6a2fc1e0f..c63d017a598e6 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -18,14 +18,14 @@ def get_engine(engine: str) -> "BaseImpl": if engine == "auto": # try engines in this order - engine_impls = [PyArrowImpl, FastParquetImpl] + engine_classes = [PyArrowImpl, FastParquetImpl] error_msgs = [] - for eimpl in engine_impls: + for ec in engine_classes: try: - return eimpl() - except ImportError as ie: - error_msgs.append(str(ie)) + return ec() + except ImportError as err: + error_msgs.append(str(err)) raise ImportError( "Unable to find a usable engine; " From ed519501f734daaf9fb3ac90cc49709b98fd3ad2 Mon Sep 17 00:00:00 2001 From: jfcorbett Date: Tue, 7 Apr 2020 15:54:29 +0200 Subject: [PATCH 5/9] Apply suggestions from code review Strip error string for great minification benefit Co-Authored-By: Marc Garcia --- pandas/io/parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index c63d017a598e6..294a69b67247e 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -32,8 +32,8 @@ def get_engine(engine: str) -> "BaseImpl": "tried using: 'pyarrow', 'fastparquet'.\n" "A suitable version of " "pyarrow or fastparquet is required for parquet " - "support. \n" - "Trying to import the above resulted in these errors: \n" + "support.\n" + "Trying to import the above resulted in these errors:\n" + "\n".join(error_msgs) ) From 7eb45b893b279446c7b21a89e3ad2ca2da0d2e99 Mon Sep 17 00:00:00 2001 From: JEACO Date: Tue, 7 Apr 2020 16:01:22 +0200 Subject: [PATCH 6/9] Refactor extract variable joined_error_messages --- pandas/io/parquet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 294a69b67247e..b4cd98cb2d735 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -27,6 +27,7 @@ def get_engine(engine: str) -> "BaseImpl": except ImportError as err: error_msgs.append(str(err)) + joined_error_msgs = "\n".join(error_msgs) raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" @@ -34,7 +35,7 @@ def get_engine(engine: str) -> "BaseImpl": "pyarrow or fastparquet is required for parquet " "support.\n" "Trying to import the above resulted in these errors:\n" - + "\n".join(error_msgs) + f"{joined_error_msgs}" ) if engine == "pyarrow": From 59a3877f4373672cb916ed08ce6ab7af92b0db6c Mon Sep 17 00:00:00 2001 From: JEACO Date: Wed, 8 Apr 2020 13:26:02 +0200 Subject: [PATCH 7/9] Add test for get_engine(engine="auto") error messages --- pandas/tests/io/test_parquet.py | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d0eaafb787222..94cf16c20e6c4 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -35,6 +35,7 @@ except ImportError: _HAVE_FASTPARQUET = False + pytestmark = pytest.mark.filterwarnings( "ignore:RangeIndex.* is deprecated:DeprecationWarning" ) @@ -223,6 +224,49 @@ def test_options_get_engine(fp, pa): assert isinstance(get_engine("fastparquet"), FastParquetImpl) +def test_get_engine_auto_error_message(): + # Expect different error messages from get_engine(engine="auto") + # if engines aren't installed vs. are installed but bad version + from pandas.compat._optional import VERSIONS + + # Do we have engines installed, but a bad version of them? + pa_min_ver = VERSIONS.get("pyarrow") + fp_min_ver = VERSIONS.get("fastparquet") + have_pa_bad_version = ( + False + if not _HAVE_PYARROW + else LooseVersion(pyarrow.__version__) < LooseVersion(pa_min_ver) + ) + have_fp_bad_version = ( + False + if not _HAVE_FASTPARQUET + else LooseVersion(fastparquet.__version__) < LooseVersion(fp_min_ver) + ) + # Do we have usable engines installed? + have_usable_pa = _HAVE_PYARROW and not have_pa_bad_version + have_usable_fp = _HAVE_FASTPARQUET and not have_fp_bad_version + + if not have_usable_pa and not have_usable_fp: + # No usable engines found. + if have_pa_bad_version: + match = f"Pandas requires version .{pa_min_ver}. or newer of .pyarrow." + with pytest.raises(ImportError, match=match): + get_engine("auto") + else: + match = "Missing optional dependency .pyarrow." + with pytest.raises(ImportError, match=match): + get_engine("auto") + + if have_fp_bad_version: + match = f"Pandas requires version .{fp_min_ver}. or newer of .fastparquet." + with pytest.raises(ImportError, match=match): + get_engine("auto") + else: + match = "Missing optional dependency .fastparquet." + with pytest.raises(ImportError, match=match): + get_engine("auto") + + def test_cross_engine_pa_fp(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines From dd529c45f5c8f1d0ec6a65824544792a6535aede Mon Sep 17 00:00:00 2001 From: JEACO Date: Wed, 8 Apr 2020 16:28:06 +0200 Subject: [PATCH 8/9] Rename --- pandas/io/parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index b4cd98cb2d735..9da490dabd954 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -21,9 +21,9 @@ def get_engine(engine: str) -> "BaseImpl": engine_classes = [PyArrowImpl, FastParquetImpl] error_msgs = [] - for ec in engine_classes: + for engine_class in engine_classes: try: - return ec() + return engine_class() except ImportError as err: error_msgs.append(str(err)) From f348001cc1d0d03b7718663b388d95f2e156435f Mon Sep 17 00:00:00 2001 From: JEACO Date: Wed, 8 Apr 2020 16:36:43 +0200 Subject: [PATCH 9/9] Refactor collection of error messages: use string instead of list --- pandas/io/parquet.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 9da490dabd954..33747d2a6dd83 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -20,22 +20,21 @@ def get_engine(engine: str) -> "BaseImpl": # try engines in this order engine_classes = [PyArrowImpl, FastParquetImpl] - error_msgs = [] + error_msgs = "" for engine_class in engine_classes: try: return engine_class() except ImportError as err: - error_msgs.append(str(err)) + error_msgs += "\n - " + str(err) - joined_error_msgs = "\n".join(error_msgs) raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" "A suitable version of " "pyarrow or fastparquet is required for parquet " "support.\n" - "Trying to import the above resulted in these errors:\n" - f"{joined_error_msgs}" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" ) if engine == "pyarrow":