diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 46320355512d1..33747d2a6dd83 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -18,20 +18,23 @@ def get_engine(engine: str) -> "BaseImpl": if engine == "auto": # try engines in this order - try: - return PyArrowImpl() - except ImportError: - pass + engine_classes = [PyArrowImpl, FastParquetImpl] - try: - return FastParquetImpl() - except ImportError: - pass + error_msgs = "" + for engine_class in engine_classes: + try: + return engine_class() + except ImportError as err: + error_msgs += "\n - " + str(err) raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" - "pyarrow or fastparquet is required for parquet support" + "A suitable version of " + "pyarrow or fastparquet is required for parquet " + "support.\n" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" ) if engine == "pyarrow": @@ -105,9 +108,7 @@ def write( **kwargs, ) else: - self.api.parquet.write_table( - table, path, compression=compression, **kwargs, - ) + self.api.parquet.write_table(table, path, compression=compression, **kwargs) def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d0eaafb787222..94cf16c20e6c4 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -35,6 +35,7 @@ except ImportError: _HAVE_FASTPARQUET = False + pytestmark = pytest.mark.filterwarnings( "ignore:RangeIndex.* is deprecated:DeprecationWarning" ) @@ -223,6 +224,49 @@ def test_options_get_engine(fp, pa): assert isinstance(get_engine("fastparquet"), FastParquetImpl) +def test_get_engine_auto_error_message(): + # Expect different error messages from get_engine(engine="auto") + # if engines aren't installed vs. are installed but bad version + from pandas.compat._optional import VERSIONS + + # Do we have engines installed, but a bad version of them? + pa_min_ver = VERSIONS.get("pyarrow") + fp_min_ver = VERSIONS.get("fastparquet") + have_pa_bad_version = ( + False + if not _HAVE_PYARROW + else LooseVersion(pyarrow.__version__) < LooseVersion(pa_min_ver) + ) + have_fp_bad_version = ( + False + if not _HAVE_FASTPARQUET + else LooseVersion(fastparquet.__version__) < LooseVersion(fp_min_ver) + ) + # Do we have usable engines installed? + have_usable_pa = _HAVE_PYARROW and not have_pa_bad_version + have_usable_fp = _HAVE_FASTPARQUET and not have_fp_bad_version + + if not have_usable_pa and not have_usable_fp: + # No usable engines found. + if have_pa_bad_version: + match = f"Pandas requires version .{pa_min_ver}. or newer of .pyarrow." + with pytest.raises(ImportError, match=match): + get_engine("auto") + else: + match = "Missing optional dependency .pyarrow." + with pytest.raises(ImportError, match=match): + get_engine("auto") + + if have_fp_bad_version: + match = f"Pandas requires version .{fp_min_ver}. or newer of .fastparquet." + with pytest.raises(ImportError, match=match): + get_engine("auto") + else: + match = "Missing optional dependency .fastparquet." + with pytest.raises(ImportError, match=match): + get_engine("auto") + + def test_cross_engine_pa_fp(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines