diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index f54cea744f4d2..df0af0b5ed156 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -44,7 +44,6 @@ Bug fixes Other ~~~~~ -- :meth:`pandas.read_parquet` now supports reading nullable dtypes with ``fastparquet`` versions above 0.7.1. - .. --------------------------------------------------------------------------- diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index f0aeeb3e6c893..49384cfb2e554 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -309,20 +309,16 @@ def write( def read( self, path, columns=None, storage_options: StorageOptions = None, **kwargs ): - parquet_kwargs = {} + parquet_kwargs: dict[str, Any] = {} use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False) - # Technically works with 0.7.0, but was incorrect - # so lets just require 0.7.1 if Version(self.api.__version__) >= Version("0.7.1"): - # Need to set even for use_nullable_dtypes = False, - # since our defaults differ - parquet_kwargs["pandas_nulls"] = use_nullable_dtypes - else: - if use_nullable_dtypes: - raise ValueError( - "The 'use_nullable_dtypes' argument is not supported for the " - "fastparquet engine for fastparquet versions less than 0.7.1" - ) + # We are disabling nullable dtypes for fastparquet pending discussion + parquet_kwargs["pandas_nulls"] = False + if use_nullable_dtypes: + raise ValueError( + "The 'use_nullable_dtypes' argument is not supported for the " + "fastparquet engine" + ) path = stringify_path(path) handles = None if is_fsspec_url(path): @@ -478,7 +474,8 @@ def read_parquet( use_nullable_dtypes : bool, default False If True, use dtypes that use ``pd.NA`` as missing value indicator - for the resulting DataFrame. + for the resulting DataFrame. (only applicable for the ``pyarrow`` + engine) As new dtypes are added that support ``pd.NA`` in the future, the output with this option will change to use those dtypes. Note: this is an experimental option, and behaviour (e.g. additional @@ -486,10 +483,6 @@ def read_parquet( .. versionadded:: 1.2.0 - .. versionchanged:: 1.3.2 - ``use_nullable_dtypes`` now works with the the ``fastparquet`` engine - if ``fastparquet`` is version 0.7.1 or higher. - **kwargs Any additional kwargs are passed to the engine. diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b1f7f15dfa99a..3dbfcba35344c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -600,11 +600,9 @@ def test_use_nullable_dtypes(self, engine): import pyarrow.parquet as pq if engine == "fastparquet": - pytest.importorskip( - "fastparquet", - "0.7.1", - reason="fastparquet must be 0.7.1 or higher for nullable dtype support", - ) + # We are manually disabling fastparquet's + # nullable dtype support pending discussion + pytest.skip("Fastparquet nullable dtype support is disabled") table = pyarrow.table( { @@ -612,6 +610,8 @@ def test_use_nullable_dtypes(self, engine): "b": pyarrow.array([1, 2, 3, None], "uint8"), "c": pyarrow.array(["a", "b", "c", None]), "d": pyarrow.array([True, False, True, None]), + # Test that nullable dtypes used even in absence of nulls + "e": pyarrow.array([1, 2, 3, 4], "int64"), } ) with tm.ensure_clean() as path: @@ -627,6 +627,7 @@ def test_use_nullable_dtypes(self, engine): "b": pd.array([1, 2, 3, None], dtype="UInt8"), "c": pd.array(["a", "b", "c", None], dtype="string"), "d": pd.array([True, False, True, None], dtype="boolean"), + "e": pd.array([1, 2, 3, 4], dtype="Int64"), } ) if engine == "fastparquet":