Skip to content

Commit 45fd72a

Browse files
authored
Revert fastparquet nullable dtype support (#42954)
1 parent 14cf6e2 commit 45fd72a

File tree

3 files changed

+16
-23
lines changed

3 files changed

+16
-23
lines changed

doc/source/whatsnew/v1.3.2.rst

-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ Bug fixes
4545

4646
Other
4747
~~~~~
48-
- :meth:`pandas.read_parquet` now supports reading nullable dtypes with ``fastparquet`` versions above 0.7.1.
4948
-
5049

5150
.. ---------------------------------------------------------------------------

pandas/io/parquet.py

+10-17
Original file line numberDiff line numberDiff line change
@@ -309,20 +309,16 @@ def write(
309309
def read(
310310
self, path, columns=None, storage_options: StorageOptions = None, **kwargs
311311
):
312-
parquet_kwargs = {}
312+
parquet_kwargs: dict[str, Any] = {}
313313
use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)
314-
# Technically works with 0.7.0, but was incorrect
315-
# so lets just require 0.7.1
316314
if Version(self.api.__version__) >= Version("0.7.1"):
317-
# Need to set even for use_nullable_dtypes = False,
318-
# since our defaults differ
319-
parquet_kwargs["pandas_nulls"] = use_nullable_dtypes
320-
else:
321-
if use_nullable_dtypes:
322-
raise ValueError(
323-
"The 'use_nullable_dtypes' argument is not supported for the "
324-
"fastparquet engine for fastparquet versions less than 0.7.1"
325-
)
315+
# We are disabling nullable dtypes for fastparquet pending discussion
316+
parquet_kwargs["pandas_nulls"] = False
317+
if use_nullable_dtypes:
318+
raise ValueError(
319+
"The 'use_nullable_dtypes' argument is not supported for the "
320+
"fastparquet engine"
321+
)
326322
path = stringify_path(path)
327323
handles = None
328324
if is_fsspec_url(path):
@@ -478,18 +474,15 @@ def read_parquet(
478474
479475
use_nullable_dtypes : bool, default False
480476
If True, use dtypes that use ``pd.NA`` as missing value indicator
481-
for the resulting DataFrame.
477+
for the resulting DataFrame. (only applicable for the ``pyarrow``
478+
engine)
482479
As new dtypes are added that support ``pd.NA`` in the future, the
483480
output with this option will change to use those dtypes.
484481
Note: this is an experimental option, and behaviour (e.g. additional
485482
support dtypes) may change without notice.
486483
487484
.. versionadded:: 1.2.0
488485
489-
.. versionchanged:: 1.3.2
490-
``use_nullable_dtypes`` now works with the the ``fastparquet`` engine
491-
if ``fastparquet`` is version 0.7.1 or higher.
492-
493486
**kwargs
494487
Any additional kwargs are passed to the engine.
495488

pandas/tests/io/test_parquet.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -600,18 +600,18 @@ def test_use_nullable_dtypes(self, engine):
600600
import pyarrow.parquet as pq
601601

602602
if engine == "fastparquet":
603-
pytest.importorskip(
604-
"fastparquet",
605-
"0.7.1",
606-
reason="fastparquet must be 0.7.1 or higher for nullable dtype support",
607-
)
603+
# We are manually disabling fastparquet's
604+
# nullable dtype support pending discussion
605+
pytest.skip("Fastparquet nullable dtype support is disabled")
608606

609607
table = pyarrow.table(
610608
{
611609
"a": pyarrow.array([1, 2, 3, None], "int64"),
612610
"b": pyarrow.array([1, 2, 3, None], "uint8"),
613611
"c": pyarrow.array(["a", "b", "c", None]),
614612
"d": pyarrow.array([True, False, True, None]),
613+
# Test that nullable dtypes used even in absence of nulls
614+
"e": pyarrow.array([1, 2, 3, 4], "int64"),
615615
}
616616
)
617617
with tm.ensure_clean() as path:
@@ -627,6 +627,7 @@ def test_use_nullable_dtypes(self, engine):
627627
"b": pd.array([1, 2, 3, None], dtype="UInt8"),
628628
"c": pd.array(["a", "b", "c", None], dtype="string"),
629629
"d": pd.array([True, False, True, None], dtype="boolean"),
630+
"e": pd.array([1, 2, 3, 4], dtype="Int64"),
630631
}
631632
)
632633
if engine == "fastparquet":

0 commit comments

Comments
 (0)