Skip to content

Commit 16a2238

Browse files
committed
Revert fastparquet nullable dtype support (pandas-dev#42954)
1 parent 9c9bf7b commit 16a2238

File tree

3 files changed

+16
-23
lines changed

3 files changed

+16
-23
lines changed

doc/source/whatsnew/v1.3.2.rst

-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ Bug fixes
4747

4848
Other
4949
~~~~~
50-
- :meth:`pandas.read_parquet` now supports reading nullable dtypes with ``fastparquet`` versions above 0.7.1.
5150
-
5251

5352
.. ---------------------------------------------------------------------------

pandas/io/parquet.py

+10-17
Original file line numberDiff line numberDiff line change
@@ -309,20 +309,16 @@ def write(
309309
def read(
310310
self, path, columns=None, storage_options: StorageOptions = None, **kwargs
311311
):
312-
parquet_kwargs = {}
312+
parquet_kwargs: dict[str, Any] = {}
313313
use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)
314-
# Technically works with 0.7.0, but was incorrect
315-
# so lets just require 0.7.1
316314
if Version(self.api.__version__) >= Version("0.7.1"):
317-
# Need to set even for use_nullable_dtypes = False,
318-
# since our defaults differ
319-
parquet_kwargs["pandas_nulls"] = use_nullable_dtypes
320-
else:
321-
if use_nullable_dtypes:
322-
raise ValueError(
323-
"The 'use_nullable_dtypes' argument is not supported for the "
324-
"fastparquet engine for fastparquet versions less than 0.7.1"
325-
)
315+
# We are disabling nullable dtypes for fastparquet pending discussion
316+
parquet_kwargs["pandas_nulls"] = False
317+
if use_nullable_dtypes:
318+
raise ValueError(
319+
"The 'use_nullable_dtypes' argument is not supported for the "
320+
"fastparquet engine"
321+
)
326322
path = stringify_path(path)
327323
handles = None
328324
if is_fsspec_url(path):
@@ -478,18 +474,15 @@ def read_parquet(
478474
479475
use_nullable_dtypes : bool, default False
480476
If True, use dtypes that use ``pd.NA`` as missing value indicator
481-
for the resulting DataFrame.
477+
for the resulting DataFrame. (only applicable for the ``pyarrow``
478+
engine)
482479
As new dtypes are added that support ``pd.NA`` in the future, the
483480
output with this option will change to use those dtypes.
484481
Note: this is an experimental option, and behaviour (e.g. additional
485482
support dtypes) may change without notice.
486483
487484
.. versionadded:: 1.2.0
488485
489-
.. versionchanged:: 1.3.2
490-
``use_nullable_dtypes`` now works with the the ``fastparquet`` engine
491-
if ``fastparquet`` is version 0.7.1 or higher.
492-
493486
**kwargs
494487
Any additional kwargs are passed to the engine.
495488

pandas/tests/io/test_parquet.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -579,18 +579,18 @@ def test_use_nullable_dtypes(self, engine):
579579
import pyarrow.parquet as pq
580580

581581
if engine == "fastparquet":
582-
pytest.importorskip(
583-
"fastparquet",
584-
"0.7.1",
585-
reason="fastparquet must be 0.7.1 or higher for nullable dtype support",
586-
)
582+
# We are manually disabling fastparquet's
583+
# nullable dtype support pending discussion
584+
pytest.skip("Fastparquet nullable dtype support is disabled")
587585

588586
table = pyarrow.table(
589587
{
590588
"a": pyarrow.array([1, 2, 3, None], "int64"),
591589
"b": pyarrow.array([1, 2, 3, None], "uint8"),
592590
"c": pyarrow.array(["a", "b", "c", None]),
593591
"d": pyarrow.array([True, False, True, None]),
592+
# Test that nullable dtypes used even in absence of nulls
593+
"e": pyarrow.array([1, 2, 3, 4], "int64"),
594594
}
595595
)
596596
with tm.ensure_clean() as path:
@@ -606,6 +606,7 @@ def test_use_nullable_dtypes(self, engine):
606606
"b": pd.array([1, 2, 3, None], dtype="UInt8"),
607607
"c": pd.array(["a", "b", "c", None], dtype="string"),
608608
"d": pd.array([True, False, True, None], dtype="boolean"),
609+
"e": pd.array([1, 2, 3, 4], dtype="Int64"),
609610
}
610611
)
611612
if engine == "fastparquet":

0 commit comments

Comments
 (0)