Skip to content

Commit b1c5b5d

Browse files
authored
Add pyarrow support to python engine in read_csv (#50318)
1 parent b02e41a commit b1c5b5d

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
4242
Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
4343
to select the nullable dtypes implementation.
4444

45-
* :func:`read_csv` (with ``engine="pyarrow"``)
45+
* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
4646
* :func:`read_excel`
4747
* :func:`read_parquet`
4848
* :func:`read_orc`

pandas/io/parsers/readers.py

+1
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ def _read(
562562
elif (
563563
kwds.get("use_nullable_dtypes", False)
564564
and get_option("mode.nullable_backend") == "pyarrow"
565+
and kwds.get("engine") == "c"
565566
):
566567
raise NotImplementedError(
567568
f"use_nullable_dtypes=True and engine={kwds['engine']} with "

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -492,13 +492,14 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request):
492492
# GH#36712
493493
pa = pytest.importorskip("pyarrow")
494494
parser = all_parsers
495+
engine = parser.engine
495496

496497
data = """a,b,c,d,e,f,g,h,i,j
497498
1,2.5,True,a,,,,,12-31-2019,
498499
3,4.5,False,b,6,7.5,True,a,12-31-2019,
499500
"""
500501
with pd.option_context("mode.nullable_backend", "pyarrow"):
501-
if parser.engine != "pyarrow":
502+
if engine == "c":
502503
request.node.add_marker(
503504
pytest.mark.xfail(
504505
raises=NotImplementedError,
@@ -517,7 +518,10 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request):
517518
"e": pd.Series([pd.NA, 6], dtype="int64[pyarrow]"),
518519
"f": pd.Series([pd.NA, 7.5], dtype="float64[pyarrow]"),
519520
"g": pd.Series([pd.NA, True], dtype="bool[pyarrow]"),
520-
"h": pd.Series(["", "a"], dtype=pd.ArrowDtype(pa.string())),
521+
"h": pd.Series(
522+
[pd.NA if engine == "python" else "", "a"],
523+
dtype=pd.ArrowDtype(pa.string()),
524+
),
521525
"i": pd.Series([Timestamp("2019-12-31")] * 2),
522526
"j": pd.Series([pd.NA, pd.NA], dtype="null[pyarrow]"),
523527
}

0 commit comments

Comments
 (0)