Skip to content

Commit a5a3300

Browse files
authored
BUG: Add numpy_nullable support to arrow csv parser (#51985)
1 parent 0c920b8 commit a5a3300

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

pandas/io/parsers/arrow_parser_wrapper.py

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010
from pandas import DataFrame
1111

12+
from pandas.io._util import _arrow_dtype_mapping
1213
from pandas.io.parsers.base_parser import ParserBase
1314

1415
if TYPE_CHECKING:
@@ -151,6 +152,8 @@ def read(self) -> DataFrame:
151152
)
152153
if self.kwds["dtype_backend"] == "pyarrow":
153154
frame = table.to_pandas(types_mapper=pd.ArrowDtype)
155+
elif self.kwds["dtype_backend"] == "numpy_nullable":
156+
frame = table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
154157
else:
155158
frame = table.to_pandas()
156159
return self._finalize_pandas_output(frame)

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,6 @@ def test_dtypes_defaultdict_invalid(all_parsers):
402402
parser.read_csv(StringIO(data), dtype=dtype)
403403

404404

405-
@pytest.mark.usefixtures("pyarrow_xfail")
406405
def test_dtype_backend(all_parsers):
407406
# GH#36712
408407

@@ -424,9 +423,13 @@ def test_dtype_backend(all_parsers):
424423
"e": pd.Series([pd.NA, 6], dtype="Int64"),
425424
"f": pd.Series([pd.NA, 7.5], dtype="Float64"),
426425
"g": pd.Series([pd.NA, True], dtype="boolean"),
427-
"h": pd.Series([pd.NA, "a"], dtype="string"),
426+
"h": pd.Series(
427+
[pd.NA if parser.engine != "pyarrow" else "", "a"], dtype="string"
428+
),
428429
"i": pd.Series([Timestamp("2019-12-31")] * 2),
429-
"j": pd.Series([pd.NA, pd.NA], dtype="Int64"),
430+
"j": pd.Series(
431+
[pd.NA, pd.NA], dtype="Int64" if parser.engine != "pyarrow" else object
432+
),
430433
}
431434
)
432435
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)