Skip to content

Commit 463c5a9

Browse files
authored
Backport PR #51766 on branch 2.0.x (CLN: Use type_mapper instead of manual conversion) (#51840)
1 parent 13cd542 commit 463c5a9

File tree

6 files changed

+31
-55
lines changed

6 files changed

+31
-55
lines changed

pandas/io/feather_format.py

+3-12
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@
1818
from pandas.compat._optional import import_optional_dependency
1919
from pandas.util._decorators import doc
2020

21-
from pandas import (
22-
arrays,
23-
get_option,
24-
)
21+
import pandas as pd
22+
from pandas import get_option
2523
from pandas.core.api import (
2624
DataFrame,
2725
RangeIndex,
@@ -173,11 +171,4 @@ def read_feather(
173171
return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
174172

175173
elif dtype_backend == "pyarrow":
176-
return DataFrame(
177-
{
178-
col_name: arrays.ArrowExtensionArray(pa_col)
179-
for col_name, pa_col in zip(
180-
pa_table.column_names, pa_table.itercolumns()
181-
)
182-
}
183-
)
174+
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)

pandas/io/json/_json.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
from pandas.core.dtypes.generic import ABCIndex
5555

5656
from pandas import (
57+
ArrowDtype,
5758
DataFrame,
5859
MultiIndex,
5960
Series,
@@ -960,16 +961,8 @@ def read(self) -> DataFrame | Series:
960961
pa_table = pyarrow_json.read_json(self.data)
961962
if self.use_nullable_dtypes:
962963
if get_option("mode.dtype_backend") == "pyarrow":
963-
from pandas.arrays import ArrowExtensionArray
964-
965-
return DataFrame(
966-
{
967-
col_name: ArrowExtensionArray(pa_col)
968-
for col_name, pa_col in zip(
969-
pa_table.column_names, pa_table.itercolumns()
970-
)
971-
}
972-
)
964+
return pa_table.to_pandas(types_mapper=ArrowDtype)
965+
973966
elif get_option("mode.dtype_backend") == "pandas":
974967
from pandas.io._util import _arrow_dtype_mapping
975968

pandas/io/orc.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
is_unsigned_integer_dtype,
2929
)
3030

31-
from pandas.core.arrays import ArrowExtensionArray
31+
import pandas as pd
3232
from pandas.core.frame import DataFrame
3333

3434
from pandas.io.common import get_handle
@@ -99,14 +99,7 @@ def read_orc(
9999
if use_nullable_dtypes:
100100
dtype_backend = get_option("mode.dtype_backend")
101101
if dtype_backend == "pyarrow":
102-
df = DataFrame(
103-
{
104-
col_name: ArrowExtensionArray(pa_col)
105-
for col_name, pa_col in zip(
106-
pa_table.column_names, pa_table.itercolumns()
107-
)
108-
}
109-
)
102+
df = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
110103
else:
111104
from pandas.io._util import _arrow_dtype_mapping
112105

pandas/io/parquet.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
from pandas.errors import AbstractMethodError
2323
from pandas.util._decorators import doc
2424

25+
import pandas as pd
2526
from pandas import (
2627
DataFrame,
2728
MultiIndex,
28-
arrays,
2929
get_option,
3030
)
3131
from pandas.core.shared_docs import _shared_docs
@@ -250,14 +250,11 @@ def read(
250250
if dtype_backend == "pandas":
251251
result = pa_table.to_pandas(**to_pandas_kwargs)
252252
elif dtype_backend == "pyarrow":
253-
result = DataFrame(
254-
{
255-
col_name: arrays.ArrowExtensionArray(pa_col)
256-
for col_name, pa_col in zip(
257-
pa_table.column_names, pa_table.itercolumns()
258-
)
259-
}
260-
)
253+
# Incompatible types in assignment (expression has type
254+
# "Type[ArrowDtype]", target has type overloaded function
255+
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment] # noqa
256+
result = pa_table.to_pandas(**to_pandas_kwargs)
257+
261258
if manager == "array":
262259
result = result._as_manager("array", copy=False)
263260
return result

pandas/io/parsers/arrow_parser_wrapper.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
from pandas.core.dtypes.inference import is_integer
77

8+
import pandas as pd
89
from pandas import (
910
DataFrame,
10-
arrays,
1111
get_option,
1212
)
1313

@@ -153,12 +153,7 @@ def read(self) -> DataFrame:
153153
self.kwds["use_nullable_dtypes"]
154154
and get_option("mode.dtype_backend") == "pyarrow"
155155
):
156-
frame = DataFrame(
157-
{
158-
col_name: arrays.ArrowExtensionArray(pa_col)
159-
for col_name, pa_col in zip(table.column_names, table.itercolumns())
160-
}
161-
)
156+
frame = table.to_pandas(types_mapper=pd.ArrowDtype)
162157
else:
163158
frame = table.to_pandas()
164159
return self._finalize_pandas_output(frame)

pandas/tests/io/test_parquet.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -1034,14 +1034,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
10341034
df["bool_with_none"] = [True, None, True]
10351035

10361036
pa_table = pyarrow.Table.from_pandas(df)
1037-
expected = pd.DataFrame(
1038-
{
1039-
col_name: pd.arrays.ArrowExtensionArray(pa_column)
1040-
for col_name, pa_column in zip(
1041-
pa_table.column_names, pa_table.itercolumns()
1042-
)
1043-
}
1044-
)
1037+
expected = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
10451038
# pyarrow infers datetimes as us instead of ns
10461039
expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]")
10471040
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype(
@@ -1059,6 +1052,20 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
10591052
expected=expected,
10601053
)
10611054

1055+
def test_read_use_nullable_types_pyarrow_config_index(self, pa):
1056+
df = pd.DataFrame(
1057+
{"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]"
1058+
)
1059+
expected = df.copy()
1060+
1061+
with pd.option_context("mode.dtype_backend", "pyarrow"):
1062+
check_round_trip(
1063+
df,
1064+
engine=pa,
1065+
read_kwargs={"use_nullable_dtypes": True},
1066+
expected=expected,
1067+
)
1068+
10621069

10631070
class TestParquetFastParquet(Base):
10641071
def test_basic(self, fp, df_full):

0 commit comments

Comments
 (0)