Skip to content

CLN: Use type_mapper instead of manual conversion #51766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas import (
arrays,
get_option,
)
import pandas as pd
from pandas import get_option
from pandas.core.api import (
DataFrame,
RangeIndex,
Expand Down Expand Up @@ -173,11 +171,4 @@ def read_feather(
return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)

elif dtype_backend == "pyarrow":
return DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
11 changes: 2 additions & 9 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
is_unsigned_integer_dtype,
)

from pandas.core.arrays import ArrowExtensionArray
import pandas as pd
from pandas.core.frame import DataFrame

from pandas.io.common import (
Expand Down Expand Up @@ -124,14 +124,7 @@ def read_orc(
if use_nullable_dtypes:
dtype_backend = get_option("mode.dtype_backend")
if dtype_backend == "pyarrow":
df = DataFrame(
{
col_name: ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
df = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
else:
from pandas.io._util import _arrow_dtype_mapping

Expand Down
15 changes: 6 additions & 9 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc

import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
arrays,
get_option,
)
from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -250,14 +250,11 @@ def read(
if dtype_backend == "pandas":
result = pa_table.to_pandas(**to_pandas_kwargs)
elif dtype_backend == "pyarrow":
result = DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
# Incompatible types in assignment (expression has type
# "Type[ArrowDtype]", target has type overloaded function
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment] # noqa
result = pa_table.to_pandas(**to_pandas_kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should be able to move the if pandas: .. elif pyarrow: .. higher up where to_pandas_kwargs["types_mapper"] = ... is defined. And then just have a single result = pa_table.to_pandas(**to_pandas_kwargs) here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoided this on purpose, because this would chang semantics. The other if is only reached if nullable_dtypes is set as well and I wanted to wait for the other discussion before touching anything,


if manager == "array":
result = result._as_manager("array", copy=False)
return result
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,20 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
expected=expected,
)

def test_read_use_nullable_types_pyarrow_config_index(self, pa):
df = pd.DataFrame(
{"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]"
)
expected = df.copy()

with pd.option_context("mode.dtype_backend", "pyarrow"):
check_round_trip(
df,
engine=pa,
read_kwargs={"use_nullable_dtypes": True},
expected=expected,
)


class TestParquetFastParquet(Base):
def test_basic(self, fp, df_full):
Expand Down