diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 120ee978292d6..ba967a62cc2d0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -647,6 +647,7 @@ Numeric Conversion ^^^^^^^^^^ +- Assigning a PyArrow array in a ``pd.DataFrame`` column now produces a ``pd.Series`` with a ``pd.ArrowDtype`` (:issue:`56994`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8df4f7e3e08f9..870e18ce10bb1 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -23,6 +23,7 @@ get_supported_dtype, is_supported_dtype, ) +from pandas.compat import pa_version_under10p1 from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( @@ -40,7 +41,10 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import NumpyEADtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + NumpyEADtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCExtensionArray, @@ -549,6 +553,9 @@ def sanitize_array( np.ndarray or ExtensionArray """ original_dtype = dtype + if not pa_version_under10p1 and lib.is_pyarrow_array(data) and dtype is None: + dtype = ArrowDtype(data.type) + if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9defb97394635..40bbdf70780d7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1068,6 +1068,18 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op): expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_())) tm.assert_series_equal(result, expected) + def test_assign_column_in_dataframe(self, data): + df = pd.DataFrame(data=data, columns=["A"], dtype=data.dtype) + df["B"] = pa.array(data, type=data.dtype.pyarrow_dtype) + df["C"] = pd.Series(data) + result = df.dtypes + expected = pd.Series({"A": data.dtype, "B": data.dtype, "C": data.dtype}) + tm.assert_series_equal(result, expected) + + def test_create_series_dtype(self, data): + ser = pd.Series(data._pa_array) + assert ser.dtype == data.dtype + class TestLogicalOps: """Various Series and DataFrame logical ops methods."""