From 3430e35d429a050bc00018595bc4e09880ed2cc7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 1 Oct 2023 20:25:16 +0100 Subject: [PATCH 1/5] BUG: Fix convert_dtypes for all na column and arrow backend BUG: Fix convert_dtypes for all na column and arrow backend --- pandas/core/dtypes/cast.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 74e785be06356..8b4c7af0b3a52 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1133,7 +1133,12 @@ def convert_dtypes( base_dtype = np.dtype(str) else: base_dtype = inferred_dtype - pa_type = to_pyarrow_type(base_dtype) + if base_dtype.kind == "O" and isna(input_array).all(): + import pyarrow as pa + + pa_type = pa.null() + else: + pa_type = to_pyarrow_type(base_dtype) if pa_type is not None: inferred_dtype = ArrowDtype(pa_type) elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype): From f16bacf2a9326795ce226ce826f026c9af474a29 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 1 Oct 2023 20:29:26 +0100 Subject: [PATCH 2/5] Add test --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/tests/series/methods/test_convert_dtypes.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9dc095e6de6ff..b3671d3618791 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -281,7 +281,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) - Strings diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index d1c79d0f00365..f621604faae4b 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -265,3 +265,11 @@ def test_convert_dtypes_pyarrow_to_np_nullable(self): result = ser.convert_dtypes(dtype_backend="numpy_nullable") expected = pd.Series(range(2), dtype="Int32") tm.assert_series_equal(result, expected) + + def test_convert_dtypes_pyarrow_null(self): + # GH#55346 + pa = pytest.importorskip("pyarrow") + ser = pd.Series([None, None]) + result = ser.convert_dtypes(dtype_backend="pyarrow") + expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) + tm.assert_series_equal(result, expected) From 1c37e08d1c13161fb523eb8006b60123e58c99ee Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 1 Oct 2023 21:05:27 +0100 Subject: [PATCH 3/5] Update cast.py --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8b4c7af0b3a52..fa1d5097971a3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1133,7 +1133,7 @@ def convert_dtypes( base_dtype = np.dtype(str) else: base_dtype = inferred_dtype - if base_dtype.kind == "O" and isna(input_array).all(): + if base_dtype.kind == "O" and len(input_array) > 0 and isna(input_array).all(): import pyarrow as pa pa_type = pa.null() From 66cb2ea3e59e99406398bdb9dcd6d0607bc982b3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 1 Oct 2023 22:24:48 +0100 Subject: [PATCH 4/5] Fix --- pandas/core/dtypes/cast.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fa1d5097971a3..d53478bc309b7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1133,7 +1133,11 @@ def convert_dtypes( base_dtype = np.dtype(str) else: base_dtype = inferred_dtype - if base_dtype.kind == "O" and len(input_array) > 0 and isna(input_array).all(): + if ( + base_dtype.kind == "O" + and len(input_array) > 0 + and isna(input_array).all() + ): import pyarrow as pa pa_type = pa.null() From 3bbf6a6954a769ac87ead1d32fe206a7642665e6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 1 Oct 2023 23:00:55 +0100 Subject: [PATCH 5/5] Fix typing --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d53478bc309b7..3208a742738a3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1134,7 +1134,7 @@ def convert_dtypes( else: base_dtype = inferred_dtype if ( - base_dtype.kind == "O" + base_dtype.kind == "O" # type: ignore[union-attr] and len(input_array) > 0 and isna(input_array).all() ):