From ba3e02d7993368a32aa53eeed4294024dbeddd61 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 May 2023 14:41:50 -0700 Subject: [PATCH] BUG: Correct .type for pyarrow.map_ and pyarrow.struct types --- doc/source/whatsnew/v2.0.2.rst | 1 + pandas/core/dtypes/dtypes.py | 2 ++ pandas/tests/extension/test_arrow.py | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 52d2730195a56..cec201db7e216 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -25,6 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :class:`.arrays.ArrowExtensionArray` incorrectly assigning ``dict`` instead of ``list`` for ``.type`` with ``pyarrow.map_`` and raising a ``NotImplementedError`` with ``pyarrow.struct`` (:issue:`53328`) - Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`) - Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`) - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index a3481cbe9eae1..96776c0e4b890 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2051,6 +2051,8 @@ def type(self): elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type): return list elif pa.types.is_map(pa_type): + return list + elif pa.types.is_struct(pa_type): return dict elif pa.types.is_null(pa_type): # TODO: None? pd.NA? pa.null? diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 1b0786bcd5d2e..9129e84700a55 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1570,7 +1570,8 @@ def test_mode_dropna_false_mode_na(data): [pa.large_string(), str], [pa.list_(pa.int64()), list], [pa.large_list(pa.int64()), list], - [pa.map_(pa.string(), pa.int64()), dict], + [pa.map_(pa.string(), pa.int64()), list], + [pa.struct([("f1", pa.int8()), ("f2", pa.string())]), dict], [pa.dictionary(pa.int64(), pa.int64()), CategoricalDtypeType], ], )