diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index b6c6df8424ef1..e10fc77699dd6 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -20,6 +20,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`) - Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`) - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`) - Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 45d6bdd7917c1..78e530f915117 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -202,7 +202,10 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]: # Doing module in order to not get ``IndexError`` for # out-of-bounds sentinel values in `codes` - values = categories[codes % len(categories)] + if len(categories) > 0: + values = categories[codes % len(categories)] + else: + values = codes cat = pd.Categorical( values, categories=categories, ordered=categorical["is_ordered"] diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index d393ba6fd3957..301cccec9e0ed 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -89,6 +89,18 @@ def test_categorical_pyarrow(): tm.assert_frame_equal(result, expected) +def test_empty_categorical_pyarrow(): + # https://github.com/pandas-dev/pandas/issues/53077 + pa = pytest.importorskip("pyarrow", "11.0.0") + + arr = [None] + table = pa.table({"arr": pa.array(arr, "float64").dictionary_encode()}) + exchange_df = table.__dataframe__() + result = pd.api.interchange.from_dataframe(exchange_df) + expected = pd.DataFrame({"arr": pd.Categorical([np.nan])}) + tm.assert_frame_equal(result, expected) + + def test_large_string_pyarrow(): # GH 52795 pa = pytest.importorskip("pyarrow", "11.0.0")