Skip to content

Commit b49d6de

Browse files
Backport PR #52763 on branch 2.0.x (BUG: interchange categorical_column_to_series() should not accept only PandasColumn) (#52793)
Backport PR #52763: BUG: interchange categorical_column_to_series() should not accept only PandasColumn Co-authored-by: Marco Edward Gorelli <[email protected]>
1 parent dd8533e commit b49d6de

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Bug fixes
2727
- Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`)
2828
- Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
2929
- Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`)
30+
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on-categorical dtypes (:issue:`49889`)
3031
- Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)
3132
- Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`)
3233
- Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`)

pandas/core/interchange/from_dataframe.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import numpy as np
88

99
import pandas as pd
10-
from pandas.core.interchange.column import PandasColumn
1110
from pandas.core.interchange.dataframe_protocol import (
1211
Buffer,
1312
Column,
@@ -181,9 +180,15 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
181180
raise NotImplementedError("Non-dictionary categoricals not supported yet")
182181

183182
cat_column = categorical["categories"]
184-
# for mypy/pyright
185-
assert isinstance(cat_column, PandasColumn), "categories must be a PandasColumn"
186-
categories = np.array(cat_column._col)
183+
if hasattr(cat_column, "_col"):
184+
# Item "Column" of "Optional[Column]" has no attribute "_col"
185+
# Item "None" of "Optional[Column]" has no attribute "_col"
186+
categories = np.array(cat_column._col) # type: ignore[union-attr]
187+
else:
188+
raise NotImplementedError(
189+
"Interchanging categorical columns isn't supported yet, and our "
190+
"fallback of using the `col._col` attribute (a ndarray) failed."
191+
)
187192
buffers = col.get_buffers()
188193

189194
codes_buff, codes_dtype = buffers["data"]

pandas/tests/interchange/test_impl.py

+15
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,21 @@ def test_categorical_dtype(data):
7474
tm.assert_frame_equal(df, from_dataframe(df.__dataframe__()))
7575

7676

77+
def test_categorical_pyarrow():
78+
# GH 49889
79+
pa = pytest.importorskip("pyarrow", "11.0.0")
80+
81+
arr = ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"]
82+
table = pa.table({"weekday": pa.array(arr).dictionary_encode()})
83+
exchange_df = table.__dataframe__()
84+
result = from_dataframe(exchange_df)
85+
weekday = pd.Categorical(
86+
arr, categories=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
87+
)
88+
expected = pd.DataFrame({"weekday": weekday})
89+
tm.assert_frame_equal(result, expected)
90+
91+
7792
@pytest.mark.parametrize(
7893
"data", [int_data, uint_data, float_data, bool_data, datetime_data]
7994
)

0 commit comments

Comments
 (0)