Skip to content

Commit 5034b78

Browse files
Backport PR pandas-dev#57174 on branch 2.2.x (BUG: Interchange protocol implementation allows non-string column names) (pandas-dev#57203)
Backport PR pandas-dev#57174: BUG: Interchange protocol implementation allows non-string column names Co-authored-by: Marco Edward Gorelli <[email protected]>
1 parent be8f9f2 commit 5034b78

File tree

4 files changed

+34
-3
lines changed

4 files changed

+34
-3
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Bug fixes
3232
~~~~~~~~~
3333
- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for Nullable integers (:issue:`55069`)
3434
- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for empty inputs (:issue:`56700`)
35+
- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which wasn't converting columns names to strings (:issue:`55069`)
3536
- Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`)
3637

3738
.. ---------------------------------------------------------------------------

pandas/core/interchange/column.py

+8
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ def __init__(self, column: pd.Series, allow_copy: bool = True) -> None:
7777
Note: doesn't deal with extension arrays yet, just assume a regular
7878
Series/ndarray for now.
7979
"""
80+
if isinstance(column, pd.DataFrame):
81+
raise TypeError(
82+
"Expected a Series, got a DataFrame. This likely happened "
83+
"because you called __dataframe__ on a DataFrame which, "
84+
"after converting column names to string, resulted in duplicated "
85+
f"names: {column.columns}. Please rename these columns before "
86+
"using the interchange protocol."
87+
)
8088
if not isinstance(column, pd.Series):
8189
raise NotImplementedError(f"Columns of type {type(column)} not handled yet")
8290

pandas/core/interchange/dataframe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self, df: DataFrame, allow_copy: bool = True) -> None:
3232
Constructor - an instance of this (private) class is returned from
3333
`pd.DataFrame.__dataframe__`.
3434
"""
35-
self._df = df
35+
self._df = df.rename(columns=str, copy=False)
3636
self._allow_copy = allow_copy
3737

3838
def __dataframe__(

pandas/tests/interchange/test_impl.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,6 @@ def test_missing_from_masked():
180180
}
181181
)
182182

183-
df2 = df.__dataframe__()
184-
185183
rng = np.random.default_rng(2)
186184
dict_null = {col: rng.integers(low=0, high=len(df)) for col in df.columns}
187185
for col, num_nulls in dict_null.items():
@@ -382,6 +380,30 @@ def test_large_string():
382380
tm.assert_frame_equal(result, expected)
383381

384382

383+
def test_non_str_names():
384+
# https://github.com/pandas-dev/pandas/issues/56701
385+
df = pd.Series([1, 2, 3], name=0).to_frame()
386+
names = df.__dataframe__().column_names()
387+
assert names == ["0"]
388+
389+
390+
def test_non_str_names_w_duplicates():
391+
# https://github.com/pandas-dev/pandas/issues/56701
392+
df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]})
393+
dfi = df.__dataframe__()
394+
with pytest.raises(
395+
TypeError,
396+
match=(
397+
"Expected a Series, got a DataFrame. This likely happened because you "
398+
"called __dataframe__ on a DataFrame which, after converting column "
399+
r"names to string, resulted in duplicated names: Index\(\['0', '0'\], "
400+
r"dtype='object'\). Please rename these columns before using the "
401+
"interchange protocol."
402+
),
403+
):
404+
pd.api.interchange.from_dataframe(dfi, allow_copy=False)
405+
406+
385407
@pytest.mark.parametrize(
386408
"dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))]
387409
)

0 commit comments

Comments
 (0)