Skip to content

Commit 8ed7dae

Browse files
authored
BUG: Interchange protocol implementation allows non-string column names (#57174)
* convert non-string colnames to strings in interchange protocol * remove irrelevant statement * informative error message if two columns end up becoming duplicates
1 parent 1d1672d commit 8ed7dae

File tree

4 files changed

+34
-3
lines changed

4 files changed

+34
-3
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Bug fixes
3232
~~~~~~~~~
3333
- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for Nullable integers (:issue:`55069`)
3434
- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for empty inputs (:issue:`56700`)
35+
- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which wasn't converting columns names to strings (:issue:`55069`)
3536
- Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`)
3637

3738
.. ---------------------------------------------------------------------------

pandas/core/interchange/column.py

+8
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ def __init__(self, column: pd.Series, allow_copy: bool = True) -> None:
7777
Note: doesn't deal with extension arrays yet, just assume a regular
7878
Series/ndarray for now.
7979
"""
80+
if isinstance(column, pd.DataFrame):
81+
raise TypeError(
82+
"Expected a Series, got a DataFrame. This likely happened "
83+
"because you called __dataframe__ on a DataFrame which, "
84+
"after converting column names to string, resulted in duplicated "
85+
f"names: {column.columns}. Please rename these columns before "
86+
"using the interchange protocol."
87+
)
8088
if not isinstance(column, pd.Series):
8189
raise NotImplementedError(f"Columns of type {type(column)} not handled yet")
8290

pandas/core/interchange/dataframe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self, df: DataFrame, allow_copy: bool = True) -> None:
3232
Constructor - an instance of this (private) class is returned from
3333
`pd.DataFrame.__dataframe__`.
3434
"""
35-
self._df = df
35+
self._df = df.rename(columns=str, copy=False)
3636
self._allow_copy = allow_copy
3737

3838
def __dataframe__(

pandas/tests/interchange/test_impl.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,6 @@ def test_missing_from_masked():
162162
}
163163
)
164164

165-
df2 = df.__dataframe__()
166-
167165
rng = np.random.default_rng(2)
168166
dict_null = {col: rng.integers(low=0, high=len(df)) for col in df.columns}
169167
for col, num_nulls in dict_null.items():
@@ -395,6 +393,30 @@ def test_large_string():
395393
tm.assert_frame_equal(result, expected)
396394

397395

396+
def test_non_str_names():
397+
# https://github.com/pandas-dev/pandas/issues/56701
398+
df = pd.Series([1, 2, 3], name=0).to_frame()
399+
names = df.__dataframe__().column_names()
400+
assert names == ["0"]
401+
402+
403+
def test_non_str_names_w_duplicates():
404+
# https://github.com/pandas-dev/pandas/issues/56701
405+
df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]})
406+
dfi = df.__dataframe__()
407+
with pytest.raises(
408+
TypeError,
409+
match=(
410+
"Expected a Series, got a DataFrame. This likely happened because you "
411+
"called __dataframe__ on a DataFrame which, after converting column "
412+
r"names to string, resulted in duplicated names: Index\(\['0', '0'\], "
413+
r"dtype='object'\). Please rename these columns before using the "
414+
"interchange protocol."
415+
),
416+
):
417+
pd.api.interchange.from_dataframe(dfi, allow_copy=False)
418+
419+
398420
@pytest.mark.parametrize(
399421
"dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))]
400422
)

0 commit comments

Comments
 (0)