Skip to content

Commit 569f904

Browse files
Backport PR pandas-dev#55227 on branch 2.1.x (BUG: Interchange object data buffer has the wrong dtype / from_dataframe incorrect ) (pandas-dev#55863)
Backport PR pandas-dev#55227: BUG: Interchange object data buffer has the wrong dtype / from_dataframe incorrect Co-authored-by: Marco Edward Gorelli <[email protected]> Co-authored-by: MarcoGorelli <[email protected]>
1 parent c9854d9 commit 569f904

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

pandas/core/interchange/from_dataframe.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,9 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
266266

267267
assert buffers["offsets"], "String buffers must contain offsets"
268268
# Retrieve the data buffer containing the UTF-8 code units
269-
data_buff, protocol_data_dtype = buffers["data"]
269+
data_buff, _ = buffers["data"]
270270
# We're going to reinterpret the buffer as uint8, so make sure we can do it safely
271-
assert protocol_data_dtype[1] == 8
272-
assert protocol_data_dtype[2] in (
271+
assert col.dtype[2] in (
273272
ArrowCTypes.STRING,
274273
ArrowCTypes.LARGE_STRING,
275274
) # format_str == utf-8
@@ -377,15 +376,16 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
377376
"""
378377
buffers = col.get_buffers()
379378

380-
_, _, format_str, _ = col.dtype
381-
dbuf, dtype = buffers["data"]
379+
_, col_bit_width, format_str, _ = col.dtype
380+
dbuf, _ = buffers["data"]
382381
# Consider dtype being `uint` to get number of units passed since the 01.01.1970
382+
383383
data = buffer_to_ndarray(
384384
dbuf,
385385
(
386-
DtypeKind.UINT,
387-
dtype[1],
388-
getattr(ArrowCTypes, f"UINT{dtype[1]}"),
386+
DtypeKind.INT,
387+
col_bit_width,
388+
getattr(ArrowCTypes, f"INT{col_bit_width}"),
389389
Endianness.NATIVE,
390390
),
391391
offset=col.offset,

pandas/tests/interchange/test_impl.py

+22
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
DtypeKind,
1919
)
2020
from pandas.core.interchange.from_dataframe import from_dataframe
21+
from pandas.core.interchange.utils import ArrowCTypes
2122

2223

2324
@pytest.fixture
@@ -340,3 +341,24 @@ def test_interchange_from_non_pandas_tz_aware(request):
340341
dtype="datetime64[us, Asia/Kathmandu]",
341342
)
342343
tm.assert_frame_equal(expected, result)
344+
345+
346+
def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
347+
# https://github.com/pandas-dev/pandas/issues/54781
348+
df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__()
349+
interchange = df.__dataframe__()
350+
column = interchange.get_column_by_name("a")
351+
buffers = column.get_buffers()
352+
buffers_data = buffers["data"]
353+
buffer_dtype = buffers_data[1]
354+
buffer_dtype = (
355+
DtypeKind.UINT,
356+
8,
357+
ArrowCTypes.UINT8,
358+
buffer_dtype[3],
359+
)
360+
buffers["data"] = (buffers_data[0], buffer_dtype)
361+
column.get_buffers = lambda: buffers
362+
interchange.get_column_by_name = lambda _: column
363+
monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange)
364+
pd.api.interchange.from_dataframe(df)

0 commit comments

Comments
 (0)