Skip to content

Commit 9876c64

Browse files
committed
use buffer dtype in interchange from_dataframe
1 parent c4efa92 commit 9876c64

File tree

1 file changed

+37
-23
lines changed

1 file changed

+37
-23
lines changed

pandas/core/interchange/from_dataframe.py

+37-23
Original file line numberDiff line numberDiff line change
@@ -266,21 +266,29 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
266266

267267
assert buffers["offsets"], "String buffers must contain offsets"
268268
# Retrieve the data buffer containing the UTF-8 code units
269-
data_buff, protocol_data_dtype = buffers["data"]
270-
# We're going to reinterpret the buffer as uint8, so make sure we can do it safely
271-
assert protocol_data_dtype[1] == 8
272-
assert protocol_data_dtype[2] in (
273-
ArrowCTypes.STRING,
274-
ArrowCTypes.LARGE_STRING,
275-
) # format_str == utf-8
276-
# Convert the buffers to NumPy arrays. In order to go from STRING to
277-
# an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
278-
data_dtype = (
279-
DtypeKind.UINT,
280-
8,
281-
ArrowCTypes.UINT8,
282-
Endianness.NATIVE,
283-
)
269+
data_buff, data_dtype = buffers["data"]
270+
271+
if (data_dtype[1] == 8) and (
272+
data_dtype[2]
273+
in (
274+
ArrowCTypes.STRING,
275+
ArrowCTypes.LARGE_STRING,
276+
)
277+
): # format_str == utf-8
278+
# temporary workaround to keep backwards compatibility due to
279+
# https://github.com/pandas-dev/pandas/issues/54781
280+
281+
# We're going to reinterpret the buffer as uint8, so make sure we can do it
282+
# safely
283+
284+
# Convert the buffers to NumPy arrays. In order to go from STRING to
285+
# an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
286+
data_dtype = (
287+
DtypeKind.UINT,
288+
8,
289+
ArrowCTypes.UINT8,
290+
Endianness.NATIVE,
291+
)
284292
# Specify zero offset as we don't want to chunk the string data
285293
data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize)
286294

@@ -378,16 +386,22 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
378386
buffers = col.get_buffers()
379387

380388
_, _, format_str, _ = col.dtype
381-
dbuf, dtype = buffers["data"]
382-
# Consider dtype being `uint` to get number of units passed since the 01.01.1970
383-
data = buffer_to_ndarray(
384-
dbuf,
385-
(
389+
dbuf, data_dtype = buffers["data"]
390+
391+
if data_dtype[0] == DtypeKind.DATETIME:
392+
# temporary workaround to keep backwards compatibility due to
393+
# https://github.com/pandas-dev/pandas/issues/54781
394+
# Consider dtype being `uint` to get number of units passed since the 01.01.1970
395+
data_dtype = (
386396
DtypeKind.UINT,
387-
dtype[1],
388-
getattr(ArrowCTypes, f"UINT{dtype[1]}"),
397+
data_dtype[1],
398+
getattr(ArrowCTypes, f"UINT{data_dtype[1]}"),
389399
Endianness.NATIVE,
390-
),
400+
)
401+
402+
data = buffer_to_ndarray(
403+
dbuf,
404+
data_dtype,
391405
offset=col.offset,
392406
length=col.size(),
393407
)

0 commit comments

Comments
 (0)