Skip to content

Commit 8d31eb0

Browse files
committed
ues buffer dtype in interchange from_dataframe
1 parent c4efa92 commit 8d31eb0

File tree

1 file changed

+35
-21
lines changed

1 file changed

+35
-21
lines changed

pandas/core/interchange/from_dataframe.py

+35-21
Original file line numberDiff line numberDiff line change
@@ -266,23 +266,31 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
266266

267267
assert buffers["offsets"], "String buffers must contain offsets"
268268
# Retrieve the data buffer containing the UTF-8 code units
269-
data_buff, protocol_data_dtype = buffers["data"]
270-
# We're going to reinterpret the buffer as uint8, so make sure we can do it safely
271-
assert protocol_data_dtype[1] == 8
272-
assert protocol_data_dtype[2] in (
273-
ArrowCTypes.STRING,
274-
ArrowCTypes.LARGE_STRING,
275-
) # format_str == utf-8
276-
# Convert the buffers to NumPy arrays. In order to go from STRING to
277-
# an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
278-
data_dtype = (
279-
DtypeKind.UINT,
280-
8,
281-
ArrowCTypes.UINT8,
282-
Endianness.NATIVE,
283-
)
269+
data_buff, dtype = buffers["data"]
270+
271+
if (dtype[1] == 8) and (
272+
dtype[2]
273+
in (
274+
ArrowCTypes.STRING,
275+
ArrowCTypes.LARGE_STRING,
276+
)
277+
): # format_str == utf-8
278+
# temporary workaround to keep backwards compatibility due to
279+
# https://github.com/pandas-dev/pandas/issues/54781
280+
281+
# We're going to reinterpret the buffer as uint8, so make sure we can do it
282+
# safely
283+
284+
# Convert the buffers to NumPy arrays. In order to go from STRING to
285+
# an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
286+
dtype = (
287+
DtypeKind.UINT,
288+
8,
289+
ArrowCTypes.UINT8,
290+
Endianness.NATIVE,
291+
)
284292
# Specify zero offset as we don't want to chunk the string data
285-
data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize)
293+
data = buffer_to_ndarray(data_buff, dtype, offset=0, length=data_buff.bufsize)
286294

287295
# Retrieve the offsets buffer containing the index offsets demarcating
288296
# the beginning and the ending of each string
@@ -379,15 +387,21 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
379387

380388
_, _, format_str, _ = col.dtype
381389
dbuf, dtype = buffers["data"]
382-
# Consider dtype being `uint` to get number of units passed since the 01.01.1970
383-
data = buffer_to_ndarray(
384-
dbuf,
385-
(
390+
391+
if dtype[0] == DtypeKind.DATETIME:
392+
# temporary workaround to keep backwards compatibility due to
393+
# https://github.com/pandas-dev/pandas/issues/54781
394+
# Consider dtype being `uint` to get number of units passed since the 01.01.1970
395+
dtype = (
386396
DtypeKind.UINT,
387397
dtype[1],
388398
getattr(ArrowCTypes, f"UINT{dtype[1]}"),
389399
Endianness.NATIVE,
390-
),
400+
)
401+
402+
data = buffer_to_ndarray(
403+
dbuf,
404+
dtype,
391405
offset=col.offset,
392406
length=col.size(),
393407
)

0 commit comments

Comments
 (0)