ues buffer dtype in interchange from_dataframe

MarcoGorelli · MarcoGorelli · commit 8d31eb028f26 · 2023-09-21T12:54:06.000+02:00
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -266,23 +266,31 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
 
     assert buffers["offsets"], "String buffers must contain offsets"
     # Retrieve the data buffer containing the UTF-8 code units
-    data_buff, protocol_data_dtype = buffers["data"]
-    # We're going to reinterpret the buffer as uint8, so make sure we can do it safely
-    assert protocol_data_dtype[1] == 8
-    assert protocol_data_dtype[2] in (
-        ArrowCTypes.STRING,
-        ArrowCTypes.LARGE_STRING,
-    )  # format_str == utf-8
-    # Convert the buffers to NumPy arrays. In order to go from STRING to
-    # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
-    data_dtype = (
-        DtypeKind.UINT,
-        8,
-        ArrowCTypes.UINT8,
-        Endianness.NATIVE,
-    )
+    data_buff, dtype = buffers["data"]
+
+    if (dtype[1] == 8) and (
+        dtype[2]
+        in (
+            ArrowCTypes.STRING,
+            ArrowCTypes.LARGE_STRING,
+        )
+    ):  # format_str == utf-8
+        # temporary workaround to keep backwards compatibility due to
+        # https://github.com/pandas-dev/pandas/issues/54781
+
+        # We're going to reinterpret the buffer as uint8, so make sure we can do it
+        # safely
+
+        # Convert the buffers to NumPy arrays. In order to go from STRING to
+        # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
+        dtype = (
+            DtypeKind.UINT,
+            8,
+            ArrowCTypes.UINT8,
+            Endianness.NATIVE,
+        )
     # Specify zero offset as we don't want to chunk the string data
-    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize)
+    data = buffer_to_ndarray(data_buff, dtype, offset=0, length=data_buff.bufsize)
 
     # Retrieve the offsets buffer containing the index offsets demarcating
     # the beginning and the ending of each string
@@ -379,15 +387,21 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
 
     _, _, format_str, _ = col.dtype
     dbuf, dtype = buffers["data"]
-    # Consider dtype being `uint` to get number of units passed since the 01.01.1970
-    data = buffer_to_ndarray(
-        dbuf,
-        (
+
+    if dtype[0] == DtypeKind.DATETIME:
+        # temporary workaround to keep backwards compatibility due to
+        # https://github.com/pandas-dev/pandas/issues/54781
+        # Consider dtype being `uint` to get number of units passed since the 01.01.1970
+        dtype = (
             DtypeKind.UINT,
             dtype[1],
             getattr(ArrowCTypes, f"UINT{dtype[1]}"),
             Endianness.NATIVE,
-        ),
+        )
+
+    data = buffer_to_ndarray(
+        dbuf,
+        dtype,
         offset=col.offset,
         length=col.size(),
     )