use buffer dtype in interchange from_dataframe

MarcoGorelli · MarcoGorelli · commit 9876c64cd6bf · 2023-09-21T13:07:54.000+02:00
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -266,21 +266,29 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
 
     assert buffers["offsets"], "String buffers must contain offsets"
     # Retrieve the data buffer containing the UTF-8 code units
-    data_buff, protocol_data_dtype = buffers["data"]
-    # We're going to reinterpret the buffer as uint8, so make sure we can do it safely
-    assert protocol_data_dtype[1] == 8
-    assert protocol_data_dtype[2] in (
-        ArrowCTypes.STRING,
-        ArrowCTypes.LARGE_STRING,
-    )  # format_str == utf-8
-    # Convert the buffers to NumPy arrays. In order to go from STRING to
-    # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
-    data_dtype = (
-        DtypeKind.UINT,
-        8,
-        ArrowCTypes.UINT8,
-        Endianness.NATIVE,
-    )
+    data_buff, data_dtype = buffers["data"]
+
+    if (data_dtype[1] == 8) and (
+        data_dtype[2]
+        in (
+            ArrowCTypes.STRING,
+            ArrowCTypes.LARGE_STRING,
+        )
+    ):  # format_str == utf-8
+        # temporary workaround to keep backwards compatibility due to
+        # https://github.com/pandas-dev/pandas/issues/54781
+
+        # We're going to reinterpret the buffer as uint8, so make sure we can do it
+        # safely
+
+        # Convert the buffers to NumPy arrays. In order to go from STRING to
+        # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
+        data_dtype = (
+            DtypeKind.UINT,
+            8,
+            ArrowCTypes.UINT8,
+            Endianness.NATIVE,
+        )
     # Specify zero offset as we don't want to chunk the string data
     data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize)
 
@@ -378,16 +386,22 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
     buffers = col.get_buffers()
 
     _, _, format_str, _ = col.dtype
-    dbuf, dtype = buffers["data"]
-    # Consider dtype being `uint` to get number of units passed since the 01.01.1970
-    data = buffer_to_ndarray(
-        dbuf,
-        (
+    dbuf, data_dtype = buffers["data"]
+
+    if data_dtype[0] == DtypeKind.DATETIME:
+        # temporary workaround to keep backwards compatibility due to
+        # https://github.com/pandas-dev/pandas/issues/54781
+        # Consider dtype being `uint` to get number of units passed since the 01.01.1970
+        data_dtype = (
             DtypeKind.UINT,
-            dtype[1],
-            getattr(ArrowCTypes, f"UINT{dtype[1]}"),
+            data_dtype[1],
+            getattr(ArrowCTypes, f"UINT{data_dtype[1]}"),
             Endianness.NATIVE,
-        ),
+        )
+
+    data = buffer_to_ndarray(
+        dbuf,
+        data_dtype,
         offset=col.offset,
         length=col.size(),
     )