From 3bf21ef45a090de52907ba224676996c45acb336 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:58:13 +0000
Subject: [PATCH 1/5] BUG: interchange protocol with nullable datatypes a
 non-null validity provides nonsense results

---
 pandas/core/interchange/column.py     | 18 ++++++++++++++++-
 pandas/tests/interchange/test_impl.py | 29 +++++++++++++++++++++------
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
index 7effc42d5ba28..e149a767ed0d9 100644
--- a/pandas/core/interchange/column.py
+++ b/pandas/core/interchange/column.py
@@ -190,6 +190,10 @@ def describe_categorical(self):
 
     @property
     def describe_null(self):
+        if isinstance(self._col.dtype, BaseMaskedDtype):
+            column_null_dtype = ColumnNullType.USE_BYTEMASK
+            null_value = 1
+            return column_null_dtype, null_value
         kind = self.dtype[0]
         try:
             null, value = _NULL_DESCRIPTION[kind]
@@ -298,7 +302,13 @@ def _get_data_buffer(
             DtypeKind.FLOAT,
             DtypeKind.BOOL,
         ):
-            np_arr = self._col.to_numpy()
+            arr = self._col.array
+            if isinstance(self._col.dtype, BaseMaskedDtype):
+                np_arr = arr._data
+            elif isinstance(self._col.dtype, ArrowDtype):
+                raise NotImplementedError("ArrowDtype not handled yet")
+            else:
+                np_arr = arr._ndarray
             buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
             dtype = self.dtype
         elif self.dtype[0] == DtypeKind.CATEGORICAL:
@@ -341,6 +351,12 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]:
         """
         null, invalid = self.describe_null
 
+        if isinstance(self._col.dtype, BaseMaskedDtype):
+            mask = self._col.array._mask
+            buffer = PandasBuffer(mask)
+            dtype = (DtypeKind.BOOL, 8, ArrowCTypes.BOOL, Endianness.NATIVE)
+            return buffer, dtype
+
         if self.dtype[0] == DtypeKind.STRING:
             # For now, use byte array as the mask.
             # TODO: maybe store as bit array to save space?..
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index e4fa6e4451a4c..8519f3995f197 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -8,7 +8,6 @@
     is_ci_environment,
     is_platform_windows,
 )
-import pandas.util._test_decorators as td
 
 import pandas as pd
 import pandas._testing as tm
@@ -417,17 +416,35 @@ def test_non_str_names_w_duplicates():
         pd.api.interchange.from_dataframe(dfi, allow_copy=False)
 
 
-@pytest.mark.parametrize(
-    "dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))]
-)
-def test_nullable_integers(dtype: str) -> None:
+def test_nullable_integers() -> None:
+    # https://github.com/pandas-dev/pandas/issues/55069
+    df = pd.DataFrame({"a": [1]}, dtype="Int8")
+    expected = pd.DataFrame({"a": [1]}, dtype="int8")
+    result = pd.api.interchange.from_dataframe(df.__dataframe__())
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/57664")
+def test_nullable_integers_pyarrow() -> None:
     # https://github.com/pandas-dev/pandas/issues/55069
-    df = pd.DataFrame({"a": [1]}, dtype=dtype)
+    df = pd.DataFrame({"a": [1]}, dtype="Int8[pyarrow]")
     expected = pd.DataFrame({"a": [1]}, dtype="int8")
     result = pd.api.interchange.from_dataframe(df.__dataframe__())
     tm.assert_frame_equal(result, expected)
 
 
+def test_nullable_integers_w_missing_values() -> None:
+    # https://github.com/pandas-dev/pandas/issues/57643
+    pytest.importorskip("pyarrow", "11.0.0")
+    import pyarrow.interchange as pai
+
+    df = pd.DataFrame({"a": [1, 2, None]}, dtype="Int64")
+    result = pai.from_dataframe(df.__dataframe__())["a"]
+    assert result[0].as_py() == 1
+    assert result[1].as_py() == 2
+    assert result[2].as_py() is None
+
+
 def test_empty_dataframe():
     # https://github.com/pandas-dev/pandas/issues/56700
     df = pd.DataFrame({"a": []}, dtype="int8")

From b99da09614c7d586c91980d3dbfd3770cf79453c Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 28 Feb 2024 19:30:43 +0000
Subject: [PATCH 2/5] whatsnew

---
 doc/source/whatsnew/v2.2.1.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
index 310dd921e44f6..fb4ecd8008e69 100644
--- a/doc/source/whatsnew/v2.2.1.rst
+++ b/doc/source/whatsnew/v2.2.1.rst
@@ -19,6 +19,7 @@ Enhancements
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`)
 - Fixed memory leak in :func:`read_csv` (:issue:`57039`)
 - Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
 - Fixed regression causing overflow for near-minimum timestamps (:issue:`57150`)

From 9ccec2d5748fac979bc1236454ef18b9fdb77e47 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 28 Feb 2024 19:55:39 +0000
Subject: [PATCH 3/5] :label: typing

---
 pandas/core/interchange/column.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
index e149a767ed0d9..bf20f0b5433cd 100644
--- a/pandas/core/interchange/column.py
+++ b/pandas/core/interchange/column.py
@@ -304,11 +304,11 @@ def _get_data_buffer(
         ):
             arr = self._col.array
             if isinstance(self._col.dtype, BaseMaskedDtype):
-                np_arr = arr._data
+                np_arr = arr._data  # type: ignore[attr-defined]
             elif isinstance(self._col.dtype, ArrowDtype):
                 raise NotImplementedError("ArrowDtype not handled yet")
             else:
-                np_arr = arr._ndarray
+                np_arr = arr._ndarray  # type: ignore[attr-defined]
             buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
             dtype = self.dtype
         elif self.dtype[0] == DtypeKind.CATEGORICAL:
@@ -352,7 +352,7 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]:
         null, invalid = self.describe_null
 
         if isinstance(self._col.dtype, BaseMaskedDtype):
-            mask = self._col.array._mask
+            mask = self._col.array._mask  # type: ignore[attr-defined]
             buffer = PandasBuffer(mask)
             dtype = (DtypeKind.BOOL, 8, ArrowCTypes.BOOL, Endianness.NATIVE)
             return buffer, dtype

From 8650b6fb482f9b2ea0e0d0bb0e717de0e0270c56 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Feb 2024 08:43:31 +0000
Subject: [PATCH 4/5] parametrise over more types

---
 pandas/tests/interchange/test_impl.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index 8519f3995f197..94b2da894ad0f 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -433,15 +433,30 @@ def test_nullable_integers_pyarrow() -> None:
     tm.assert_frame_equal(result, expected)
 
 
-def test_nullable_integers_w_missing_values() -> None:
+@pytest.mark.parametrize(
+    ("data", "dtype", "expected_dtype"),
+    [
+        ([1, 2, None], "Int64", "int64"),
+        (
+            [1, 2, None],
+            "UInt64",
+            "uint64",
+        ),
+        ([1.0, 2.25, None], "Float32", "float32"),
+    ],
+)
+def test_pandas_nullable_w_missing_values(
+    data: list, dtype: str, expected_dtype: str
+) -> None:
     # https://github.com/pandas-dev/pandas/issues/57643
     pytest.importorskip("pyarrow", "11.0.0")
     import pyarrow.interchange as pai
 
-    df = pd.DataFrame({"a": [1, 2, None]}, dtype="Int64")
+    df = pd.DataFrame({"a": data}, dtype=dtype)
     result = pai.from_dataframe(df.__dataframe__())["a"]
-    assert result[0].as_py() == 1
-    assert result[1].as_py() == 2
+    assert result.type == expected_dtype
+    assert result[0].as_py() == data[0]
+    assert result[1].as_py() == data[1]
     assert result[2].as_py() is None
 
 

From b9c15c675c4feac016413b2616c5b0cfdc20b9d3 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 7 Mar 2024 12:55:05 +0000
Subject: [PATCH 5/5] move whatsnew

---
 doc/source/whatsnew/v2.2.1.rst | 1 -
 doc/source/whatsnew/v2.2.2.rst | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
index fb4ecd8008e69..310dd921e44f6 100644
--- a/doc/source/whatsnew/v2.2.1.rst
+++ b/doc/source/whatsnew/v2.2.1.rst
@@ -19,7 +19,6 @@ Enhancements
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
-- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`)
 - Fixed memory leak in :func:`read_csv` (:issue:`57039`)
 - Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
 - Fixed regression causing overflow for near-minimum timestamps (:issue:`57150`)
diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst
index 058f7aebcd538..96f210ce6b7b9 100644
--- a/doc/source/whatsnew/v2.2.2.rst
+++ b/doc/source/whatsnew/v2.2.2.rst
@@ -13,6 +13,7 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`)
 -
 
 .. ---------------------------------------------------------------------------