Skip to content

Commit 3bf21ef

Browse files
committed
BUG: interchange protocol with nullable datatypes a non-null validity provides nonsense results
1 parent 69f03a3 commit 3bf21ef

File tree

2 files changed

+40
-7
lines changed

2 files changed

+40
-7
lines changed

pandas/core/interchange/column.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ def describe_categorical(self):
190190

191191
@property
192192
def describe_null(self):
193+
if isinstance(self._col.dtype, BaseMaskedDtype):
194+
column_null_dtype = ColumnNullType.USE_BYTEMASK
195+
null_value = 1
196+
return column_null_dtype, null_value
193197
kind = self.dtype[0]
194198
try:
195199
null, value = _NULL_DESCRIPTION[kind]
@@ -298,7 +302,13 @@ def _get_data_buffer(
298302
DtypeKind.FLOAT,
299303
DtypeKind.BOOL,
300304
):
301-
np_arr = self._col.to_numpy()
305+
arr = self._col.array
306+
if isinstance(self._col.dtype, BaseMaskedDtype):
307+
np_arr = arr._data
308+
elif isinstance(self._col.dtype, ArrowDtype):
309+
raise NotImplementedError("ArrowDtype not handled yet")
310+
else:
311+
np_arr = arr._ndarray
302312
buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
303313
dtype = self.dtype
304314
elif self.dtype[0] == DtypeKind.CATEGORICAL:
@@ -341,6 +351,12 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]:
341351
"""
342352
null, invalid = self.describe_null
343353

354+
if isinstance(self._col.dtype, BaseMaskedDtype):
355+
mask = self._col.array._mask
356+
buffer = PandasBuffer(mask)
357+
dtype = (DtypeKind.BOOL, 8, ArrowCTypes.BOOL, Endianness.NATIVE)
358+
return buffer, dtype
359+
344360
if self.dtype[0] == DtypeKind.STRING:
345361
# For now, use byte array as the mask.
346362
# TODO: maybe store as bit array to save space?..

pandas/tests/interchange/test_impl.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
is_ci_environment,
99
is_platform_windows,
1010
)
11-
import pandas.util._test_decorators as td
1211

1312
import pandas as pd
1413
import pandas._testing as tm
@@ -417,17 +416,35 @@ def test_non_str_names_w_duplicates():
417416
pd.api.interchange.from_dataframe(dfi, allow_copy=False)
418417

419418

420-
@pytest.mark.parametrize(
421-
"dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))]
422-
)
423-
def test_nullable_integers(dtype: str) -> None:
419+
def test_nullable_integers() -> None:
420+
# https://github.com/pandas-dev/pandas/issues/55069
421+
df = pd.DataFrame({"a": [1]}, dtype="Int8")
422+
expected = pd.DataFrame({"a": [1]}, dtype="int8")
423+
result = pd.api.interchange.from_dataframe(df.__dataframe__())
424+
tm.assert_frame_equal(result, expected)
425+
426+
427+
@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/57664")
428+
def test_nullable_integers_pyarrow() -> None:
424429
# https://github.com/pandas-dev/pandas/issues/55069
425-
df = pd.DataFrame({"a": [1]}, dtype=dtype)
430+
df = pd.DataFrame({"a": [1]}, dtype="Int8[pyarrow]")
426431
expected = pd.DataFrame({"a": [1]}, dtype="int8")
427432
result = pd.api.interchange.from_dataframe(df.__dataframe__())
428433
tm.assert_frame_equal(result, expected)
429434

430435

436+
def test_nullable_integers_w_missing_values() -> None:
437+
# https://github.com/pandas-dev/pandas/issues/57643
438+
pytest.importorskip("pyarrow", "11.0.0")
439+
import pyarrow.interchange as pai
440+
441+
df = pd.DataFrame({"a": [1, 2, None]}, dtype="Int64")
442+
result = pai.from_dataframe(df.__dataframe__())["a"]
443+
assert result[0].as_py() == 1
444+
assert result[1].as_py() == 2
445+
assert result[2].as_py() is None
446+
447+
431448
def test_empty_dataframe():
432449
# https://github.com/pandas-dev/pandas/issues/56700
433450
df = pd.DataFrame({"a": []}, dtype="int8")

0 commit comments

Comments
 (0)