Skip to content

Commit 7212ecd

Browse files
ENH: support the Arrow PyCapsule Interface on pandas.DataFrame (export) (#56587)
* ENH: support the Arrow PyCapsule Interface on pandas.DataFrame * expand documentation on how index is handled
1 parent e544b9f commit 7212ecd

File tree

4 files changed

+89
-2
lines changed

4 files changed

+89
-2
lines changed

pandas/compat/_optional.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,8 @@ def import_optional_dependency(
147147
The imported module, when found and the version is correct.
148148
None is returned when the package is not found and `errors`
149149
is False, or when the package's version is too old and `errors`
150-
is ``'warn'``.
150+
is ``'warn'`` or ``'ignore'``.
151151
"""
152-
153152
assert errors in {"warn", "raise", "ignore"}
154153

155154
package_name = INSTALL_MAPPING.get(name)
@@ -190,5 +189,7 @@ def import_optional_dependency(
190189
return None
191190
elif errors == "raise":
192191
raise ImportError(msg)
192+
else:
193+
return None
193194

194195
return module

pandas/core/frame.py

+27
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,33 @@ def __dataframe_consortium_standard__(
988988
)
989989
return convert_to_standard_compliant_dataframe(self, api_version=api_version)
990990

991+
def __arrow_c_stream__(self, requested_schema=None):
992+
"""
993+
Export the pandas DataFrame as an Arrow C stream PyCapsule.
994+
995+
This relies on pyarrow to convert the pandas DataFrame to the Arrow
996+
format (and follows the default behaviour of ``pyarrow.Table.from_pandas``
997+
in its handling of the index, i.e. store the index as a column except
998+
for RangeIndex).
999+
This conversion is not necessarily zero-copy.
1000+
1001+
Parameters
1002+
----------
1003+
requested_schema : PyCapsule, default None
1004+
The schema to which the dataframe should be casted, passed as a
1005+
PyCapsule containing a C ArrowSchema representation of the
1006+
requested schema.
1007+
1008+
Returns
1009+
-------
1010+
PyCapsule
1011+
"""
1012+
pa = import_optional_dependency("pyarrow", min_version="14.0.0")
1013+
if requested_schema is not None:
1014+
requested_schema = pa.Schema._import_from_c_capsule(requested_schema)
1015+
table = pa.Table.from_pandas(self, schema=requested_schema)
1016+
return table.__arrow_c_stream__()
1017+
9911018
# ----------------------------------------------------------------------
9921019

9931020
@property
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import ctypes
2+
3+
import pytest
4+
5+
import pandas.util._test_decorators as td
6+
7+
import pandas as pd
8+
9+
pa = pytest.importorskip("pyarrow")
10+
11+
12+
@td.skip_if_no("pyarrow", min_version="14.0")
13+
def test_dataframe_arrow_interface():
14+
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
15+
16+
capsule = df.__arrow_c_stream__()
17+
assert (
18+
ctypes.pythonapi.PyCapsule_IsValid(
19+
ctypes.py_object(capsule), b"arrow_array_stream"
20+
)
21+
== 1
22+
)
23+
24+
table = pa.table(df)
25+
expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
26+
assert table.equals(expected)
27+
28+
schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
29+
table = pa.table(df, schema=schema)
30+
expected = expected.cast(schema)
31+
assert table.equals(expected)
32+
33+
34+
@td.skip_if_no("pyarrow", min_version="15.0")
35+
def test_dataframe_to_arrow():
36+
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
37+
38+
table = pa.RecordBatchReader.from_stream(df)
39+
expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
40+
assert table.equals(expected)
41+
42+
schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
43+
table = pa.RecordBatchReader.from_stream(df, schema=schema)
44+
expected = expected.cast(schema)
45+
assert table.equals(expected)

pandas/tests/test_optional_dependency.py

+14
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,20 @@ def test_bad_version(monkeypatch):
5050
result = import_optional_dependency("fakemodule")
5151
assert result is module
5252

53+
with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
54+
import_optional_dependency("fakemodule", min_version="1.1.0")
55+
56+
with tm.assert_produces_warning(UserWarning):
57+
result = import_optional_dependency(
58+
"fakemodule", errors="warn", min_version="1.1.0"
59+
)
60+
assert result is None
61+
62+
result = import_optional_dependency(
63+
"fakemodule", errors="ignore", min_version="1.1.0"
64+
)
65+
assert result is None
66+
5367

5468
def test_submodule(monkeypatch):
5569
# Create a fake module with a submodule

0 commit comments

Comments
 (0)