Skip to content

Commit 160d7a1

Browse files
Backport PR #56587 on branch 2.2.x (ENH: support the Arrow PyCapsule Interface on pandas.DataFrame (export)) (#56944)
Backport PR #56587: ENH: support the Arrow PyCapsule Interface on pandas.DataFrame (export) Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 74fa740 commit 160d7a1

File tree

4 files changed

+89
-2
lines changed

4 files changed

+89
-2
lines changed

pandas/compat/_optional.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,8 @@ def import_optional_dependency(
120120
The imported module, when found and the version is correct.
121121
None is returned when the package is not found and `errors`
122122
is False, or when the package's version is too old and `errors`
123-
is ``'warn'``.
123+
is ``'warn'`` or ``'ignore'``.
124124
"""
125-
126125
assert errors in {"warn", "raise", "ignore"}
127126

128127
package_name = INSTALL_MAPPING.get(name)
@@ -163,5 +162,7 @@ def import_optional_dependency(
163162
return None
164163
elif errors == "raise":
165164
raise ImportError(msg)
165+
else:
166+
return None
166167

167168
return module

pandas/core/frame.py

+27
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,33 @@ def __dataframe_consortium_standard__(
987987
)
988988
return convert_to_standard_compliant_dataframe(self, api_version=api_version)
989989

990+
def __arrow_c_stream__(self, requested_schema=None):
991+
"""
992+
Export the pandas DataFrame as an Arrow C stream PyCapsule.
993+
994+
This relies on pyarrow to convert the pandas DataFrame to the Arrow
995+
format (and follows the default behaviour of ``pyarrow.Table.from_pandas``
996+
in its handling of the index, i.e. store the index as a column except
997+
for RangeIndex).
998+
This conversion is not necessarily zero-copy.
999+
1000+
Parameters
1001+
----------
1002+
requested_schema : PyCapsule, default None
1003+
The schema to which the dataframe should be casted, passed as a
1004+
PyCapsule containing a C ArrowSchema representation of the
1005+
requested schema.
1006+
1007+
Returns
1008+
-------
1009+
PyCapsule
1010+
"""
1011+
pa = import_optional_dependency("pyarrow", min_version="14.0.0")
1012+
if requested_schema is not None:
1013+
requested_schema = pa.Schema._import_from_c_capsule(requested_schema)
1014+
table = pa.Table.from_pandas(self, schema=requested_schema)
1015+
return table.__arrow_c_stream__()
1016+
9901017
# ----------------------------------------------------------------------
9911018

9921019
@property
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import ctypes
2+
3+
import pytest
4+
5+
import pandas.util._test_decorators as td
6+
7+
import pandas as pd
8+
9+
pa = pytest.importorskip("pyarrow")
10+
11+
12+
@td.skip_if_no("pyarrow", min_version="14.0")
13+
def test_dataframe_arrow_interface():
14+
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
15+
16+
capsule = df.__arrow_c_stream__()
17+
assert (
18+
ctypes.pythonapi.PyCapsule_IsValid(
19+
ctypes.py_object(capsule), b"arrow_array_stream"
20+
)
21+
== 1
22+
)
23+
24+
table = pa.table(df)
25+
expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
26+
assert table.equals(expected)
27+
28+
schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
29+
table = pa.table(df, schema=schema)
30+
expected = expected.cast(schema)
31+
assert table.equals(expected)
32+
33+
34+
@td.skip_if_no("pyarrow", min_version="15.0")
35+
def test_dataframe_to_arrow():
36+
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
37+
38+
table = pa.RecordBatchReader.from_stream(df)
39+
expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
40+
assert table.equals(expected)
41+
42+
schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
43+
table = pa.RecordBatchReader.from_stream(df, schema=schema)
44+
expected = expected.cast(schema)
45+
assert table.equals(expected)

pandas/tests/test_optional_dependency.py

+14
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,20 @@ def test_bad_version(monkeypatch):
5050
result = import_optional_dependency("fakemodule")
5151
assert result is module
5252

53+
with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
54+
import_optional_dependency("fakemodule", min_version="1.1.0")
55+
56+
with tm.assert_produces_warning(UserWarning):
57+
result = import_optional_dependency(
58+
"fakemodule", errors="warn", min_version="1.1.0"
59+
)
60+
assert result is None
61+
62+
result = import_optional_dependency(
63+
"fakemodule", errors="ignore", min_version="1.1.0"
64+
)
65+
assert result is None
66+
5367

5468
def test_submodule(monkeypatch):
5569
# Create a fake module with a submodule

0 commit comments

Comments
 (0)