Skip to content

Commit fb3646e

Browse files
support dataframe protocol (tested with Vaex)
This allows plotly express to take in any dataframe that supports the dataframe protocol, see: https://data-apis.org/blog/dataframe_protocol_rfc/ https://data-apis.org/dataframe-protocol/latest/index.html Test includes an example with vaex, which should work with vaexio/vaex#1509 (not yet released)
1 parent 0a83329 commit fb3646e

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

Diff for: packages/python/plotly/plotly/express/_core.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,14 @@ def build_dataframe(args, constructor):
13031303
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
13041304
df_provided = args["data_frame"] is not None
13051305
if df_provided and not isinstance(args["data_frame"], pd.DataFrame):
1306-
args["data_frame"] = pd.DataFrame(args["data_frame"])
1306+
if hasattr(args["data_frame"], "__dataframe__"):
1307+
# Pandas does not implement a `from_dataframe` yet
1308+
# $ wget https://raw.githubusercontent.com/data-apis/dataframe-api/main/protocol/pandas_implementation.py
1309+
# $ export PYTHONPATH=`pwd`
1310+
import pandas_implementation
1311+
args["data_frame"] = pandas_implementation.from_dataframe(args["data_frame"])
1312+
else:
1313+
args["data_frame"] = pd.DataFrame(args["data_frame"])
13071314
df_input = args["data_frame"]
13081315

13091316
# now we handle special cases like wide-mode or x-xor-y specification

Diff for: packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py

+10
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,16 @@ def test_build_df_with_index():
233233
assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"])
234234

235235

236+
def test_build_df_protocol():
237+
import vaex
238+
# take out the 'species' columns since the vaex implementation does not cover strings yet
239+
iris_pandas = px.data.iris()[["petal_width", "sepal_length"]]
240+
iris_vaex = vaex.from_pandas(iris_pandas)
241+
args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length")
242+
out = build_dataframe(args, go.Scatter)
243+
assert_frame_equal(iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"])
244+
245+
236246
def test_timezones():
237247
df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
238248
df["date"] = pd.to_datetime(df["date"])

0 commit comments

Comments
 (0)