From a09f858b97386b397df488e8315e906c29991e04 Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Thu, 16 Sep 2021 13:04:08 +0200 Subject: [PATCH 1/2] support dataframe protocol (tested with Vaex) This allows plotly express to take in any dataframe that supports the dataframe protocol, see: https://data-apis.org/blog/dataframe_protocol_rfc/ https://data-apis.org/dataframe-protocol/latest/index.html Test includes an example with vaex, which should work with https://github.com/vaexio/vaex/pull/1509/ (not yet released) --- packages/python/plotly/plotly/express/_core.py | 12 +++++++++++- .../tests/test_optional/test_px/test_px_input.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index dd2d53be1ed..43895ace24d 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1303,7 +1303,17 @@ def build_dataframe(args, constructor): # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): - args["data_frame"] = pd.DataFrame(args["data_frame"]) + if hasattr(args["data_frame"], "__dataframe__"): + # Pandas does not implement a `from_dataframe` yet + # $ wget https://raw.githubusercontent.com/data-apis/dataframe-api/main/protocol/pandas_implementation.py + # $ export PYTHONPATH=`pwd` + import pandas_implementation + + args["data_frame"] = pandas_implementation.from_dataframe( + args["data_frame"] + ) + else: + args["data_frame"] = pd.DataFrame(args["data_frame"]) df_input = args["data_frame"] # now we handle special cases like wide-mode or x-xor-y specification diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py index 477e7dbcb04..1dbfc95dff9 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py @@ -233,6 +233,19 @@ def test_build_df_with_index(): assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"]) +def test_build_df_protocol(): + import vaex + + # take out the 'species' columns since the vaex implementation does not cover strings yet + iris_pandas = px.data.iris()[["petal_width", "sepal_length"]] + iris_vaex = vaex.from_pandas(iris_pandas) + args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length") + out = build_dataframe(args, go.Scatter) + assert_frame_equal( + iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"] + ) + + def test_timezones(): df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]}) df["date"] = pd.to_datetime(df["date"]) From f727bfa0cfd7cb3dbd2d02bbc28ceba1c8db4628 Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Fri, 30 Sep 2022 11:53:22 +0200 Subject: [PATCH 2/2] use pandas 1.5.0 to consume other dataframes --- .../python/plotly/plotly/express/_core.py | 19 +++++++++++-------- .../test_optional/test_px/test_px_input.py | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 43895ace24d..167c8e90f93 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1304,14 +1304,17 @@ def build_dataframe(args, constructor): df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): if hasattr(args["data_frame"], "__dataframe__"): - # Pandas does not implement a `from_dataframe` yet - # $ wget https://raw.githubusercontent.com/data-apis/dataframe-api/main/protocol/pandas_implementation.py - # $ export PYTHONPATH=`pwd` - import pandas_implementation - - args["data_frame"] = pandas_implementation.from_dataframe( - args["data_frame"] - ) + try: + import pandas.api.interchange + except ModuleNotFoundError: + raise NotImplementedError( + "The dataframe you provided supports the dataframe interchange" + "protocol, " + "but pandas 1.5.0 or greater is required to consume it." + ) + df_not_pandas = args["data_frame"] + df_pandas = pandas.api.interchange.from_dataframe(df_not_pandas) + args["data_frame"] = df_pandas else: args["data_frame"] = pd.DataFrame(args["data_frame"]) df_input = args["data_frame"] diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py index 1dbfc95dff9..b4fae3a6a5c 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py @@ -236,7 +236,7 @@ def test_build_df_with_index(): def test_build_df_protocol(): import vaex - # take out the 'species' columns since the vaex implementation does not cover strings yet + # take out the 'species' columns since there are still some issues with strings iris_pandas = px.data.iris()[["petal_width", "sepal_length"]] iris_vaex = vaex.from_pandas(iris_pandas) args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length")