diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd674454de..9ba4bde693d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Updated - Updated Plotly.js from version 2.24.1 to version 2.24.2. See the [plotly.js CHANGELOG](https://github.com/plotly/plotly.js/blob/master/CHANGELOG.md#2242----2023-06-09) for more information. These changes are reflected in the auto-generated `plotly.graph_objects` module. +- `px` methods now accept data-frame-like objects that support a [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), such as polars, vaex, modin etc. This protocol has priority on `to_pandas` call, but will only be used if pandas>=2.0.2 is installed in the environment. ## [5.15.0] - 2023-06-08 diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index cd51b86e150..452c0a7ff79 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -7,6 +7,7 @@ from _plotly_utils.basevalidators import ColorscaleValidator from plotly.colors import qualitative, sequential import math +from packaging import version import pandas as pd import numpy as np @@ -1307,7 +1308,25 @@ def build_dataframe(args, constructor): # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): - if hasattr(args["data_frame"], "to_pandas"): + if hasattr(args["data_frame"], "__dataframe__") and version.parse( + pd.__version__ + ) >= version.parse("2.0.2"): + import pandas.api.interchange + + df_not_pandas = args["data_frame"] + try: + df_pandas = pandas.api.interchange.from_dataframe(df_not_pandas) + except (ImportError, NotImplementedError) as exc: + # temporary workaround; developers of third-party libraries themselves + # should try a different implementation, if available. For example: + # def __dataframe__(self, ...): + # if not some_condition: + # self.to_pandas(...) + if not hasattr(df_not_pandas, "to_pandas"): + raise exc + df_pandas = df_not_pandas.to_pandas() + args["data_frame"] = df_pandas + elif hasattr(args["data_frame"], "to_pandas"): args["data_frame"] = args["data_frame"].to_pandas() else: args["data_frame"] = pd.DataFrame(args["data_frame"]) diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py index 477e7dbcb04..1acbf3f1e64 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py @@ -3,9 +3,25 @@ import numpy as np import pandas as pd import pytest +from packaging import version +import unittest.mock as mock from plotly.express._core import build_dataframe from pandas.testing import assert_frame_equal +# Fixtures +# -------- +@pytest.fixture +def add_interchange_module_for_old_pandas(): + if not hasattr(pd.api, "interchange"): + pd.api.interchange = mock.MagicMock() + # to make the following import work: `import pandas.api.interchange` + with mock.patch.dict( + "sys.modules", {"pandas.api.interchange": pd.api.interchange} + ): + yield + else: + yield + def test_numpy(): fig = px.scatter(x=[1, 2, 3], y=[2, 3, 4], color=[1, 3, 9]) @@ -233,6 +249,47 @@ def test_build_df_with_index(): assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"]) +def test_build_df_using_interchange_protocol_mock( + add_interchange_module_for_old_pandas, +): + class CustomDataFrame: + def __dataframe__(self): + pass + + input_dataframe = CustomDataFrame() + args = dict(data_frame=input_dataframe, x="petal_width", y="sepal_length") + + iris_pandas = px.data.iris() + + with mock.patch("pandas.__version__", "2.0.2"): + with mock.patch( + "pandas.api.interchange.from_dataframe", return_value=iris_pandas + ) as mock_from_dataframe: + build_dataframe(args, go.Scatter) + mock_from_dataframe.assert_called_once_with(input_dataframe) + + +@pytest.mark.skipif( + version.parse(pd.__version__) < version.parse("2.0.2"), + reason="plotly doesn't use a dataframe interchange protocol for pandas < 2.0.2", +) +@pytest.mark.parametrize("test_lib", ["vaex", "polars"]) +def test_build_df_from_vaex_and_polars(test_lib): + if test_lib == "vaex": + import vaex as lib + else: + import polars as lib + + # take out the 'species' columns since the vaex implementation does not cover strings yet + iris_pandas = px.data.iris()[["petal_width", "sepal_length"]] + iris_vaex = lib.from_pandas(iris_pandas) + args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length") + out = build_dataframe(args, go.Scatter) + assert_frame_equal( + iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"] + ) + + def test_timezones(): df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]}) df["date"] = pd.to_datetime(df["date"]) diff --git a/packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt b/packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt index acb9752ac16..e7f16aeecc8 100644 --- a/packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt +++ b/packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt @@ -1,6 +1,6 @@ requests==2.25.1 tenacity==6.2.0 -pandas==2.0.1 +pandas==2.0.2 numpy==1.20.3 xarray==0.17.0 statsmodels @@ -19,3 +19,5 @@ matplotlib==2.2.3 scikit-image==0.18.1 psutil==5.7.0 kaleido +vaex +polars