From 9543afe1a48b7522efca47035f1a973797383451 Mon Sep 17 00:00:00 2001 From: alexcjohnson Date: Fri, 5 May 2023 23:01:43 -0400 Subject: [PATCH 1/4] escape unsafe chars in JSON for insertion in HTML, to avoid XSS --- packages/python/plotly/plotly/io/_json.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/packages/python/plotly/plotly/io/_json.py b/packages/python/plotly/plotly/io/_json.py index ae9935b1dda..45d9fb6790d 100644 --- a/packages/python/plotly/plotly/io/_json.py +++ b/packages/python/plotly/plotly/io/_json.py @@ -57,6 +57,23 @@ def coerce_to_strict(const): return const +_swap = ( + ("<", "\\u003c"), + (">", "\\u003e"), + ("/", "\\u002f"), + ("\u2028", "\\u2028"), + ("\u2029", "\\u2029"), +) + + +def _safe(json_str): + out = json_str + for unsafe_char, safe_char in _swap: + if unsafe_char in out: + out = out.replace(unsafe_char, safe_char) + return out + + def to_json_plotly(plotly_object, pretty=False, engine=None): """ Convert a plotly/Dash object to a JSON string representation @@ -120,7 +137,7 @@ def to_json_plotly(plotly_object, pretty=False, engine=None): from _plotly_utils.utils import PlotlyJSONEncoder - return json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts) + return _safe(json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts)) elif engine == "orjson": JsonConfig.validate_orjson() opts = orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY @@ -136,7 +153,7 @@ def to_json_plotly(plotly_object, pretty=False, engine=None): # Try without cleaning try: - return orjson.dumps(plotly_object, option=opts).decode("utf8") + return _safe(orjson.dumps(plotly_object, option=opts).decode("utf8")) except TypeError: pass @@ -146,7 +163,7 @@ def to_json_plotly(plotly_object, pretty=False, engine=None): datetime_allowed=True, modules=modules, ) - return orjson.dumps(cleaned, option=opts).decode("utf8") + return _safe(orjson.dumps(cleaned, option=opts).decode("utf8")) def to_json(fig, validate=True, pretty=False, remove_uids=True, engine=None): From 115d1669386e6edeed2bda5e6dc908e46150e16b Mon Sep 17 00:00:00 2001 From: alexcjohnson Date: Mon, 8 May 2023 18:48:29 -0400 Subject: [PATCH 2/4] test for json sanitization --- packages/python/plotly/plotly/io/_json.py | 16 +++++++++----- .../tests/test_io/test_to_from_plotly_json.py | 22 +++++++++++++++++++ .../requirements_37_optional.txt | 1 + .../requirements_39_optional.txt | 1 + 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/io/_json.py b/packages/python/plotly/plotly/io/_json.py index 45d9fb6790d..26ea14e063e 100644 --- a/packages/python/plotly/plotly/io/_json.py +++ b/packages/python/plotly/plotly/io/_json.py @@ -57,16 +57,18 @@ def coerce_to_strict(const): return const -_swap = ( +_swap_json = ( ("<", "\\u003c"), (">", "\\u003e"), ("/", "\\u002f"), +) +_swap_orjson = _swap_json + ( ("\u2028", "\\u2028"), ("\u2029", "\\u2029"), ) -def _safe(json_str): +def _safe(json_str, _swap): out = json_str for unsafe_char, safe_char in _swap: if unsafe_char in out: @@ -137,7 +139,9 @@ def to_json_plotly(plotly_object, pretty=False, engine=None): from _plotly_utils.utils import PlotlyJSONEncoder - return _safe(json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts)) + return _safe( + json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts), _swap_json + ) elif engine == "orjson": JsonConfig.validate_orjson() opts = orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY @@ -153,7 +157,9 @@ def to_json_plotly(plotly_object, pretty=False, engine=None): # Try without cleaning try: - return _safe(orjson.dumps(plotly_object, option=opts).decode("utf8")) + return _safe( + orjson.dumps(plotly_object, option=opts).decode("utf8"), _swap_orjson + ) except TypeError: pass @@ -163,7 +169,7 @@ def to_json_plotly(plotly_object, pretty=False, engine=None): datetime_allowed=True, modules=modules, ) - return _safe(orjson.dumps(cleaned, option=opts).decode("utf8")) + return _safe(orjson.dumps(cleaned, option=opts).decode("utf8"), _swap_orjson) def to_json(fig, validate=True, pretty=False, remove_uids=True, engine=None): diff --git a/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py b/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py index e21b556c6b8..95e11f71ddb 100644 --- a/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py +++ b/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py @@ -221,3 +221,25 @@ def test_mixed_string_nonstring_key(engine, pretty): value = build_test_dict({0: 1, "a": 2}) result = pio.to_json_plotly(value, engine=engine) check_roundtrip(result, engine=engine, pretty=pretty) + + +def test_sanitize_json(engine): + layout = {"title": {"text": "\u2028\u2029"}} + fig = go.Figure(layout=layout) + fig_json = pio.to_json_plotly(fig, engine=engine) + layout_2 = json.loads(fig_json)["layout"] + del layout_2["template"] + + assert layout == layout_2 + + replacements = { + "<": "\\u003c", + ">": "\\u003e", + "/": "\\u002f", + "\u2028": "\\u2028", + "\u2029": "\\u2029", + } + + for bad, good in replacements.items(): + assert bad not in fig_json + assert good in fig_json diff --git a/packages/python/plotly/test_requirements/requirements_37_optional.txt b/packages/python/plotly/test_requirements/requirements_37_optional.txt index 3fb4185e9c9..224fbf7db8b 100644 --- a/packages/python/plotly/test_requirements/requirements_37_optional.txt +++ b/packages/python/plotly/test_requirements/requirements_37_optional.txt @@ -19,3 +19,4 @@ matplotlib==2.2.3 scikit-image==0.14.4 psutil==5.7.0 kaleido +orjson==3.8.12 diff --git a/packages/python/plotly/test_requirements/requirements_39_optional.txt b/packages/python/plotly/test_requirements/requirements_39_optional.txt index 34b686ad024..eae8cd6d2ec 100644 --- a/packages/python/plotly/test_requirements/requirements_39_optional.txt +++ b/packages/python/plotly/test_requirements/requirements_39_optional.txt @@ -19,3 +19,4 @@ matplotlib==2.2.3 scikit-image==0.18.1 psutil==5.7.0 kaleido +orjson==3.8.12 From fabd54da24a7b5cca265cb36cfcf4bee2081a19f Mon Sep 17 00:00:00 2001 From: alexcjohnson Date: Mon, 8 May 2023 21:55:04 -0400 Subject: [PATCH 3/4] adjust old test for orjson datetime64 fixed precision --- .../plotly/tests/test_io/test_to_from_plotly_json.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py b/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py index 95e11f71ddb..cdcd78a5a41 100644 --- a/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py +++ b/packages/python/plotly/plotly/tests/test_io/test_to_from_plotly_json.py @@ -6,6 +6,7 @@ import pandas as pd import json import datetime +import re import sys from pytz import timezone from _plotly_utils.optional_imports import get_module @@ -201,6 +202,14 @@ def to_str(v): array_str = to_json_test(dt_values) expected = build_test_dict_string(array_str) + if orjson: + # orjson always serializes datetime64 to ns, but json will return either + # full seconds or microseconds, if the rest is zeros. + # we don't care about any trailing zeros + trailing_zeros = re.compile(r'[.]?0+"') + result = trailing_zeros.sub('"', result) + expected = trailing_zeros.sub('"', expected) + assert result == expected check_roundtrip(result, engine=engine, pretty=pretty) From b955f3532550200bf2ce396b1bda3aa43c2f7d47 Mon Sep 17 00:00:00 2001 From: alexcjohnson Date: Tue, 9 May 2023 12:44:27 -0400 Subject: [PATCH 4/4] changelog for JSON sanitizer --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f17a4a7e178..346dd66aa81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Fixed - Fixed another compatibility issue with Pandas 2.0, just affecting `px.*(line_close=True)` [[#4190](https://github.com/plotly/plotly.py/pull/4190)] - - Added some rounding to the `make_subplots` function to handle situations where the user-input specs cause the domain to exceed 1 by small amounts https://github.com/plotly/plotly.py/pull/4153 + - Added some rounding to the `make_subplots` function to handle situations where the user-input specs cause the domain to exceed 1 by small amounts [[#4153](https://github.com/plotly/plotly.py/pull/4153)] + - Sanitize JSON output to prevent an XSS vector when graphs are inserted directly into HTML [[#4196](https://github.com/plotly/plotly.py/pull/4196)] ## [5.14.1] - 2023-04-05