Skip to content

Don't lose python/name tags values in mkdocs.yml #7507

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions readthedocs/config/tests/test_yaml_loader.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@
import textwrap
from readthedocs.doc_builder.backends.mkdocs import yaml_load_safely
from readthedocs.doc_builder.backends.mkdocs import (
yaml_dump_safely,
yaml_load_safely,
ProxyPythonName
)

content = '''
int: 3
float: !!float 3
function: !!python/name:python_function
other_function: !!python/name:module.other.function
unknown: !!python/module:python_module
'''


def test_yaml_load_safely():
expected = {
'int': 3,
'float': 3.0,
'function': ProxyPythonName('python_function'),
'other_function': ProxyPythonName('module.other.function'),
'unknown': None,
}

content = textwrap.dedent('''
int: 3
float: !!float 3
unknown: !!python/name:unknown_funtion
''')
data = yaml_load_safely(content)

assert data == expected
assert type(data['int']) is int
assert type(data['float']) is float
assert type(data['function']) is ProxyPythonName
assert type(data['other_function']) is ProxyPythonName
assert data['function'].value == 'python_function'
assert data['other_function'].value == 'module.other.function'


def test_yaml_dump_safely():
data = yaml_load_safely(content)
assert yaml_load_safely(yaml_dump_safely(data)) == data

58 changes: 49 additions & 9 deletions readthedocs/doc_builder/backends/mkdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def append_conf(self):
user_config['theme'] = self.DEFAULT_THEME_NAME

# Write the modified mkdocs configuration
yaml.safe_dump(
yaml_dump_safely(
user_config,
open(self.yaml_file, 'w'),
)
Expand Down Expand Up @@ -326,27 +326,67 @@ class MkdocsHTML(BaseMkdocs):
build_dir = '_build/html'


class SafeLoaderIgnoreUnknown(yaml.SafeLoader): # pylint: disable=too-many-ancestors
# TODO: find a better way to integrate with MkDocs.
# See https://github.com/readthedocs/readthedocs.org/issues/7844


class ProxyPythonName(yaml.YAMLObject):
def __init__(self, value):
self.value = value

def __eq__(self, other):
return self.value == other.value


class SafeLoader(yaml.SafeLoader): # pylint: disable=too-many-ancestors

"""
YAML loader to ignore unknown tags.
Safe YAML loader.

This loader parses special ``!!python/name:`` tags without actually
importing or executing code. Every other special tag is ignored.

Borrowed from https://stackoverflow.com/a/57121993
Issue https://github.com/readthedocs/readthedocs.org/issues/7461
"""

def ignore_unknown(self, node): # pylint: disable=no-self-use, unused-argument
return None

def construct_python_name(self, suffix, node): # pylint: disable=no-self-use, unused-argument
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we need the same for others like: construct_python_module, construct_python_object and maybe others here as well?

https://github.com/yaml/pyyaml/blob/538b5c93f7d5dee40322893c1e524e94a4f8bbde/lib3/yaml/constructor.py#L572

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you think we should immediately add support for all python/* tags, then yes. I only added logic for the python/name one since it's the only one I use, and the only one that is mentioned in the original issue #6889.

Just tell me and I'll update the PR to add support for the other tags!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it depends on if it's common to have those... But I'd say it doesn't hurt to add them now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are 16 more Python tags:

tag python
!!python/none None
!!python/bool bool
!!python/bytes (bytes in Python 3)
!!python/str str (str in Python 3)
!!python/unicode unicode (str in Python 3)
!!python/int int
!!python/long long (int in Python 3)
!!python/float float
!!python/complex complex
!!python/list list
!!python/tuple tuple
!!python/dict dict
!!python/name:module.name module.name
!!python/module:package.module package.module
!!python/object:module.cls module.cls instance
!!python/object/new:module.cls module.cls instance
!!python/object/apply:module.f value of f(...)

Taken from https://pyyaml.org/wiki/PyYAMLDocumentation#yaml-tags-and-python-types.

Honestly I'm a bit reluctant to add all these, especially at once. Some of these tags might be more complex to support (more than one suffix/value to store in a proxy object, multiple ways to write them, etc.). I think it would be best to add them one by one as people ask for them. We now know how, so it could be quick PRs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes sense to add only what's required now to make MkDocs with plugins to work 👍

return ProxyPythonName(suffix)


class SafeDumper(yaml.SafeDumper):

SafeLoaderIgnoreUnknown.add_constructor(None, SafeLoaderIgnoreUnknown.ignore_unknown)
"""
Safe YAML dumper.

This dumper allows to avoid losing values of special tags that
were parsed by our safe loader.
"""

def represent_name(self, data):
return self.represent_scalar("tag:yaml.org,2002:python/name:" + data.value, "")


SafeLoader.add_multi_constructor("tag:yaml.org,2002:python/name:", SafeLoader.construct_python_name)
SafeLoader.add_constructor(None, SafeLoader.ignore_unknown)
SafeDumper.add_representer(ProxyPythonName, SafeDumper.represent_name)


def yaml_load_safely(content):
"""
Uses ``SafeLoaderIgnoreUnknown`` loader to skip unknown tags.
Uses ``SafeLoader`` loader to skip unknown tags.

When a YAML contains ``!!python/name:int`` it will complete ignore it an
return ``None`` for those fields instead of failing. We need this to avoid
executing random code, but still support these YAML files.
When a YAML contains ``!!python/name:int`` it will store the ``int``
suffix temporarily to be able to re-dump it later. We need this to avoid
executing random code, but still support these YAML files without
information loss.
"""
return yaml.load(content, Loader=SafeLoaderIgnoreUnknown)
return yaml.load(content, Loader=SafeLoader)


def yaml_dump_safely(content, stream=None):
"""Uses ``SafeDumper`` dumper to write YAML contents."""
return yaml.dump(content, stream=stream, Dumper=SafeDumper)
Comment on lines +390 to +392
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it may be good to have a test for this one as well to be sure that we are not transforming the YAML and it looks exactly the same after yaml_dump_safely, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True! I eventually added the __eq__ method to ProxyPythonName because it made the test super easy to write 🤣

def test_yaml_dump_safely():
    data = yaml_load_safely(content)
    assert yaml_load_safely(yaml_dump_safely(data)) == data

exactly the same after yaml_dump_safely

It's not the case, the indentation can change (we don't have indentation here though), and an empty string is added at the end of python/name tags (see #7461 (comment)), which is correct and expected, but different from the original input.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as the data is the same, I'm OK with that.

12 changes: 7 additions & 5 deletions readthedocs/rtd_tests/tests/test_doc_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from readthedocs.builds.constants import EXTERNAL
from readthedocs.builds.models import Version
from readthedocs.doc_builder.backends.mkdocs import MkdocsHTML, yaml_load_safely
from readthedocs.doc_builder.backends.mkdocs import MkdocsHTML, SafeDumper, yaml_load_safely
from readthedocs.doc_builder.backends.sphinx import (
BaseSphinx,
HtmlBuilder,
Expand Down Expand Up @@ -392,7 +392,7 @@ def test_get_theme_name_with_feature_flag(self, checkout_path, run):
}
builder.append_conf()

mock_yaml.safe_dump.assert_called_once_with(
mock_yaml.dump.assert_called_once_with(
{
'site_name': mock.ANY,
'docs_dir': mock.ANY,
Expand All @@ -401,7 +401,8 @@ def test_get_theme_name_with_feature_flag(self, checkout_path, run):
'google_analytics': mock.ANY,
'theme': 'readthedocs',
},
mock.ANY,
stream=mock.ANY,
Dumper=SafeDumper,
)
mock_yaml.reset_mock()

Expand All @@ -417,7 +418,7 @@ def test_get_theme_name_with_feature_flag(self, checkout_path, run):
}
builder.append_conf()

mock_yaml.safe_dump.assert_called_once_with(
mock_yaml.dump.assert_called_once_with(
{
'site_name': mock.ANY,
'docs_dir': mock.ANY,
Expand All @@ -426,7 +427,8 @@ def test_get_theme_name_with_feature_flag(self, checkout_path, run):
'google_analytics': mock.ANY,
'theme': 'customtheme',
},
mock.ANY,
stream=mock.ANY,
Dumper=SafeDumper,
)

@patch('readthedocs.doc_builder.base.BaseBuilder.run')
Expand Down