Title¶
\nThis is an example page used to test EmbedAPI parsing features.
\nSub-title¶
\nThis is a reference to Sub-title.
\ndiff --git a/.circleci/config.yml b/.circleci/config.yml index 25a50f29076..ea681b49c8d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -17,6 +17,16 @@ jobs: - run: pip install --user tox - run: tox -e py36,codecov + tests-embedapi: + docker: + - image: 'cimg/python:3.6' + steps: + - checkout + - run: git submodule sync + - run: git submodule update --init + - run: pip install --user tox + - run: tox -c tox.embedapi.ini + checks: docker: - image: 'cimg/python:3.6' @@ -45,3 +55,4 @@ workflows: jobs: - checks - tests + - tests-embedapi diff --git a/pytest.ini b/pytest.ini index 15e67f8a2d3..d06623bde72 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,9 +1,11 @@ [pytest] -addopts = --reuse-db --strict-markers +addopts = --strict-markers markers = search serve proxito + embed_api + sphinx python_files = tests.py test_*.py *_tests.py filterwarnings = # Ignore external dependencies warning deprecations @@ -13,3 +15,9 @@ filterwarnings = ignore:Pagination may yield inconsistent results with an unordered object_list.*:django.core.paginator.UnorderedObjectListWarning # docutils ignore:'U' mode is deprecated:DeprecationWarning + # slumber + ignore:Using 'method_whitelist' with Retry is deprecated and will be removed in v2.0.*:DeprecationWarning + # kombu + ignore:SelectableGroups dict interface is deprecated.*:DeprecationWarning + # django + ignore:Remove the context parameter from JSONField.*:django.utils.deprecation.RemovedInDjango30Warning \ No newline at end of file diff --git a/readthedocs/conftest.py b/readthedocs/conftest.py index 0dc0b840141..de296516abf 100644 --- a/readthedocs/conftest.py +++ b/readthedocs/conftest.py @@ -1,6 +1,12 @@ import pytest from rest_framework.test import APIClient + +pytest_plugins = ( + 'sphinx.testing.fixtures', +) + + @pytest.fixture def api_client(): return APIClient() diff --git a/readthedocs/embed/tests/test_links.py b/readthedocs/embed/tests/test_links.py index 8650a361282..aab2f81b230 100644 --- a/readthedocs/embed/tests/test_links.py +++ b/readthedocs/embed/tests/test_links.py @@ -3,7 +3,7 @@ import pytest from pyquery import PyQuery -from readthedocs.embed.views import clean_links +from readthedocs.embed.utils import clean_links URLData = namedtuple('URLData', ['docurl', 'href', 'expected']) diff --git a/readthedocs/embed/utils.py b/readthedocs/embed/utils.py index 95f8640749f..94ad78bfe59 100644 --- a/readthedocs/embed/utils.py +++ b/readthedocs/embed/utils.py @@ -1,5 +1,8 @@ """Embed utils.""" +from urllib.parse import urlparse +from pyquery import PyQuery as PQ # noqa + def recurse_while_none(element): """Recursively find the leaf node with the ``href`` attribute.""" @@ -10,3 +13,55 @@ def recurse_while_none(element): if not href: href = element.attrib.get('id') return {element.text: href} + + +def clean_links(obj, url, html_raw_response=False): + """ + Rewrite (internal) links to make them absolute. + + 1. external links are not changed + 2. prepend URL to links that are just fragments (e.g. #section) + 3. prepend URL (without filename) to internal relative links + """ + + # TODO: do not depend on PyQuery + obj = PQ(obj) + + if url is None: + return obj + + for link in obj.find('a'): + base_url = urlparse(url) + # We need to make all internal links, to be absolute + href = link.attrib['href'] + parsed_href = urlparse(href) + if parsed_href.scheme or parsed_href.path.startswith('/'): + # don't change external links + continue + + if not parsed_href.path and parsed_href.fragment: + # href="#section-link" + new_href = base_url.geturl() + href + link.attrib['href'] = new_href + continue + + if not base_url.path.endswith('/'): + # internal relative link + # href="../../another.html" and ``base_url`` is not HTMLDir + # (e.g. /en/latest/deep/internal/section/page.html) + # we want to remove the trailing filename (page.html) and use the rest as base URL + # The resulting absolute link should be + # https://slug.readthedocs.io/en/latest/deep/internal/section/../../another.html + + # remove the filename (page.html) from the original document URL (base_url) and, + path, _ = base_url.path.rsplit('/', 1) + # append the value of href (../../another.html) to the base URL. + base_url = base_url._replace(path=path + '/') + + new_href = base_url.geturl() + href + link.attrib['href'] = new_href + + if html_raw_response: + return obj.outerHtml() + + return obj diff --git a/readthedocs/embed/v3/__init__.py b/readthedocs/embed/v3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/embed/v3/tests/__init__.py b/readthedocs/embed/v3/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/embed/v3/tests/conftest.py b/readthedocs/embed/v3/tests/conftest.py new file mode 100644 index 00000000000..0efc9948632 --- /dev/null +++ b/readthedocs/embed/v3/tests/conftest.py @@ -0,0 +1,14 @@ +import os +import shutil +import pytest + +from .utils import srcdir + + +@pytest.fixture(autouse=True, scope='module') +def remove_sphinx_build_output(): + """Remove _build/ folder, if exist.""" + for path in (srcdir,): + build_path = os.path.join(path, '_build') + if os.path.exists(build_path): + shutil.rmtree(build_path) diff --git a/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst b/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst new file mode 100644 index 00000000000..bac1deac36c --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst @@ -0,0 +1,9 @@ +sphinxcontrib-bibtex +==================== + +See https://sphinxcontrib-bibtex.readthedocs.io/en/latest/ for more information about how to use ``sphinxcontrib-bibtex``. + +See :cite:t:`1987:nelson` for an introduction to non-standard analysis. +Non-standard analysis is fun :cite:p:`1987:nelson`. + +.. bibliography:: diff --git a/readthedocs/embed/v3/tests/examples/default/chapter-i.rst b/readthedocs/embed/v3/tests/examples/default/chapter-i.rst new file mode 100644 index 00000000000..6bf55dad0f6 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/chapter-i.rst @@ -0,0 +1,11 @@ +:orphan: + +Chapter I +========= + +This is Chapter I. + +Section I +--------- + +This the Section I inside Chapter I. diff --git a/readthedocs/embed/v3/tests/examples/default/conf.py b/readthedocs/embed/v3/tests/examples/default/conf.py new file mode 100644 index 00000000000..b8fe3483942 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/conf.py @@ -0,0 +1,17 @@ +# conf.py to run tests +import sphinxcontrib.bibtex + +master_doc = 'index' +extensions = [ + 'sphinx.ext.autosectionlabel', + 'sphinxcontrib.bibtex', +] + +bibtex_bibfiles = ['refs.bib'] + +def setup(app): + app.add_object_type( + 'confval', # directivename + 'confval', # rolename + 'pair: %s; configuration value', # indextemplate + ) diff --git a/readthedocs/embed/v3/tests/examples/default/configuration.rst b/readthedocs/embed/v3/tests/examples/default/configuration.rst new file mode 100644 index 00000000000..7ac6465b9f0 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/configuration.rst @@ -0,0 +1,12 @@ +Configuration +============= + +Examples of configurations. + +.. confval:: config1 + + Description: This the description for config1 + + Default: ``'Default value for config'`` + + Type: bool diff --git a/readthedocs/embed/v3/tests/examples/default/glossary.rst b/readthedocs/embed/v3/tests/examples/default/glossary.rst new file mode 100644 index 00000000000..f8f50705e4d --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/glossary.rst @@ -0,0 +1,9 @@ +Glossary +-------- + +Example using a ``:term:`` role :term:`Read the Docs`. + +.. glossary:: + + Read the Docs + Best company ever. diff --git a/readthedocs/embed/v3/tests/examples/default/index.rst b/readthedocs/embed/v3/tests/examples/default/index.rst new file mode 100644 index 00000000000..540bed0984c --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/index.rst @@ -0,0 +1,9 @@ +Title +===== + +This is an example page used to test EmbedAPI parsing features. + +Sub-title +--------- + +This is a reference to :ref:`sub-title`. diff --git a/readthedocs/embed/v3/tests/examples/default/refs.bib b/readthedocs/embed/v3/tests/examples/default/refs.bib new file mode 100644 index 00000000000..8be9d662d21 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/refs.bib @@ -0,0 +1,6 @@ +@Book{1987:nelson, + author = {Edward Nelson}, + title = {Radically Elementary Probability Theory}, + publisher = {Princeton University Press}, + year = {1987} +} diff --git a/readthedocs/embed/v3/tests/test_basics.py b/readthedocs/embed/v3/tests/test_basics.py new file mode 100644 index 00000000000..56bf7e2b965 --- /dev/null +++ b/readthedocs/embed/v3/tests/test_basics.py @@ -0,0 +1,71 @@ +import pytest + +from django.conf import settings +from django.core.cache import cache +from django.urls import reverse + +from .utils import srcdir + + +@pytest.mark.django_db +@pytest.mark.embed_api +class TestEmbedAPIv3Basics: + + @pytest.fixture(autouse=True) + def setup_method(self, settings): + settings.USE_SUBDOMAIN = True + settings.PUBLIC_DOMAIN = 'readthedocs.io' + settings.RTD_EMBED_API_EXTERNAL_DOMAINS = ['docs.project.com'] + + self.api_url = reverse('embed_api_v3') + + yield + cache.clear() + + def test_not_url_query_argument(self, client): + params = {} + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': 'Invalid arguments. Please provide "url".'} + + def test_not_allowed_domain(self, client): + params = { + 'url': 'https://docs.notalloweddomain.com#title', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': 'External domain not allowed. domain=docs.notalloweddomain.com'} + + def test_malformed_url(self, client): + params = { + 'url': 'https:///page.html#title', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': f'The URL requested is malformed. url={params["url"]}'} + + def test_rate_limit_domain(self, client): + params = { + 'url': 'https://docs.project.com#title', + } + cache_key = 'embed-api-docs.project.com' + cache.set(cache_key, settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT) + + response = client.get(self.api_url, params) + assert response.status_code == 429 + assert response.json() == {'error': 'Too many requests for this domain. domain=docs.project.com'} + + def test_infinite_redirect(self, client, requests_mock): + requests_mock.get( + 'https://docs.project.com', + status_code=302, + headers={ + 'Location': 'https://docs.project.com', + }, + ) + params = { + 'url': 'https://docs.project.com#title', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': f'The URL requested generates too many redirects. url={params["url"]}'} diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py new file mode 100644 index 00000000000..e1377da5f4c --- /dev/null +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -0,0 +1,254 @@ +import docutils +import os + +import pytest +import sphinx + +from packaging.version import Version + +from django.conf import settings +from django.core.cache import cache +from django.urls import reverse + +from .utils import srcdir + + +@pytest.mark.django_db +@pytest.mark.embed_api +class TestEmbedAPIv3ExternalPages: + + @pytest.fixture(autouse=True) + def setup_method(self, settings): + settings.USE_SUBDOMAIN = True + settings.PUBLIC_DOMAIN = 'readthedocs.io' + settings.RTD_EMBED_API_EXTERNAL_DOMAINS = ['docs.project.com'] + + self.api_url = reverse('embed_api_v3') + + yield + cache.clear() + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + def test_default_main_section(self, app, client, requests_mock): + app.build() + path = app.outdir / 'index.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com', text=content) + + params = { + 'url': 'https://docs.project.com', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + + # The output is different because docutils is outputting this, + # and we're not sanitizing it, but just passing it through. + if Version(docutils.__version__) >= Version('0.17'): + content = '
This is an example page used to test EmbedAPI parsing features.
\nThis is a reference to Sub-title.
\nThis is a reference to Sub-title.
\nconfig1
¶config1
¶config1
¶config1
¶config1
¶Description: This the description for config1
\nDefault: \'Default value for config\'
Type: bool
\nconfig1
¶Description: This the description for config1
\nDefault: \'Default value for config\'
Type: bool
\nconfig1
¶Description: This the description for config1
\nDefault: \'Default value for config\'
Type: bool
\nDescription: This the description for config1
\nDefault: \'Default value for config\'
Type: bool
\nEdward Nelson. Radically Elementary Probability Theory. Princeton University Press, 1987.
\nBest company ever.
\nBest company ever.
\nThis is an example page used to test EmbedAPI parsing features.
\nThis is a reference to Sub-title.
\nconfig
+ # ¶Text with a description