readthedocs · humitos · Sep 21, 2021 · Jul 5, 2021 · Jul 6, 2021 · Jul 6, 2021
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -17,6 +17,16 @@ jobs:
       - run: pip install --user tox
       - run: tox -e py36,codecov
 
+  tests-embedapi:
+    docker:
+      - image: 'cimg/python:3.6'
+    steps:
+      - checkout
+      - run: git submodule sync
+      - run: git submodule update --init
+      - run: pip install --user tox
+      - run: tox -c tox.embedapi.ini
+
   checks:
     docker:
       - image: 'cimg/python:3.6'
@@ -45,3 +55,4 @@ workflows:
     jobs:
       - checks
       - tests
+      - tests-embedapi
diff --git a/pytest.ini b/pytest.ini
@@ -1,9 +1,11 @@
 [pytest]
-addopts = --reuse-db --strict-markers
+addopts = --strict-markers
 markers =
     search
     serve
     proxito
+    embed_api
+    sphinx
 python_files = tests.py test_*.py *_tests.py
 filterwarnings =
     # Ignore external dependencies warning deprecations
@@ -13,3 +15,9 @@ filterwarnings =
     ignore:Pagination may yield inconsistent results with an unordered object_list.*:django.core.paginator.UnorderedObjectListWarning
     # docutils
     ignore:'U' mode is deprecated:DeprecationWarning
+    # slumber
+    ignore:Using 'method_whitelist' with Retry is deprecated and will be removed in v2.0.*:DeprecationWarning
+    # kombu
+    ignore:SelectableGroups dict interface is deprecated.*:DeprecationWarning
+    # django
+    ignore:Remove the context parameter from JSONField.*:django.utils.deprecation.RemovedInDjango30Warning
diff --git a/readthedocs/conftest.py b/readthedocs/conftest.py
@@ -1,6 +1,12 @@
 import pytest
 from rest_framework.test import APIClient
 
+
+pytest_plugins = (
+    'sphinx.testing.fixtures',
+)
+
+
 @pytest.fixture
 def api_client():
     return APIClient()
diff --git a/readthedocs/embed/tests/test_links.py b/readthedocs/embed/tests/test_links.py
@@ -3,7 +3,7 @@
 import pytest
 from pyquery import PyQuery
 
-from readthedocs.embed.views import clean_links
+from readthedocs.embed.utils import clean_links
 
 URLData = namedtuple('URLData', ['docurl', 'href', 'expected'])
 

diff --git a/readthedocs/embed/utils.py b/readthedocs/embed/utils.py
@@ -1,5 +1,8 @@
 """Embed utils."""
 
+from urllib.parse import urlparse
+from pyquery import PyQuery as PQ  # noqa
+
 
 def recurse_while_none(element):
     """Recursively find the leaf node with the ``href`` attribute."""
@@ -10,3 +13,55 @@ def recurse_while_none(element):
     if not href:
         href = element.attrib.get('id')
     return {element.text: href}
+
+
+def clean_links(obj, url, html_raw_response=False):
+    """
+    Rewrite (internal) links to make them absolute.
+
+    1. external links are not changed
+    2. prepend URL to links that are just fragments (e.g. #section)
+    3. prepend URL (without filename) to internal relative links
+    """
+
+    # TODO: do not depend on PyQuery
+    obj = PQ(obj)
+
+    if url is None:
+        return obj
+
+    for link in obj.find('a'):
+        base_url = urlparse(url)
+        # We need to make all internal links, to be absolute
+        href = link.attrib['href']
+        parsed_href = urlparse(href)
+        if parsed_href.scheme or parsed_href.path.startswith('/'):
+            # don't change external links
+            continue
+
+        if not parsed_href.path and parsed_href.fragment:
+            # href="#section-link"
+            new_href = base_url.geturl() + href
+            link.attrib['href'] = new_href
+            continue
+
+        if not base_url.path.endswith('/'):
+            # internal relative link
+            # href="../../another.html" and ``base_url`` is not HTMLDir
+            # (e.g. /en/latest/deep/internal/section/page.html)
+            # we want to remove the trailing filename (page.html) and use the rest as base URL
+            # The resulting absolute link should be
+            # https://slug.readthedocs.io/en/latest/deep/internal/section/../../another.html
+
+            # remove the filename (page.html) from the original document URL (base_url) and,
+            path, _ = base_url.path.rsplit('/', 1)
+            # append the value of href (../../another.html) to the base URL.
+            base_url = base_url._replace(path=path + '/')
+
+        new_href = base_url.geturl() + href
+        link.attrib['href'] = new_href
+
+    if html_raw_response:
+        return obj.outerHtml()
+
+    return obj
diff --git a/readthedocs/embed/v3/__init__.py b/readthedocs/embed/v3/__init__.py
diff --git a/readthedocs/embed/v3/tests/__init__.py b/readthedocs/embed/v3/tests/__init__.py
diff --git a/readthedocs/embed/v3/tests/conftest.py b/readthedocs/embed/v3/tests/conftest.py
@@ -0,0 +1,14 @@
+import os
+import shutil
+import pytest
+
+from .utils import srcdir
+
+
+@pytest.fixture(autouse=True, scope='module')
+def remove_sphinx_build_output():
+    """Remove _build/ folder, if exist."""
+    for path in (srcdir,):
+        build_path = os.path.join(path, '_build')
+        if os.path.exists(build_path):
+            shutil.rmtree(build_path)
diff --git a/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst b/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst
@@ -0,0 +1,9 @@
+sphinxcontrib-bibtex
+====================
+
+See https://sphinxcontrib-bibtex.readthedocs.io/en/latest/ for more information about how to use ``sphinxcontrib-bibtex``.
+
+See :cite:t:`1987:nelson` for an introduction to non-standard analysis.
+Non-standard analysis is fun :cite:p:`1987:nelson`.
+
+.. bibliography::
diff --git a/readthedocs/embed/v3/tests/examples/default/chapter-i.rst b/readthedocs/embed/v3/tests/examples/default/chapter-i.rst
@@ -0,0 +1,11 @@
+:orphan:
+
+Chapter I
+=========
+
+This is Chapter I.
+
+Section I
+---------
+
+This the Section I inside Chapter I.
diff --git a/readthedocs/embed/v3/tests/examples/default/conf.py b/readthedocs/embed/v3/tests/examples/default/conf.py
@@ -0,0 +1,17 @@
+# conf.py to run tests
+import sphinxcontrib.bibtex
+
+master_doc = 'index'
+extensions = [
+    'sphinx.ext.autosectionlabel',
+    'sphinxcontrib.bibtex',
+]
+
+bibtex_bibfiles = ['refs.bib']
+
+def setup(app):
+    app.add_object_type(
+        'confval',  # directivename
+        'confval',  # rolename
+        'pair: %s; configuration value',  # indextemplate
+    )
diff --git a/readthedocs/embed/v3/tests/examples/default/configuration.rst b/readthedocs/embed/v3/tests/examples/default/configuration.rst
@@ -0,0 +1,12 @@
+Configuration
+=============
+
+Examples of configurations.
+
+.. confval:: config1
+
+   Description: This the description for config1
+
+   Default: ``'Default value for config'``
+
+   Type: bool
diff --git a/readthedocs/embed/v3/tests/examples/default/glossary.rst b/readthedocs/embed/v3/tests/examples/default/glossary.rst
@@ -0,0 +1,9 @@
+Glossary
+--------
+
+Example using a ``:term:`` role :term:`Read the Docs`.
+
+.. glossary::
+
+    Read the Docs
+       Best company ever.
diff --git a/readthedocs/embed/v3/tests/examples/default/index.rst b/readthedocs/embed/v3/tests/examples/default/index.rst
@@ -0,0 +1,9 @@
+Title
+=====
+
+This is an example page used to test EmbedAPI parsing features.
+
+Sub-title
+---------
+
+This is a reference to :ref:`sub-title`.
diff --git a/readthedocs/embed/v3/tests/examples/default/refs.bib b/readthedocs/embed/v3/tests/examples/default/refs.bib
@@ -0,0 +1,6 @@
+@Book{1987:nelson,
+  author = {Edward Nelson},
+  title = {Radically Elementary Probability Theory},
+  publisher = {Princeton University Press},
+  year = {1987}
+}
diff --git a/readthedocs/embed/v3/tests/test_basics.py b/readthedocs/embed/v3/tests/test_basics.py
@@ -0,0 +1,71 @@
+import pytest
+
+from django.conf import settings
+from django.core.cache import cache
+from django.urls import reverse
+
+from .utils import srcdir
+
+
+@pytest.mark.django_db
+@pytest.mark.embed_api
+class TestEmbedAPIv3Basics:
+
+    @pytest.fixture(autouse=True)
+    def setup_method(self, settings):
+        settings.USE_SUBDOMAIN = True
+        settings.PUBLIC_DOMAIN = 'readthedocs.io'
+        settings.RTD_EMBED_API_EXTERNAL_DOMAINS = ['docs.project.com']
+
+        self.api_url = reverse('embed_api_v3')
+
+        yield
+        cache.clear()
+
+    def test_not_url_query_argument(self, client):
+        params = {}
+        response = client.get(self.api_url, params)
+        assert response.status_code == 400
+        assert response.json() == {'error': 'Invalid arguments. Please provide "url".'}
+
+    def test_not_allowed_domain(self, client):
+        params = {
+            'url': 'https://docs.notalloweddomain.com#title',
+        }
+        response = client.get(self.api_url, params)
+        assert response.status_code == 400
+        assert response.json() == {'error': 'External domain not allowed. domain=docs.notalloweddomain.com'}
+
+    def test_malformed_url(self, client):
+        params = {
+            'url': 'https:///page.html#title',
+        }
+        response = client.get(self.api_url, params)
+        assert response.status_code == 400
+        assert response.json() == {'error': f'The URL requested is malformed. url={params["url"]}'}
+
+    def test_rate_limit_domain(self, client):
+        params = {
+            'url': 'https://docs.project.com#title',
+        }
+        cache_key = 'embed-api-docs.project.com'
+        cache.set(cache_key, settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT)
+
+        response = client.get(self.api_url, params)
+        assert response.status_code == 429
+        assert response.json() == {'error': 'Too many requests for this domain. domain=docs.project.com'}
+
+    def test_infinite_redirect(self, client, requests_mock):
+        requests_mock.get(
+            'https://docs.project.com',
+            status_code=302,
+            headers={
+                'Location': 'https://docs.project.com',
+            },
+        )
+        params = {
+            'url': 'https://docs.project.com#title',
+        }
+        response = client.get(self.api_url, params)
+        assert response.status_code == 400
+        assert response.json() == {'error': f'The URL requested generates too many redirects. url={params["url"]}'}