Skip to content

Commit d8651c1

Browse files
authored
Merge pull request #8521 from readthedocs/humitos/embed-api-v3-updates
EmbedAPIv3: updates after QA with sphinx-hoverxref
2 parents 658f825 + cc45e39 commit d8651c1

File tree

4 files changed

+44
-33
lines changed

4 files changed

+44
-33
lines changed

readthedocs/embed/v3/views.py

+34-31
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import requests
77

88
from selectolax.parser import HTMLParser
9-
from pyquery import PyQuery as PQ # noqa
109

1110
from django.conf import settings
1211
from django.core.cache import cache
@@ -71,12 +70,15 @@ def _download_page_content(self, url):
7170

7271
response = requests.get(url, timeout=settings.RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT)
7372
if response.ok:
73+
# NOTE: we use ``response.content`` to get its binary
74+
# representation. Then ``selectolax`` is in charge to auto-detect
75+
# its encoding. We trust more in selectolax for this than in requests.
7476
cache.set(
7577
cache_key,
76-
response.text,
78+
response.content,
7779
timeout=settings.RTD_EMBED_API_PAGE_CACHE_TIMEOUT,
7880
)
79-
return response.text
81+
return response.content
8082

8183
def _get_page_content_from_storage(self, project, version_slug, filename):
8284
version = get_object_or_404(
@@ -138,7 +140,11 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio
138140

139141
node = None
140142
if fragment:
141-
selector = f'#{fragment}'
143+
# NOTE: we use the `[id=]` selector because using `#{id}` requires
144+
# escaping the selector since CSS does not support the same
145+
# characters as the `id=` HTML attribute
146+
# https://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
147+
selector = f'[id="{fragment}"]'
142148
node = HTMLParser(page_content).css_first(selector)
143149
else:
144150
html = HTMLParser(page_content)
@@ -150,7 +156,10 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio
150156
if doctool == 'sphinx':
151157
# Handle ``dt`` special cases
152158
if node.tag == 'dt':
153-
if 'glossary' in node.parent.attributes.get('class'):
159+
if any([
160+
'glossary' in node.parent.attributes.get('class'),
161+
'citation' in node.parent.attributes.get('class'),
162+
]):
154163
# Sphinx HTML structure for term glossary puts the ``id`` in the
155164
# ``dt`` element with the title of the term. In this case, we
156165
# return the parent node which contains the definition list
@@ -163,32 +172,26 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio
163172
# ...
164173
# </dl>
165174

166-
# TODO: figure it out if it's needed to remove the siblings here
167-
# parent = node.parent
168-
# for n in parent.traverse():
169-
# if n not in (node, node.next):
170-
# n.remove()
171-
node = node.parent
172-
173-
elif 'citation' in node.parent.attributes.get('class'):
174-
# Sphinx HTML structure for sphinxcontrib-bibtex puts the ``id`` in the
175-
# ``dt`` element with the title of the cite. In this case, we
176-
# return the parent node which contains the definition list
177-
# and remove all ``dt/dd`` that are not the requested one
178-
179-
# Structure:
180-
# <dl class="citation">
181-
# <dt id="cite-id"><span><a>Title of the cite</a></span></dt>
182-
# <dd>Content of the cite</dd>
183-
# ...
184-
# </dl>
185-
186-
# TODO: figure it out if it's needed to remove the siblings here
187-
# parent = node.parent
188-
# for n in parent.traverse():
189-
# if n not in (node, node.next):
190-
# n.remove()
191-
node = node.parent
175+
parent_node = node.parent
176+
if 'glossary' in node.parent.attributes.get('class'):
177+
next_node = node.next
178+
179+
elif 'citation' in node.parent.attributes.get('class'):
180+
next_node = node.next.next
181+
182+
# Iterate over all the siblings (``.iter()``) of the parent
183+
# node and remove ``dt`` and ``dd`` that are not the ones
184+
# we are looking for. Then return the parent node as
185+
# result.
186+
#
187+
# Note that ``.iter()`` returns a generator and we modify
188+
# the HTML in-place, so we have to convert it to a list
189+
# before removing elements. Otherwise we break the
190+
# iteration before completing it
191+
for n in list(parent_node.iter()): # pylint: disable=invalid-name
192+
if n not in (node, next_node):
193+
n.remove()
194+
node = parent_node
192195

193196
else:
194197
# Sphinx HTML structure for definition list puts the ``id``

readthedocs/settings/base.py

+2
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,8 @@ def DOCKER_LIMITS(self):
818818
r'docs\.python\.org',
819819
r'docs\.scipy\.org',
820820
r'docs\.sympy\.org',
821+
r'www.sphinx-doc.org',
822+
r'numpy\.org',
821823
]
822824
RTD_EMBED_API_PAGE_CACHE_TIMEOUT = 5 * 10
823825
RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT = 1

readthedocs/settings/docker_compose.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,12 @@ def RTD_EXT_THEME_DEV_SERVER(self):
6666
@property
6767
def RTD_EMBED_API_EXTERNAL_DOMAINS(self):
6868
domains = super().RTD_EMBED_API_EXTERNAL_DOMAINS
69-
domains.append(r'.*\.readthedocs\.io')
69+
domains.extend([
70+
r'.*\.readthedocs\.io',
71+
r'.*\.org\.readthedocs\.build',
72+
r'.*\.readthedocs-hosted\.com',
73+
r'.*\.com\.readthedocs\.build',
74+
])
7075
return domains
7176

7277
@property

tox.embedapi.ini

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = sphinx-{18,20,21,22,23,24,30,31,32,33,34,35,40,41,latest}
2+
envlist = sphinx-{18,20,21,22,23,24,30,31,32,33,34,35,40,41,42,latest}
33

44
[testenv]
55
description = run test suite for the EmbedAPIv3
@@ -27,6 +27,7 @@ deps =
2727
sphinx-35: Sphinx~=3.5.0
2828
sphinx-40: Sphinx~=4.0.0
2929
sphinx-41: Sphinx~=4.1.0
30+
sphinx-42: Sphinx~=4.2.0
3031
sphinx-latest: Sphinx
3132
setenv =
3233
DJANGO_SETTINGS_MODULE=readthedocs.settings.test

0 commit comments

Comments
 (0)