Skip to content

Commit 48203f2

Browse files
authored
Addons + Proxito: return X-RTD-Resolver-Filename and inject via CF (#11100)
* Development: use `wrangler` locally (update NGINX/Dockerfile config) Related readthedocs/addons#217 * Add CF Worker .js script * Proxy JS files to Addons GitHub's repository * Point common to wrangler branch * Addons + Proxito: return `X-RTD-Resolver-Filename` and inject via CF Return `X-RTD-Resolver-Filename` from El Proxito and inject it as HTML `meta` tag from Cloudflare Worker. Required by readthedocs/addons#211 * Feedback from review * Validate the header value before injecting it * Initialize `request.unresolved_url` as `None` * Add test for longer filename * Inject the `unresolved_url` on 404 pages
1 parent 5279a90 commit 48203f2

File tree

5 files changed

+106
-41
lines changed

5 files changed

+106
-41
lines changed

dockerfiles/nginx/proxito.conf.template

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ server {
7777
add_header X-RTD-Project-Method $rtd_project_method always;
7878
set $rtd_redirect $upstream_http_x_rtd_redirect;
7979
add_header X-RTD-Redirect $rtd_redirect always;
80+
set $rtd_resolver_filename $upstream_http_x_rtd_resolver_filename;
81+
add_header X-RTD-Resolver-Filename $rtd_resolver_filename always;
8082
set $cdn_cache_control $upstream_http_cdn_cache_control;
8183
add_header CDN-Cache-Control $cdn_cache_control always;
8284
set $cache_tag $upstream_http_cache_tag;

readthedocs/proxito/middleware.py

+25
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
)
1616
from django.conf import settings
1717
from django.core.exceptions import SuspiciousOperation
18+
from django.http.response import BadHeaderError, ResponseHeaders
1819
from django.shortcuts import redirect
1920
from django.urls import reverse
2021
from django.utils.deprecation import MiddlewareMixin
22+
from django.utils.html import escape
2123

2224
from readthedocs.builds.models import Version
2325
from readthedocs.core.unresolver import (
@@ -198,6 +200,7 @@ def _set_request_attributes(self, request, unresolved_domain):
198200
def process_request(self, request): # noqa
199201
# Initialize our custom request attributes.
200202
request.unresolved_domain = None
203+
request.unresolved_url = None
201204

202205
skip = any(request.path.startswith(reverse(view)) for view in self.skip_views)
203206
if skip:
@@ -367,11 +370,33 @@ def _get_https_redirect(self, request):
367370

368371
return None
369372

373+
def add_resolver_headers(self, request, response):
374+
if request.unresolved_url is not None:
375+
# TODO: add more ``X-RTD-Resolver-*`` headers
376+
header_value = escape(request.unresolved_url.filename)
377+
try:
378+
# Use Django internals to validate the header's value before injecting it.
379+
ResponseHeaders({})._convert_to_charset(
380+
header_value,
381+
"latin-1",
382+
mime_encode=True,
383+
)
384+
385+
response["X-RTD-Resolver-Filename"] = header_value
386+
except BadHeaderError:
387+
# Skip adding the header because it fails validation
388+
log.info(
389+
"Skip adding X-RTD-Resolver-Filename header due to invalid value.",
390+
filename=request.unresolved_url.filename,
391+
value=header_value,
392+
)
393+
370394
def process_response(self, request, response): # noqa
371395
self.add_proxito_headers(request, response)
372396
self.add_cache_headers(request, response)
373397
self.add_hsts_headers(request, response)
374398
self.add_user_headers(request, response)
375399
self.add_hosting_integrations_headers(request, response)
400+
self.add_resolver_headers(request, response)
376401
self.add_cors_headers(request, response)
377402
return response

readthedocs/proxito/tests/test_headers.py

+20
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,30 @@ def test_serve_headers(self):
5151
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
5252
self.assertEqual(r["X-RTD-Version"], "latest")
5353
self.assertEqual(r["X-RTD-version-Method"], "path")
54+
self.assertEqual(r["X-RTD-Resolver-Filename"], "/")
5455
self.assertEqual(
5556
r["X-RTD-Path"], "/proxito/media/html/project/latest/index.html"
5657
)
5758

59+
def test_serve_headers_with_path(self):
60+
r = self.client.get(
61+
"/en/latest/guides/jupyter/gallery.html",
62+
secure=True,
63+
headers={"host": "project.dev.readthedocs.io"},
64+
)
65+
self.assertEqual(r.status_code, 200)
66+
self.assertEqual(r["Cache-Tag"], "project,project:latest")
67+
self.assertEqual(r["X-RTD-Domain"], "project.dev.readthedocs.io")
68+
self.assertEqual(r["X-RTD-Project"], "project")
69+
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
70+
self.assertEqual(r["X-RTD-Version"], "latest")
71+
self.assertEqual(r["X-RTD-version-Method"], "path")
72+
self.assertEqual(r["X-RTD-Resolver-Filename"], "/guides/jupyter/gallery.html")
73+
self.assertEqual(
74+
r["X-RTD-Path"],
75+
"/proxito/media/html/project/latest/guides/jupyter/gallery.html",
76+
)
77+
5878
def test_subproject_serve_headers(self):
5979
r = self.client.get(
6080
"/projects/subproject/en/latest/",

readthedocs/proxito/tests/test_old_redirects.py

+1
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,7 @@ def test_redirect_exact_with_wildcard_crossdomain(self):
13521352
r = self.client.get(url, headers={"host": "project.dev.readthedocs.io"})
13531353
self.assertEqual(r.status_code, 302, url)
13541354
self.assertEqual(r["Location"], expected_location, url)
1355+
self.assertNotIn("X-RTD-Resolver-Filename", r.headers)
13551356

13561357
def test_redirect_html_to_clean_url_crossdomain(self):
13571358
"""

readthedocs/proxito/views/serve.py

+58-41
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ def serve_path(self, request, path):
265265
version = unresolved.version
266266
filename = unresolved.filename
267267

268+
# Inject the UnresolvedURL into the HttpRequest so we can access from the middleware.
269+
# We could resolve it again from the middleware, but we would duplicating DB queries.
270+
request.unresolved_url = unresolved
271+
268272
# Check if the old language code format was used, and redirect to the new one.
269273
# NOTE: we may have some false positives here, for example for an URL like:
270274
# /pt-br/latest/pt_BR/index.html, but our protection for infinite redirects
@@ -391,7 +395,7 @@ def get(self, request, proxito_path):
391395
the Docs default page (Maze Found) is rendered by Django and served.
392396
"""
393397
log.bind(proxito_path=proxito_path)
394-
log.debug('Executing 404 handler.')
398+
log.debug("Executing 404 handler.")
395399
unresolved_domain = request.unresolved_domain
396400
# We force all storage calls to use the external versions storage,
397401
# since we are serving an external version.
@@ -420,6 +424,11 @@ def get(self, request, proxito_path):
420424
path=proxito_path,
421425
append_indexhtml=False,
422426
)
427+
428+
# Inject the UnresolvedURL into the HttpRequest so we can access from the middleware.
429+
# We could resolve it again from the middleware, but we would duplicating DB queries.
430+
request.unresolved_url = unresolved
431+
423432
project = unresolved.project
424433
version = unresolved.version
425434
filename = unresolved.filename
@@ -708,11 +717,12 @@ def get(self, request):
708717
# Verify if the project is marked as spam and return a custom robots.txt
709718
if "readthedocsext.spamfighting" in settings.INSTALLED_APPS:
710719
from readthedocsext.spamfighting.utils import is_robotstxt_denied # noqa
720+
711721
if is_robotstxt_denied(project):
712722
return render(
713723
request,
714-
'robots.spam.txt',
715-
content_type='text/plain',
724+
"robots.spam.txt",
725+
content_type="text/plain",
716726
)
717727

718728
# Use the ``robots.txt`` file from the default version configured
@@ -747,33 +757,30 @@ def get(self, request):
747757
filename="robots.txt",
748758
check_if_exists=True,
749759
)
750-
log.info('Serving custom robots.txt file.')
760+
log.info("Serving custom robots.txt file.")
751761
return response
752762
except StorageFileNotFound:
753763
pass
754764

755765
# Serve default robots.txt
756-
sitemap_url = '{scheme}://{domain}/sitemap.xml'.format(
757-
scheme='https',
766+
sitemap_url = "{scheme}://{domain}/sitemap.xml".format(
767+
scheme="https",
758768
domain=project.subdomain(),
759769
)
760770
context = {
761-
'sitemap_url': sitemap_url,
762-
'hidden_paths': self._get_hidden_paths(project),
771+
"sitemap_url": sitemap_url,
772+
"hidden_paths": self._get_hidden_paths(project),
763773
}
764774
return render(
765775
request,
766-
'robots.txt',
776+
"robots.txt",
767777
context,
768-
content_type='text/plain',
778+
content_type="text/plain",
769779
)
770780

771781
def _get_hidden_paths(self, project):
772782
"""Get the absolute paths of the public hidden versions of `project`."""
773-
hidden_versions = (
774-
Version.internal.public(project=project)
775-
.filter(hidden=True)
776-
)
783+
hidden_versions = Version.internal.public(project=project).filter(hidden=True)
777784
resolver = Resolver()
778785
hidden_paths = [
779786
resolver.resolve_path(project, version_slug=version.slug)
@@ -846,8 +853,8 @@ def hreflang_formatter(lang):
846853
Use hyphen instead of underscore in language and country value.
847854
ref: https://en.wikipedia.org/wiki/Hreflang#Common_Mistakes
848855
"""
849-
if '_' in lang:
850-
return lang.replace('_', '-')
856+
if "_" in lang:
857+
return lang.replace("_", "-")
851858
return lang
852859

853860
def changefreqs_generator():
@@ -861,8 +868,8 @@ def changefreqs_generator():
861868
aggressive. If the tag is removed and a branch is created with the same
862869
name, we will want bots to revisit this.
863870
"""
864-
changefreqs = ['weekly', 'daily']
865-
yield from itertools.chain(changefreqs, itertools.repeat('monthly'))
871+
changefreqs = ["weekly", "daily"]
872+
yield from itertools.chain(changefreqs, itertools.repeat("monthly"))
866873

867874
project = request.unresolved_domain.project
868875
public_versions = Version.internal.public(
@@ -879,28 +886,34 @@ def changefreqs_generator():
879886
# We want stable with priority=1 and changefreq='weekly' and
880887
# latest with priority=0.9 and changefreq='daily'
881888
# More details on this: https://github.com/rtfd/readthedocs.org/issues/5447
882-
if (len(sorted_versions) >= 2 and sorted_versions[0].slug == LATEST and
883-
sorted_versions[1].slug == STABLE):
884-
sorted_versions[0], sorted_versions[1] = sorted_versions[1], sorted_versions[0]
889+
if (
890+
len(sorted_versions) >= 2
891+
and sorted_versions[0].slug == LATEST
892+
and sorted_versions[1].slug == STABLE
893+
):
894+
sorted_versions[0], sorted_versions[1] = (
895+
sorted_versions[1],
896+
sorted_versions[0],
897+
)
885898

886899
versions = []
887900
for version, priority, changefreq in zip(
888-
sorted_versions,
889-
priorities_generator(),
890-
changefreqs_generator(),
901+
sorted_versions,
902+
priorities_generator(),
903+
changefreqs_generator(),
891904
):
892905
element = {
893-
'loc': version.get_subdomain_url(),
894-
'priority': priority,
895-
'changefreq': changefreq,
896-
'languages': [],
906+
"loc": version.get_subdomain_url(),
907+
"priority": priority,
908+
"changefreq": changefreq,
909+
"languages": [],
897910
}
898911

899912
# Version can be enabled, but not ``built`` yet. We want to show the
900913
# link without a ``lastmod`` attribute
901-
last_build = version.builds.order_by('-date').first()
914+
last_build = version.builds.order_by("-date").first()
902915
if last_build:
903-
element['lastmod'] = last_build.date.isoformat()
916+
element["lastmod"] = last_build.date.isoformat()
904917

905918
resolver = Resolver()
906919
if project.translations.exists():
@@ -915,27 +928,31 @@ def changefreqs_generator():
915928
project=translation,
916929
version=translated_version,
917930
)
918-
element['languages'].append({
919-
'hreflang': hreflang_formatter(translation.language),
920-
'href': href,
921-
})
931+
element["languages"].append(
932+
{
933+
"hreflang": hreflang_formatter(translation.language),
934+
"href": href,
935+
}
936+
)
922937

923938
# Add itself also as protocol requires
924-
element['languages'].append({
925-
'hreflang': project.language,
926-
'href': element['loc'],
927-
})
939+
element["languages"].append(
940+
{
941+
"hreflang": project.language,
942+
"href": element["loc"],
943+
}
944+
)
928945

929946
versions.append(element)
930947

931948
context = {
932-
'versions': versions,
949+
"versions": versions,
933950
}
934951
return render(
935952
request,
936-
'sitemap.xml',
953+
"sitemap.xml",
937954
context,
938-
content_type='application/xml',
955+
content_type="application/xml",
939956
)
940957

941958
def _get_project(self):

0 commit comments

Comments
 (0)