Skip to content

Addons + Proxito: return X-RTD-Resolver-Filename and inject via CF #11100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 27, 2024
Merged
2 changes: 1 addition & 1 deletion common
2 changes: 2 additions & 0 deletions dockerfiles/Dockerfile.wrangler
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM node:18.15
RUN npm install -g [email protected]
223 changes: 223 additions & 0 deletions dockerfiles/force-readthedocs-addons.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
/*

Script to inject the new Addons implementation on pages served by El Proxito.

This script is ran on a Cloudflare Worker and modifies the HTML with two different purposes:

1. remove the old implementation of our flyout (``readthedocs-doc-embed.js`` and others)
2. inject the new addons implementation (``readthedocs-addons.js``) script

Currently, we are doing 1) only when users opt-in into the new beta addons.
In the future, when our addons become stable, we will always remove the old implementation,
making all the projects to use the addons by default.

*/

// add "readthedocs-addons.js" inside the "<head>"
const addonsJs =
'<script async type="text/javascript" src="/_/static/javascript/readthedocs-addons.js"></script>';

// selectors we want to remove
// https://developers.cloudflare.com/workers/runtime-apis/html-rewriter/#selectors
const analyticsJs =
'script[src="/_/static/javascript/readthedocs-analytics.js"]';
const docEmbedCss = 'link[href="/_/static/css/readthedocs-doc-embed.css"]';
const docEmbedJs =
'script[src="/_/static/javascript/readthedocs-doc-embed.js"]';
const analyticsJsAssets =
'script[src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js"]';
const docEmbedCssAssets =
'link[href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css"]';
const docEmbedJsAssets =
'script[src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"]';
const docEmbedJsAssetsCore =
'script[src="https://assets.readthedocs.org/static/core/js/readthedocs-doc-embed.js"]';
const badgeOnlyCssAssets =
'link[href="https://assets.readthedocs.org/static/css/badge_only.css"]';
const badgeOnlyCssAssetsProxied = 'link[href="/_/static/css/badge_only.css"]';
const readthedocsExternalVersionWarning = "[role=main] > div:first-child > div:first-child.admonition.warning";
const readthedocsFlyout = "div.rst-versions";

// "readthedocsDataParse" is the "<script>" that calls:
//
// READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
//
const readthedocsDataParse = "script[id=READTHEDOCS_DATA]:first-of-type";
const readthedocsData = "script[id=READTHEDOCS_DATA]";

// do this on a fetch
addEventListener("fetch", (event) => {
const request = event.request;
event.respondWith(handleRequest(request));
});

async function handleRequest(request) {
// perform the original request
let originalResponse = await fetch(request);

// get the content type of the response to manipulate the content only if it's HTML
const contentType = originalResponse.headers.get("content-type") || "";
const injectHostingIntegrations =
originalResponse.headers.get("x-rtd-hosting-integrations") || "false";
const forceAddons =
originalResponse.headers.get("x-rtd-force-addons") || "false";

// Log some debugging data
console.log(`ContentType: ${contentType}`);
console.log(`X-RTD-Force-Addons: ${forceAddons}`);
console.log(`X-RTD-Hosting-Integrations: ${injectHostingIntegrations}`);

// get project/version slug from headers inject by El Proxito
const projectSlug = originalResponse.headers.get("x-rtd-project") || "";
const versionSlug = originalResponse.headers.get("x-rtd-version") || "";
const resolverFilename = originalResponse.headers.get("x-rtd-resolver-filename") || "";

// check to decide whether or not inject the new beta addons:
//
// - content type has to be "text/html"
// when all these conditions are met, we remove all the old JS/CSS files and inject the new beta flyout JS

// check if the Content-Type is HTML, otherwise do nothing
if (contentType.includes("text/html")) {
// Remove old implementation of our flyout and inject the new addons if the following conditions are met:
//
// - header `X-RTD-Force-Addons` is present (user opted-in into new beta addons)
// - header `X-RTD-Hosting-Integrations` is not present (added automatically when using `build.commands`)
//
if (forceAddons === "true" && injectHostingIntegrations === "false") {
return (
new HTMLRewriter()
.on(analyticsJs, new removeElement())
.on(docEmbedCss, new removeElement())
.on(docEmbedJs, new removeElement())
.on(analyticsJsAssets, new removeElement())
.on(docEmbedCssAssets, new removeElement())
.on(docEmbedJsAssets, new removeElement())
.on(docEmbedJsAssetsCore, new removeElement())
.on(badgeOnlyCssAssets, new removeElement())
.on(badgeOnlyCssAssetsProxied, new removeElement())
.on(readthedocsExternalVersionWarning, new removeElement())
.on(readthedocsFlyout, new removeElement())
// NOTE: I wasn't able to reliably remove the "<script>" that parses
// the "READTHEDOCS_DATA" defined previously, so we are keeping it for now.
//
// .on(readthedocsDataParse, new removeElement())
// .on(readthedocsData, new removeElement())
.on("head", new addPreloads())
.on("head", new addMetaTags(projectSlug, versionSlug, resolverFilename))
.transform(originalResponse)
);
}

// Inject the new addons if the following conditions are met:
//
// - header `X-RTD-Hosting-Integrations` is present (added automatically when using `build.commands`)
// - header `X-RTD-Force-Addons` is not present (user opted-in into new beta addons)
//
if (forceAddons === "false" && injectHostingIntegrations === "true") {
return new HTMLRewriter()
.on("head", new addPreloads())
.on("head", new addMetaTags(projectSlug, versionSlug, resolverFilename))
.transform(originalResponse);
}
}

// Modify `_static/searchtools.js` to re-enable Sphinx's default search
if (
(contentType.includes("text/javascript") ||
contentType.includes("application/javascript")) &&
(injectHostingIntegrations === "true" || forceAddons === "true") &&
originalResponse.url.endsWith("_static/searchtools.js")
) {
console.log("Modifying _static/searchtools.js");
return handleSearchToolsJSRequest(originalResponse);
}

// if none of the previous conditions are met,
// we return the response without modifying it
return originalResponse;
}

class removeElement {
element(element) {
console.log("Removing: " + element.tagName);
console.log("Attribute href: " + element.getAttribute("href"));
console.log("Attribute src: " + element.getAttribute("src"));
console.log("Attribute id: " + element.getAttribute("id"));
console.log("Attribute class: " + element.getAttribute("class"));
element.remove();
}
}

class addPreloads {
element(element) {
console.log("addPreloads");
element.append(addonsJs, { html: true });
}
}

class addMetaTags {
constructor(projectSlug, versionSlug, resolverFilename) {
this.projectSlug = projectSlug;
this.versionSlug = versionSlug;
this.resolverFilename = resolverFilename;
}

element(element) {
console.log(
`addMetaTags. projectSlug=${this.projectSlug} versionSlug=${this.versionSlug} resolverFilename=${this.resolverFilename}`,
);
if (this.projectSlug && this.versionSlug) {
const metaProject = `<meta name="readthedocs-project-slug" content="${this.projectSlug}" />`;
const metaVersion = `<meta name="readthedocs-version-slug" content="${this.versionSlug}" />`;
const metaResolverFilename = `<meta name="readthedocs-resolver-filename" content="${this.resolverFilename}" />`;

element.append(metaProject, { html: true });
element.append(metaVersion, { html: true });
element.append(metaResolverFilename, { html: true });
}
}
}

/*

Script to fix the old removal of the Sphinx search init.

Enabling addons breaks the default Sphinx search in old versions that are not possible to rebuilt.
This is because we solved the problem in the `readthedocs-sphinx-ext` extension,
but since those versions can't be rebuilt, the fix does not apply there.

To solve the problem in these old versions, we are using a CF worker to apply that fix on-the-fly
at serving time on those old versions.

The fix basically replaces a Read the Docs comment in file `_static/searchtools.js`,
introduced by `readthedocs-sphinx-ext` to _disable the initialization of Sphinx search_,
with the real JavaScript to initialize the search, as Sphinx does by default.
(in other words, it _reverts_ the manipulation done by `readthedocs-sphinx-ext`)

*/

const textToReplace = `/* Search initialization removed for Read the Docs */`;
const textReplacement = `
/* Search initialization manipulated by Read the Docs using Cloudflare Workers */
/* See https://github.com/readthedocs/addons/issues/219 for more information */

function initializeSearch() {
Search.init();
}

if (document.readyState !== "loading") {
initializeSearch();
}
else {
document.addEventListener("DOMContentLoaded", initializeSearch);
}
`;

async function handleSearchToolsJSRequest(originalResponse) {
const content = await originalResponse.text();
const modifiedResponse = new Response(
content.replace(textToReplace, textReplacement),
);
return modifiedResponse;
}
26 changes: 15 additions & 11 deletions dockerfiles/nginx/proxito.conf.template
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Proxito
server {
listen 80 default_server;
server_name $NGINX_PROXITO_SERVER_NAME;
listen 8080;
server_name proxito;

# Docker Compose's "logging.driver: none" is not working anymore.
# So, we are disabling the logs from NGINX directly.
Expand All @@ -20,13 +20,22 @@ server {
rewrite ^ http://$project--$version.org.dev.readthedocs.build/page/$path;
}

# Proxy the "readthedocs-addons.js" to be downloaded from GitHub
location /_/static/javascript/readthedocs-addons.js {
proxy_pass https://raw.githubusercontent.com/readthedocs/addons/$NGINX_ADDONS_GITHUB_TAG/dist/readthedocs-addons.js;
add_header Content-Type "text/javascript; charset=utf-8" always;
}

location /_/static/javascript/readthedocs-addons.js.map {
proxy_pass https://raw.githubusercontent.com/readthedocs/addons/$NGINX_ADDONS_GITHUB_TAG/dist/readthedocs-addons.js.map;
add_header Content-Type "text/javascript; charset=utf-8" always;
}

# Proxito doc serving
location / {
proxy_pass http://proxito:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Host $host;

proxy_intercept_errors on;
error_page 404 = @notfoundfallback;
Expand Down Expand Up @@ -66,6 +75,8 @@ server {
add_header X-RTD-Project-Method $rtd_project_method always;
set $rtd_redirect $upstream_http_x_rtd_redirect;
add_header X-RTD-Redirect $rtd_redirect always;
set $rtd_resolver_filename $upstream_http_x_rtd_resolver_filename;
add_header X-RTD-Resolver-Filename $rtd_resolver_filename always;
set $cdn_cache_control $upstream_http_cdn_cache_control;
add_header CDN-Cache-Control $cdn_cache_control always;
set $cache_tag $upstream_http_cache_tag;
Expand Down Expand Up @@ -110,13 +121,6 @@ server {
add_header X-RTD-Hosting-Integrations $rtd_hosting_integrations always;
set $rtd_force_addons $upstream_http_x_rtd_force_addons;
add_header X-RTD-Force-Addons $rtd_force_addons always;

# Inject our own script dynamically and project/version slugs into the HTML to emulate what CF worker does
# TODO: find a way to make this work _without_ running `npm run dev` from the `addons` repository
sub_filter '</head>' '<script async language="javascript" src="http://localhost:8000/readthedocs-addons.js"></script>\n<meta name="readthedocs-project-slug" content="$rtd_project" />\n<meta name="readthedocs-version-slug" content="$rtd_version" />\n</head>';
sub_filter_types text/html;
sub_filter_last_modified on;
sub_filter_once on;
}

# Serve 404 pages here
Expand Down
20 changes: 20 additions & 0 deletions dockerfiles/nginx/wrangler.conf.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Proxito
server {
listen 80 default_server;
server_name $NGINX_PROXITO_SERVER_NAME;

# Docker Compose's "logging.driver: none" is not working anymore.
# So, we are disabling the logs from NGINX directly.
access_log off;

# Wrangler serving
location / {
proxy_pass http://wrangler:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Host $host;

add_header X-Served CF-Wrangler always;
}
}
22 changes: 20 additions & 2 deletions readthedocs/proxito/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
from readthedocs.core.unresolver import (
InvalidCustomDomainError,
InvalidExternalDomainError,
InvalidPathForVersionedProjectError,
InvalidSubdomainError,
InvalidXRTDSlugHeaderError,
SuspiciousHostnameError,
unresolve,
unresolver,
)
from readthedocs.core.utils import get_cache_tag
Expand Down Expand Up @@ -187,17 +189,19 @@ def add_cache_headers(self, request, response):
else:
cache_response(response, force=False)

def _set_request_attributes(self, request, unresolved_domain):
def _set_request_attributes(self, request, unresolved_domain, unresolved_url):
"""
Set attributes in the request from the unresolved domain.

- Set ``request.unresolved_domain`` to the unresolved domain.
"""
request.unresolved_domain = unresolved_domain
request.unresolved_url = unresolved_url

def process_request(self, request): # noqa
# Initialize our custom request attributes.
request.unresolved_domain = None
request.unresolved_url = None

skip = any(request.path.startswith(reverse(view)) for view in self.skip_views)
if skip:
Expand Down Expand Up @@ -229,7 +233,12 @@ def process_request(self, request): # noqa
except InvalidXRTDSlugHeaderError as exc:
raise SuspiciousOperation("Invalid X-RTD-Slug header.") from exc

self._set_request_attributes(request, unresolved_domain)
try:
unresolved_url = unresolve(request.build_absolute_uri())
except InvalidPathForVersionedProjectError:
unresolved_url = None

self._set_request_attributes(request, unresolved_domain, unresolved_url)

response = self._get_https_redirect(request)
if response:
Expand Down Expand Up @@ -367,11 +376,20 @@ def _get_https_redirect(self, request):

return None

def add_resolver_headers(self, request, response):
# TODO: find a better way to re-use the unresolved URL so we don't
# query the db multiple times on the same request.
# https://github.com/readthedocs/readthedocs.org/issues/10456
if request.unresolved_url is not None:
# TODO: add more ``X-RTD-Resolver-*`` headers
response["X-RTD-Resolver-Filename"] = request.unresolved_url.filename

def process_response(self, request, response): # noqa
self.add_proxito_headers(request, response)
self.add_cache_headers(request, response)
self.add_hsts_headers(request, response)
self.add_user_headers(request, response)
self.add_hosting_integrations_headers(request, response)
self.add_resolver_headers(request, response)
self.add_cors_headers(request, response)
return response
1 change: 1 addition & 0 deletions readthedocs/proxito/tests/test_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_serve_headers(self):
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
self.assertEqual(r["X-RTD-Version"], "latest")
self.assertEqual(r["X-RTD-version-Method"], "path")
self.assertEqual(r["X-RTD-Resolver-Filename"], "/index.html")
self.assertEqual(
r["X-RTD-Path"], "/proxito/media/html/project/latest/index.html"
)
Expand Down