Skip to content

Proxito: pass unresolved domain in request #9982

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions readthedocs/audit/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from readthedocs.acl.utils import get_auth_backend
from readthedocs.analytics.utils import get_client_ip
from readthedocs.projects.models import Project


class AuditLogManager(models.Manager):
Expand Down Expand Up @@ -50,9 +49,9 @@ def new(self, action, user=None, request=None, **kwargs):

# Fill the project from the request if available.
# This is frequently on actions generated from a subdomain.
project_slug = getattr(request, 'host_project_slug', None)
if 'project' not in kwargs and project_slug:
kwargs['project'] = Project.objects.filter(slug=project_slug).first()
unresolved_domain = getattr(request, "unresolved_domain", None)
if "project" not in kwargs and unresolved_domain:
kwargs["project"] = unresolved_domain.project

return self.create(
user=user,
Expand Down
20 changes: 18 additions & 2 deletions readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,22 @@ class UnresolvedDomain:
domain: Domain = None
external_version_slug: str = None

@property
def is_from_custom_domain(self):
return self.source == DomainSourceType.custom_domain

@property
def is_from_public_domain(self):
return self.source == DomainSourceType.public_domain

@property
def is_from_http_header(self):
return self.source == DomainSourceType.http_header

@property
def is_from_external_domain(self):
return self.source == DomainSourceType.external_domain


class Unresolver:
# This pattern matches:
Expand Down Expand Up @@ -136,7 +152,7 @@ def unresolve(self, url, append_indexhtml=True):
)

# Make sure we are serving the external version from the subdomain.
if unresolved_domain.source == DomainSourceType.external_domain and version:
if unresolved_domain.is_from_external_domain and version:
if unresolved_domain.external_version_slug != version.slug:
log.warning(
"Invalid version for external domain.",
Expand Down Expand Up @@ -166,7 +182,7 @@ def unresolve(self, url, append_indexhtml=True):
filename=filename,
parsed_url=parsed,
domain=unresolved_domain.domain,
external=unresolved_domain.source == DomainSourceType.external_domain,
external=unresolved_domain.is_from_external_domain,
)

@staticmethod
Expand Down
75 changes: 30 additions & 45 deletions readthedocs/proxito/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from django.utils.deprecation import MiddlewareMixin

from readthedocs.core.unresolver import (
DomainSourceType,
InvalidCustomDomainError,
InvalidExternalDomainError,
InvalidSubdomainError,
Expand Down Expand Up @@ -75,17 +74,13 @@ def add_proxito_headers(self, request, response):
if cache_tags:
response['Cache-Tag'] = ','.join(cache_tags)

if hasattr(request, 'rtdheader'):
response['X-RTD-Project-Method'] = 'rtdheader'
elif hasattr(request, 'subdomain'):
response['X-RTD-Project-Method'] = 'subdomain'
elif hasattr(request, 'cname'):
response['X-RTD-Project-Method'] = 'cname'

if hasattr(request, 'external_domain'):
response['X-RTD-Version-Method'] = 'domain'
else:
response['X-RTD-Version-Method'] = 'path'
unresolved_domain = request.unresolved_domain
if unresolved_domain:
response["X-RTD-Project-Method"] = unresolved_domain.source.name
if unresolved_domain.is_from_external_domain:
response["X-RTD-Version-Method"] = "domain"
else:
response["X-RTD-Version-Method"] = "path"

def add_user_headers(self, request, response):
"""
Expand All @@ -94,19 +89,21 @@ def add_user_headers(self, request, response):
The headers added come from ``projects.models.HTTPHeader`` associated
with the ``Domain`` object.
"""
if hasattr(request, 'domain'):
unresolved_domain = request.unresolved_domain
if unresolved_domain and unresolved_domain.is_from_custom_domain:
response_headers = [header.lower() for header in response.headers.keys()]
for http_header in request.domain.http_headers.all():
domain = unresolved_domain.domain
for http_header in domain.http_headers.all():
if http_header.name.lower() in response_headers:
log.error(
'Overriding an existing response HTTP header.',
http_header=http_header.name,
domain=request.domain.domain,
domain=domain.domain,
)
log.info(
'Adding custom response HTTP header.',
http_header=http_header.name,
domain=request.domain.domain,
domain=domain.domain,
)

if http_header.only_if_secure_request and not request.is_secure():
Expand All @@ -129,17 +126,20 @@ def add_hsts_headers(self, request, response):
# Only set the HSTS header if the request is over HTTPS
return response

host = request.get_host().lower().split(':')[0]
public_domain = settings.PUBLIC_DOMAIN.lower().split(':')[0]
hsts_header_values = []
if settings.PUBLIC_DOMAIN_USES_HTTPS and public_domain in host:
unresolved_domain = request.unresolved_domain
if (
settings.PUBLIC_DOMAIN_USES_HTTPS
and unresolved_domain
and unresolved_domain.is_from_public_domain
):
hsts_header_values = [
'max-age=31536000',
'includeSubDomains',
'preload',
]
elif hasattr(request, 'domain'):
domain = request.domain
elif unresolved_domain and unresolved_domain.is_from_custom_domain:
domain = unresolved_domain.domain
# TODO: migrate Domains with HSTS set using these fields to
# ``HTTPHeader`` and remove this chunk of code from here.
if domain.hsts_max_age:
Expand Down Expand Up @@ -169,10 +169,11 @@ def add_cache_headers(self, request, response):
See https://developers.cloudflare.com/cache/about/cdn-cache-control.
"""
header = "CDN-Cache-Control"
unresolved_domain = request.unresolved_domain
# Never trust projects resolving from the X-RTD-Slug header,
# we don't want to cache their content on domains from other
# projects, see GHSA-mp38-vprc-7hf5.
if hasattr(request, "rtdheader"):
if unresolved_domain and unresolved_domain.is_from_http_header:
response.headers[header] = "private"

if settings.ALLOW_PRIVATE_REPOS:
Expand All @@ -183,32 +184,18 @@ def _set_request_attributes(self, request, unresolved_domain):
"""
Set attributes in the request from the unresolved domain.

- If the project was extracted from the ``X-RTD-Slug`` header,
we set ``request.rtdheader`` to `True`.
- If the project was extracted from the public domain,
we set ``request.subdomain`` to `True`.
- If the project was extracted from a custom domain,
we set ``request.cname`` to `True`.
- Set ``request.unresolved_domain`` to the unresolved domain.
- If the domain needs to redirect, set the canonicalize attribute accordingly.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only the canonicalize attributes are set here now, but I think we should be able to move these to the serve_docs view, since there is the only place we make use of them, maybe the http to https redirect could be done at the middleware level.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we have a bigger object? I'd like to inject one and only one object to the request with a known structure. As an example:

class DataRequest:
  unresolved_domain
  canonicalize
  # ... others we will add in the future

What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are using these attributes in the docs view only, moving all the logic to the docs view serving should be easier

canonicalize_redirect_type = getattr(request, "canonicalize", None)
if canonicalize_redirect_type:
try:
# A canonical redirect can be cached, if we don't have information
# about the version, since the final URL will check for authz.
if not version and self._is_cache_enabled(final_project):
self.cache_request = True
return self.canonical_redirect(
request=request,
final_project=final_project,
version_slug=version_slug,
filename=filename,
redirect_type=canonicalize_redirect_type,
is_external_version=is_external,
)
except InfiniteRedirectException:
# Don't redirect in this case, since it would break things
pass

Once that is moved, not sure if there is anything else that we will need to inject in the request, but if we have the need, I'd be fine creating another object.

"""
# TODO: Set the unresolved domain in the request instead of each of these attributes.
source = unresolved_domain.source
request.unresolved_domain = unresolved_domain
project = unresolved_domain.project
if source == DomainSourceType.http_header:
request.rtdheader = True
elif source == DomainSourceType.custom_domain:
if unresolved_domain.is_from_custom_domain:
domain = unresolved_domain.domain
request.cname = True
request.domain = domain
if domain.https and not request.is_secure():
# Redirect HTTP -> HTTPS (302) for this custom domain.
log.debug("Proxito CNAME HTTPS Redirect.", domain=domain.domain)
request.canonicalize = constants.REDIRECT_HTTPS
elif source == DomainSourceType.external_domain:
request.external_domain = True
request.host_version_slug = unresolved_domain.external_version_slug
elif source == DomainSourceType.public_domain:
request.subdomain = True
elif unresolved_domain.is_from_public_domain:
canonical_domain = (
Domain.objects.filter(project=project)
.filter(canonical=True, https=True)
Expand All @@ -226,10 +213,11 @@ def _set_request_attributes(self, request, unresolved_domain):
project_slug=project.slug,
)
request.canonicalize = constants.REDIRECT_SUBPROJECT_MAIN_DOMAIN
else:
raise NotImplementedError

def process_request(self, request): # noqa
# Initialize our custom request attributes.
request.unresolved_domain = None

skip = any(
request.path.startswith(reverse(view))
for view in self.skip_views
Expand Down Expand Up @@ -292,9 +280,6 @@ def process_request(self, request): # noqa
project_slug=project.slug,
)

# Otherwise set the slug on the request
request.host_project_slug = request.slug = project.slug

# This is hacky because Django wants a module for the URLConf,
# instead of also accepting string
if project.urlconf:
Expand Down
60 changes: 32 additions & 28 deletions readthedocs/proxito/tests/test_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,25 @@ def test_redirect_headers(self):
self.assertEqual(
r['Location'], 'https://project.dev.readthedocs.io/en/latest/',
)
self.assertEqual(r['Cache-Tag'], 'project')
self.assertEqual(r['X-RTD-Project'], 'project')
self.assertEqual(r['X-RTD-Project-Method'], 'subdomain')
self.assertEqual(r['X-RTD-Domain'], 'project.dev.readthedocs.io')
self.assertIsNone(r.get('X-RTD-Version'))
self.assertIsNone(r.get('X-RTD-Path'))
self.assertEqual(r["Cache-Tag"], "project")
self.assertEqual(r["X-RTD-Project"], "project")
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
self.assertEqual(r["X-RTD-Domain"], "project.dev.readthedocs.io")
self.assertIsNone(r.get("X-RTD-Version"))
self.assertIsNone(r.get("X-RTD-Path"))

def test_serve_headers(self):
r = self.client.get('/en/latest/', HTTP_HOST='project.dev.readthedocs.io')
self.assertEqual(r.status_code, 200)
self.assertEqual(r['Cache-Tag'], 'project,project:latest')
self.assertEqual(r['X-RTD-Domain'], 'project.dev.readthedocs.io')
self.assertEqual(r['X-RTD-Project'], 'project')
self.assertEqual(r['X-RTD-Project-Method'], 'subdomain')
self.assertEqual(r['X-RTD-Version'], 'latest')
self.assertEqual(r['X-RTD-version-Method'], 'path')
self.assertEqual(r['X-RTD-Path'], '/proxito/media/html/project/latest/index.html')
self.assertEqual(r["Cache-Tag"], "project,project:latest")
self.assertEqual(r["X-RTD-Domain"], "project.dev.readthedocs.io")
self.assertEqual(r["X-RTD-Project"], "project")
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
self.assertEqual(r["X-RTD-Version"], "latest")
self.assertEqual(r["X-RTD-version-Method"], "path")
self.assertEqual(
r["X-RTD-Path"], "/proxito/media/html/project/latest/index.html"
)

def test_subproject_serve_headers(self):
r = self.client.get('/projects/subproject/en/latest/', HTTP_HOST='project.dev.readthedocs.io')
Expand All @@ -49,7 +51,7 @@ def test_subproject_serve_headers(self):
# I think it's not accurate saying that it's `subdomain` the method
# that we use to get the project slug here, since it was in fact the
# URL's path but we don't have that feature built
self.assertEqual(r['X-RTD-Project-Method'], 'subdomain')
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")

self.assertEqual(r['X-RTD-Version'], 'latest')
self.assertEqual(r['X-RTD-version-Method'], 'path')
Expand All @@ -58,13 +60,13 @@ def test_subproject_serve_headers(self):
def test_404_headers(self):
r = self.client.get('/foo/bar.html', HTTP_HOST='project.dev.readthedocs.io')
self.assertEqual(r.status_code, 404)
self.assertEqual(r['Cache-Tag'], 'project')
self.assertEqual(r['X-RTD-Domain'], 'project.dev.readthedocs.io')
self.assertEqual(r['X-RTD-Project'], 'project')
self.assertEqual(r['X-RTD-Project-Method'], 'subdomain')
self.assertEqual(r['X-RTD-version-Method'], 'path')
self.assertIsNone(r.get('X-RTD-Version'))
self.assertIsNone(r.get('X-RTD-Path'))
self.assertEqual(r["Cache-Tag"], "project")
self.assertEqual(r["X-RTD-Domain"], "project.dev.readthedocs.io")
self.assertEqual(r["X-RTD-Project"], "project")
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
self.assertEqual(r["X-RTD-version-Method"], "path")
self.assertIsNone(r.get("X-RTD-Version"))
self.assertIsNone(r.get("X-RTD-Path"))

def test_custom_domain_headers(self):
hostname = 'docs.random.com'
Expand All @@ -75,13 +77,15 @@ def test_custom_domain_headers(self):
)
r = self.client.get("/en/latest/", HTTP_HOST=hostname)
self.assertEqual(r.status_code, 200)
self.assertEqual(r['Cache-Tag'], 'project,project:latest')
self.assertEqual(r['X-RTD-Domain'], self.domain.domain)
self.assertEqual(r['X-RTD-Project'], self.project.slug)
self.assertEqual(r['X-RTD-Project-Method'], 'cname')
self.assertEqual(r['X-RTD-Version'], 'latest')
self.assertEqual(r['X-RTD-version-Method'], 'path')
self.assertEqual(r['X-RTD-Path'], '/proxito/media/html/project/latest/index.html')
self.assertEqual(r["Cache-Tag"], "project,project:latest")
self.assertEqual(r["X-RTD-Domain"], self.domain.domain)
self.assertEqual(r["X-RTD-Project"], self.project.slug)
self.assertEqual(r["X-RTD-Project-Method"], "custom_domain")
self.assertEqual(r["X-RTD-Version"], "latest")
self.assertEqual(r["X-RTD-version-Method"], "path")
self.assertEqual(
r["X-RTD-Path"], "/proxito/media/html/project/latest/index.html"
)

def test_footer_headers(self):
version = self.project.versions.get(slug=LATEST)
Expand Down
24 changes: 12 additions & 12 deletions readthedocs/proxito/tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def test_proper_cname(self):
request = self.request(method='get', path=self.url, HTTP_HOST=domain)
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertEqual(request.cname, True)
self.assertEqual(request.host_project_slug, 'pip')
self.assertTrue(request.unresolved_domain.is_from_custom_domain)
self.assertEqual(request.unresolved_domain.project, self.pip)

def test_proper_cname_https_upgrade(self):
cname = 'docs.random.com'
Expand Down Expand Up @@ -138,8 +138,8 @@ def test_proper_cname_uppercase(self):
get(Domain, project=self.pip, domain='docs.random.com')
request = self.request(method='get', path=self.url, HTTP_HOST='docs.RANDOM.COM')
self.run_middleware(request)
self.assertEqual(request.cname, True)
self.assertEqual(request.host_project_slug, 'pip')
self.assertTrue(request.unresolved_domain.is_from_custom_domain)
self.assertEqual(request.unresolved_domain.project, self.pip)

def test_invalid_cname(self):
self.assertFalse(Domain.objects.filter(domain='my.host.com').exists())
Expand All @@ -151,17 +151,17 @@ def test_invalid_cname(self):
def test_proper_subdomain(self):
request = self.request(method='get', path=self.url, HTTP_HOST='pip.dev.readthedocs.io')
self.run_middleware(request)
self.assertEqual(request.subdomain, True)
self.assertEqual(request.host_project_slug, 'pip')
self.assertTrue(request.unresolved_domain.is_from_public_domain)
self.assertEqual(request.unresolved_domain.project, self.pip)

@override_settings(PUBLIC_DOMAIN='foo.bar.readthedocs.io')
def test_subdomain_different_length(self):
request = self.request(
method='get', path=self.url, HTTP_HOST='pip.foo.bar.readthedocs.io'
)
self.run_middleware(request)
self.assertEqual(request.subdomain, True)
self.assertEqual(request.host_project_slug, 'pip')
self.assertTrue(request.unresolved_domain.is_from_public_domain)
self.assertEqual(request.unresolved_domain.project, self.pip)

def test_request_header(self):
get(
Expand All @@ -171,8 +171,8 @@ def test_request_header(self):
method='get', path=self.url, HTTP_HOST='some.random.com', HTTP_X_RTD_SLUG='pip'
)
self.run_middleware(request)
self.assertEqual(request.rtdheader, True)
self.assertEqual(request.host_project_slug, 'pip')
self.assertTrue(request.unresolved_domain.is_from_http_header)
self.assertEqual(request.unresolved_domain.project, self.pip)

def test_request_header_uppercase(self):
get(
Expand All @@ -183,8 +183,8 @@ def test_request_header_uppercase(self):
)
self.run_middleware(request)

self.assertEqual(request.rtdheader, True)
self.assertEqual(request.host_project_slug, 'pip')
self.assertTrue(request.unresolved_domain.is_from_http_header)
self.assertEqual(request.unresolved_domain.project, self.pip)

def test_request_header_not_allowed(self):
request = self.request(
Expand Down
Loading