diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 15e04dcf92d..9fc3dd79e03 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -1,5 +1,6 @@ import re from dataclasses import dataclass +from enum import Enum, auto from urllib.parse import ParseResult, urlparse import structlog @@ -8,7 +9,7 @@ from readthedocs.builds.constants import EXTERNAL from readthedocs.builds.models import Version from readthedocs.constants import pattern_opts -from readthedocs.projects.models import Domain, Project +from readthedocs.projects.models import Domain, Feature, Project log = structlog.get_logger(__name__) @@ -17,19 +18,29 @@ class UnresolverError(Exception): pass -class SuspiciousHostnameError(UnresolverError): +class InvalidXRTDSlugHeaderError(UnresolverError): + + pass + + +class DomainError(UnresolverError): + def __init__(self, domain): + self.domain = domain + + +class SuspiciousHostnameError(DomainError): pass -class InvalidSubdomainError(UnresolverError): +class InvalidSubdomainError(DomainError): pass -class InvalidExternalDomainError(UnresolverError): +class InvalidExternalDomainError(DomainError): pass -class InvalidCustomDomainError(UnresolverError): +class InvalidCustomDomainError(DomainError): pass @@ -53,6 +64,24 @@ class UnresolvedURL: external: bool = False +class DomainSourceType(Enum): + + """Where the custom domain was resolved from.""" + + custom_domain = auto() + public_domain = auto() + external_domain = auto() + http_header = auto() + + +@dataclass(slots=True) +class UnresolvedDomain: + source: DomainSourceType + project: Project + domain: Domain = None + external_version_slug: str = None + + class Unresolver: # This pattern matches: # - /en @@ -98,19 +127,17 @@ def unresolve(self, url, append_indexhtml=True): """ parsed = urlparse(url) domain = self.get_domain_from_host(parsed.netloc) - parent_project, domain_object, external_version_slug = self.unresolve_domain( - domain - ) + unresolved_domain = self.unresolve_domain(domain) current_project, version, filename = self._unresolve_path( - parent_project=parent_project, + parent_project=unresolved_domain.project, path=parsed.path, - external_version_slug=external_version_slug, + external_version_slug=unresolved_domain.external_version_slug, ) # Make sure we are serving the external version from the subdomain. - if external_version_slug and version: - if external_version_slug != version.slug: + if unresolved_domain.source == DomainSourceType.external_domain and version: + if unresolved_domain.external_version_slug != version.slug: log.warning( "Invalid version for external domain.", domain=domain, @@ -133,13 +160,13 @@ def unresolve(self, url, append_indexhtml=True): filename += "index.html" return UnresolvedURL( - parent_project=parent_project, - project=current_project or parent_project, + parent_project=unresolved_domain.project, + project=current_project or unresolved_domain.project, version=version, filename=filename, parsed_url=parsed, - domain=domain_object, - external=bool(external_version_slug), + domain=unresolved_domain.domain, + external=unresolved_domain.source == DomainSourceType.external_domain, ) @staticmethod @@ -325,8 +352,7 @@ def unresolve_domain(self, domain): Unresolve domain by extracting relevant information from it. :param str domain: Domain to extract the information from. - :returns: A tuple with: the project, domain object, and the - external version slug if the domain is from an external version. + :returns: A UnresolvedDomain object. """ public_domain = self.get_domain_from_host(settings.PUBLIC_DOMAIN) external_domain = self.get_domain_from_host( @@ -341,12 +367,15 @@ def unresolve_domain(self, domain): if public_domain == root_domain: project_slug = subdomain log.debug("Public domain.", domain=domain) - return self._resolve_project_slug(project_slug), None, None + return UnresolvedDomain( + source=DomainSourceType.public_domain, + project=self._resolve_project_slug(project_slug, domain), + ) # NOTE: This can catch some possibly valid domains (docs.readthedocs.io.com) # for example, but these might be phishing, so let's block them for now. log.warning("Weird variation of our domain.", domain=domain) - raise SuspiciousHostnameError() + raise SuspiciousHostnameError(domain=domain) # Serve PR builds on external_domain host. if external_domain in domain: @@ -354,17 +383,21 @@ def unresolve_domain(self, domain): try: project_slug, version_slug = subdomain.rsplit("--", maxsplit=1) log.debug("External versions domain.", domain=domain) - return self._resolve_project_slug(project_slug), None, version_slug + return UnresolvedDomain( + source=DomainSourceType.external_domain, + project=self._resolve_project_slug(project_slug, domain), + external_version_slug=version_slug, + ) except ValueError: log.info( "Invalid format of external versions domain.", domain=domain ) - raise InvalidExternalDomainError() + raise InvalidExternalDomainError(domain=domain) # NOTE: This can catch some possibly valid domains (docs.readthedocs.build.com) # for example, but these might be phishing, so let's block them for now. log.warning("Weird variation of our domain.", domain=domain) - raise SuspiciousHostnameError() + raise SuspiciousHostnameError(domain=domain) # Custom domain. domain_object = ( @@ -372,17 +405,59 @@ def unresolve_domain(self, domain): ) if not domain_object: log.info("Invalid domain.", domain=domain) - raise InvalidCustomDomainError() + raise InvalidCustomDomainError(domain=domain) log.debug("Custom domain.", domain=domain) - return domain_object.project, domain_object, None + return UnresolvedDomain( + source=DomainSourceType.custom_domain, + project=domain_object.project, + domain=domain_object, + ) - def _resolve_project_slug(self, slug): + def _resolve_project_slug(self, slug, domain): """Get the project from the slug or raise an exception if not found.""" try: return Project.objects.get(slug=slug) except Project.DoesNotExist: - raise InvalidSubdomainError() + raise InvalidSubdomainError(domain=domain) + + def unresolve_domain_from_request(self, request): + """ + Unresolve domain by extracting relevant information from the request. + + We first check if the ``X-RTD-Slug`` header has been set for explicit + project mapping, otherwise we unresolve by calling `self.unresolve_domain` + on the host. + + :param request: Request to extract the information from. + :returns: A UnresolvedDomain object. + """ + host = self.get_domain_from_host(request.get_host()) + log.bind(host=host) + + # Explicit Project slug being passed in. + header_project_slug = request.headers.get("X-RTD-Slug", "").lower() + if header_project_slug: + project = Project.objects.filter( + slug=header_project_slug, + feature__feature_id=Feature.RESOLVE_PROJECT_FROM_HEADER, + ).first() + if project: + log.info( + "Setting project based on X_RTD_SLUG header.", + project_slug=project.slug, + ) + return UnresolvedDomain( + source=DomainSourceType.http_header, + project=project, + ) + log.warning( + "X-RTD-Header passed for project without it enabled.", + project_slug=header_project_slug, + ) + raise InvalidXRTDSlugHeaderError + + return unresolver.unresolve_domain(host) unresolver = Unresolver() diff --git a/readthedocs/proxito/middleware.py b/readthedocs/proxito/middleware.py index 516d312acc5..a72a967857f 100644 --- a/readthedocs/proxito/middleware.py +++ b/readthedocs/proxito/middleware.py @@ -18,119 +18,19 @@ from django.utils.deprecation import MiddlewareMixin from readthedocs.core.unresolver import ( + DomainSourceType, InvalidCustomDomainError, InvalidExternalDomainError, InvalidSubdomainError, + InvalidXRTDSlugHeaderError, SuspiciousHostnameError, unresolver, ) from readthedocs.core.utils import get_cache_tag -from readthedocs.projects.models import Domain, Feature, Project, ProjectRelationship +from readthedocs.projects.models import Domain, ProjectRelationship from readthedocs.proxito import constants -log = structlog.get_logger(__name__) # noqa - - -def map_host_to_project(request): # pylint: disable=too-many-return-statements - """ - Take the request and map the host to the proper project. - - We check, in order: - - * The ``HTTP_X_RTD_SLUG`` host header for explicit Project mapping - - This sets ``request.rtdheader`` True - * The ``PUBLIC_DOMAIN`` where we can use the subdomain as the project name - - This sets ``request.subdomain`` True - * The hostname without port information, which maps to ``Domain`` objects - - This sets ``request.cname`` True - * The domain is the canonical one and using HTTPS if supported - - This sets ``request.canonicalize`` with the value as the reason - """ - - host = unresolver.get_domain_from_host(request.get_host()) - log.bind(host=host) - - # Explicit Project slug being passed in. - if "HTTP_X_RTD_SLUG" in request.META: - project_slug = request.headers["X-RTD-Slug"].lower() - project = Project.objects.filter( - slug=project_slug, - feature__feature_id=Feature.RESOLVE_PROJECT_FROM_HEADER, - ).first() - if project: - request.rtdheader = True - log.info( - "Setting project based on X_RTD_SLUG header.", project_slug=project_slug - ) - return project - log.warning( - "X-RTD-Header passed for project without it enabled.", - project_slug=project_slug, - ) - raise SuspiciousOperation("Invalid X-RTD-Slug header.") - - try: - project, domain_object, external_version_slug = unresolver.unresolve_domain( - host - ) - except SuspiciousHostnameError: - log.warning("Weird variation on our hostname.", host=host) - return render( - request, - "core/dns-404.html", - context={"host": host}, - status=400, - ) - except (InvalidSubdomainError, InvalidExternalDomainError): - log.debug("Invalid project set on the subdomain.") - raise Http404 - except InvalidCustomDomainError: - # Some person is CNAMEing to us without configuring a domain - 404. - log.debug("CNAME 404.", host=host) - return render(request, "core/dns-404.html", context={"host": host}, status=404) - - # Custom domain. - if domain_object: - request.cname = True - request.domain = domain_object - log.debug('Proxito CNAME.', host=host) - - if domain_object.https and not request.is_secure(): - # Redirect HTTP -> HTTPS (302) for this custom domain. - log.debug('Proxito CNAME HTTPS Redirect.', host=host) - request.canonicalize = constants.REDIRECT_HTTPS - - # NOTE: consider redirecting non-canonical custom domains to the canonical one - # Whether that is another custom domain or the public domain - - return project - - # Pull request previews. - if external_version_slug: - request.external_domain = True - request.host_version_slug = external_version_slug - log.debug("Proxito External Version Domain.", host=host) - return project - - # Normal doc serving. - request.subdomain = True - log.debug("Proxito Public Domain.", host=host) - if ( - Domain.objects.filter(project=project) - .filter( - canonical=True, - https=True, - ) - .exists() - ): - log.debug("Proxito Public Domain -> Canonical Domain Redirect.", host=host) - request.canonicalize = constants.REDIRECT_CANONICAL_CNAME - elif ProjectRelationship.objects.filter(child=project).exists(): - log.debug( - "Proxito Public Domain -> Subproject Main Domain Redirect.", host=host - ) - request.canonicalize = constants.REDIRECT_SUBPROJECT_MAIN_DOMAIN - return project +log = structlog.get_logger(__name__) class ProxitoMiddleware(MiddlewareMixin): @@ -279,6 +179,56 @@ def add_cache_headers(self, request, response): # Set the key to private only if it hasn't already been set by the view. response.headers.setdefault(header, "private") + def _set_request_attributes(self, request, unresolved_domain): + """ + Set attributes in the request from the unresolved domain. + + - If the project was extracted from the ``X-RTD-Slug`` header, + we set ``request.rtdheader`` to `True`. + - If the project was extracted from the public domain, + we set ``request.subdomain`` to `True`. + - If the project was extracted from a custom domain, + we set ``request.cname`` to `True`. + - If the domain needs to redirect, set the canonicalize attribute accordingly. + """ + # TODO: Set the unresolved domain in the request instead of each of these attributes. + source = unresolved_domain.source + project = unresolved_domain.project + if source == DomainSourceType.http_header: + request.rtdheader = True + elif source == DomainSourceType.custom_domain: + domain = unresolved_domain.domain + request.cname = True + request.domain = domain + if domain.https and not request.is_secure(): + # Redirect HTTP -> HTTPS (302) for this custom domain. + log.debug("Proxito CNAME HTTPS Redirect.", domain=domain.domain) + request.canonicalize = constants.REDIRECT_HTTPS + elif source == DomainSourceType.external_domain: + request.external_domain = True + request.host_version_slug = unresolved_domain.external_version_slug + elif source == DomainSourceType.public_domain: + request.subdomain = True + canonical_domain = ( + Domain.objects.filter(project=project) + .filter(canonical=True, https=True) + .exists() + ) + if canonical_domain: + log.debug( + "Proxito Public Domain -> Canonical Domain Redirect.", + project_slug=project.slug, + ) + request.canonicalize = constants.REDIRECT_CANONICAL_CNAME + elif ProjectRelationship.objects.filter(child=project).exists(): + log.debug( + "Proxito Public Domain -> Subproject Main Domain Redirect.", + project_slug=project.slug, + ) + request.canonicalize = constants.REDIRECT_SUBPROJECT_MAIN_DOMAIN + else: + raise NotImplementedError + def process_request(self, request): # noqa skip = any( request.path.startswith(reverse(view)) @@ -293,11 +243,29 @@ def process_request(self, request): # noqa log.debug('Not processing Proxito middleware') return None - ret = map_host_to_project(request) + try: + unresolved_domain = unresolver.unresolve_domain_from_request(request) + except SuspiciousHostnameError as exc: + log.warning("Weird variation on our hostname.", domain=exc.domain) + return render( + request, + "core/dns-404.html", + context={"host": exc.domain}, + status=400, + ) + except (InvalidSubdomainError, InvalidExternalDomainError): + log.debug("Invalid project set on the subdomain.") + raise Http404 + except InvalidCustomDomainError as exc: + # Some person is CNAMEing to us without configuring a domain - 404. + log.debug("CNAME 404.", domain=exc.domain) + return render( + request, "core/dns-404.html", context={"host": exc.domain}, status=404 + ) + except InvalidXRTDSlugHeaderError: + raise SuspiciousOperation("Invalid X-RTD-Slug header.") - # Handle returning a response - if hasattr(ret, 'status_code'): - return ret + self._set_request_attributes(request, unresolved_domain) # Remove multiple slashes from URL's if '//' in request.path: @@ -318,7 +286,7 @@ def process_request(self, request): # noqa ) return redirect(final_url) - project = ret + project = unresolved_domain.project log.debug( 'Proxito Project.', project_slug=project.slug,