diff --git a/readthedocs/proxito/middleware.py b/readthedocs/proxito/middleware.py index 2e7954fbf93..a2a051cedfe 100644 --- a/readthedocs/proxito/middleware.py +++ b/readthedocs/proxito/middleware.py @@ -22,6 +22,64 @@ log = structlog.get_logger(__name__) # noqa +def _get_domain_from_host(host): + """ + Get the normalized domain from a hostname. + + A hostname can include the port. + """ + return host.lower().split(":")[0] + + +def _unresolve_domain(domain): + """ + Unresolve domain by extracting relevant information from it. + + :param str domain: Domain to extract the information from. + :returns: A tuple with: the project slug, domain object, and if the domain + is from an external version. + """ + public_domain = _get_domain_from_host(settings.PUBLIC_DOMAIN) + external_domain = _get_domain_from_host(settings.RTD_EXTERNAL_VERSION_DOMAIN) + + subdomain, *root_domain = domain.split(".", maxsplit=1) + root_domain = root_domain[0] if root_domain else "" + + if public_domain in domain: + # Serve from the PUBLIC_DOMAIN, ensuring it looks like `foo.PUBLIC_DOMAIN`. + if public_domain == root_domain: + project_slug = subdomain + log.info("Public domain.", domain=domain) + return project_slug, None, False + + # TODO: This can catch some possibly valid domains (docs.readthedocs.io.com) for example, + # but these might be phishing, so let's ignore them for now. + log.warning("Weird variation of our domain.", domain=domain) + return None, None, False + + # Serve PR builds on external_domain host. + if external_domain == root_domain: + try: + log.info("External versions domain.", domain=domain) + project_slug, _ = subdomain.rsplit("--", maxsplit=1) + return project_slug, None, True + except ValueError: + log.info("Invalid format of external versions domain.", domain=domain) + return None, None, False + + # Custom domain. + domain_object = ( + Domain.objects.filter(domain=domain).prefetch_related("project").first() + ) + if domain_object: + log.info("Custom domain.", domain=domain) + project_slug = domain_object.project.slug + return project_slug, domain_object, False + + log.info("Invalid domain.", domain=domain) + return None, None, None + + def map_host_to_project_slug(request): # pylint: disable=too-many-return-statements """ Take the request and map the host to the proper project slug. @@ -38,17 +96,11 @@ def map_host_to_project_slug(request): # pylint: disable=too-many-return-statem - This sets ``request.canonicalize`` with the value as the reason """ - host = request.get_host().lower().split(':')[0] - public_domain = settings.PUBLIC_DOMAIN.lower().split(':')[0] - external_domain = settings.RTD_EXTERNAL_VERSION_DOMAIN.lower().split(':')[0] - - host_parts = host.split('.') - public_domain_parts = public_domain.split('.') - external_domain_parts = external_domain.split('.') + host = _get_domain_from_host(request.get_host()) + public_domain = _get_domain_from_host(settings.PUBLIC_DOMAIN) + external_domain = _get_domain_from_host(settings.RTD_EXTERNAL_VERSION_DOMAIN) - project_slug = None - - # Explicit Project slug being passed in + # Explicit Project slug being passed in. if 'HTTP_X_RTD_SLUG' in request.META: project_slug = request.META['HTTP_X_RTD_SLUG'].lower() if Project.objects.filter(slug=project_slug).exists(): @@ -56,58 +108,29 @@ def map_host_to_project_slug(request): # pylint: disable=too-many-return-statem log.info('Setting project based on X_RTD_SLUG header.', project_slug=project_slug) return project_slug - if public_domain in host or host == 'proxito': - # Serve from the PUBLIC_DOMAIN, ensuring it looks like `foo.PUBLIC_DOMAIN` - if public_domain_parts == host_parts[1:]: - project_slug = host_parts[0] - request.subdomain = True - log.debug('Proxito Public Domain.', host=host) - if Domain.objects.filter(project__slug=project_slug).filter( - canonical=True, - https=True, - ).exists(): - log.debug('Proxito Public Domain -> Canonical Domain Redirect.', host=host) - request.canonicalize = constants.REDIRECT_CANONICAL_CNAME - elif ( - ProjectRelationship.objects. - filter(child__slug=project_slug).exists() - ): - log.debug('Proxito Public Domain -> Subproject Main Domain Redirect.', host=host) - request.canonicalize = constants.REDIRECT_SUBPROJECT_MAIN_DOMAIN - return project_slug - - # TODO: This can catch some possibly valid domains (docs.readthedocs.io.com) for example - # But these feel like they might be phishing, etc. so let's block them for now. - log.warning('Weird variation on our hostname.', host=host) - return render( - request, 'core/dns-404.html', context={'host': host}, status=400 - ) - - if external_domain in host: - # Serve custom versions on external-host-domain - if external_domain_parts == host_parts[1:]: - try: - project_slug, version_slug = host_parts[0].rsplit('--', 1) - request.external_domain = True - request.host_version_slug = version_slug - log.debug('Proxito External Version Domain.', host=host) - return project_slug - except ValueError: - log.warning('Weird variation on our hostname.', host=host) - return render( - request, 'core/dns-404.html', context={'host': host}, status=400 - ) + project_slug, domain_object, external = _unresolve_domain(host) + if not project_slug: + # Block domains that look like ours, may be phishing. + if external_domain in host or public_domain in host: + log.warning("Weird variation on our hostname.", host=host) + return render( + request, + "core/dns-404.html", + context={"host": host}, + status=400, + ) + # Some person is CNAMEing to us without configuring a domain - 404. + log.debug("CNAME 404.", host=host) + return render(request, "core/dns-404.html", context={"host": host}, status=404) - # Serve CNAMEs - domain = Domain.objects.filter(domain=host).first() - if domain: - project_slug = domain.project.slug + # Custom domain. + if domain_object: request.cname = True - request.domain = domain + request.domain = domain_object log.debug('Proxito CNAME.', host=host) - if domain.https and not request.is_secure(): - # Redirect HTTP -> HTTPS (302) for this custom domain + if domain_object.https and not request.is_secure(): + # Redirect HTTP -> HTTPS (302) for this custom domain. log.debug('Proxito CNAME HTTPS Redirect.', host=host) request.canonicalize = constants.REDIRECT_HTTPS @@ -116,11 +139,34 @@ def map_host_to_project_slug(request): # pylint: disable=too-many-return-statem return project_slug - # Some person is CNAMEing to us without configuring a domain - 404. - log.debug('CNAME 404.', host=host) - return render( - request, 'core/dns-404.html', context={'host': host}, status=404 - ) + # Pull request previews. + if external: + subdomain, _ = host.split(".", maxsplit=1) + _, version_slug = subdomain.rsplit("--", maxsplit=1) + request.external_domain = True + request.host_version_slug = version_slug + log.debug("Proxito External Version Domain.", host=host) + return project_slug + + # Normal doc serving. + request.subdomain = True + log.debug("Proxito Public Domain.", host=host) + if ( + Domain.objects.filter(project__slug=project_slug) + .filter( + canonical=True, + https=True, + ) + .exists() + ): + log.debug("Proxito Public Domain -> Canonical Domain Redirect.", host=host) + request.canonicalize = constants.REDIRECT_CANONICAL_CNAME + elif ProjectRelationship.objects.filter(child__slug=project_slug).exists(): + log.debug( + "Proxito Public Domain -> Subproject Main Domain Redirect.", host=host + ) + request.canonicalize = constants.REDIRECT_SUBPROJECT_MAIN_DOMAIN + return project_slug class ProxitoMiddleware(MiddlewareMixin):