Skip to content

Proxito: pass unresolved domain in request #9982

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 14, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions readthedocs/audit/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from readthedocs.acl.utils import get_auth_backend
from readthedocs.analytics.utils import get_client_ip
from readthedocs.projects.models import Project


class AuditLogManager(models.Manager):
Expand Down Expand Up @@ -50,9 +49,9 @@ def new(self, action, user=None, request=None, **kwargs):

# Fill the project from the request if available.
# This is frequently on actions generated from a subdomain.
project_slug = getattr(request, 'host_project_slug', None)
if 'project' not in kwargs and project_slug:
kwargs['project'] = Project.objects.filter(slug=project_slug).first()
unresolved_domain = getattr(request, "unresolved_domain", None)
if "project" not in kwargs and unresolved_domain:
kwargs["project"] = unresolved_domain.project

return self.create(
user=user,
Expand Down
125 changes: 99 additions & 26 deletions readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from dataclasses import dataclass
from enum import Enum, auto
from urllib.parse import ParseResult, urlparse

import structlog
Expand All @@ -17,19 +18,24 @@ class UnresolverError(Exception):
pass


class SuspiciousHostnameError(UnresolverError):
class DomainError(UnresolverError):
def __init__(self, domain):
self.domain = domain


class SuspiciousHostnameError(DomainError):
pass


class InvalidSubdomainError(UnresolverError):
class InvalidSubdomainError(DomainError):
pass


class InvalidExternalDomainError(UnresolverError):
class InvalidExternalDomainError(DomainError):
pass


class InvalidCustomDomainError(UnresolverError):
class InvalidCustomDomainError(DomainError):
pass


Expand All @@ -53,6 +59,40 @@ class UnresolvedURL:
external: bool = False


class OriginType(Enum):

"""From where the custom domain was resolved."""

custom_domain = auto()
public_domain = auto()
external_domain = auto()
http_header = auto()


@dataclass(slots=True)
class UnresolvedDomain:
origin: OriginType
project: Project
domain: Domain = None
external_version_slug: str = None

@property
def is_from_custom_domain(self):
return self.origin == OriginType.custom_domain

@property
def is_from_public_domain(self):
return self.origin == OriginType.public_domain

@property
def is_from_http_header(self):
return self.origin == OriginType.http_header

@property
def is_from_external_domain(self):
return self.origin == OriginType.external_domain


class Unresolver:
# This pattern matches:
# - /en
Expand Down Expand Up @@ -98,19 +138,17 @@ def unresolve(self, url, append_indexhtml=True):
"""
parsed = urlparse(url)
domain = self.get_domain_from_host(parsed.netloc)
parent_project, domain_object, external_version_slug = self.unresolve_domain(
domain
)
unresolved_domain = self.unresolve_domain(domain)

current_project, version, filename = self._unresolve_path(
parent_project=parent_project,
parent_project=unresolved_domain.project,
path=parsed.path,
external_version_slug=external_version_slug,
external_version_slug=unresolved_domain.external_version_slug,
)

# Make sure we are serving the external version from the subdomain.
if external_version_slug and version:
if external_version_slug != version.slug:
if unresolved_domain.is_from_external_domain and version:
if unresolved_domain.external_version_slug != version.slug:
log.warning(
"Invalid version for external domain.",
domain=domain,
Expand All @@ -133,13 +171,13 @@ def unresolve(self, url, append_indexhtml=True):
filename += "index.html"

return UnresolvedURL(
parent_project=parent_project,
project=current_project or parent_project,
parent_project=unresolved_domain.project,
project=current_project or unresolved_domain.project,
version=version,
filename=filename,
parsed_url=parsed,
domain=domain_object,
external=bool(external_version_slug),
domain=unresolved_domain.domain,
external=unresolved_domain.is_from_external_domain,
)

@staticmethod
Expand Down Expand Up @@ -325,8 +363,7 @@ def unresolve_domain(self, domain):
Unresolve domain by extracting relevant information from it.

:param str domain: Domain to extract the information from.
:returns: A tuple with: the project, domain object, and the
external version slug if the domain is from an external version.
:returns: A UnresolvedDomain object.
"""
public_domain = self.get_domain_from_host(settings.PUBLIC_DOMAIN)
external_domain = self.get_domain_from_host(
Expand All @@ -341,48 +378,84 @@ def unresolve_domain(self, domain):
if public_domain == root_domain:
project_slug = subdomain
log.debug("Public domain.", domain=domain)
return self._resolve_project_slug(project_slug), None, None
return UnresolvedDomain(
origin=OriginType.public_domain,
project=self._resolve_project_slug(project_slug, domain),
)

# NOTE: This can catch some possibly valid domains (docs.readthedocs.io.com)
# for example, but these might be phishing, so let's block them for now.
log.warning("Weird variation of our domain.", domain=domain)
raise SuspiciousHostnameError()
raise SuspiciousHostnameError(domain=domain)

# Serve PR builds on external_domain host.
if external_domain in domain:
if external_domain == root_domain:
try:
project_slug, version_slug = subdomain.rsplit("--", maxsplit=1)
log.debug("External versions domain.", domain=domain)
return self._resolve_project_slug(project_slug), None, version_slug
return UnresolvedDomain(
origin=OriginType.external_domain,
project=self._resolve_project_slug(project_slug, domain),
external_version_slug=version_slug,
)
except ValueError:
log.info(
"Invalid format of external versions domain.", domain=domain
)
raise InvalidExternalDomainError()
raise InvalidExternalDomainError(domain=domain)

# NOTE: This can catch some possibly valid domains (docs.readthedocs.build.com)
# for example, but these might be phishing, so let's block them for now.
log.warning("Weird variation of our domain.", domain=domain)
raise SuspiciousHostnameError()
raise SuspiciousHostnameError(domain=domain)

# Custom domain.
domain_object = (
Domain.objects.filter(domain=domain).select_related("project").first()
)
if not domain_object:
log.info("Invalid domain.", domain=domain)
raise InvalidCustomDomainError()
raise InvalidCustomDomainError(domain=domain)

log.debug("Custom domain.", domain=domain)
return domain_object.project, domain_object, None
return UnresolvedDomain(
origin=OriginType.custom_domain,
project=domain_object.project,
domain=domain_object,
)

def _resolve_project_slug(self, slug):
def _resolve_project_slug(self, slug, domain):
"""Get the project from the slug or raise an exception if not found."""
try:
return Project.objects.get(slug=slug)
except Project.DoesNotExist:
raise InvalidSubdomainError()
raise InvalidSubdomainError(domain=domain)

def unresolve_domain_from_request(self, request):
"""
Unresolve domain by extracting relevant information from the request.

We first check if the ``X-RTD-Slug`` header has been set for explicit
project mapping, otherwise we unresolve by calling `self.unresolve_domain`
on the host.

:param request: Request to extract the information from.
:returns: A UnresolvedDomain object.
"""
# Explicit Project slug being passed in.
header_project_slug = request.headers.get("X-RTD-Slug", "").lower()
if header_project_slug:
project = Project.objects.filter(slug=header_project_slug).first()
if project:
log.info(
"Setting project based on X_RTD_SLUG header.",
project_slug=project.slug,
)
return UnresolvedDomain(origin=OriginType.http_header, project=project)

host = self.get_domain_from_host(request.get_host())
return unresolver.unresolve_domain(host)


unresolver = Unresolver()
Expand Down
Loading