Skip to content

Proxito: use unresolver in 404 handler #10074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 134 additions & 67 deletions readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import structlog
from django.conf import settings

from readthedocs.builds.constants import EXTERNAL
from readthedocs.builds.constants import EXTERNAL, INTERNAL
from readthedocs.builds.models import Version
from readthedocs.constants import pattern_opts
from readthedocs.projects.models import Domain, Feature, Project
Expand Down Expand Up @@ -44,6 +44,34 @@ class InvalidCustomDomainError(DomainError):
pass


class VersionNotFoundError(UnresolverError):
def __init__(self, project, version_slug, filename):
self.project = project
self.version_slug = version_slug
self.filename = filename


class TranslationNotFoundError(UnresolverError):
def __init__(self, project, language, filename, version_slug):
self.project = project
self.language = language
self.filename = filename
self.version_slug = version_slug


class UnresolvedPathError(UnresolverError):
def __init__(self, project, path):
self.project = project
self.path = path


class InvalidExternalVersionError(UnresolverError):
def __init__(self, project, version_slug, external_version_slug):
self.project = project
self.version_slug = version_slug
self.external_version_slug = external_version_slug


@dataclass(slots=True)
class UnresolvedURL:

Expand All @@ -57,9 +85,9 @@ class UnresolvedURL:
# It can be the same as parent_project.
project: Project

version: Version = None
filename: str = None
parsed_url: ParseResult = None
version: Version
filename: str
parsed_url: ParseResult
domain: Domain = None
external: bool = False

Expand All @@ -76,8 +104,11 @@ class DomainSourceType(Enum):

@dataclass(slots=True)
class UnresolvedDomain:
# The domain that was used to extract the information from.
source_domain: str
source: DomainSourceType
project: Project
# Domain object for custom domains.
domain: Domain = None
external_version_slug: str = None

Expand Down Expand Up @@ -141,46 +172,61 @@ def unresolve(self, url, append_indexhtml=True):
:param append_indexhtml: If `True` directories will be normalized
to end with ``/index.html``.
"""
parsed = urlparse(url)
domain = self.get_domain_from_host(parsed.netloc)
parsed_url = urlparse(url)
domain = self.get_domain_from_host(parsed_url.netloc)
unresolved_domain = self.unresolve_domain(domain)
return self._unresolve(
unresolved_domain=unresolved_domain,
parsed_url=parsed_url,
append_indexhtml=append_indexhtml,
)

def unresolve_path(self, unresolved_domain, path, append_indexhtml=True):
"""
Unresolve a path given a unresolved domain.

This is the same as the unresolve method,
but this method takes an unresolved domain
from unresolve_domain as input.

:param unresolved_domain: An UnresolvedDomain object.
:param path: Path to unresolve (this shouldn't include the protocol or querystrings).
:param append_indexhtml: If `True` directories will be normalized
to end with ``/index.html``.
"""
# We don't call unparse() on the path,
# since it could be parsed as a full URL if it starts with a protocol.
parsed_url = ParseResult(
scheme="", netloc="", path=path, params="", query="", fragment=""
)
return self._unresolve(
unresolved_domain=unresolved_domain,
parsed_url=parsed_url,
append_indexhtml=append_indexhtml,
)

def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml):
"""
The actual unresolver.

Extracted into a separate method so it can be re-used by
the unresolve and unresolve_path methods.
"""
current_project, version, filename = self._unresolve_path(
parent_project=unresolved_domain.project,
path=parsed.path,
path=parsed_url.path,
external_version_slug=unresolved_domain.external_version_slug,
)

# Make sure we are serving the external version from the subdomain.
if unresolved_domain.is_from_external_domain and version:
if unresolved_domain.external_version_slug != version.slug:
log.warning(
"Invalid version for external domain.",
domain=domain,
version_slug=version.slug,
)
version = None
filename = None
elif not version.is_external:
log.warning(
"Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.",
domain=domain,
version_slug=version.slug,
version_type=version.type,
url=url,
)
version = None
filename = None

if append_indexhtml and filename and filename.endswith("/"):
if append_indexhtml and filename.endswith("/"):
filename += "index.html"

return UnresolvedURL(
parent_project=unresolved_domain.project,
project=current_project or unresolved_domain.project,
project=current_project,
version=version,
filename=filename,
parsed_url=parsed,
parsed_url=parsed_url,
domain=unresolved_domain.domain,
external=unresolved_domain.is_from_external_domain,
)
Expand All @@ -193,16 +239,17 @@ def _normalize_filename(filename):
filename = "/" + filename
return filename

def _match_multiversion_project(self, parent_project, path):
def _match_multiversion_project(
self, parent_project, path, external_version_slug=None
):
"""
Try to match a multiversion project.

If the translation exists, we return a result even if the version doesn't,
so the translation is taken as the current project (useful for 404 pages).
An exception is raised if we weren't able to find a matching version or language,
this exception has the current project (useful for 404 pages).

:returns: None or a tuple with the current project, version and file.
A tuple with only the project means we weren't able to find a version,
but the translation was correct.
:returns: A tuple with the current project, version and file.
Returns `None` if there isn't a total or partial match.
"""
match = self.multiversion_pattern.match(path)
if not match:
Expand All @@ -216,14 +263,29 @@ def _match_multiversion_project(self, parent_project, path):
project = parent_project
else:
project = parent_project.translations.filter(language=language).first()
if not project:
raise TranslationNotFoundError(
project=parent_project,
language=language,
filename=file,
version_slug=version_slug,
)

if project:
version = project.versions.filter(slug=version_slug).first()
if version:
return project, version, file
return project, None, None
if external_version_slug and external_version_slug != version_slug:
raise InvalidExternalVersionError(
project=project,
version_slug=version_slug,
external_version_slug=external_version_slug,
)

return None
manager = EXTERNAL if external_version_slug else INTERNAL
version = project.versions(manager=manager).filter(slug=version_slug).first()
if not version:
raise VersionNotFoundError(
project=project, version_slug=version_slug, filename=file
)

return project, version, file

def _match_subproject(self, parent_project, path, external_version_slug=None):
"""
Expand All @@ -232,12 +294,8 @@ def _match_subproject(self, parent_project, path, external_version_slug=None):
If the subproject exists, we try to resolve the rest of the path
with the subproject as the canonical project.

If the subproject exists, we return a result even if version doesn't,
so the subproject is taken as the current project (useful for 404 pages).

:returns: None or a tuple with the current project, version and file.
A tuple with only the project means we were able to find the subproject,
but we weren't able to resolve the rest of the path.
:returns: A tuple with the current project, version and file.
Returns `None` if there isn't a total or partial match.
"""
match = self.subproject_pattern.match(path)
if not match:
Expand All @@ -260,12 +318,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None):
check_subprojects=False,
external_version_slug=external_version_slug,
)
# If we got a valid response, return that,
# otherwise return the current subproject
# as the current project without a valid version or path.
if response:
return response
return subproject, None, None
return response
return None

def _match_single_version_project(
Expand All @@ -276,21 +329,30 @@ def _match_single_version_project(

By default any path will match. If `external_version_slug` is given,
that version is used instead of the project's default version.

An exception is raised if we weren't able to find a matching version,
this exception has the current project (useful for 404 pages).

:returns: A tuple with the current project, version and file.
Returns `None` if there isn't a total or partial match.
"""
file = self._normalize_filename(path)
if external_version_slug:
version = (
parent_project.versions(manager=EXTERNAL)
.filter(slug=external_version_slug)
.first()
)
version_slug = external_version_slug
manager = EXTERNAL
else:
version = parent_project.versions.filter(
slug=parent_project.default_version
).first()
if version:
return parent_project, version, file
return parent_project, None, None
version_slug = parent_project.default_version
manager = INTERNAL

version = (
parent_project.versions(manager=manager).filter(slug=version_slug).first()
)
if not version:
raise VersionNotFoundError(
project=parent_project, version_slug=version_slug, filename=file
)

return parent_project, version, file

def _unresolve_path(
self, parent_project, path, check_subprojects=True, external_version_slug=None
Expand Down Expand Up @@ -328,6 +390,7 @@ def _unresolve_path(
response = self._match_multiversion_project(
parent_project=parent_project,
path=path,
external_version_slug=external_version_slug,
)
if response:
return response
Expand All @@ -352,7 +415,7 @@ def _unresolve_path(
if response:
return response

return parent_project, None, None
raise UnresolvedPathError(project=parent_project, path=path)

@staticmethod
def get_domain_from_host(host):
Expand Down Expand Up @@ -384,6 +447,7 @@ def unresolve_domain(self, domain):
project_slug = subdomain
log.debug("Public domain.", domain=domain)
return UnresolvedDomain(
source_domain=domain,
source=DomainSourceType.public_domain,
project=self._resolve_project_slug(project_slug, domain),
)
Expand All @@ -400,6 +464,7 @@ def unresolve_domain(self, domain):
project_slug, version_slug = subdomain.rsplit("--", maxsplit=1)
log.debug("External versions domain.", domain=domain)
return UnresolvedDomain(
source_domain=domain,
source=DomainSourceType.external_domain,
project=self._resolve_project_slug(project_slug, domain),
external_version_slug=version_slug,
Expand All @@ -425,6 +490,7 @@ def unresolve_domain(self, domain):

log.debug("Custom domain.", domain=domain)
return UnresolvedDomain(
source_domain=domain,
source=DomainSourceType.custom_domain,
project=domain_object.project,
domain=domain_object,
Expand Down Expand Up @@ -464,6 +530,7 @@ def unresolve_domain_from_request(self, request):
project_slug=project.slug,
)
return UnresolvedDomain(
source_domain=host,
source=DomainSourceType.http_header,
project=project,
)
Expand Down
Loading