Skip to content

Proxito: adapt unresolver to make it usable for proxito #10037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 2, 2023
207 changes: 135 additions & 72 deletions readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import structlog
from django.conf import settings

from readthedocs.builds.constants import EXTERNAL
from readthedocs.builds.constants import EXTERNAL, INTERNAL
from readthedocs.builds.models import Version
from readthedocs.constants import pattern_opts
from readthedocs.projects.models import Domain, Feature, Project
Expand Down Expand Up @@ -44,6 +44,33 @@ class InvalidCustomDomainError(DomainError):
pass


class VersionNotFoundError(UnresolverError):
def __init__(self, project, version_slug, filename):
self.project = project
self.version_slug = version_slug
self.filename = filename


class TranslationNotFoundError(UnresolverError):
def __init__(self, project, language, filename):
self.project = project
self.language = language
self.filename = filename


class UnresolvedPathError(UnresolverError):
def __init__(self, project, path):
self.project = project
self.path = path


class InvalidExternalVersionError(UnresolverError):
def __init__(self, project, version_slug, external_version_slug):
self.project = project
self.version_slug = version_slug
self.external_version_slug = external_version_slug


@dataclass(slots=True)
class UnresolvedURL:

Expand All @@ -57,9 +84,9 @@ class UnresolvedURL:
# It can be the same as parent_project.
project: Project

version: Version = None
filename: str = None
parsed_url: ParseResult = None
version: Version
filename: str
parsed_url: ParseResult
domain: Domain = None
external: bool = False

Expand All @@ -76,8 +103,11 @@ class DomainSourceType(Enum):

@dataclass(slots=True)
class UnresolvedDomain:
# The domain that was used to extract the information from.
source_domain: str
source: DomainSourceType
project: Project
# Domain object for custom domains.
domain: Domain = None
external_version_slug: str = None

Expand Down Expand Up @@ -130,7 +160,7 @@ class Unresolver:
re.VERBOSE,
)

def unresolve(self, url, append_indexhtml=True):
def unresolve_url(self, url, append_indexhtml=True):
"""
Turn a URL into the component parts that our views would use to process them.

Expand All @@ -141,46 +171,61 @@ def unresolve(self, url, append_indexhtml=True):
:param append_indexhtml: If `True` directories will be normalized
to end with ``/index.html``.
"""
parsed = urlparse(url)
domain = self.get_domain_from_host(parsed.netloc)
parsed_url = urlparse(url)
domain = self.get_domain_from_host(parsed_url.netloc)
unresolved_domain = self.unresolve_domain(domain)
return self._unresolve(
unresolved_domain=unresolved_domain,
parsed_url=parsed_url,
append_indexhtml=append_indexhtml,
)

def unresolve_path(self, unresolved_domain, path, append_indexhtml=True):
"""
Unresolve a path given a unresolved domain.

current_project, version, filename = self._unresolve_path(
This is the same as the unresolve method,
but this method takes an unresolved domain
from unresolve_domain as input.

:param unresolved_domain: An UnresolvedDomain object.
:param path: Path to unresolve (this shouldn't include the protocol or querystrings).
:param append_indexhtml: If `True` directories will be normalized
to end with ``/index.html``.
"""
# We don't call unparse() on the path,
# since it could be parsed as a full URL if it starts with a protocol.
parsed_url = ParseResult(
scheme="", netloc="", path=path, params="", query="", fragment=""
)
Comment on lines +196 to +200
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we validate that path comes in the exact shape we require here?

Copy link
Member Author

@stsewd stsewd Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if I understand what you mean with shape, anything can be a path, this is the path as we receive it from proxito.

Copy link
Member

@humitos humitos Feb 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are expecting a string with a particular structure. For example, it can't be https://humitos.dev/page/, it has to be just /page/. I'm saying that we should validate the path to be what we expect to be instead of a random string.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what we expect is a random string

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the code comment you said that it should not start with a protocol, tho. I'm confused what are the correct values for path then. Can you expand on this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says we can't use unparse() because the path could start with a protocol, or anything really that can confuse unparse() so we manually build the ParseResult class.

Copy link
Member

@humitos humitos Mar 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I got that. I'm not saying we have to use unparse(). I'm saying that we can validate the string to be exactly in the shape we need to be and raise an exception otherwise.

return self._unresolve(
unresolved_domain=unresolved_domain,
parsed_url=parsed_url,
append_indexhtml=append_indexhtml,
)

def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml):
"""
The actual unresolver.

Extracted into a separate method so it can be re-used by
the unresolve and unresolve_path methods.
"""
current_project, version, filename = self._unresolve_path_with_parent_project(
parent_project=unresolved_domain.project,
path=parsed.path,
path=parsed_url.path,
external_version_slug=unresolved_domain.external_version_slug,
)

# Make sure we are serving the external version from the subdomain.
if unresolved_domain.is_from_external_domain and version:
if unresolved_domain.external_version_slug != version.slug:
log.warning(
"Invalid version for external domain.",
domain=domain,
version_slug=version.slug,
)
version = None
filename = None
elif not version.is_external:
log.warning(
"Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.",
domain=domain,
version_slug=version.slug,
version_type=version.type,
url=url,
)
version = None
filename = None

if append_indexhtml and filename and filename.endswith("/"):
if append_indexhtml and filename.endswith("/"):
filename += "index.html"

return UnresolvedURL(
parent_project=unresolved_domain.project,
project=current_project or unresolved_domain.project,
project=current_project,
version=version,
filename=filename,
parsed_url=parsed,
parsed_url=parsed_url,
domain=unresolved_domain.domain,
external=unresolved_domain.is_from_external_domain,
)
Expand All @@ -193,16 +238,17 @@ def _normalize_filename(filename):
filename = "/" + filename
return filename

def _match_multiversion_project(self, parent_project, path):
def _match_multiversion_project(
self, parent_project, path, external_version_slug=None
):
"""
Try to match a multiversion project.

If the translation exists, we return a result even if the version doesn't,
so the translation is taken as the current project (useful for 404 pages).
An exception is raised if we weren't able to find a matching version or language,
this exception has the current project (useful for 404 pages).

:returns: None or a tuple with the current project, version and file.
A tuple with only the project means we weren't able to find a version,
but the translation was correct.
:returns: A tuple with the current project, version and file.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be an object? Maybe an Unresolved<Something> object to keep consistency with the other objects.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for internal usage only, they are always present.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine. Even if it's internal. Using well-known objects makes the code easier to read and follow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This 3 tuple elements is well-defined, I'd avoid creating data classes for every return type, if we have more than 3 things to return, then that would make sense for me.

Returns `None` if there isn't a total or partial match.
"""
match = self.multiversion_pattern.match(path)
if not match:
Expand All @@ -216,14 +262,26 @@ def _match_multiversion_project(self, parent_project, path):
project = parent_project
else:
project = parent_project.translations.filter(language=language).first()
if not project:
raise TranslationNotFoundError(
project=parent_project, language=language, filename=file
)

if project:
version = project.versions.filter(slug=version_slug).first()
if version:
return project, version, file
return project, None, None
if external_version_slug and external_version_slug != version_slug:
raise InvalidExternalVersionError(
project=project,
version_slug=version_slug,
external_version_slug=external_version_slug,
)

return None
manager = EXTERNAL if external_version_slug else INTERNAL
version = project.versions(manager=manager).filter(slug=version_slug).first()
if not version:
raise VersionNotFoundError(
project=project, version_slug=version_slug, filename=file
)

return project, version, file

def _match_subproject(self, parent_project, path, external_version_slug=None):
"""
Expand All @@ -232,12 +290,8 @@ def _match_subproject(self, parent_project, path, external_version_slug=None):
If the subproject exists, we try to resolve the rest of the path
with the subproject as the canonical project.

If the subproject exists, we return a result even if version doesn't,
so the subproject is taken as the current project (useful for 404 pages).

:returns: None or a tuple with the current project, version and file.
A tuple with only the project means we were able to find the subproject,
but we weren't able to resolve the rest of the path.
:returns: A tuple with the current project, version and file.
Returns `None` if there isn't a total or partial match.
"""
match = self.subproject_pattern.match(path)
if not match:
Expand All @@ -254,18 +308,13 @@ def _match_subproject(self, parent_project, path, external_version_slug=None):
# We use the subproject as the new parent project
# to resolve the rest of the path relative to it.
subproject = project_relationship.child
response = self._unresolve_path(
response = self._unresolve_path_with_parent_project(
parent_project=subproject,
path=file,
check_subprojects=False,
external_version_slug=external_version_slug,
)
# If we got a valid response, return that,
# otherwise return the current subproject
# as the current project without a valid version or path.
if response:
return response
return subproject, None, None
return response
return None

def _match_single_version_project(
Expand All @@ -276,23 +325,32 @@ def _match_single_version_project(

By default any path will match. If `external_version_slug` is given,
that version is used instead of the project's default version.

An exception is raised if we weren't able to find a matching version,
this exception has the current project (useful for 404 pages).

:returns: A tuple with the current project, version and file.
Returns `None` if there isn't a total or partial match.
"""
file = self._normalize_filename(path)
if external_version_slug:
version = (
parent_project.versions(manager=EXTERNAL)
.filter(slug=external_version_slug)
.first()
)
version_slug = external_version_slug
manager = EXTERNAL
else:
version = parent_project.versions.filter(
slug=parent_project.default_version
).first()
if version:
return parent_project, version, file
return parent_project, None, None
version_slug = parent_project.default_version
manager = INTERNAL

def _unresolve_path(
version = (
parent_project.versions(manager=manager).filter(slug=version_slug).first()
)
if not version:
raise VersionNotFoundError(
project=parent_project, version_slug=version_slug, filename=file
)

return parent_project, version, file

def _unresolve_path_with_parent_project(
self, parent_project, path, check_subprojects=True, external_version_slug=None
):
"""
Expand Down Expand Up @@ -328,6 +386,7 @@ def _unresolve_path(
response = self._match_multiversion_project(
parent_project=parent_project,
path=path,
external_version_slug=external_version_slug,
)
if response:
return response
Expand All @@ -352,7 +411,7 @@ def _unresolve_path(
if response:
return response

return parent_project, None, None
raise UnresolvedPathError(project=parent_project, path=path)

@staticmethod
def get_domain_from_host(host):
Expand Down Expand Up @@ -384,6 +443,7 @@ def unresolve_domain(self, domain):
project_slug = subdomain
log.debug("Public domain.", domain=domain)
return UnresolvedDomain(
source_domain=domain,
source=DomainSourceType.public_domain,
project=self._resolve_project_slug(project_slug, domain),
)
Expand All @@ -400,6 +460,7 @@ def unresolve_domain(self, domain):
project_slug, version_slug = subdomain.rsplit("--", maxsplit=1)
log.debug("External versions domain.", domain=domain)
return UnresolvedDomain(
source_domain=domain,
source=DomainSourceType.external_domain,
project=self._resolve_project_slug(project_slug, domain),
external_version_slug=version_slug,
Expand All @@ -425,6 +486,7 @@ def unresolve_domain(self, domain):

log.debug("Custom domain.", domain=domain)
return UnresolvedDomain(
source_domain=domain,
source=DomainSourceType.custom_domain,
project=domain_object.project,
domain=domain_object,
Expand Down Expand Up @@ -464,6 +526,7 @@ def unresolve_domain_from_request(self, request):
project_slug=project.slug,
)
return UnresolvedDomain(
source_domain=host,
source=DomainSourceType.http_header,
project=project,
)
Expand All @@ -477,4 +540,4 @@ def unresolve_domain_from_request(self, request):


unresolver = Unresolver()
unresolve = unresolver.unresolve
unresolve = unresolver.unresolve_url
Loading