-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New unresolver implementation #9500
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
e34fe11
6ac26b0
a732260
0672724
185771d
3d30d6b
fce39ac
f583d2c
789f74c
067367d
1cf32b4
53fa8aa
a62c149
63d72c3
aba6fc4
cfe16b7
6900724
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,99 +1,279 @@ | ||||||||||||
import structlog | ||||||||||||
from collections import namedtuple | ||||||||||||
import re | ||||||||||||
from dataclasses import dataclass | ||||||||||||
from urllib.parse import urlparse | ||||||||||||
|
||||||||||||
from django.test.client import RequestFactory | ||||||||||||
from django.urls import resolve as url_resolve | ||||||||||||
import structlog | ||||||||||||
from django.conf import settings | ||||||||||||
|
||||||||||||
from readthedocs.core.utils.extend import SettingsOverrideObject | ||||||||||||
from readthedocs.proxito.middleware import map_host_to_project_slug | ||||||||||||
from readthedocs.proxito.views.mixins import ServeDocsMixin | ||||||||||||
from readthedocs.proxito.views.utils import _get_project_data_from_request | ||||||||||||
from readthedocs.builds.models import Version | ||||||||||||
from readthedocs.constants import pattern_opts | ||||||||||||
from readthedocs.projects.models import Domain, Project | ||||||||||||
|
||||||||||||
log = structlog.get_logger(__name__) | ||||||||||||
|
||||||||||||
UnresolvedObject = namedtuple( | ||||||||||||
'Unresolved', 'project, language_slug, version_slug, filename, fragment') | ||||||||||||
|
||||||||||||
@dataclass(slots=True) | ||||||||||||
class UnresolvedURL: | ||||||||||||
|
||||||||||||
"""Dataclass with the parts of an unresolved URL.""" | ||||||||||||
|
||||||||||||
# This is the project that owns the domain, | ||||||||||||
# this usually is the parent project of a translation or subproject. | ||||||||||||
canonical_project: Project | ||||||||||||
|
||||||||||||
# The current project we are serving the docs from. | ||||||||||||
# It can be the same as canonical_project. | ||||||||||||
project: Project | ||||||||||||
|
||||||||||||
version: Version = None | ||||||||||||
filename: str = None | ||||||||||||
query: str = None | ||||||||||||
fragment: str = None | ||||||||||||
domain: Domain = None | ||||||||||||
external: bool = False | ||||||||||||
|
||||||||||||
|
||||||||||||
class UnresolverBase: | ||||||||||||
class Unresolver: | ||||||||||||
# This pattern matches: | ||||||||||||
# - /en | ||||||||||||
# - /en/ | ||||||||||||
# - /en/latest | ||||||||||||
# - /en/latest/ | ||||||||||||
# - /en/latest/file/name/ | ||||||||||||
multiversion_pattern = re.compile( | ||||||||||||
r"^/(?P<language>{lang_slug})(/((?P<version>{version_slug})(/(?P<file>{filename_slug}))?)?)?$".format( # noqa | ||||||||||||
**pattern_opts | ||||||||||||
) | ||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
) | ||||||||||||
|
||||||||||||
# This pattern matches: | ||||||||||||
# - /projects/subproject | ||||||||||||
# - /projects/subproject/ | ||||||||||||
# - /projects/subproject/file/name/ | ||||||||||||
subproject_pattern = re.compile( | ||||||||||||
r"^/projects/(?P<project>{project_slug}+)(/(?P<file>{filename_slug}))?$".format( | ||||||||||||
**pattern_opts | ||||||||||||
) | ||||||||||||
) | ||||||||||||
|
||||||||||||
def unresolve(self, url): | ||||||||||||
def unresolve(self, url, add_index=True): | ||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
""" | ||||||||||||
Turn a URL into the component parts that our views would use to process them. | ||||||||||||
|
||||||||||||
This is useful for lots of places, | ||||||||||||
like where we want to figure out exactly what file a URL maps to. | ||||||||||||
|
||||||||||||
:param url: Full URL to unresolve (including the protocol and domain part). | ||||||||||||
:param add_index: If `True` the filename will be normalized | ||||||||||||
to end with ``/index.html``. | ||||||||||||
""" | ||||||||||||
parsed = urlparse(url) | ||||||||||||
domain = parsed.netloc.split(':', 1)[0] | ||||||||||||
|
||||||||||||
# TODO: Make this not depend on the request object, | ||||||||||||
# but instead move all this logic here working on strings. | ||||||||||||
request = RequestFactory().get(path=parsed.path, HTTP_HOST=domain) | ||||||||||||
project_slug = map_host_to_project_slug(request) | ||||||||||||
domain = self.get_domain_from_host(parsed.netloc) | ||||||||||||
project_slug, domain_object, external = self.unresolve_domain(domain) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are using I think it would be good to refactor this to use more descriptive names:
Probably makes sense to change the |
||||||||||||
if not project_slug: | ||||||||||||
return None | ||||||||||||
|
||||||||||||
# Handle returning a response | ||||||||||||
if hasattr(project_slug, 'status_code'): | ||||||||||||
canonical_project = Project.objects.filter(slug=project_slug).first() | ||||||||||||
if not canonical_project: | ||||||||||||
return None | ||||||||||||
|
||||||||||||
request.host_project_slug = request.slug = project_slug | ||||||||||||
return self.unresolve_from_request(request, url) | ||||||||||||
project, version, filename = self._unresolve_path( | ||||||||||||
canonical_project=canonical_project, | ||||||||||||
path=parsed.path, | ||||||||||||
) | ||||||||||||
|
||||||||||||
if add_index and filename and filename.endswith("/"): | ||||||||||||
filename += "index.html" | ||||||||||||
|
||||||||||||
def unresolve_from_request(self, request, path): | ||||||||||||
return UnresolvedURL( | ||||||||||||
canonical_project=canonical_project, | ||||||||||||
project=project or canonical_project, | ||||||||||||
version=version, | ||||||||||||
filename=filename, | ||||||||||||
query=parsed.query, | ||||||||||||
fragment=parsed.fragment, | ||||||||||||
domain=domain_object, | ||||||||||||
external=external, | ||||||||||||
) | ||||||||||||
|
||||||||||||
@staticmethod | ||||||||||||
def _normalize_filename(filename): | ||||||||||||
"""Normalize filename to always start with ``/``.""" | ||||||||||||
filename = filename or "/" | ||||||||||||
if not filename.startswith("/"): | ||||||||||||
filename = "/" + filename | ||||||||||||
return filename | ||||||||||||
|
||||||||||||
def _match_multiversion_project(self, canonical_project, path): | ||||||||||||
""" | ||||||||||||
Unresolve using a request. | ||||||||||||
Try to match a multiversion project. | ||||||||||||
|
||||||||||||
``path`` can be a full URL, but the domain will be ignored, | ||||||||||||
since that information is already in the request object. | ||||||||||||
If the translation exists, we return a result even if the version doesn't, | ||||||||||||
so the translation is taken as the canonical project (useful for 404 pages). | ||||||||||||
""" | ||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
match = self.multiversion_pattern.match(path) | ||||||||||||
if not match: | ||||||||||||
return None | ||||||||||||
|
||||||||||||
language = match.group("language") | ||||||||||||
version_slug = match.group("version") | ||||||||||||
file = self._normalize_filename(match.group("file")) | ||||||||||||
|
||||||||||||
if canonical_project.language == language: | ||||||||||||
project = canonical_project | ||||||||||||
else: | ||||||||||||
project = canonical_project.translations.filter(language=language).first() | ||||||||||||
|
||||||||||||
None is returned if the request isn't valid. | ||||||||||||
if project: | ||||||||||||
version = project.versions.filter(slug=version_slug).first() | ||||||||||||
if version: | ||||||||||||
return project, version, file | ||||||||||||
return project, None, None | ||||||||||||
|
||||||||||||
return None | ||||||||||||
|
||||||||||||
def _match_subproject(self, canonical_project, path): | ||||||||||||
""" | ||||||||||||
parsed = urlparse(path) | ||||||||||||
path = parsed.path | ||||||||||||
project_slug = getattr(request, 'host_project_slug', None) | ||||||||||||
Try to match a subproject. | ||||||||||||
|
||||||||||||
if not project_slug: | ||||||||||||
If the subproject exists, we try to resolve the rest of the path | ||||||||||||
with the subproject as the canonical project. | ||||||||||||
""" | ||||||||||||
match = self.subproject_pattern.match(path) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Is this the longer term goal for customization? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, basically on each call to match, will be some extra logic to use the custom pattern from the project instead of the default one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same idea should be used for multiversion patterns 👍🏼 |
||||||||||||
if not match: | ||||||||||||
return None | ||||||||||||
|
||||||||||||
_, __, kwargs = url_resolve( | ||||||||||||
path, | ||||||||||||
urlconf='readthedocs.proxito.urls', | ||||||||||||
project_slug = match.group("project") | ||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
file = self._normalize_filename(match.group("file")) | ||||||||||||
project_relationship = ( | ||||||||||||
canonical_project.subprojects.filter(alias=project_slug) | ||||||||||||
.prefetch_related("child") | ||||||||||||
.first() | ||||||||||||
) | ||||||||||||
if project_relationship: | ||||||||||||
return self._unresolve_path( | ||||||||||||
canonical_project=project_relationship.child, | ||||||||||||
path=file, | ||||||||||||
check_subprojects=False, | ||||||||||||
) | ||||||||||||
return None | ||||||||||||
|
||||||||||||
mixin = ServeDocsMixin() | ||||||||||||
version_slug = mixin.get_version_from_host(request, kwargs.get('version_slug')) | ||||||||||||
def _match_single_version_project(self, canonical_project, path): | ||||||||||||
""" | ||||||||||||
Try to match a single version project. | ||||||||||||
|
||||||||||||
final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa | ||||||||||||
request, | ||||||||||||
project_slug=project_slug, | ||||||||||||
subproject_slug=kwargs.get('subproject_slug'), | ||||||||||||
lang_slug=kwargs.get('lang_slug'), | ||||||||||||
version_slug=version_slug, | ||||||||||||
filename=kwargs.get('filename', ''), | ||||||||||||
) | ||||||||||||
By default any path will match. | ||||||||||||
""" | ||||||||||||
file = self._normalize_filename(path) | ||||||||||||
version = canonical_project.versions.filter( | ||||||||||||
slug=canonical_project.default_version | ||||||||||||
).first() | ||||||||||||
if version: | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In what case will this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, this is when a use doesn't have a default version or the default version is invalid |
||||||||||||
return canonical_project, version, file | ||||||||||||
return canonical_project, None, None | ||||||||||||
|
||||||||||||
# Handle our backend storage not supporting directory indexes, | ||||||||||||
# so we need to append index.html when appropriate. | ||||||||||||
if not filename or filename.endswith('/'): | ||||||||||||
# We need to add the index.html to find this actual file | ||||||||||||
filename += 'index.html' | ||||||||||||
|
||||||||||||
log.debug( | ||||||||||||
'Unresolver parsed.', | ||||||||||||
project_slug=final_project.slug, | ||||||||||||
lang_slug=lang_slug, | ||||||||||||
version_slug=version_slug, | ||||||||||||
filename=filename, | ||||||||||||
def _unresolve_path(self, canonical_project, path, check_subprojects=True): | ||||||||||||
""" | ||||||||||||
Unresolve `path` with `canonical_project` as base. | ||||||||||||
|
||||||||||||
If the returned project is `None`, then we weren't able to | ||||||||||||
unresolve the path into a project. | ||||||||||||
|
||||||||||||
If the returned version is `None`, then we weren't able to | ||||||||||||
unresolve the path into a valid version of the project. | ||||||||||||
|
||||||||||||
:param canonical_project: The project that owns the path. | ||||||||||||
:param path: The path to unresolve. | ||||||||||||
:param check_subprojects: If we should check for subprojects, | ||||||||||||
this is used to call this function recursively. | ||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
|
||||||||||||
:returns: A tuple with: project, version, and file name. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to make this a dataclass or similar here as well and all these related methods. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is okay to have this as a tuple, they are internal methods, and we already have one dataclass for the final result. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even if they are internal methods, real objects are a lot easier to read and follow while debugging/reading this code. |
||||||||||||
""" | ||||||||||||
# Multiversion project. | ||||||||||||
if not canonical_project.single_version: | ||||||||||||
response = self._match_multiversion_project( | ||||||||||||
canonical_project=canonical_project, | ||||||||||||
path=path, | ||||||||||||
) | ||||||||||||
if response: | ||||||||||||
return response | ||||||||||||
|
||||||||||||
# Subprojects. | ||||||||||||
if check_subprojects: | ||||||||||||
response = self._match_subproject( | ||||||||||||
canonical_project=canonical_project, | ||||||||||||
path=path, | ||||||||||||
) | ||||||||||||
if response: | ||||||||||||
return response | ||||||||||||
|
||||||||||||
# Single version project. | ||||||||||||
if canonical_project.single_version: | ||||||||||||
response = self._match_single_version_project( | ||||||||||||
canonical_project=canonical_project, | ||||||||||||
path=path, | ||||||||||||
) | ||||||||||||
if response: | ||||||||||||
return response | ||||||||||||
|
||||||||||||
return None, None, None | ||||||||||||
|
||||||||||||
@staticmethod | ||||||||||||
def get_domain_from_host(host): | ||||||||||||
""" | ||||||||||||
Get the normalized domain from a hostname. | ||||||||||||
|
||||||||||||
A hostname can include the port. | ||||||||||||
""" | ||||||||||||
return host.lower().split(":")[0] | ||||||||||||
|
||||||||||||
# TODO: make this a private method once | ||||||||||||
# proxito uses the unresolve method directly. | ||||||||||||
def unresolve_domain(self, domain): | ||||||||||||
""" | ||||||||||||
Unresolve domain by extracting relevant information from it. | ||||||||||||
|
||||||||||||
:param str domain: Domain to extract the information from. | ||||||||||||
:returns: A tuple with: the project slug, domain object, and if the domain | ||||||||||||
is from an external version. | ||||||||||||
""" | ||||||||||||
public_domain = self.get_domain_from_host(settings.PUBLIC_DOMAIN) | ||||||||||||
external_domain = self.get_domain_from_host( | ||||||||||||
settings.RTD_EXTERNAL_VERSION_DOMAIN | ||||||||||||
) | ||||||||||||
return UnresolvedObject(final_project, lang_slug, version_slug, filename, parsed.fragment) | ||||||||||||
|
||||||||||||
subdomain, *rest_of_domain = domain.split(".", maxsplit=1) | ||||||||||||
rest_of_domain = rest_of_domain[0] if rest_of_domain else "" | ||||||||||||
|
||||||||||||
if public_domain in domain: | ||||||||||||
# Serve from the PUBLIC_DOMAIN, ensuring it looks like `foo.PUBLIC_DOMAIN`. | ||||||||||||
if public_domain == rest_of_domain: | ||||||||||||
project_slug = subdomain | ||||||||||||
return project_slug, None, False | ||||||||||||
|
||||||||||||
class Unresolver(SettingsOverrideObject): | ||||||||||||
# TODO: This can catch some possibly valid domains (docs.readthedocs.io.com) | ||||||||||||
# for example, but these might be phishing, so let's ignore them for now. | ||||||||||||
return None, None, False | ||||||||||||
|
||||||||||||
if external_domain in domain: | ||||||||||||
# Serve custom versions on external-host-domain. | ||||||||||||
if external_domain == rest_of_domain: | ||||||||||||
try: | ||||||||||||
project_slug, _ = subdomain.rsplit("--", maxsplit=1) | ||||||||||||
return project_slug, None, True | ||||||||||||
except ValueError: | ||||||||||||
return None, None, False | ||||||||||||
|
||||||||||||
# Custom domain. | ||||||||||||
domain_object = ( | ||||||||||||
Domain.objects.filter(domain=domain).prefetch_related("project").first() | ||||||||||||
) | ||||||||||||
if domain_object: | ||||||||||||
project_slug = domain_object.project.slug | ||||||||||||
return project_slug, domain_object, False | ||||||||||||
|
||||||||||||
_default_class = UnresolverBase | ||||||||||||
return None, None, None | ||||||||||||
|
||||||||||||
|
||||||||||||
unresolver = Unresolver() | ||||||||||||
unresolve = unresolver.unresolve | ||||||||||||
unresolve_from_request = unresolver.unresolve_from_request |
Uh oh!
There was an error while loading. Please reload this page.