diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index d12c27a0818..7bf6bbe3004 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -6,7 +6,7 @@ import structlog from django.conf import settings -from readthedocs.builds.constants import EXTERNAL +from readthedocs.builds.constants import EXTERNAL, INTERNAL from readthedocs.builds.models import Version from readthedocs.constants import pattern_opts from readthedocs.projects.models import Domain, Feature, Project @@ -44,6 +44,33 @@ class InvalidCustomDomainError(DomainError): pass +class VersionNotFoundError(UnresolverError): + def __init__(self, project, version_slug, filename): + self.project = project + self.version_slug = version_slug + self.filename = filename + + +class TranslationNotFoundError(UnresolverError): + def __init__(self, project, language, filename): + self.project = project + self.language = language + self.filename = filename + + +class UnresolvedPathError(UnresolverError): + def __init__(self, project, path): + self.project = project + self.path = path + + +class InvalidExternalVersionError(UnresolverError): + def __init__(self, project, version_slug, external_version_slug): + self.project = project + self.version_slug = version_slug + self.external_version_slug = external_version_slug + + @dataclass(slots=True) class UnresolvedURL: @@ -57,9 +84,9 @@ class UnresolvedURL: # It can be the same as parent_project. project: Project - version: Version = None - filename: str = None - parsed_url: ParseResult = None + version: Version + filename: str + parsed_url: ParseResult domain: Domain = None external: bool = False @@ -76,8 +103,11 @@ class DomainSourceType(Enum): @dataclass(slots=True) class UnresolvedDomain: + # The domain that was used to extract the information from. + source_domain: str source: DomainSourceType project: Project + # Domain object for custom domains. domain: Domain = None external_version_slug: str = None @@ -130,7 +160,7 @@ class Unresolver: re.VERBOSE, ) - def unresolve(self, url, append_indexhtml=True): + def unresolve_url(self, url, append_indexhtml=True): """ Turn a URL into the component parts that our views would use to process them. @@ -141,46 +171,61 @@ def unresolve(self, url, append_indexhtml=True): :param append_indexhtml: If `True` directories will be normalized to end with ``/index.html``. """ - parsed = urlparse(url) - domain = self.get_domain_from_host(parsed.netloc) + parsed_url = urlparse(url) + domain = self.get_domain_from_host(parsed_url.netloc) unresolved_domain = self.unresolve_domain(domain) + return self._unresolve( + unresolved_domain=unresolved_domain, + parsed_url=parsed_url, + append_indexhtml=append_indexhtml, + ) + + def unresolve_path(self, unresolved_domain, path, append_indexhtml=True): + """ + Unresolve a path given a unresolved domain. - current_project, version, filename = self._unresolve_path( + This is the same as the unresolve method, + but this method takes an unresolved domain + from unresolve_domain as input. + + :param unresolved_domain: An UnresolvedDomain object. + :param path: Path to unresolve (this shouldn't include the protocol or querystrings). + :param append_indexhtml: If `True` directories will be normalized + to end with ``/index.html``. + """ + # We don't call unparse() on the path, + # since it could be parsed as a full URL if it starts with a protocol. + parsed_url = ParseResult( + scheme="", netloc="", path=path, params="", query="", fragment="" + ) + return self._unresolve( + unresolved_domain=unresolved_domain, + parsed_url=parsed_url, + append_indexhtml=append_indexhtml, + ) + + def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): + """ + The actual unresolver. + + Extracted into a separate method so it can be re-used by + the unresolve and unresolve_path methods. + """ + current_project, version, filename = self._unresolve_path_with_parent_project( parent_project=unresolved_domain.project, - path=parsed.path, + path=parsed_url.path, external_version_slug=unresolved_domain.external_version_slug, ) - # Make sure we are serving the external version from the subdomain. - if unresolved_domain.is_from_external_domain and version: - if unresolved_domain.external_version_slug != version.slug: - log.warning( - "Invalid version for external domain.", - domain=domain, - version_slug=version.slug, - ) - version = None - filename = None - elif not version.is_external: - log.warning( - "Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.", - domain=domain, - version_slug=version.slug, - version_type=version.type, - url=url, - ) - version = None - filename = None - - if append_indexhtml and filename and filename.endswith("/"): + if append_indexhtml and filename.endswith("/"): filename += "index.html" return UnresolvedURL( parent_project=unresolved_domain.project, - project=current_project or unresolved_domain.project, + project=current_project, version=version, filename=filename, - parsed_url=parsed, + parsed_url=parsed_url, domain=unresolved_domain.domain, external=unresolved_domain.is_from_external_domain, ) @@ -193,16 +238,17 @@ def _normalize_filename(filename): filename = "/" + filename return filename - def _match_multiversion_project(self, parent_project, path): + def _match_multiversion_project( + self, parent_project, path, external_version_slug=None + ): """ Try to match a multiversion project. - If the translation exists, we return a result even if the version doesn't, - so the translation is taken as the current project (useful for 404 pages). + An exception is raised if we weren't able to find a matching version or language, + this exception has the current project (useful for 404 pages). - :returns: None or a tuple with the current project, version and file. - A tuple with only the project means we weren't able to find a version, - but the translation was correct. + :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ match = self.multiversion_pattern.match(path) if not match: @@ -216,14 +262,26 @@ def _match_multiversion_project(self, parent_project, path): project = parent_project else: project = parent_project.translations.filter(language=language).first() + if not project: + raise TranslationNotFoundError( + project=parent_project, language=language, filename=file + ) - if project: - version = project.versions.filter(slug=version_slug).first() - if version: - return project, version, file - return project, None, None + if external_version_slug and external_version_slug != version_slug: + raise InvalidExternalVersionError( + project=project, + version_slug=version_slug, + external_version_slug=external_version_slug, + ) - return None + manager = EXTERNAL if external_version_slug else INTERNAL + version = project.versions(manager=manager).filter(slug=version_slug).first() + if not version: + raise VersionNotFoundError( + project=project, version_slug=version_slug, filename=file + ) + + return project, version, file def _match_subproject(self, parent_project, path, external_version_slug=None): """ @@ -232,12 +290,8 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): If the subproject exists, we try to resolve the rest of the path with the subproject as the canonical project. - If the subproject exists, we return a result even if version doesn't, - so the subproject is taken as the current project (useful for 404 pages). - - :returns: None or a tuple with the current project, version and file. - A tuple with only the project means we were able to find the subproject, - but we weren't able to resolve the rest of the path. + :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ match = self.subproject_pattern.match(path) if not match: @@ -254,18 +308,13 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): # We use the subproject as the new parent project # to resolve the rest of the path relative to it. subproject = project_relationship.child - response = self._unresolve_path( + response = self._unresolve_path_with_parent_project( parent_project=subproject, path=file, check_subprojects=False, external_version_slug=external_version_slug, ) - # If we got a valid response, return that, - # otherwise return the current subproject - # as the current project without a valid version or path. - if response: - return response - return subproject, None, None + return response return None def _match_single_version_project( @@ -276,23 +325,32 @@ def _match_single_version_project( By default any path will match. If `external_version_slug` is given, that version is used instead of the project's default version. + + An exception is raised if we weren't able to find a matching version, + this exception has the current project (useful for 404 pages). + + :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ file = self._normalize_filename(path) if external_version_slug: - version = ( - parent_project.versions(manager=EXTERNAL) - .filter(slug=external_version_slug) - .first() - ) + version_slug = external_version_slug + manager = EXTERNAL else: - version = parent_project.versions.filter( - slug=parent_project.default_version - ).first() - if version: - return parent_project, version, file - return parent_project, None, None + version_slug = parent_project.default_version + manager = INTERNAL - def _unresolve_path( + version = ( + parent_project.versions(manager=manager).filter(slug=version_slug).first() + ) + if not version: + raise VersionNotFoundError( + project=parent_project, version_slug=version_slug, filename=file + ) + + return parent_project, version, file + + def _unresolve_path_with_parent_project( self, parent_project, path, check_subprojects=True, external_version_slug=None ): """ @@ -328,6 +386,7 @@ def _unresolve_path( response = self._match_multiversion_project( parent_project=parent_project, path=path, + external_version_slug=external_version_slug, ) if response: return response @@ -352,7 +411,7 @@ def _unresolve_path( if response: return response - return parent_project, None, None + raise UnresolvedPathError(project=parent_project, path=path) @staticmethod def get_domain_from_host(host): @@ -384,6 +443,7 @@ def unresolve_domain(self, domain): project_slug = subdomain log.debug("Public domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.public_domain, project=self._resolve_project_slug(project_slug, domain), ) @@ -400,6 +460,7 @@ def unresolve_domain(self, domain): project_slug, version_slug = subdomain.rsplit("--", maxsplit=1) log.debug("External versions domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.external_domain, project=self._resolve_project_slug(project_slug, domain), external_version_slug=version_slug, @@ -425,6 +486,7 @@ def unresolve_domain(self, domain): log.debug("Custom domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.custom_domain, project=domain_object.project, domain=domain_object, @@ -464,6 +526,7 @@ def unresolve_domain_from_request(self, request): project_slug=project.slug, ) return UnresolvedDomain( + source_domain=host, source=DomainSourceType.http_header, project=project, ) @@ -477,4 +540,4 @@ def unresolve_domain_from_request(self, request): unresolver = Unresolver() -unresolve = unresolver.unresolve +unresolve = unresolver.unresolve_url diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index c6bdc44e8b7..482806fdf01 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -8,7 +8,11 @@ from readthedocs.core.unresolver import ( InvalidCustomDomainError, InvalidExternalDomainError, + InvalidExternalVersionError, SuspiciousHostnameError, + TranslationNotFoundError, + UnresolvedPathError, + VersionNotFoundError, unresolve, ) from readthedocs.projects.models import Domain, Project @@ -63,17 +67,44 @@ def test_file_with_trailing_slash(self): self.assertEqual(parts.version, self.version) self.assertEqual(parts.filename, "/foo/index.html") + def test_project_no_version_and_language(self): + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.path, "/") + + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/foo/bar") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.path, "/foo/bar") + + def test_subproject_no_version_and_language(self): + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/") + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.path, "/") + + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/foo/bar") + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.path, "/foo/bar") + def test_path_no_version(self): urls = [ "https://pip.readthedocs.io/en", "https://pip.readthedocs.io/en/", ] for url in urls: - parts = unresolve(url) - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve(url) + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, None) + self.assertEqual(exc.filename, "/") def test_unresolver_subproject(self): parts = unresolve("https://pip.readthedocs.io/projects/sub/ja/latest/foo.html") @@ -116,18 +147,22 @@ def test_unresolve_subproject_alias(self): self.assertEqual(parts.filename, "/index.html") def test_unresolve_subproject_invalid_version(self): - parts = unresolve("https://pip.readthedocs.io/projects/sub/ja/nothing/foo.html") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.subproject) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/ja/nothing/foo.html") + exc = excinfo.value + + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.version_slug, "nothing") + self.assertEqual(exc.filename, "/foo.html") def test_unresolve_subproject_invalid_translation(self): - parts = unresolve("https://pip.readthedocs.io/projects/sub/es/latest/foo.html") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.subproject) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(TranslationNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/es/latest/foo.html") + + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.language, "es") + self.assertEqual(exc.filename, "/foo.html") def test_unresolver_translation(self): parts = unresolve("https://pip.readthedocs.io/ja/latest/foo.html") @@ -137,11 +172,12 @@ def test_unresolver_translation(self): self.assertEqual(parts.filename, "/foo.html") def test_unresolve_no_existing_translation(self): - parts = unresolve("https://pip.readthedocs.io/es/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(TranslationNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/es/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.language, "es") + self.assertEqual(exc.filename, "/") def test_unresolver_custom_domain(self): self.domain = fixture.get( @@ -229,41 +265,49 @@ def test_external_version_single_version_project(self): def test_unexisting_external_version_single_version_project(self): self.pip.single_version = True self.pip.save() - parts = unresolve("https://pip--10.dev.readthedocs.build/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/") + + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "10") + self.assertEqual(exc.filename, "/") def test_non_external_version_single_version_project(self): self.pip.single_version = True self.pip.save() - parts = unresolve("https://pip--latest.dev.readthedocs.build/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--latest.dev.readthedocs.build/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.filename, "/") def test_unresolve_external_version_no_existing_version(self): - parts = unresolve("https://pip--10.dev.readthedocs.build/en/10/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/en/10/") + + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "10") + self.assertEqual(exc.filename, "/") def test_external_version_not_matching_final_version(self): - parts = unresolve("https://pip--10.dev.readthedocs.build/en/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(InvalidExternalVersionError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/en/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.external_version_slug, "10") def test_normal_version_in_external_version_subdomain(self): - parts = unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.filename, "/") def test_malformed_external_version(self): with pytest.raises(InvalidExternalDomainError):