From c50554400120e11c533e3b494dc1361f34225360 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 15 Feb 2023 19:15:19 -0500 Subject: [PATCH 01/14] Refactor unresolver --- readthedocs/core/unresolver.py | 170 +++++++++++++----- .../rtd_tests/tests/test_unresolver.py | 134 +++++++++----- 2 files changed, 212 insertions(+), 92 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index d12c27a0818..f1361e32ef6 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -44,6 +44,33 @@ class InvalidCustomDomainError(DomainError): pass +class VersionNotFoundError(UnresolverError): + def __init__(self, project, version_slug, filename): + self.project = project + self.version_slug = version_slug + self.filename = filename + + +class TranslationNotFoundError(UnresolverError): + def __init__(self, project, language, filename): + self.project = project + self.language = language + self.filename = filename + + +class UnresolvedPathError(UnresolverError): + def __init__(self, project, path): + self.project = project + self.path = path + + +class InvalidExternalVersionError(UnresolverError): + def __init__(self, project, version, external_version_slug): + self.project = project + self.version = version + self.external_version_slug = external_version_slug + + @dataclass(slots=True) class UnresolvedURL: @@ -57,9 +84,9 @@ class UnresolvedURL: # It can be the same as parent_project. project: Project - version: Version = None - filename: str = None - parsed_url: ParseResult = None + version: Version + filename: str + parsed_url: ParseResult domain: Domain = None external: bool = False @@ -76,6 +103,8 @@ class DomainSourceType(Enum): @dataclass(slots=True) class UnresolvedDomain: + # The domain that was used to extract the information from. + source_domain: str source: DomainSourceType project: Project domain: Domain = None @@ -141,46 +170,88 @@ def unresolve(self, url, append_indexhtml=True): :param append_indexhtml: If `True` directories will be normalized to end with ``/index.html``. """ - parsed = urlparse(url) - domain = self.get_domain_from_host(parsed.netloc) + parsed_url = urlparse(url) + domain = self.get_domain_from_host(parsed_url.netloc) unresolved_domain = self.unresolve_domain(domain) + return self._unresolve( + unresolved_domain=unresolved_domain, + parsed_url=parsed_url, + append_indexhtml=append_indexhtml, + ) + + def unresolve_path(self, unresolved_domain, path, append_indexhtml=True): + """ + Unresolve a path given a unresolved domain. + + This is the same as the unresolve method, + but this method takes an unresolved domain + from unresolve_domain as input. + + :param unresolved_domain: An UnresolvedDomain object. + :param path: Path to unresolve (this shouldn't include the protocol or querystrings). + :param append_indexhtml: If `True` directories will be normalized + to end with ``/index.html``. + """ + # We don't call unparse() on the path, + # since it could be parsed as a full URL if it starts with a protocol. + parsed_url = ParseResult( + scheme="", netloc="", path=path, params="", query="", fragment="" + ) + return self._unresolve( + unresolved_domain=unresolved_domain, + parsed_url=parsed_url, + append_indexhtml=append_indexhtml, + ) + def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): + """ + The actual unresolver. + + Extracted into a separate method so it can be re-used by + the unresolve and unresolve_path methods. + """ current_project, version, filename = self._unresolve_path( parent_project=unresolved_domain.project, - path=parsed.path, + path=parsed_url.path, external_version_slug=unresolved_domain.external_version_slug, ) # Make sure we are serving the external version from the subdomain. - if unresolved_domain.is_from_external_domain and version: + if unresolved_domain.is_from_external_domain: + domain = unresolved_domain.source_domain if unresolved_domain.external_version_slug != version.slug: log.warning( "Invalid version for external domain.", domain=domain, version_slug=version.slug, ) - version = None - filename = None - elif not version.is_external: + raise InvalidExternalVersionError( + project=current_project, + version=version, + external_version_slug=unresolved_domain.external_version_slug, + ) + if not version.is_external: log.warning( "Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.", domain=domain, version_slug=version.slug, version_type=version.type, - url=url, ) - version = None - filename = None + raise InvalidExternalVersionError( + project=current_project, + version=version, + external_version_slug=unresolved_domain.external_version_slug, + ) - if append_indexhtml and filename and filename.endswith("/"): + if append_indexhtml and filename.endswith("/"): filename += "index.html" return UnresolvedURL( parent_project=unresolved_domain.project, - project=current_project or unresolved_domain.project, + project=current_project, version=version, filename=filename, - parsed_url=parsed, + parsed_url=parsed_url, domain=unresolved_domain.domain, external=unresolved_domain.is_from_external_domain, ) @@ -197,12 +268,10 @@ def _match_multiversion_project(self, parent_project, path): """ Try to match a multiversion project. - If the translation exists, we return a result even if the version doesn't, - so the translation is taken as the current project (useful for 404 pages). + An exception is raised if we weren't able to find a matching version or language, + this exception has the current project (useful for 404 pages). - :returns: None or a tuple with the current project, version and file. - A tuple with only the project means we weren't able to find a version, - but the translation was correct. + :returns: A tuple with the current project, version and file. """ match = self.multiversion_pattern.match(path) if not match: @@ -216,14 +285,18 @@ def _match_multiversion_project(self, parent_project, path): project = parent_project else: project = parent_project.translations.filter(language=language).first() + if not project: + raise TranslationNotFoundError( + project=parent_project, language=language, filename=file + ) - if project: - version = project.versions.filter(slug=version_slug).first() - if version: - return project, version, file - return project, None, None + version = project.versions.filter(slug=version_slug).first() + if not version: + raise VersionNotFoundError( + project=project, version_slug=version_slug, filename=file + ) - return None + return project, version, file def _match_subproject(self, parent_project, path, external_version_slug=None): """ @@ -232,12 +305,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): If the subproject exists, we try to resolve the rest of the path with the subproject as the canonical project. - If the subproject exists, we return a result even if version doesn't, - so the subproject is taken as the current project (useful for 404 pages). - - :returns: None or a tuple with the current project, version and file. - A tuple with only the project means we were able to find the subproject, - but we weren't able to resolve the rest of the path. + :returns: A tuple with the current project, version and file. """ match = self.subproject_pattern.match(path) if not match: @@ -260,12 +328,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): check_subprojects=False, external_version_slug=external_version_slug, ) - # If we got a valid response, return that, - # otherwise return the current subproject - # as the current project without a valid version or path. - if response: - return response - return subproject, None, None + return response return None def _match_single_version_project( @@ -276,21 +339,30 @@ def _match_single_version_project( By default any path will match. If `external_version_slug` is given, that version is used instead of the project's default version. + + An exception is raised if we weren't able to find a matching version, + this exception has the current project (useful for 404 pages). + + :returns: A tuple with the current project, version and file. """ file = self._normalize_filename(path) if external_version_slug: + version_slug = external_version_slug version = ( parent_project.versions(manager=EXTERNAL) - .filter(slug=external_version_slug) + .filter(slug=version_slug) .first() ) else: - version = parent_project.versions.filter( - slug=parent_project.default_version - ).first() - if version: - return parent_project, version, file - return parent_project, None, None + version_slug = parent_project.default_version + version = parent_project.versions.filter(slug=version_slug).first() + + if not version: + raise VersionNotFoundError( + project=parent_project, version_slug=version_slug, filename=file + ) + + return parent_project, version, file def _unresolve_path( self, parent_project, path, check_subprojects=True, external_version_slug=None @@ -352,7 +424,7 @@ def _unresolve_path( if response: return response - return parent_project, None, None + raise UnresolvedPathError(project=parent_project, path=path) @staticmethod def get_domain_from_host(host): @@ -384,6 +456,7 @@ def unresolve_domain(self, domain): project_slug = subdomain log.debug("Public domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.public_domain, project=self._resolve_project_slug(project_slug, domain), ) @@ -400,6 +473,7 @@ def unresolve_domain(self, domain): project_slug, version_slug = subdomain.rsplit("--", maxsplit=1) log.debug("External versions domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.external_domain, project=self._resolve_project_slug(project_slug, domain), external_version_slug=version_slug, @@ -425,6 +499,7 @@ def unresolve_domain(self, domain): log.debug("Custom domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.custom_domain, project=domain_object.project, domain=domain_object, @@ -464,6 +539,7 @@ def unresolve_domain_from_request(self, request): project_slug=project.slug, ) return UnresolvedDomain( + source_domain=host, source=DomainSourceType.http_header, project=project, ) diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index c6bdc44e8b7..f95d72cb731 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -8,7 +8,11 @@ from readthedocs.core.unresolver import ( InvalidCustomDomainError, InvalidExternalDomainError, + InvalidExternalVersionError, SuspiciousHostnameError, + TranslationNotFoundError, + UnresolvedPathError, + VersionNotFoundError, unresolve, ) from readthedocs.projects.models import Domain, Project @@ -63,17 +67,44 @@ def test_file_with_trailing_slash(self): self.assertEqual(parts.version, self.version) self.assertEqual(parts.filename, "/foo/index.html") + def test_project_no_version_and_language(self): + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.path, "/") + + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/foo/bar") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.path, "/foo/bar") + + def test_subproject_no_version_and_language(self): + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/") + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.path, "/") + + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/foo/bar") + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.path, "/foo/bar") + def test_path_no_version(self): urls = [ "https://pip.readthedocs.io/en", "https://pip.readthedocs.io/en/", ] for url in urls: - parts = unresolve(url) - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve(url) + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, None) + self.assertEqual(exc.filename, "/") def test_unresolver_subproject(self): parts = unresolve("https://pip.readthedocs.io/projects/sub/ja/latest/foo.html") @@ -116,18 +147,22 @@ def test_unresolve_subproject_alias(self): self.assertEqual(parts.filename, "/index.html") def test_unresolve_subproject_invalid_version(self): - parts = unresolve("https://pip.readthedocs.io/projects/sub/ja/nothing/foo.html") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.subproject) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/ja/nothing/foo.html") + exc = excinfo.value + + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.version_slug, "nothing") + self.assertEqual(exc.filename, "/foo.html") def test_unresolve_subproject_invalid_translation(self): - parts = unresolve("https://pip.readthedocs.io/projects/sub/es/latest/foo.html") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.subproject) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(TranslationNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/es/latest/foo.html") + + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.language, "es") + self.assertEqual(exc.filename, "/foo.html") def test_unresolver_translation(self): parts = unresolve("https://pip.readthedocs.io/ja/latest/foo.html") @@ -137,11 +172,12 @@ def test_unresolver_translation(self): self.assertEqual(parts.filename, "/foo.html") def test_unresolve_no_existing_translation(self): - parts = unresolve("https://pip.readthedocs.io/es/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(TranslationNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/es/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.language, "es") + self.assertEqual(exc.filename, "/") def test_unresolver_custom_domain(self): self.domain = fixture.get( @@ -229,41 +265,49 @@ def test_external_version_single_version_project(self): def test_unexisting_external_version_single_version_project(self): self.pip.single_version = True self.pip.save() - parts = unresolve("https://pip--10.dev.readthedocs.build/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/") + + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "10") + self.assertEqual(exc.filename, "/") def test_non_external_version_single_version_project(self): self.pip.single_version = True self.pip.save() - parts = unresolve("https://pip--latest.dev.readthedocs.build/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--latest.dev.readthedocs.build/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.filename, "/") def test_unresolve_external_version_no_existing_version(self): - parts = unresolve("https://pip--10.dev.readthedocs.build/en/10/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/en/10/") + + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "10") + self.assertEqual(exc.filename, "/") def test_external_version_not_matching_final_version(self): - parts = unresolve("https://pip--10.dev.readthedocs.build/en/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(InvalidExternalVersionError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/en/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version, self.version) + self.assertEqual(exc.external_version_slug, "10") def test_normal_version_in_external_version_subdomain(self): - parts = unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(InvalidExternalVersionError) as excinfo: + unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.external_version_slug, "latest") + self.assertEqual(exc.version, self.version) def test_malformed_external_version(self): with pytest.raises(InvalidExternalDomainError): From 14bdbd25144f1349c3b7f46c315368c40b3d7a77 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 21 Feb 2023 14:45:45 -0500 Subject: [PATCH 02/14] Move external version checks inside unresolver --- readthedocs/core/unresolver.py | 59 +++++++------------ .../rtd_tests/tests/test_unresolver.py | 8 +-- 2 files changed, 25 insertions(+), 42 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index f1361e32ef6..7e909d8ead3 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -6,7 +6,7 @@ import structlog from django.conf import settings -from readthedocs.builds.constants import EXTERNAL +from readthedocs.builds.constants import EXTERNAL, INTERNAL from readthedocs.builds.models import Version from readthedocs.constants import pattern_opts from readthedocs.projects.models import Domain, Feature, Project @@ -65,9 +65,9 @@ def __init__(self, project, path): class InvalidExternalVersionError(UnresolverError): - def __init__(self, project, version, external_version_slug): + def __init__(self, project, version_slug, external_version_slug): self.project = project - self.version = version + self.version_slug = version_slug self.external_version_slug = external_version_slug @@ -216,33 +216,6 @@ def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): external_version_slug=unresolved_domain.external_version_slug, ) - # Make sure we are serving the external version from the subdomain. - if unresolved_domain.is_from_external_domain: - domain = unresolved_domain.source_domain - if unresolved_domain.external_version_slug != version.slug: - log.warning( - "Invalid version for external domain.", - domain=domain, - version_slug=version.slug, - ) - raise InvalidExternalVersionError( - project=current_project, - version=version, - external_version_slug=unresolved_domain.external_version_slug, - ) - if not version.is_external: - log.warning( - "Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.", - domain=domain, - version_slug=version.slug, - version_type=version.type, - ) - raise InvalidExternalVersionError( - project=current_project, - version=version, - external_version_slug=unresolved_domain.external_version_slug, - ) - if append_indexhtml and filename.endswith("/"): filename += "index.html" @@ -264,7 +237,9 @@ def _normalize_filename(filename): filename = "/" + filename return filename - def _match_multiversion_project(self, parent_project, path): + def _match_multiversion_project( + self, parent_project, path, external_version_slug=None + ): """ Try to match a multiversion project. @@ -290,7 +265,15 @@ def _match_multiversion_project(self, parent_project, path): project=parent_project, language=language, filename=file ) - version = project.versions.filter(slug=version_slug).first() + if external_version_slug and external_version_slug != version_slug: + raise InvalidExternalVersionError( + project=project, + version_slug=version_slug, + external_version_slug=external_version_slug, + ) + + manager = EXTERNAL if external_version_slug else INTERNAL + version = project.versions(manager=manager).filter(slug=version_slug).first() if not version: raise VersionNotFoundError( project=project, version_slug=version_slug, filename=file @@ -348,15 +331,14 @@ def _match_single_version_project( file = self._normalize_filename(path) if external_version_slug: version_slug = external_version_slug - version = ( - parent_project.versions(manager=EXTERNAL) - .filter(slug=version_slug) - .first() - ) + manager = EXTERNAL else: version_slug = parent_project.default_version - version = parent_project.versions.filter(slug=version_slug).first() + manager = INTERNAL + version = ( + parent_project.versions(manager=manager).filter(slug=version_slug).first() + ) if not version: raise VersionNotFoundError( project=parent_project, version_slug=version_slug, filename=file @@ -400,6 +382,7 @@ def _unresolve_path( response = self._match_multiversion_project( parent_project=parent_project, path=path, + external_version_slug=external_version_slug, ) if response: return response diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index f95d72cb731..482806fdf01 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -298,16 +298,16 @@ def test_external_version_not_matching_final_version(self): unresolve("https://pip--10.dev.readthedocs.build/en/latest/") exc = excinfo.value self.assertEqual(exc.project, self.pip) - self.assertEqual(exc.version, self.version) + self.assertEqual(exc.version_slug, "latest") self.assertEqual(exc.external_version_slug, "10") def test_normal_version_in_external_version_subdomain(self): - with pytest.raises(InvalidExternalVersionError) as excinfo: + with pytest.raises(VersionNotFoundError) as excinfo: unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") exc = excinfo.value self.assertEqual(exc.project, self.pip) - self.assertEqual(exc.external_version_slug, "latest") - self.assertEqual(exc.version, self.version) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.filename, "/") def test_malformed_external_version(self): with pytest.raises(InvalidExternalDomainError): From 36b9f02820ba79380b516acb7a28a3d87723670b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 22 Feb 2023 17:11:42 -0500 Subject: [PATCH 03/14] Updates from review --- readthedocs/core/unresolver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 7e909d8ead3..52bea1a902e 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -107,6 +107,7 @@ class UnresolvedDomain: source_domain: str source: DomainSourceType project: Project + # Domain object for custom domains. domain: Domain = None external_version_slug: str = None @@ -247,6 +248,7 @@ def _match_multiversion_project( this exception has the current project (useful for 404 pages). :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ match = self.multiversion_pattern.match(path) if not match: @@ -289,6 +291,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): with the subproject as the canonical project. :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ match = self.subproject_pattern.match(path) if not match: @@ -327,6 +330,7 @@ def _match_single_version_project( this exception has the current project (useful for 404 pages). :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ file = self._normalize_filename(path) if external_version_slug: From 38496bc12431312bced90d1a5c48010eab64a7fd Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 27 Feb 2023 17:28:30 -0500 Subject: [PATCH 04/14] Proxito: use unresolved in 404 handler --- readthedocs/core/unresolver.py | 8 +- readthedocs/proxito/views/serve.py | 286 +++++++++++++++++++++-------- 2 files changed, 220 insertions(+), 74 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 52bea1a902e..b040e4adb51 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -52,10 +52,11 @@ def __init__(self, project, version_slug, filename): class TranslationNotFoundError(UnresolverError): - def __init__(self, project, language, filename): + def __init__(self, project, language, filename, version_slug): self.project = project self.language = language self.filename = filename + self.version_slug = version_slug class UnresolvedPathError(UnresolverError): @@ -264,7 +265,10 @@ def _match_multiversion_project( project = parent_project.translations.filter(language=language).first() if not project: raise TranslationNotFoundError( - project=parent_project, language=language, filename=file + project=parent_project, + language=language, + filename=file, + version_slug=version_slug, ) if external_version_slug and external_version_slug != version_slug: diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 2dccf3b813f..26eb4045ff3 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -289,16 +289,25 @@ def get(self, request, proxito_path, template_name='404.html'): * Handles directory indexing for URLs that don't end in a slash * Handles directory indexing for README.html (for now) + * Check for user redirects + * Record the broken link for analytics * Handles custom 404 serving For 404's, first search for a 404 page in the current version, then continues with the default version and finally, if none of them are found, the Read the Docs default page (Maze Found) is rendered by Django and served. """ - # pylint: disable=too-many-locals log.bind(proxito_path=proxito_path) log.debug('Executing 404 handler.') + unresolved_domain = request.unresolved_domain + + if unresolved_domain.is_from_external_domain: + self.version_type = EXTERNAL + if unresolved_domain.project.has_feature(Feature.USE_UNRESOLVER_WITH_PROXITO): + return self.get_using_unresolver(request, proxito_path) + + # Parse the URL using the normal urlconf, so we get proper subdomain/translation data _, __, kwargs = url_resolve( proxito_path, @@ -321,41 +330,21 @@ def get(self, request, proxito_path, template_name='404.html'): version_slug=version_slug, ) - if version_slug: - storage_root_path = final_project.get_storage_path( - type_="html", - version_slug=version_slug, - include_file=False, - version_type=self.version_type, - ) - - # First, check for dirhtml with slash - for tryfile in ("index.html", "README.html"): - storage_filename_path = build_media_storage.join( - storage_root_path, - f"{filename}/{tryfile}".lstrip("/"), - ) - log.debug("Trying index filename.") - if build_media_storage.exists(storage_filename_path): - log.info("Redirecting to index file.", tryfile=tryfile) - # Use urlparse so that we maintain GET args in our redirect - parts = urlparse(proxito_path) - if tryfile == "README.html": - new_path = parts.path.rstrip("/") + f"/{tryfile}" - else: - new_path = parts.path.rstrip("/") + "/" - - # `proxito_path` doesn't include query params.` - query = urlparse(request.get_full_path()).query - new_parts = parts._replace( - path=new_path, - query=query, - ) - redirect_url = new_parts.geturl() + version = Version.objects.filter( + project=final_project, slug=version_slug + ).first() - # TODO: decide if we need to check for infinite redirect here - # (from URL == to URL) - return HttpResponseRedirect(redirect_url) + # Check if an index file exists, redirect to it if so. + if version: + response = self._get_index_file_redirect( + request=request, + project=final_project, + version=version, + filename=filename, + full_path=proxito_path, + ) + if response: + return response # Check and perform redirects on 404 handler # NOTE: this redirect check must be done after trying files like @@ -375,21 +364,6 @@ def get(self, request, proxito_path, template_name='404.html'): # Continue with our normal 404 handling in this case pass - version = Version.objects.filter( - project=final_project, slug=version_slug - ).first() - - # If there are no redirect, - # try to serve the custom 404 of the current version (version_slug) - # Then, try to serve the custom 404 page for the default version - # (project.get_default_version()) - versions = [] - if version: - versions.append(version_slug) - default_version_slug = final_project.get_default_version() - if default_version_slug != version_slug: - versions.append(default_version_slug) - # Register 404 pages into our database for user's analytics self._register_broken_link( project=final_project, @@ -398,28 +372,13 @@ def get(self, request, proxito_path, template_name='404.html'): full_path=proxito_path, ) - for version_slug_404 in versions: - if not self.allowed_user(request, final_project, version_slug_404): - continue - - storage_root_path = final_project.get_storage_path( - type_='html', - version_slug=version_slug_404, - include_file=False, - version_type=self.version_type, - ) - tryfiles = ["404.html", "404/index.html"] - for tryfile in tryfiles: - storage_filename_path = build_media_storage.join(storage_root_path, tryfile) - if build_media_storage.exists(storage_filename_path): - log.info( - 'Serving custom 404.html page.', - version_slug_404=version_slug_404, - storage_filename_path=storage_filename_path, - ) - resp = HttpResponse(build_media_storage.open(storage_filename_path).read()) - resp.status_code = 404 - return resp + response = self._serve_custom_404_page( + request=request, + project=final_project, + version=version, + ) + if response: + return response raise Http404('No custom 404 page found.') @@ -461,6 +420,189 @@ def _register_broken_link(self, project, version, path, full_path): full_path=full_path, ) + def _serve_custom_404_page(self, request, project, version=None): + """ + Try to serve a custom 404 page from this project. + + If a version is given, try to serve the 404 page from that version first, + if it doesn't exist, try to serve the 404 page from the default version. + + We check for a 404.html or 404/index.html file. + + If a 404 page is found, we return a response with the content of that file, + `None` otherwise. + """ + current_version_slug = version.slug if version else None + versions_slug = [] + if current_version_slug: + versions_slug.append(current_version_slug) + + default_version_slug = project.get_default_version() + if default_version_slug != current_version_slug: + versions_slug.append(default_version_slug) + + for version_slug_404 in versions_slug: + if not self.allowed_user(request, project, version_slug_404): + continue + + storage_root_path = project.get_storage_path( + type_="html", + version_slug=version_slug_404, + include_file=False, + version_type=self.version_type, + ) + tryfiles = ["404.html", "404/index.html"] + for tryfile in tryfiles: + storage_filename_path = build_media_storage.join( + storage_root_path, tryfile + ) + if build_media_storage.exists(storage_filename_path): + log.info( + "Serving custom 404.html page.", + version_slug_404=version_slug_404, + storage_filename_path=storage_filename_path, + ) + resp = HttpResponse( + build_media_storage.open(storage_filename_path).read() + ) + resp.status_code = 404 + return resp + return None + + def _get_index_file_redirect(self, request, project, version, filename, full_path): + """Check if a file is a directory and redirect to its index/README file.""" + storage_root_path = project.get_storage_path( + type_="html", + version_slug=version.slug, + include_file=False, + version_type=self.version_type, + ) + + # First, check for dirhtml with slash + for tryfile in ("index.html", "README.html"): + storage_filename_path = build_media_storage.join( + storage_root_path, + f"{filename}/{tryfile}".lstrip("/"), + ) + log.debug("Trying index filename.") + if build_media_storage.exists(storage_filename_path): + log.info("Redirecting to index file.", tryfile=tryfile) + # Use urlparse so that we maintain GET args in our redirect + parts = urlparse(full_path) + if tryfile == "README.html": + new_path = parts.path.rstrip("/") + f"/{tryfile}" + else: + new_path = parts.path.rstrip("/") + "/" + + # `full_path` doesn't include query params.` + query = urlparse(request.get_full_path()).query + redirect_url = parts._replace( + path=new_path, + query=query, + ).geturl() + + # TODO: decide if we need to check for infinite redirect here + # (from URL == to URL) + return HttpResponseRedirect(redirect_url) + + return None + + def get_using_unresolver(self, request, path): + """ + 404 handler using the new proxito implementation. + + This is basically a copy of the get() method, but adapted to make use + of the unresolver to extract the current project, version, and file. + """ + unresolved_domain = request.unresolved_domain + project = None + version = None + filename = None + lang_slug = None + version_slug = None + try: + unresolved = unresolver.unresolve_path( + unresolved_domain=unresolved_domain, + path=path, + append_indexhtml=False, + ) + project = unresolved.project + version = unresolved.version + filename = unresolved.filename + lang_slug = project.language + version_slug = version.slug + except VersionNotFoundError as exc: + project = exc.project + lang_slug = project.language + version_slug = exc.version_slug + filename = exc.filename + except TranslationNotFoundError as exc: + project = exc.project + lang_slug = exc.language + version_slug = exc.version_slug + filename = exc.filename + except InvalidExternalVersionError as exc: + project = exc.project + except UnresolvedPathError as exc: + project = exc.project + filename = exc.path + + log.bind( + project_slug=project.slug, + version_slug=version_slug, + ) + + request.path_project_slug = project.slug + request.path_version_slug = version_slug + + if version: + response = self._get_index_file_redirect( + request=request, + project=project, + version=version, + filename=filename, + full_path=path, + ) + if response: + return response + + # Check and perform redirects on 404 handler + # NOTE: this redirect check must be done after trying files like + # ``index.html`` and ``README.html`` to emulate the behavior we had when + # serving directly from NGINX without passing through Python. + redirect_path, http_status = self.get_redirect( + project=project, + lang_slug=lang_slug, + version_slug=version_slug, + filename=filename, + full_path=path, + ) + if redirect_path and http_status: + try: + return self.get_redirect_response( + request, redirect_path, path, http_status + ) + except InfiniteRedirectException: + # Continue with our normal 404 handling in this case + pass + + # Register 404 pages into our database for user's analytics + self._register_broken_link( + project=project, + version=version, + path=filename, + full_path=path, + ) + + response = self._serve_custom_404_page( + request=request, + project=project, + version=version, + ) + if response: + return response + raise Http404("No custom 404 page found.") + class ServeError404(SettingsOverrideObject): _default_class = ServeError404Base From 69f4886bda3b21d57948481996a7fc3dcaed6ec4 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 28 Feb 2023 17:16:32 -0500 Subject: [PATCH 05/14] Updates from review --- readthedocs/core/unresolver.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 52bea1a902e..7bf6bbe3004 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -160,7 +160,7 @@ class Unresolver: re.VERBOSE, ) - def unresolve(self, url, append_indexhtml=True): + def unresolve_url(self, url, append_indexhtml=True): """ Turn a URL into the component parts that our views would use to process them. @@ -211,7 +211,7 @@ def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): Extracted into a separate method so it can be re-used by the unresolve and unresolve_path methods. """ - current_project, version, filename = self._unresolve_path( + current_project, version, filename = self._unresolve_path_with_parent_project( parent_project=unresolved_domain.project, path=parsed_url.path, external_version_slug=unresolved_domain.external_version_slug, @@ -308,7 +308,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): # We use the subproject as the new parent project # to resolve the rest of the path relative to it. subproject = project_relationship.child - response = self._unresolve_path( + response = self._unresolve_path_with_parent_project( parent_project=subproject, path=file, check_subprojects=False, @@ -350,7 +350,7 @@ def _match_single_version_project( return parent_project, version, file - def _unresolve_path( + def _unresolve_path_with_parent_project( self, parent_project, path, check_subprojects=True, external_version_slug=None ): """ @@ -540,4 +540,4 @@ def unresolve_domain_from_request(self, request): unresolver = Unresolver() -unresolve = unresolver.unresolve +unresolve = unresolver.unresolve_url From 21841125aefe90b7e57e6890a5d78ad5550b86bd Mon Sep 17 00:00:00 2001 From: Benjamin Bach Date: Wed, 1 Mar 2023 16:05:05 +0100 Subject: [PATCH 06/14] Missing imports --- readthedocs/proxito/views/serve.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 26eb4045ff3..887ec8202e6 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -25,6 +25,13 @@ from readthedocs.redirects.exceptions import InfiniteRedirectException from readthedocs.storage import build_media_storage +from ...core import unresolver +from ...core.unresolver import ( + InvalidExternalVersionError, + TranslationNotFoundError, + UnresolvedPathError, + VersionNotFoundError, +) from .mixins import ( InvalidPathError, ServeDocsMixin, From ce28e12af643433eb78db4e688c07addd4615975 Mon Sep 17 00:00:00 2001 From: Benjamin Bach Date: Wed, 1 Mar 2023 16:06:02 +0100 Subject: [PATCH 07/14] Linting: Too many blank lines --- readthedocs/proxito/views/serve.py | 1 - 1 file changed, 1 deletion(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 887ec8202e6..fb2a5afbd84 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -314,7 +314,6 @@ def get(self, request, proxito_path, template_name='404.html'): if unresolved_domain.project.has_feature(Feature.USE_UNRESOLVER_WITH_PROXITO): return self.get_using_unresolver(request, proxito_path) - # Parse the URL using the normal urlconf, so we get proper subdomain/translation data _, __, kwargs = url_resolve( proxito_path, From 05384be067d7e1fb8e7b1264f428a9939201231d Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 1 Mar 2023 20:21:39 -0500 Subject: [PATCH 08/14] Remove duplicated imports --- readthedocs/proxito/views/serve.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 6f8b13f9a77..f0fbf10898b 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -30,13 +30,6 @@ from readthedocs.redirects.exceptions import InfiniteRedirectException from readthedocs.storage import build_media_storage -from ...core import unresolver -from ...core.unresolver import ( - InvalidExternalVersionError, - TranslationNotFoundError, - UnresolvedPathError, - VersionNotFoundError, -) from .mixins import ( InvalidPathError, ServeDocsMixin, From 7af491af9a25ec26805d73a65743e25d40727fa5 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 1 Mar 2023 20:22:07 -0500 Subject: [PATCH 09/14] Black --- readthedocs/core/unresolver.py | 1 - 1 file changed, 1 deletion(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 8446b58b5b4..ca3d057cf7b 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -52,7 +52,6 @@ def __init__(self, project, version_slug, filename): class TranslationNotFoundError(UnresolverError): - def __init__(self, project, language, filename, version_slug): self.project = project self.language = language From 86f67b2958d1b400613db2ad227104b1c4c0ae61 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 1 Mar 2023 20:24:24 -0500 Subject: [PATCH 10/14] Fix exc --- readthedocs/proxito/views/serve.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index f0fbf10898b..412ea7d805c 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -463,7 +463,6 @@ def get(self, request, proxito_path, template_name='404.html'): log.debug('Executing 404 handler.') unresolved_domain = request.unresolved_domain - if unresolved_domain.is_from_external_domain: self.version_type = EXTERNAL if unresolved_domain.project.has_feature(Feature.USE_UNRESOLVER_WITH_PROXITO): @@ -704,7 +703,7 @@ def get_using_unresolver(self, request, path): filename = exc.filename except InvalidExternalVersionError as exc: project = exc.project - except UnresolvedPathError as exc: + except InvalidPathForVersionedProjectError as exc: project = exc.project filename = exc.path From 2d92061c2b11f55388a977a0653eed8a5bbd6d81 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 2 Mar 2023 18:11:07 -0500 Subject: [PATCH 11/14] Update tests --- .../proxito/tests/test_old_redirects.py | 42 ++++++++++++++++++- readthedocs/proxito/tests/test_redirects.py | 15 ++++++- readthedocs/proxito/tests/test_urls.py | 17 +++++++- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/readthedocs/proxito/tests/test_old_redirects.py b/readthedocs/proxito/tests/test_old_redirects.py index aed84bbb6c8..cddd5ba4ae8 100644 --- a/readthedocs/proxito/tests/test_old_redirects.py +++ b/readthedocs/proxito/tests/test_old_redirects.py @@ -1039,7 +1039,16 @@ def test_redirect_prefix_crossdomain(self): self.assertEqual(r.status_code, 302, url) self.assertEqual(r["Location"], expected_location, url) - # These aren't even handled by Django. + def test_redirect_prefix_crossdomain_with_newline_chars(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="prefix", + from_url="/", + ) + # We make use of Django's URL resolve in the current implementation, + # which doesn't handle these chars and raises an exception + # instead of redirecting. urls = [ # Trying to bypass the protocol check by including a `\n` char. "http://project.dev.readthedocs.io/http:/%0A/my.host/path.html", @@ -1135,3 +1144,34 @@ def setUp(self): default_true=True, future_default_true=True, ) + + def test_redirect_prefix_crossdomain_with_newline_chars(self): + """ + Overridden to make it compatible with the new implementation. + + In the new implementation we will correctly redirect, + instead of raising an exception. + """ + fixture.get( + Redirect, + project=self.project, + redirect_type="prefix", + from_url="/", + ) + urls = [ + ( + "http://project.dev.readthedocs.io/http:/%0A/my.host/path.html", + "http://project.dev.readthedocs.io/en/latest/http://my.host/path.html", + ), + ( + "http://project.dev.readthedocs.io/%0A/my.host/path.html", + "http://project.dev.readthedocs.io/en/latest/my.host/path.html", + ), + ] + for url, expected_location in urls: + r = self.client.get( + url, + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302, url) + self.assertEqual(r["Location"], expected_location, url) diff --git a/readthedocs/proxito/tests/test_redirects.py b/readthedocs/proxito/tests/test_redirects.py index 1a1c5c5236d..a02f944f8c4 100644 --- a/readthedocs/proxito/tests/test_redirects.py +++ b/readthedocs/proxito/tests/test_redirects.py @@ -1,8 +1,9 @@ # Copied from .org test_redirects - import pytest from django.test import override_settings +from django_dynamic_fixture import get +from readthedocs.projects.models import Feature from readthedocs.proxito.constants import RedirectType from .base import BaseDocServing @@ -280,3 +281,15 @@ def test_slash_redirect(self): # WARNING # The test client strips multiple slashes at the front of the URL # Additional tests for this are in ``test_middleware:test_front_slash`` + + +class ProxitoV2RedirectTests(RedirectTests): + # TODO: remove this class once the new implementation is the default. + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) diff --git a/readthedocs/proxito/tests/test_urls.py b/readthedocs/proxito/tests/test_urls.py index df958f9316b..6d8e8f1c6d1 100644 --- a/readthedocs/proxito/tests/test_urls.py +++ b/readthedocs/proxito/tests/test_urls.py @@ -3,8 +3,11 @@ """Test URL config.""" import pytest -from django.test import TestCase, override_settings +from django.test import TestCase from django.urls import resolve +from django_dynamic_fixture import get + +from readthedocs.projects.models import Feature @pytest.mark.proxito @@ -103,3 +106,15 @@ def test_single_version(self): 'filename': 'some/path/index.html', }, ) + + +class ProxitoV2TestSingleVersionURLs(TestSingleVersionURLs): + # TODO: remove this class once the new implementation is the default. + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) From c01750334b380a391075e544dd13950d8e3a7c19 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 2 Mar 2023 18:28:45 -0500 Subject: [PATCH 12/14] Move check to new implementation only --- readthedocs/proxito/views/serve.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 412ea7d805c..96871371c75 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -463,8 +463,6 @@ def get(self, request, proxito_path, template_name='404.html'): log.debug('Executing 404 handler.') unresolved_domain = request.unresolved_domain - if unresolved_domain.is_from_external_domain: - self.version_type = EXTERNAL if unresolved_domain.project.has_feature(Feature.USE_UNRESOLVER_WITH_PROXITO): return self.get_using_unresolver(request, proxito_path) @@ -675,6 +673,11 @@ def get_using_unresolver(self, request, path): of the unresolver to extract the current project, version, and file. """ unresolved_domain = request.unresolved_domain + # We force all storage calls to use the external versions storage, + # since we are serving an external version. + if unresolved_domain.is_from_external_domain: + self.version_type = EXTERNAL + project = None version = None filename = None From be2284880baeaa85e73c2c1743df457de47cb0f5 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 6 Mar 2023 19:03:38 -0500 Subject: [PATCH 13/14] Updates from review --- readthedocs/proxito/views/serve.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 96871371c75..a6c880f6911 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -492,7 +492,9 @@ def get(self, request, proxito_path, template_name='404.html'): project=final_project, slug=version_slug ).first() - # Check if an index file exists, redirect to it if so. + # If we were able to resolve to a valid version, it means that the + # current file doesn't exist. So we check if we can redirect to its + # index file if it exists before doing anything else. if version: response = self._get_index_file_redirect( request=request, @@ -519,7 +521,9 @@ def get(self, request, proxito_path, template_name='404.html'): try: return self.get_redirect_response(request, redirect_path, proxito_path, http_status) except InfiniteRedirectException: - # Continue with our normal 404 handling in this case + # ``get_redirect_response`` raises this when it's redirecting back to itself. + # We can safely ignore it here because it's logged in ``canonical_redirect``, + # and we don't want to issue infinite redirects. pass # Register 404 pages into our database for user's analytics @@ -530,7 +534,7 @@ def get(self, request, proxito_path, template_name='404.html'): full_path=proxito_path, ) - response = self._serve_custom_404_page( + response = self._get_custom_404_page( request=request, project=final_project, version=version, @@ -578,7 +582,7 @@ def _register_broken_link(self, project, version, path, full_path): full_path=full_path, ) - def _serve_custom_404_page(self, request, project, version=None): + def _get_custom_404_page(self, request, project, version=None): """ Try to serve a custom 404 page from this project. @@ -683,6 +687,9 @@ def get_using_unresolver(self, request, path): filename = None lang_slug = None version_slug = None + # Try to map the current path to a project/version/filename. + # If that fails, we fill the variables with the information we have + # available in the exceptions. try: unresolved = unresolver.unresolve_path( unresolved_domain=unresolved_domain, @@ -715,9 +722,13 @@ def get_using_unresolver(self, request, path): version_slug=version_slug, ) + # TODO: find a better way to pass this to the middleware. request.path_project_slug = project.slug request.path_version_slug = version_slug + # If we were able to resolve to a valid version, it means that the + # current file doesn't exist. So we check if we can redirect to its + # index file if it exists before doing anything else. if version: response = self._get_index_file_redirect( request=request, @@ -746,7 +757,9 @@ def get_using_unresolver(self, request, path): request, redirect_path, path, http_status ) except InfiniteRedirectException: - # Continue with our normal 404 handling in this case + # ``get_redirect_response`` raises this when it's redirecting back to itself. + # We can safely ignore it here because it's logged in ``canonical_redirect``, + # and we don't want to issue infinite redirects. pass # Register 404 pages into our database for user's analytics @@ -757,7 +770,7 @@ def get_using_unresolver(self, request, path): full_path=path, ) - response = self._serve_custom_404_page( + response = self._get_custom_404_page( request=request, project=project, version=version, From 03bbc6d7aee143f01158333e47cbd30c8bd6254d Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 7 Mar 2023 11:33:48 -0500 Subject: [PATCH 14/14] Updates from review --- readthedocs/core/unresolver.py | 5 +++-- readthedocs/proxito/views/serve.py | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index ca3d057cf7b..1aa2986a34f 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -52,10 +52,11 @@ def __init__(self, project, version_slug, filename): class TranslationNotFoundError(UnresolverError): - def __init__(self, project, language, filename, version_slug): + def __init__(self, project, language, version_slug, filename): self.project = project self.language = language self.filename = filename + # The version doesn't exist, so we just have the slug. self.version_slug = version_slug @@ -267,8 +268,8 @@ def _match_multiversion_project( raise TranslationNotFoundError( project=parent_project, language=language, - filename=file, version_slug=version_slug, + filename=file, ) if external_version_slug and external_version_slug != version_slug: diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index a6c880f6911..92399ab5475 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -495,6 +495,7 @@ def get(self, request, proxito_path, template_name='404.html'): # If we were able to resolve to a valid version, it means that the # current file doesn't exist. So we check if we can redirect to its # index file if it exists before doing anything else. + # This is /en/latest/foo -> /en/latest/foo/index.html. if version: response = self._get_index_file_redirect( request=request, @@ -632,7 +633,15 @@ def _get_custom_404_page(self, request, project, version=None): return None def _get_index_file_redirect(self, request, project, version, filename, full_path): - """Check if a file is a directory and redirect to its index/README file.""" + """ + Check if a file is a directory and redirect to its index/README file. + + For example: + + - /en/latest/foo -> /en/latest/foo/index.html + - /en/latest/foo -> /en/latest/foo/README.html + - /en/latest/foo/ -> /en/latest/foo/README.html + """ storage_root_path = project.get_storage_path( type_="html", version_slug=version.slug, @@ -679,6 +688,9 @@ def get_using_unresolver(self, request, path): unresolved_domain = request.unresolved_domain # We force all storage calls to use the external versions storage, # since we are serving an external version. + # The version that results from the unresolve_path() call already is + # validated to use the correct manager, this is here to add defense in + # depth against serving the wrong version. if unresolved_domain.is_from_external_domain: self.version_type = EXTERNAL @@ -729,6 +741,7 @@ def get_using_unresolver(self, request, path): # If we were able to resolve to a valid version, it means that the # current file doesn't exist. So we check if we can redirect to its # index file if it exists before doing anything else. + # This is /en/latest/foo -> /en/latest/foo/index.html. if version: response = self._get_index_file_redirect( request=request,