From c50554400120e11c533e3b494dc1361f34225360 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 15 Feb 2023 19:15:19 -0500 Subject: [PATCH 01/13] Refactor unresolver --- readthedocs/core/unresolver.py | 170 +++++++++++++----- .../rtd_tests/tests/test_unresolver.py | 134 +++++++++----- 2 files changed, 212 insertions(+), 92 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index d12c27a0818..f1361e32ef6 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -44,6 +44,33 @@ class InvalidCustomDomainError(DomainError): pass +class VersionNotFoundError(UnresolverError): + def __init__(self, project, version_slug, filename): + self.project = project + self.version_slug = version_slug + self.filename = filename + + +class TranslationNotFoundError(UnresolverError): + def __init__(self, project, language, filename): + self.project = project + self.language = language + self.filename = filename + + +class UnresolvedPathError(UnresolverError): + def __init__(self, project, path): + self.project = project + self.path = path + + +class InvalidExternalVersionError(UnresolverError): + def __init__(self, project, version, external_version_slug): + self.project = project + self.version = version + self.external_version_slug = external_version_slug + + @dataclass(slots=True) class UnresolvedURL: @@ -57,9 +84,9 @@ class UnresolvedURL: # It can be the same as parent_project. project: Project - version: Version = None - filename: str = None - parsed_url: ParseResult = None + version: Version + filename: str + parsed_url: ParseResult domain: Domain = None external: bool = False @@ -76,6 +103,8 @@ class DomainSourceType(Enum): @dataclass(slots=True) class UnresolvedDomain: + # The domain that was used to extract the information from. + source_domain: str source: DomainSourceType project: Project domain: Domain = None @@ -141,46 +170,88 @@ def unresolve(self, url, append_indexhtml=True): :param append_indexhtml: If `True` directories will be normalized to end with ``/index.html``. """ - parsed = urlparse(url) - domain = self.get_domain_from_host(parsed.netloc) + parsed_url = urlparse(url) + domain = self.get_domain_from_host(parsed_url.netloc) unresolved_domain = self.unresolve_domain(domain) + return self._unresolve( + unresolved_domain=unresolved_domain, + parsed_url=parsed_url, + append_indexhtml=append_indexhtml, + ) + + def unresolve_path(self, unresolved_domain, path, append_indexhtml=True): + """ + Unresolve a path given a unresolved domain. + + This is the same as the unresolve method, + but this method takes an unresolved domain + from unresolve_domain as input. + + :param unresolved_domain: An UnresolvedDomain object. + :param path: Path to unresolve (this shouldn't include the protocol or querystrings). + :param append_indexhtml: If `True` directories will be normalized + to end with ``/index.html``. + """ + # We don't call unparse() on the path, + # since it could be parsed as a full URL if it starts with a protocol. + parsed_url = ParseResult( + scheme="", netloc="", path=path, params="", query="", fragment="" + ) + return self._unresolve( + unresolved_domain=unresolved_domain, + parsed_url=parsed_url, + append_indexhtml=append_indexhtml, + ) + def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): + """ + The actual unresolver. + + Extracted into a separate method so it can be re-used by + the unresolve and unresolve_path methods. + """ current_project, version, filename = self._unresolve_path( parent_project=unresolved_domain.project, - path=parsed.path, + path=parsed_url.path, external_version_slug=unresolved_domain.external_version_slug, ) # Make sure we are serving the external version from the subdomain. - if unresolved_domain.is_from_external_domain and version: + if unresolved_domain.is_from_external_domain: + domain = unresolved_domain.source_domain if unresolved_domain.external_version_slug != version.slug: log.warning( "Invalid version for external domain.", domain=domain, version_slug=version.slug, ) - version = None - filename = None - elif not version.is_external: + raise InvalidExternalVersionError( + project=current_project, + version=version, + external_version_slug=unresolved_domain.external_version_slug, + ) + if not version.is_external: log.warning( "Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.", domain=domain, version_slug=version.slug, version_type=version.type, - url=url, ) - version = None - filename = None + raise InvalidExternalVersionError( + project=current_project, + version=version, + external_version_slug=unresolved_domain.external_version_slug, + ) - if append_indexhtml and filename and filename.endswith("/"): + if append_indexhtml and filename.endswith("/"): filename += "index.html" return UnresolvedURL( parent_project=unresolved_domain.project, - project=current_project or unresolved_domain.project, + project=current_project, version=version, filename=filename, - parsed_url=parsed, + parsed_url=parsed_url, domain=unresolved_domain.domain, external=unresolved_domain.is_from_external_domain, ) @@ -197,12 +268,10 @@ def _match_multiversion_project(self, parent_project, path): """ Try to match a multiversion project. - If the translation exists, we return a result even if the version doesn't, - so the translation is taken as the current project (useful for 404 pages). + An exception is raised if we weren't able to find a matching version or language, + this exception has the current project (useful for 404 pages). - :returns: None or a tuple with the current project, version and file. - A tuple with only the project means we weren't able to find a version, - but the translation was correct. + :returns: A tuple with the current project, version and file. """ match = self.multiversion_pattern.match(path) if not match: @@ -216,14 +285,18 @@ def _match_multiversion_project(self, parent_project, path): project = parent_project else: project = parent_project.translations.filter(language=language).first() + if not project: + raise TranslationNotFoundError( + project=parent_project, language=language, filename=file + ) - if project: - version = project.versions.filter(slug=version_slug).first() - if version: - return project, version, file - return project, None, None + version = project.versions.filter(slug=version_slug).first() + if not version: + raise VersionNotFoundError( + project=project, version_slug=version_slug, filename=file + ) - return None + return project, version, file def _match_subproject(self, parent_project, path, external_version_slug=None): """ @@ -232,12 +305,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): If the subproject exists, we try to resolve the rest of the path with the subproject as the canonical project. - If the subproject exists, we return a result even if version doesn't, - so the subproject is taken as the current project (useful for 404 pages). - - :returns: None or a tuple with the current project, version and file. - A tuple with only the project means we were able to find the subproject, - but we weren't able to resolve the rest of the path. + :returns: A tuple with the current project, version and file. """ match = self.subproject_pattern.match(path) if not match: @@ -260,12 +328,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): check_subprojects=False, external_version_slug=external_version_slug, ) - # If we got a valid response, return that, - # otherwise return the current subproject - # as the current project without a valid version or path. - if response: - return response - return subproject, None, None + return response return None def _match_single_version_project( @@ -276,21 +339,30 @@ def _match_single_version_project( By default any path will match. If `external_version_slug` is given, that version is used instead of the project's default version. + + An exception is raised if we weren't able to find a matching version, + this exception has the current project (useful for 404 pages). + + :returns: A tuple with the current project, version and file. """ file = self._normalize_filename(path) if external_version_slug: + version_slug = external_version_slug version = ( parent_project.versions(manager=EXTERNAL) - .filter(slug=external_version_slug) + .filter(slug=version_slug) .first() ) else: - version = parent_project.versions.filter( - slug=parent_project.default_version - ).first() - if version: - return parent_project, version, file - return parent_project, None, None + version_slug = parent_project.default_version + version = parent_project.versions.filter(slug=version_slug).first() + + if not version: + raise VersionNotFoundError( + project=parent_project, version_slug=version_slug, filename=file + ) + + return parent_project, version, file def _unresolve_path( self, parent_project, path, check_subprojects=True, external_version_slug=None @@ -352,7 +424,7 @@ def _unresolve_path( if response: return response - return parent_project, None, None + raise UnresolvedPathError(project=parent_project, path=path) @staticmethod def get_domain_from_host(host): @@ -384,6 +456,7 @@ def unresolve_domain(self, domain): project_slug = subdomain log.debug("Public domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.public_domain, project=self._resolve_project_slug(project_slug, domain), ) @@ -400,6 +473,7 @@ def unresolve_domain(self, domain): project_slug, version_slug = subdomain.rsplit("--", maxsplit=1) log.debug("External versions domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.external_domain, project=self._resolve_project_slug(project_slug, domain), external_version_slug=version_slug, @@ -425,6 +499,7 @@ def unresolve_domain(self, domain): log.debug("Custom domain.", domain=domain) return UnresolvedDomain( + source_domain=domain, source=DomainSourceType.custom_domain, project=domain_object.project, domain=domain_object, @@ -464,6 +539,7 @@ def unresolve_domain_from_request(self, request): project_slug=project.slug, ) return UnresolvedDomain( + source_domain=host, source=DomainSourceType.http_header, project=project, ) diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index c6bdc44e8b7..f95d72cb731 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -8,7 +8,11 @@ from readthedocs.core.unresolver import ( InvalidCustomDomainError, InvalidExternalDomainError, + InvalidExternalVersionError, SuspiciousHostnameError, + TranslationNotFoundError, + UnresolvedPathError, + VersionNotFoundError, unresolve, ) from readthedocs.projects.models import Domain, Project @@ -63,17 +67,44 @@ def test_file_with_trailing_slash(self): self.assertEqual(parts.version, self.version) self.assertEqual(parts.filename, "/foo/index.html") + def test_project_no_version_and_language(self): + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.path, "/") + + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/foo/bar") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.path, "/foo/bar") + + def test_subproject_no_version_and_language(self): + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/") + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.path, "/") + + with pytest.raises(UnresolvedPathError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/foo/bar") + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.path, "/foo/bar") + def test_path_no_version(self): urls = [ "https://pip.readthedocs.io/en", "https://pip.readthedocs.io/en/", ] for url in urls: - parts = unresolve(url) - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve(url) + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, None) + self.assertEqual(exc.filename, "/") def test_unresolver_subproject(self): parts = unresolve("https://pip.readthedocs.io/projects/sub/ja/latest/foo.html") @@ -116,18 +147,22 @@ def test_unresolve_subproject_alias(self): self.assertEqual(parts.filename, "/index.html") def test_unresolve_subproject_invalid_version(self): - parts = unresolve("https://pip.readthedocs.io/projects/sub/ja/nothing/foo.html") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.subproject) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/ja/nothing/foo.html") + exc = excinfo.value + + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.version_slug, "nothing") + self.assertEqual(exc.filename, "/foo.html") def test_unresolve_subproject_invalid_translation(self): - parts = unresolve("https://pip.readthedocs.io/projects/sub/es/latest/foo.html") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.subproject) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(TranslationNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/projects/sub/es/latest/foo.html") + + exc = excinfo.value + self.assertEqual(exc.project, self.subproject) + self.assertEqual(exc.language, "es") + self.assertEqual(exc.filename, "/foo.html") def test_unresolver_translation(self): parts = unresolve("https://pip.readthedocs.io/ja/latest/foo.html") @@ -137,11 +172,12 @@ def test_unresolver_translation(self): self.assertEqual(parts.filename, "/foo.html") def test_unresolve_no_existing_translation(self): - parts = unresolve("https://pip.readthedocs.io/es/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(TranslationNotFoundError) as excinfo: + unresolve("https://pip.readthedocs.io/es/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.language, "es") + self.assertEqual(exc.filename, "/") def test_unresolver_custom_domain(self): self.domain = fixture.get( @@ -229,41 +265,49 @@ def test_external_version_single_version_project(self): def test_unexisting_external_version_single_version_project(self): self.pip.single_version = True self.pip.save() - parts = unresolve("https://pip--10.dev.readthedocs.build/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/") + + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "10") + self.assertEqual(exc.filename, "/") def test_non_external_version_single_version_project(self): self.pip.single_version = True self.pip.save() - parts = unresolve("https://pip--latest.dev.readthedocs.build/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--latest.dev.readthedocs.build/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.filename, "/") def test_unresolve_external_version_no_existing_version(self): - parts = unresolve("https://pip--10.dev.readthedocs.build/en/10/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(VersionNotFoundError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/en/10/") + + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version_slug, "10") + self.assertEqual(exc.filename, "/") def test_external_version_not_matching_final_version(self): - parts = unresolve("https://pip--10.dev.readthedocs.build/en/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(InvalidExternalVersionError) as excinfo: + unresolve("https://pip--10.dev.readthedocs.build/en/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.version, self.version) + self.assertEqual(exc.external_version_slug, "10") def test_normal_version_in_external_version_subdomain(self): - parts = unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") - self.assertEqual(parts.parent_project, self.pip) - self.assertEqual(parts.project, self.pip) - self.assertEqual(parts.version, None) - self.assertEqual(parts.filename, None) + with pytest.raises(InvalidExternalVersionError) as excinfo: + unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") + exc = excinfo.value + self.assertEqual(exc.project, self.pip) + self.assertEqual(exc.external_version_slug, "latest") + self.assertEqual(exc.version, self.version) def test_malformed_external_version(self): with pytest.raises(InvalidExternalDomainError): From 7bc0505e9bfa1b784ccad4218121bd3a0f7481cc Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 16 Feb 2023 18:06:32 -0500 Subject: [PATCH 02/13] Proxito V2 --- readthedocs/projects/models.py | 5 ++ readthedocs/proxito/views/serve.py | 132 ++++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index b7d80730c07..93fd436d30d 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1824,6 +1824,7 @@ def add_features(sender, **kwargs): DISABLE_PAGEVIEWS = "disable_pageviews" DISABLE_SPHINX_DOMAINS = "disable_sphinx_domains" RESOLVE_PROJECT_FROM_HEADER = "resolve_project_from_header" + USE_UNRESOLVER_WITH_PROXITO = "use_unresolver_with_proxito" # Versions sync related features SKIP_SYNC_TAGS = 'skip_sync_tags' @@ -1939,6 +1940,10 @@ def add_features(sender, **kwargs): RESOLVE_PROJECT_FROM_HEADER, _("Allow usage of the X-RTD-Slug header"), ), + ( + USE_UNRESOLVER_WITH_PROXITO, + _("Use new unresolver implementation for serving documentation files."), + ), # Versions sync related features ( diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 2dccf3b813f..24ef26bd800 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -17,6 +17,13 @@ from readthedocs.builds.models import Version from readthedocs.core.mixins import CDNCacheControlMixin from readthedocs.core.resolver import resolve_path +from readthedocs.core.unresolver import ( + InvalidExternalVersionError, + TranslationNotFoundError, + UnresolvedPathError, + VersionNotFoundError, + unresolver, +) from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.projects import constants from readthedocs.projects.models import Domain, Feature @@ -82,13 +89,17 @@ def get(self, lang_slug=None, version_slug=None, filename='', - ): # noqa + ): """ Take the incoming parsed URL's and figure out what file to serve. ``subproject_slash`` is used to determine if the subproject URL has a slash, so that we can decide if we need to serve docs or add a /. """ + unresolved_domain = request.unresolved_domain + if unresolved_domain.project.has_feature(Feature.USE_UNRESOLVER_WITH_PROXITO): + return self.get_using_unresolver(request) + version_slug = self.get_version_from_host(request, version_slug) final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa request, @@ -274,6 +285,125 @@ def _get_canonical_redirect_type(self, request): return None + def get_using_unresolver(self, request): + unresolved_domain = request.unresolved_domain + # TODO: capture this path in the URL. + path = request.path_info + try: + unresolved = unresolver.unresolve_path( + unresolved_domain=unresolved_domain, + path=path, + append_indexhtml=False, + ) + except (VersionNotFoundError, TranslationNotFoundError): + raise Http404 + except InvalidExternalVersionError: + raise Http404 + except UnresolvedPathError as exc: + # This exception happends if the project didn't have an explict version, + # this is a ``/ -> /en/latest`` or + # ``/projects/subproject/ -> /projects/subproject/en/latest/`` redirect. + project = exc.project + # A system redirect can be cached if we don't have information + # about the version, since the final URL will check for authz. + if self._is_cache_enabled(project): + self.cache_request = True + + if unresolved_domain.is_from_external_domain: + version_slug = unresolved_domain.external_version_slug + else: + version_slug = None + + return self.system_redirect( + request=request, + final_project=project, + version_slug=version_slug, + filename=exc.path, + is_external_version=unresolved_domain.is_from_external_domain, + ) + + project = unresolved.project + version = unresolved.version + filename = unresolved.filename + + if not version.active: + log.warning("Version is not active.") + raise Http404("Version is not active.") + + if self._is_cache_enabled(project) and version and not version.is_private: + # All public versions can be cached. + self.cache_request = True + + log.bind(cache_request=self.cache_request) + log.debug("Serving docs.") + + # Verify if the project is marked as spam and return a 401 in that case + spam_response = self._spam_response(request, project) + if spam_response: + return spam_response + + # Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain) + redirect_type = self._get_canonical_redirect_type(request) + if redirect_type: + # A canonical redirect can be cached, if we don't have information + # about the version, since the final URL will check for authz. + if not version and self._is_cache_enabled(project): + self.cache_request = True + try: + return self.canonical_redirect( + request=request, + final_project=project, + version_slug=version.slug, + filename=filename, + redirect_type=redirect_type, + is_external_version=unresolved.external, + ) + except InfiniteRedirectException: + # Don't redirect in this case, since it would break things + pass + + # Trailing slash redirect. + if filename == "/" and not path.endswith("/"): + return self.system_redirect( + request=request, + final_project=project, + version_slug=version.slug, + filename=filename, + is_external_version=unresolved_domain.is_from_external_domain, + ) + + redirect_path, http_status = self.get_redirect( + project=project, + lang_slug=project.language, + version_slug=version.slug, + filename=unresolved.filename, + full_path=request.path, + forced_only=True, + ) + if redirect_path and http_status: + log.bind(forced_redirect=True) + try: + return self.get_redirect_response( + request=request, + redirect_path=redirect_path, + proxito_path=request.path, + http_status=http_status, + ) + except InfiniteRedirectException: + # Continue with our normal serve. + pass + + # Check user permissions and return an unauthed response if needed + if not self.allowed_user(request, project, version.slug): + return self.get_unauthed_response(request, project) + + return self._serve_docs( + request=request, + project=project, + version=version, + filename=unresolved.filename, + ) + class ServeDocs(SettingsOverrideObject): _default_class = ServeDocsBase From 14bdbd25144f1349c3b7f46c315368c40b3d7a77 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 21 Feb 2023 14:45:45 -0500 Subject: [PATCH 03/13] Move external version checks inside unresolver --- readthedocs/core/unresolver.py | 59 +++++++------------ .../rtd_tests/tests/test_unresolver.py | 8 +-- 2 files changed, 25 insertions(+), 42 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index f1361e32ef6..7e909d8ead3 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -6,7 +6,7 @@ import structlog from django.conf import settings -from readthedocs.builds.constants import EXTERNAL +from readthedocs.builds.constants import EXTERNAL, INTERNAL from readthedocs.builds.models import Version from readthedocs.constants import pattern_opts from readthedocs.projects.models import Domain, Feature, Project @@ -65,9 +65,9 @@ def __init__(self, project, path): class InvalidExternalVersionError(UnresolverError): - def __init__(self, project, version, external_version_slug): + def __init__(self, project, version_slug, external_version_slug): self.project = project - self.version = version + self.version_slug = version_slug self.external_version_slug = external_version_slug @@ -216,33 +216,6 @@ def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): external_version_slug=unresolved_domain.external_version_slug, ) - # Make sure we are serving the external version from the subdomain. - if unresolved_domain.is_from_external_domain: - domain = unresolved_domain.source_domain - if unresolved_domain.external_version_slug != version.slug: - log.warning( - "Invalid version for external domain.", - domain=domain, - version_slug=version.slug, - ) - raise InvalidExternalVersionError( - project=current_project, - version=version, - external_version_slug=unresolved_domain.external_version_slug, - ) - if not version.is_external: - log.warning( - "Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN.", - domain=domain, - version_slug=version.slug, - version_type=version.type, - ) - raise InvalidExternalVersionError( - project=current_project, - version=version, - external_version_slug=unresolved_domain.external_version_slug, - ) - if append_indexhtml and filename.endswith("/"): filename += "index.html" @@ -264,7 +237,9 @@ def _normalize_filename(filename): filename = "/" + filename return filename - def _match_multiversion_project(self, parent_project, path): + def _match_multiversion_project( + self, parent_project, path, external_version_slug=None + ): """ Try to match a multiversion project. @@ -290,7 +265,15 @@ def _match_multiversion_project(self, parent_project, path): project=parent_project, language=language, filename=file ) - version = project.versions.filter(slug=version_slug).first() + if external_version_slug and external_version_slug != version_slug: + raise InvalidExternalVersionError( + project=project, + version_slug=version_slug, + external_version_slug=external_version_slug, + ) + + manager = EXTERNAL if external_version_slug else INTERNAL + version = project.versions(manager=manager).filter(slug=version_slug).first() if not version: raise VersionNotFoundError( project=project, version_slug=version_slug, filename=file @@ -348,15 +331,14 @@ def _match_single_version_project( file = self._normalize_filename(path) if external_version_slug: version_slug = external_version_slug - version = ( - parent_project.versions(manager=EXTERNAL) - .filter(slug=version_slug) - .first() - ) + manager = EXTERNAL else: version_slug = parent_project.default_version - version = parent_project.versions.filter(slug=version_slug).first() + manager = INTERNAL + version = ( + parent_project.versions(manager=manager).filter(slug=version_slug).first() + ) if not version: raise VersionNotFoundError( project=parent_project, version_slug=version_slug, filename=file @@ -400,6 +382,7 @@ def _unresolve_path( response = self._match_multiversion_project( parent_project=parent_project, path=path, + external_version_slug=external_version_slug, ) if response: return response diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index f95d72cb731..482806fdf01 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -298,16 +298,16 @@ def test_external_version_not_matching_final_version(self): unresolve("https://pip--10.dev.readthedocs.build/en/latest/") exc = excinfo.value self.assertEqual(exc.project, self.pip) - self.assertEqual(exc.version, self.version) + self.assertEqual(exc.version_slug, "latest") self.assertEqual(exc.external_version_slug, "10") def test_normal_version_in_external_version_subdomain(self): - with pytest.raises(InvalidExternalVersionError) as excinfo: + with pytest.raises(VersionNotFoundError) as excinfo: unresolve("https://pip--latest.dev.readthedocs.build/en/latest/") exc = excinfo.value self.assertEqual(exc.project, self.pip) - self.assertEqual(exc.external_version_slug, "latest") - self.assertEqual(exc.version, self.version) + self.assertEqual(exc.version_slug, "latest") + self.assertEqual(exc.filename, "/") def test_malformed_external_version(self): with pytest.raises(InvalidExternalDomainError): From f540ffa6ff0accd3fad95a75257ff8f05b3598f2 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 21 Feb 2023 18:03:52 -0500 Subject: [PATCH 04/13] Updates --- readthedocs/proxito/views/serve.py | 72 ++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 24ef26bd800..8154827d0d5 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -295,25 +295,55 @@ def get_using_unresolver(self, request): path=path, append_indexhtml=False, ) - except (VersionNotFoundError, TranslationNotFoundError): - raise Http404 - except InvalidExternalVersionError: + except (VersionNotFoundError, TranslationNotFoundError, InvalidExternalVersionError) as exc: + # TODO: find a better way to pass this to the middleware. + request.path_project_slug = exc.project.slug raise Http404 except UnresolvedPathError as exc: - # This exception happends if the project didn't have an explict version, - # this is a ``/ -> /en/latest`` or - # ``/projects/subproject/ -> /projects/subproject/en/latest/`` redirect. project = exc.project - # A system redirect can be cached if we don't have information - # about the version, since the final URL will check for authz. - if self._is_cache_enabled(project): - self.cache_request = True - if unresolved_domain.is_from_external_domain: version_slug = unresolved_domain.external_version_slug else: version_slug = None + # TODO: find a better way to pass this to the middleware. + request.path_project_slug = project.slug + request.path_version_slug = version_slug + + # Before doing anything else, we need to check for canonical redirects. + redirect_type = self._get_canonical_redirect_type(request) + if redirect_type: + # A canonical redirect can be cached, if we don't have information + # about the version, since the final URL will check for authz. + if self._is_cache_enabled(project): + self.cache_request = True + try: + return self.canonical_redirect( + request=request, + final_project=project, + version_slug=version_slug, + filename=path, + redirect_type=redirect_type, + is_external_version=unresolved_domain.is_from_external_domain, + ) + except InfiniteRedirectException: + # Don't redirect in this case, since it would break things + self.cache_request = False + + # If the path is not empty, the path doesn't resolve to a proper file. + if exc.path not in ["", "/"]: + raise Http404 + + # When the path is empty, the project didn't have an explicit version, + # so we need to redirect to the default version. + # This is `/ -> /en/latest/` or + # `/projects/subproject/ -> /projects/subproject/en/latest/`. + + # A system redirect can be cached if we don't have information + # about the version, since the final URL will check for authz. + if self._is_cache_enabled(project): + self.cache_request = True + return self.system_redirect( request=request, final_project=project, @@ -326,11 +356,15 @@ def get_using_unresolver(self, request): version = unresolved.version filename = unresolved.filename + # TODO: find a better way to pass this to the middleware. + request.path_project_slug = project.slug + request.path_version_slug = version.slug + if not version.active: log.warning("Version is not active.") raise Http404("Version is not active.") - if self._is_cache_enabled(project) and version and not version.is_private: + if self._is_cache_enabled(project) and not version.is_private: # All public versions can be cached. self.cache_request = True @@ -342,13 +376,9 @@ def get_using_unresolver(self, request): if spam_response: return spam_response - # Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain) + # Check for canonical redirects before serving the file. redirect_type = self._get_canonical_redirect_type(request) if redirect_type: - # A canonical redirect can be cached, if we don't have information - # about the version, since the final URL will check for authz. - if not version and self._is_cache_enabled(project): - self.cache_request = True try: return self.canonical_redirect( request=request, @@ -363,6 +393,11 @@ def get_using_unresolver(self, request): pass # Trailing slash redirect. + # We don't want to serve documentation at: + # - `/en/latest` + # - `/projects/subproject/en/latest` + # - `/projects/subproject` + # These paths need to end with an slash. if filename == "/" and not path.endswith("/"): return self.system_redirect( request=request, @@ -372,6 +407,7 @@ def get_using_unresolver(self, request): is_external_version=unresolved_domain.is_from_external_domain, ) + # Check for forced redirects. redirect_path, http_status = self.get_redirect( project=project, lang_slug=project.language, @@ -393,7 +429,7 @@ def get_using_unresolver(self, request): # Continue with our normal serve. pass - # Check user permissions and return an unauthed response if needed + # Check user permissions and return an unauthed response if needed. if not self.allowed_user(request, project, version.slug): return self.get_unauthed_response(request, project) From dcf0b66a911bfb327fc848b79873d3ee97f1eb5d Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 22 Feb 2023 11:43:37 -0500 Subject: [PATCH 05/13] Tests --- readthedocs/proxito/tests/test_full.py | 44 ++++++++++++++++++ readthedocs/proxito/tests/test_headers.py | 14 +++++- readthedocs/proxito/tests/test_middleware.py | 11 +++++ .../proxito/tests/test_old_redirects.py | 46 +++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 2d7e890f86c..06e6f69f187 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -332,6 +332,17 @@ def test_custom_domain_response_hsts(self): ) +class ProxitoV2TestFullDocServing(TestFullDocServing): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + class TestDocServingBackends(BaseDocServing): # Test that nginx and python backends both work @@ -471,6 +482,17 @@ def test_track_downloads(self, is_audit_enabled): self.assertEqual(log.action, AuditLog.DOWNLOAD) +class ProxitoV2TestDocServingBackends(TestDocServingBackends): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + @override_settings( PYTHON_MEDIA=False, PUBLIC_DOMAIN='readthedocs.io', @@ -1249,6 +1271,17 @@ def test_serve_invalid_static_file(self, staticfiles_storage): self.assertEqual(resp.status_code, 404) +class ProxitoV2TestAdditionalDocViews(TestAdditionalDocViews): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + @override_settings( ALLOW_PRIVATE_REPOS=True, PUBLIC_DOMAIN='dev.readthedocs.io', @@ -1495,3 +1528,14 @@ def test_cache_on_plan(self): # Add project to plan, so we're using that to enable CDN self.organization.projects.add(self.project) self._test_cache_control_header_project(expected_value="public") + + +class ProxitoV2TestCDNCache(TestCDNCache): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) diff --git a/readthedocs/proxito/tests/test_headers.py b/readthedocs/proxito/tests/test_headers.py index 35ff8ef523d..07e9952cb58 100644 --- a/readthedocs/proxito/tests/test_headers.py +++ b/readthedocs/proxito/tests/test_headers.py @@ -1,9 +1,10 @@ import django_dynamic_fixture as fixture from django.test import override_settings from django.urls import reverse +from django_dynamic_fixture import get from readthedocs.builds.constants import LATEST -from readthedocs.projects.models import Domain, HTTPHeader +from readthedocs.projects.models import Domain, Feature, HTTPHeader from .base import BaseDocServing @@ -143,3 +144,14 @@ def test_cache_headers_private_projects(self): r = self.client.get('/en/latest/', HTTP_HOST='project.dev.readthedocs.io') self.assertEqual(r.status_code, 200) self.assertEqual(r['CDN-Cache-Control'], 'private') + + +class ProxitoV2HeaderTests(ProxitoHeaderTests): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) diff --git a/readthedocs/proxito/tests/test_middleware.py b/readthedocs/proxito/tests/test_middleware.py index c4408e94ec8..43d0d5ae371 100644 --- a/readthedocs/proxito/tests/test_middleware.py +++ b/readthedocs/proxito/tests/test_middleware.py @@ -255,6 +255,17 @@ def test_front_slash_url(self): ) +class ProxitoV2MiddlewareTests(MiddlewareTests): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + @pytest.mark.proxito @override_settings(PUBLIC_DOMAIN='dev.readthedocs.io') class MiddlewareURLConfTests(TestCase): diff --git a/readthedocs/proxito/tests/test_old_redirects.py b/readthedocs/proxito/tests/test_old_redirects.py index ba6625f817f..b6532a0fb36 100644 --- a/readthedocs/proxito/tests/test_old_redirects.py +++ b/readthedocs/proxito/tests/test_old_redirects.py @@ -14,6 +14,7 @@ from django.http import Http404 from django.test.utils import override_settings from django.urls import Resolver404 +from django_dynamic_fixture import get from readthedocs.builds.models import Version from readthedocs.projects.models import Feature @@ -153,6 +154,17 @@ def test_root_redirect_with_query_params(self): ) +class ProxitoV2InternalRedirectTests(InternalRedirectTests): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + # Use ``PYTHON_MEDIA`` here to raise a 404 when trying to serve the file # from disk and execute the code for the handler404 (the file does not # exist). On production, this will happen when trying to serve from @@ -649,6 +661,17 @@ def test_not_found_page_without_trailing_slash(self): ) +class ProxitoV2UserRedirectTests(UserRedirectTests): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + @override_settings(PUBLIC_DOMAIN="dev.readthedocs.io") class UserForcedRedirectTests(BaseDocServing): def test_no_forced_redirect(self): @@ -943,6 +966,18 @@ def test_redirect_with_301_status(self): "http://project.dev.readthedocs.io/en/latest/tutorial/install.html", ) + +class ProxitoV2UserForcedRedirectTests(UserForcedRedirectTests): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) + + @override_settings( PYTHON_MEDIA=True, PUBLIC_DOMAIN="dev.readthedocs.io", @@ -1085,3 +1120,14 @@ def test_redirect_sphinx_html_crossdomain(self): ) self.assertEqual(r.status_code, 302, url) self.assertEqual(r["Location"], expected_location, url) + + +class ProxitoV2UserRedirectCrossdomainTest(UserRedirectCrossdomainTest): + def setUp(self): + super().setUp() + get( + Feature, + feature_id=Feature.USE_UNRESOLVER_WITH_PROXITO, + default_true=True, + future_default_true=True, + ) From 774fce2b44afbdd920602fd3431b1bc9a2be684e Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 22 Feb 2023 12:33:27 -0500 Subject: [PATCH 06/13] Black --- readthedocs/proxito/views/serve.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 8154827d0d5..84f5d7c14bf 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -80,16 +80,16 @@ def get(self, request, subproject_slug=None, filename=""): class ServeDocsBase(CDNCacheControlMixin, ServeRedirectMixin, ServeDocsMixin, View): - - def get(self, - request, - project_slug=None, - subproject_slug=None, - subproject_slash=None, - lang_slug=None, - version_slug=None, - filename='', - ): + def get( + self, + request, + project_slug=None, + subproject_slug=None, + subproject_slash=None, + lang_slug=None, + version_slug=None, + filename="", + ): """ Take the incoming parsed URL's and figure out what file to serve. @@ -295,7 +295,11 @@ def get_using_unresolver(self, request): path=path, append_indexhtml=False, ) - except (VersionNotFoundError, TranslationNotFoundError, InvalidExternalVersionError) as exc: + except ( + VersionNotFoundError, + TranslationNotFoundError, + InvalidExternalVersionError, + ) as exc: # TODO: find a better way to pass this to the middleware. request.path_project_slug = exc.project.slug raise Http404 From 36b9f02820ba79380b516acb7a28a3d87723670b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 22 Feb 2023 17:11:42 -0500 Subject: [PATCH 07/13] Updates from review --- readthedocs/core/unresolver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 7e909d8ead3..52bea1a902e 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -107,6 +107,7 @@ class UnresolvedDomain: source_domain: str source: DomainSourceType project: Project + # Domain object for custom domains. domain: Domain = None external_version_slug: str = None @@ -247,6 +248,7 @@ def _match_multiversion_project( this exception has the current project (useful for 404 pages). :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ match = self.multiversion_pattern.match(path) if not match: @@ -289,6 +291,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): with the subproject as the canonical project. :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ match = self.subproject_pattern.match(path) if not match: @@ -327,6 +330,7 @@ def _match_single_version_project( this exception has the current project (useful for 404 pages). :returns: A tuple with the current project, version and file. + Returns `None` if there isn't a total or partial match. """ file = self._normalize_filename(path) if external_version_slug: From cd2ae183fd3e28f85c2ba8414ca6faf27cbc0b13 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 22 Feb 2023 18:28:21 -0500 Subject: [PATCH 08/13] Small updates --- readthedocs/proxito/views/serve.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 84f5d7c14bf..f6387d9ac09 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -166,7 +166,8 @@ def get( ) except InfiniteRedirectException: # Don't redirect in this case, since it would break things - pass + # Disable caching, since we failed to redirect. + self.cache_request = False # Handle a / redirect when we aren't a single version if all([ @@ -286,6 +287,12 @@ def _get_canonical_redirect_type(self, request): return None def get_using_unresolver(self, request): + """ + Resolve the current request using the new proxito implementation. + + This is basically a copy of the get() method, + but adapted to make use of the unresolved to extract the current project, version, and file. + """ unresolved_domain = request.unresolved_domain # TODO: capture this path in the URL. path = request.path_info @@ -331,7 +338,8 @@ def get_using_unresolver(self, request): is_external_version=unresolved_domain.is_from_external_domain, ) except InfiniteRedirectException: - # Don't redirect in this case, since it would break things + # Don't redirect in this case, since it would break things. + # Disable caching, since we failed to redirect. self.cache_request = False # If the path is not empty, the path doesn't resolve to a proper file. @@ -393,7 +401,7 @@ def get_using_unresolver(self, request): is_external_version=unresolved.external, ) except InfiniteRedirectException: - # Don't redirect in this case, since it would break things + # Don't redirect in this case, since it would break things. pass # Trailing slash redirect. From db3b96822b4b64eff89d236bc3f7ea14648df4e2 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 22 Feb 2023 19:19:59 -0500 Subject: [PATCH 09/13] Normalize path --- readthedocs/core/unresolver.py | 5 ++++- readthedocs/proxito/views/serve.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 7e909d8ead3..e6b303ec8b7 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -407,7 +407,10 @@ def _unresolve_path( if response: return response - raise UnresolvedPathError(project=parent_project, path=path) + raise UnresolvedPathError( + project=parent_project, + path=self._normalize_filename(path), + ) @staticmethod def get_domain_from_host(host): diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index f6387d9ac09..867eb8a8a02 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -343,7 +343,7 @@ def get_using_unresolver(self, request): self.cache_request = False # If the path is not empty, the path doesn't resolve to a proper file. - if exc.path not in ["", "/"]: + if exc.path != "/": raise Http404 # When the path is empty, the project didn't have an explicit version, From 5fbcb0318ce1b62c532987b20321f8219c4f9979 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 27 Feb 2023 10:51:16 -0500 Subject: [PATCH 10/13] Put todo in tests --- readthedocs/proxito/tests/test_full.py | 4 ++++ readthedocs/proxito/tests/test_headers.py | 1 + readthedocs/proxito/tests/test_middleware.py | 1 + readthedocs/proxito/tests/test_old_redirects.py | 4 ++++ 4 files changed, 10 insertions(+) diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 06e6f69f187..4482dc14794 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -333,6 +333,7 @@ def test_custom_domain_response_hsts(self): class ProxitoV2TestFullDocServing(TestFullDocServing): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( @@ -483,6 +484,7 @@ def test_track_downloads(self, is_audit_enabled): class ProxitoV2TestDocServingBackends(TestDocServingBackends): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( @@ -1272,6 +1274,7 @@ def test_serve_invalid_static_file(self, staticfiles_storage): class ProxitoV2TestAdditionalDocViews(TestAdditionalDocViews): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( @@ -1531,6 +1534,7 @@ def test_cache_on_plan(self): class ProxitoV2TestCDNCache(TestCDNCache): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( diff --git a/readthedocs/proxito/tests/test_headers.py b/readthedocs/proxito/tests/test_headers.py index 07e9952cb58..d2ea5e1a2c8 100644 --- a/readthedocs/proxito/tests/test_headers.py +++ b/readthedocs/proxito/tests/test_headers.py @@ -147,6 +147,7 @@ def test_cache_headers_private_projects(self): class ProxitoV2HeaderTests(ProxitoHeaderTests): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( diff --git a/readthedocs/proxito/tests/test_middleware.py b/readthedocs/proxito/tests/test_middleware.py index 43d0d5ae371..b2ccc5bc4a8 100644 --- a/readthedocs/proxito/tests/test_middleware.py +++ b/readthedocs/proxito/tests/test_middleware.py @@ -256,6 +256,7 @@ def test_front_slash_url(self): class ProxitoV2MiddlewareTests(MiddlewareTests): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( diff --git a/readthedocs/proxito/tests/test_old_redirects.py b/readthedocs/proxito/tests/test_old_redirects.py index b6532a0fb36..aed84bbb6c8 100644 --- a/readthedocs/proxito/tests/test_old_redirects.py +++ b/readthedocs/proxito/tests/test_old_redirects.py @@ -155,6 +155,7 @@ def test_root_redirect_with_query_params(self): class ProxitoV2InternalRedirectTests(InternalRedirectTests): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( @@ -662,6 +663,7 @@ def test_not_found_page_without_trailing_slash(self): class ProxitoV2UserRedirectTests(UserRedirectTests): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( @@ -968,6 +970,7 @@ def test_redirect_with_301_status(self): class ProxitoV2UserForcedRedirectTests(UserForcedRedirectTests): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( @@ -1123,6 +1126,7 @@ def test_redirect_sphinx_html_crossdomain(self): class ProxitoV2UserRedirectCrossdomainTest(UserRedirectCrossdomainTest): + # TODO: remove this class once the new implementation is the default. def setUp(self): super().setUp() get( From 69f4886bda3b21d57948481996a7fc3dcaed6ec4 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 28 Feb 2023 17:16:32 -0500 Subject: [PATCH 11/13] Updates from review --- readthedocs/core/unresolver.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index 52bea1a902e..7bf6bbe3004 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -160,7 +160,7 @@ class Unresolver: re.VERBOSE, ) - def unresolve(self, url, append_indexhtml=True): + def unresolve_url(self, url, append_indexhtml=True): """ Turn a URL into the component parts that our views would use to process them. @@ -211,7 +211,7 @@ def _unresolve(self, unresolved_domain, parsed_url, append_indexhtml): Extracted into a separate method so it can be re-used by the unresolve and unresolve_path methods. """ - current_project, version, filename = self._unresolve_path( + current_project, version, filename = self._unresolve_path_with_parent_project( parent_project=unresolved_domain.project, path=parsed_url.path, external_version_slug=unresolved_domain.external_version_slug, @@ -308,7 +308,7 @@ def _match_subproject(self, parent_project, path, external_version_slug=None): # We use the subproject as the new parent project # to resolve the rest of the path relative to it. subproject = project_relationship.child - response = self._unresolve_path( + response = self._unresolve_path_with_parent_project( parent_project=subproject, path=file, check_subprojects=False, @@ -350,7 +350,7 @@ def _match_single_version_project( return parent_project, version, file - def _unresolve_path( + def _unresolve_path_with_parent_project( self, parent_project, path, check_subprojects=True, external_version_slug=None ): """ @@ -540,4 +540,4 @@ def unresolve_domain_from_request(self, request): unresolver = Unresolver() -unresolve = unresolver.unresolve +unresolve = unresolver.unresolve_url From bdfc2c7eb80caac06041074a6f51c60ce5a00983 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 1 Mar 2023 19:41:27 -0500 Subject: [PATCH 12/13] Match new changes --- readthedocs/proxito/views/serve.py | 86 +++++++++++------------------- 1 file changed, 32 insertions(+), 54 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index cd078bcc8bc..9c3a58131d9 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -300,17 +300,29 @@ def get_using_unresolver(self, request): unresolved_domain = request.unresolved_domain # TODO: capture this path in the URL. path = request.path_info + + # We force all storage calls to use the external versions storage, + # since we are serving an external version. + if unresolved_domain.is_from_external_domain: + self.version_type = EXTERNAL + try: unresolved = unresolver.unresolve_path( unresolved_domain=unresolved_domain, path=path, append_indexhtml=False, ) - except ( - VersionNotFoundError, - TranslationNotFoundError, - InvalidExternalVersionError, - ) as exc: + except VersionNotFoundError as exc: + # TODO: find a better way to pass this to the middleware. + request.path_project_slug = exc.project.slug + request.path_version_slug = exc.version_slug + raise Http404 + except InvalidExternalVersionError as exc: + # TODO: find a better way to pass this to the middleware. + request.path_project_slug = exc.project.slug + request.path_version_slug = exc.external_version_slug + raise Http404 + except TranslationNotFoundError as exc: # TODO: find a better way to pass this to the middleware. request.path_project_slug = exc.project.slug raise Http404 @@ -325,27 +337,6 @@ def get_using_unresolver(self, request): request.path_project_slug = project.slug request.path_version_slug = version_slug - # Before doing anything else, we need to check for canonical redirects. - redirect_type = self._get_canonical_redirect_type(request) - if redirect_type: - # A canonical redirect can be cached, if we don't have information - # about the version, since the final URL will check for authz. - if self._is_cache_enabled(project): - self.cache_request = True - try: - return self.canonical_redirect( - request=request, - final_project=project, - version_slug=version_slug, - filename=path, - redirect_type=redirect_type, - is_external_version=unresolved_domain.is_from_external_domain, - ) - except InfiniteRedirectException: - # Don't redirect in this case, since it would break things. - # Disable caching, since we failed to redirect. - self.cache_request = False - # If the path is not empty, the path doesn't resolve to a proper file. if exc.path != "/": raise Http404 @@ -354,12 +345,6 @@ def get_using_unresolver(self, request): # so we need to redirect to the default version. # This is `/ -> /en/latest/` or # `/projects/subproject/ -> /projects/subproject/en/latest/`. - - # A system redirect can be cached if we don't have information - # about the version, since the final URL will check for authz. - if self._is_cache_enabled(project): - self.cache_request = True - return self.system_redirect( request=request, final_project=project, @@ -372,6 +357,13 @@ def get_using_unresolver(self, request): version = unresolved.version filename = unresolved.filename + log.bind( + project_slug=project.slug, + version_slug=version.slug, + filename=filename, + external=unresolved_domain.is_from_external_domain, + ) + # TODO: find a better way to pass this to the middleware. request.path_project_slug = project.slug request.path_version_slug = version.slug @@ -380,34 +372,20 @@ def get_using_unresolver(self, request): log.warning("Version is not active.") raise Http404("Version is not active.") - if self._is_cache_enabled(project) and not version.is_private: - # All public versions can be cached. - self.cache_request = True + # All public versions can be cached. + self.cache_response = version.is_public - log.bind(cache_request=self.cache_request) + log.bind(cache_response=self.cache_response) log.debug("Serving docs.") # Verify if the project is marked as spam and return a 401 in that case spam_response = self._spam_response(request, project) if spam_response: + # If a project was marked as spam, + # all of their responses can be cached. + self.cache_response = True return spam_response - # Check for canonical redirects before serving the file. - redirect_type = self._get_canonical_redirect_type(request) - if redirect_type: - try: - return self.canonical_redirect( - request=request, - final_project=project, - version_slug=version.slug, - filename=filename, - redirect_type=redirect_type, - is_external_version=unresolved.external, - ) - except InfiniteRedirectException: - # Don't redirect in this case, since it would break things. - pass - # Trailing slash redirect. # We don't want to serve documentation at: # - `/en/latest` @@ -428,7 +406,7 @@ def get_using_unresolver(self, request): project=project, lang_slug=project.language, version_slug=version.slug, - filename=unresolved.filename, + filename=filename, full_path=request.path, forced_only=True, ) @@ -453,7 +431,7 @@ def get_using_unresolver(self, request): request=request, project=project, version=version, - filename=unresolved.filename, + filename=filename, ) From 1f6738ce542e4d0d225d19991c413c3ef6b04d17 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 1 Mar 2023 19:59:35 -0500 Subject: [PATCH 13/13] Updates from review --- readthedocs/core/unresolver.py | 4 +-- readthedocs/proxito/views/serve.py | 36 ++++++++++--------- .../rtd_tests/tests/test_unresolver.py | 10 +++--- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index fb66d986268..731e8fe47f3 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -58,7 +58,7 @@ def __init__(self, project, language, filename): self.filename = filename -class UnresolvedPathError(UnresolverError): +class InvalidPathForVersionedProjectError(UnresolverError): def __init__(self, project, path): self.project = project self.path = path @@ -411,7 +411,7 @@ def _unresolve_path_with_parent_project( if response: return response - raise UnresolvedPathError( + raise InvalidPathForVersionedProjectError( project=parent_project, path=self._normalize_filename(path), ) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 9c3a58131d9..2ea3f53c12f 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -17,8 +17,8 @@ from readthedocs.core.resolver import resolve_path from readthedocs.core.unresolver import ( InvalidExternalVersionError, + InvalidPathForVersionedProjectError, TranslationNotFoundError, - UnresolvedPathError, VersionNotFoundError, unresolver, ) @@ -298,7 +298,8 @@ def get_using_unresolver(self, request): but adapted to make use of the unresolved to extract the current project, version, and file. """ unresolved_domain = request.unresolved_domain - # TODO: capture this path in the URL. + # TODO: We shouldn't use path_info to the get the proxito path, + # it should be captured in proxito/urls.py. path = request.path_info # We force all storage calls to use the external versions storage, @@ -326,7 +327,7 @@ def get_using_unresolver(self, request): # TODO: find a better way to pass this to the middleware. request.path_project_slug = exc.project.slug raise Http404 - except UnresolvedPathError as exc: + except InvalidPathForVersionedProjectError as exc: project = exc.project if unresolved_domain.is_from_external_domain: version_slug = unresolved_domain.external_version_slug @@ -337,21 +338,20 @@ def get_using_unresolver(self, request): request.path_project_slug = project.slug request.path_version_slug = version_slug - # If the path is not empty, the path doesn't resolve to a proper file. - if exc.path != "/": - raise Http404 + if exc.path == "/": + # When the path is empty, the project didn't have an explicit version, + # so we need to redirect to the default version. + # This is `/ -> /en/latest/` or + # `/projects/subproject/ -> /projects/subproject/en/latest/`. + return self.system_redirect( + request=request, + final_project=project, + version_slug=version_slug, + filename=exc.path, + is_external_version=unresolved_domain.is_from_external_domain, + ) - # When the path is empty, the project didn't have an explicit version, - # so we need to redirect to the default version. - # This is `/ -> /en/latest/` or - # `/projects/subproject/ -> /projects/subproject/en/latest/`. - return self.system_redirect( - request=request, - final_project=project, - version_slug=version_slug, - filename=exc.path, - is_external_version=unresolved_domain.is_from_external_domain, - ) + raise Http404 project = unresolved.project version = unresolved.version @@ -393,6 +393,8 @@ def get_using_unresolver(self, request): # - `/projects/subproject` # These paths need to end with an slash. if filename == "/" and not path.endswith("/"): + # TODO: We could avoid calling the resolver, + # and just redirect to the same path with a slash. return self.system_redirect( request=request, final_project=project, diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index 482806fdf01..9708a523919 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -9,9 +9,9 @@ InvalidCustomDomainError, InvalidExternalDomainError, InvalidExternalVersionError, + InvalidPathForVersionedProjectError, SuspiciousHostnameError, TranslationNotFoundError, - UnresolvedPathError, VersionNotFoundError, unresolve, ) @@ -68,26 +68,26 @@ def test_file_with_trailing_slash(self): self.assertEqual(parts.filename, "/foo/index.html") def test_project_no_version_and_language(self): - with pytest.raises(UnresolvedPathError) as excinfo: + with pytest.raises(InvalidPathForVersionedProjectError) as excinfo: unresolve("https://pip.readthedocs.io/") exc = excinfo.value self.assertEqual(exc.project, self.pip) self.assertEqual(exc.path, "/") - with pytest.raises(UnresolvedPathError) as excinfo: + with pytest.raises(InvalidPathForVersionedProjectError) as excinfo: unresolve("https://pip.readthedocs.io/foo/bar") exc = excinfo.value self.assertEqual(exc.project, self.pip) self.assertEqual(exc.path, "/foo/bar") def test_subproject_no_version_and_language(self): - with pytest.raises(UnresolvedPathError) as excinfo: + with pytest.raises(InvalidPathForVersionedProjectError) as excinfo: unresolve("https://pip.readthedocs.io/projects/sub/") exc = excinfo.value self.assertEqual(exc.project, self.subproject) self.assertEqual(exc.path, "/") - with pytest.raises(UnresolvedPathError) as excinfo: + with pytest.raises(InvalidPathForVersionedProjectError) as excinfo: unresolve("https://pip.readthedocs.io/projects/sub/foo/bar") exc = excinfo.value self.assertEqual(exc.project, self.subproject)