diff --git a/readthedocs/proxito/views/mixins.py b/readthedocs/proxito/views/mixins.py index a87e40be3c0..67b4f703d1d 100644 --- a/readthedocs/proxito/views/mixins.py +++ b/readthedocs/proxito/views/mixins.py @@ -1,9 +1,14 @@ import logging import mimetypes +from urllib.parse import urlparse, urlunparse from django.conf import settings from django.core.files.storage import get_storage_class -from django.http import HttpResponse +from django.http import ( + HttpResponse, + HttpResponseRedirect, + HttpResponsePermanentRedirect, +) from django.shortcuts import render from django.utils.encoding import iri_to_uri from django.views.static import serve @@ -79,3 +84,47 @@ def _serve_401(self, request, project): res.status_code = 401 log.debug('Unauthorized access to %s documentation', project.slug) return res + + +class ServeRedirectMixin: + + def get_redirect(self, project, lang_slug, version_slug, filename, full_path): + """ + Check for a redirect for this project that matches ``full_path``. + + :returns: the path to redirect the request and its status code + :rtype: tuple + """ + redirect_path, http_status = project.redirects.get_redirect_path_with_status( + language=lang_slug, + version_slug=version_slug, + path=filename, + full_path=full_path, + ) + return redirect_path, http_status + + def get_redirect_response(self, request, redirect_path, http_status): + """ + Build the response for the ``redirect_path`` and its ``http_status``. + + :returns: redirect respose with the correct path + :rtype: HttpResponseRedirect or HttpResponsePermanentRedirect + """ + schema, netloc, path, params, query, fragments = urlparse(request.path) + new_path = urlunparse((schema, netloc, redirect_path, params, query, fragments)) + + # Re-use the domain and protocol used in the current request. + # Redirects shouldn't change the domain, version or language. + # However, if the new_path is already an absolute URI, just use it + new_path = request.build_absolute_uri(new_path) + log.info( + 'Redirecting: from=%s to=%s http_status=%s', + request.build_absolute_uri(), + new_path, + http_status, + ) + + if http_status and http_status == 301: + return HttpResponsePermanentRedirect(new_path) + + return HttpResponseRedirect(new_path) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 10b238d2522..e55d30be42c 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -20,7 +20,7 @@ from readthedocs.projects import constants from readthedocs.projects.templatetags.projects_tags import sort_version_aware -from .mixins import ServeDocsMixin +from .mixins import ServeDocsMixin, ServeRedirectMixin from .decorators import map_project_slug from .redirects import redirect_project_slug @@ -30,7 +30,7 @@ log = logging.getLogger(__name__) # noqa -class ServeDocsBase(ServeDocsMixin, View): +class ServeDocsBase(ServeRedirectMixin, ServeDocsMixin, View): def get(self, request, @@ -77,11 +77,16 @@ def get(self, ) raise Http404('Invalid URL for project with versions') - # TODO: Redirects need to be refactored before we can turn them on - # They currently do 1 request per redirect that exists for the project - # path, http_status = final_project.redirects.get_redirect_path_with_status( - # language=lang_slug, version_slug=version_slug, path=filename + # TODO: un-comment when ready to perform redirect here + # redirect_path, http_status = self.get_redirect( + # final_project, + # lang_slug, + # version_slug, + # filename, + # request.path, # ) + # if redirect_path and http_status: + # return self.get_redirect_response(request, redirect_path, http_status) # Check user permissions and return an unauthed response if needed if not self.allowed_user(request, final_project, version_slug): @@ -107,7 +112,7 @@ class ServeDocs(SettingsOverrideObject): _default_class = ServeDocsBase -class ServeError404Base(View): +class ServeError404Base(ServeRedirectMixin, View): def get(self, request, proxito_path, template_name='404.html'): """ @@ -138,6 +143,17 @@ def get(self, request, proxito_path, template_name='404.html'): filename=kwargs.get('filename', ''), ) + # Check and perform redirects on 404 handler + redirect_path, http_status = self.get_redirect( + final_project, + lang_slug, + version_slug, + filename, + request.path, + ) + if redirect_path and http_status: + return self.get_redirect_response(request, redirect_path, http_status) + storage_root_path = final_project.get_storage_path( type_='html', version_slug=version_slug, diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 155ec4762fb..b549e250daf 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,9 +1,10 @@ """Queryset for the redirects app.""" from django.db import models +from django.db.models import Value, CharField, IntegerField, Q, F, ExpressionWrapper +from django.db.models.functions import Substr, Length from readthedocs.core.utils.extend import SettingsOverrideObject -from readthedocs.projects import constants class RedirectQuerySetBase(models.QuerySet): @@ -25,8 +26,80 @@ def api(self, user=None, detail=True): queryset = self._add_user_repos(queryset, user) return queryset - def get_redirect_path_with_status(self, path, language=None, version_slug=None): - for redirect in self.select_related('project'): + def get_redirect_path_with_status(self, path, full_path=None, language=None, version_slug=None): + # add extra fields with the ``path`` and ``full_path`` to perform a + # filter at db level instead with Python + queryset = self.annotate( + path=Value( + path, + output_field=CharField(), + ), + full_path=Value( + full_path, + output_field=CharField(), + ), + + from_url_length=ExpressionWrapper( + Length('from_url'), + output_field=IntegerField(), + ), + + # 1-indexed + from_url_without_rest=Substr( + 'from_url', + 1, + F('from_url_length') - 5, # Strip "$rest" + output_field=CharField(), + ), + + # 1-indexed + full_path_without_rest=Substr( + 'full_path', + 1, + F('from_url_length') - 5, # Strip "$rest" + output_field=CharField(), + ), + ) + prefix = Q( + redirect_type='prefix', + path__startswith=F('from_url'), + ) + page = Q( + redirect_type='page', + path__iexact=F('from_url'), + ) + exact = ( + Q( + redirect_type='exact', + from_url__endswith='$rest', + # This works around a bug in Django doing a substr and an endswith, + # so instead we do 2 substrs and an exact + # https://code.djangoproject.com/ticket/29155 + full_path_without_rest=F('from_url_without_rest'), + ) | Q( + redirect_type='exact', + full_path__iexact=F('from_url'), + ) + ) + sphinx_html = ( + Q( + redirect_type='sphinx_html', + path__endswith='/', + ) | Q( + redirect_type='sphinx_html', + path__endswith='/index.html', + ) + ) + sphinx_htmldir = Q( + redirect_type='sphinx_htmldir', + path__endswith='.html', + ) + + # There should be one and only one redirect returned by this query. I + # can't think in a case where there can be more at this point. I'm + # leaving the loop just in case for now + queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir) + for redirect in queryset.select_related('project'): new_path = redirect.get_redirect_path( path=path, language=language, diff --git a/readthedocs/redirects/utils.py b/readthedocs/redirects/utils.py index d14d512b7fc..f0698fdb21c 100644 --- a/readthedocs/redirects/utils.py +++ b/readthedocs/redirects/utils.py @@ -84,7 +84,7 @@ def get_redirect_response(request, full_path): language, version_slug, path = language_and_version_from_path(path) path, http_status = project.redirects.get_redirect_path_with_status( - path=path, language=language, version_slug=version_slug + path=path, full_path=full_path, language=language, version_slug=version_slug ) if path is None: