From fd66a6a83f310eb11a22aab6d16eb52928a165de Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 21 Nov 2019 14:45:47 +0100 Subject: [PATCH 01/11] Perform redirects at DB level These queries a little more complex but they allow us to rely on the database to perform the filter and check for all the redirect in a single query than fetching all the instances of the Redirect objects and iterating over them. This will perform only one query to the DB and the result will be the redirect we need to apply or no redirects at all. --- readthedocs/redirects/querysets.py | 53 ++++++++++++++++++++++++++++-- readthedocs/redirects/utils.py | 2 +- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 155ec4762fb..f7743fb2545 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,9 +1,9 @@ """Queryset for the redirects app.""" from django.db import models +from django.db.models import Value, CharField, Q, F from readthedocs.core.utils.extend import SettingsOverrideObject -from readthedocs.projects import constants class RedirectQuerySetBase(models.QuerySet): @@ -25,8 +25,55 @@ def api(self, user=None, detail=True): queryset = self._add_user_repos(queryset, user) return queryset - def get_redirect_path_with_status(self, path, language=None, version_slug=None): - for redirect in self.select_related('project'): + def get_redirect_path_with_status(self, path, full_path=None, language=None, version_slug=None): + # add extra fields with the ``path`` and ``full_path`` to perform a + # filter at db level instead with Python + queryset = self.annotate( + path=Value( + path, + output_field=CharField(), + ), + full_path=Value( + full_path, + output_field=CharField(), + ), + ) + prefix = Q( + redirect_type='prefix', + path__startswith=F('from_url'), + ) + page = Q( + redirect_type='page', + path__iexact=F('from_url'), + ) + exact = ( + Q( + redirect_type='exact', + from_url__endswith='$rest', # Python implementation does "in" + ) | Q( + redirect_type='exact', + full_path__iexact=F('from_url'), + ) + ) + sphinx_html = ( + Q( + redirect_type='sphinx_html', + path__endswith='/', + ) | Q( + redirect_type='sphinx_html', + path__endswith='/index.html', + ) + ) + sphinx_htmldir = Q( + redirect_type='sphinx_html', + path__endswith='.html', + ) + + # There should be one and only one redirect returned by this query. I + # can't think in a case where there can be more at this point. I'm + # leaving the loop just in case for now + queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir) + for redirect in queryset.select_related('project'): new_path = redirect.get_redirect_path( path=path, language=language, diff --git a/readthedocs/redirects/utils.py b/readthedocs/redirects/utils.py index d14d512b7fc..f0698fdb21c 100644 --- a/readthedocs/redirects/utils.py +++ b/readthedocs/redirects/utils.py @@ -84,7 +84,7 @@ def get_redirect_response(request, full_path): language, version_slug, path = language_and_version_from_path(path) path, http_status = project.redirects.get_redirect_path_with_status( - path=path, language=language, version_slug=version_slug + path=path, full_path=full_path, language=language, version_slug=version_slug ) if path is None: From ca3f3f9a4405f5e1be9324ab6de73de01f6e17c2 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 21 Nov 2019 15:55:27 +0100 Subject: [PATCH 02/11] Trigger redirect logic from El Proxito Adapt El Proxito `serve_docs` to detect if there is any redirect configured on the project that matches the URL and return the proper redirect in that case. --- readthedocs/proxito/views.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/readthedocs/proxito/views.py b/readthedocs/proxito/views.py index ddf684fb9e4..82b797c6982 100644 --- a/readthedocs/proxito/views.py +++ b/readthedocs/proxito/views.py @@ -5,11 +5,11 @@ import mimetypes import os from functools import wraps -from urllib.parse import urlparse +from urllib.parse import urlparse, urlunparse from django.conf import settings from django.core.files.storage import get_storage_class -from django.http import Http404, HttpResponse, HttpResponseRedirect +from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponsePermanentRedirect from django.shortcuts import get_object_or_404, render from django.urls import resolve as url_resolve from django.utils.encoding import iri_to_uri @@ -245,11 +245,23 @@ def serve_docs( ) raise Http404('Invalid URL for project with versions') - # TODO: Redirects need to be refactored before we can turn them on - # They currently do 1 request per redirect that exists for the project - # path, http_status = final_project.redirects.get_redirect_path_with_status( - # language=lang_slug, version_slug=version_slug, path=filename - # ) + full_path = request.path + redirect_path, http_status = final_project.redirects.get_redirect_path_with_status( + language=lang_slug, version_slug=version_slug, path=filename, full_path=full_path, + ) + if redirect_path is not None: + schema, netloc, path, params, query, fragments = urlparse(full_path) + new_path = urlunparse((schema, netloc, redirect_path, params, query, fragments)) + + # Re-use the domain and protocol used in the current request. + # Redirects shouldn't change the domain, version or language. + # However, if the new_path is already an absolute URI, just use it + new_path = request.build_absolute_uri(new_path) + + if http_status and http_status == 301: + return HttpResponsePermanentRedirect(new_path) + + return HttpResponseRedirect(new_path) # Don't do auth checks # try: From 03ce54a7faecfe672a9c221b1997725c4a37d237 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 21 Nov 2019 16:16:09 +0100 Subject: [PATCH 03/11] Use Func(function='replace') for exact redirect `Func(function='replace')` allows us to modify a field in the row. We use it to remove the `$rest` part of the URL and be able to compare the beginning of the from URL (without the `$rest`) against the `full_path`. --- readthedocs/redirects/querysets.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index f7743fb2545..59bfaf12576 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,7 +1,7 @@ """Queryset for the redirects app.""" from django.db import models -from django.db.models import Value, CharField, Q, F +from django.db.models import Value, CharField, Q, F, Func from readthedocs.core.utils.extend import SettingsOverrideObject @@ -37,6 +37,16 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver full_path, output_field=CharField(), ), + + # NOTE: using replace here could take some time if there are a lot + # of redirect for this project. + from_url_without_rest=Func( + F('from_url'), + Value('$rest'), + Value(''), + # This could be done with ``Replace`` once on Django 2.2 + function='replace', + ), ) prefix = Q( redirect_type='prefix', @@ -50,6 +60,7 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver Q( redirect_type='exact', from_url__endswith='$rest', # Python implementation does "in" + full_path__startswith=F('from_url_without_rest'), ) | Q( redirect_type='exact', full_path__iexact=F('from_url'), From 49f6425fd26eabe14f837939c3f268fce2dafffb Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Fri, 22 Nov 2019 12:05:02 +0100 Subject: [PATCH 04/11] Try approach with substr instead of replace --- readthedocs/redirects/querysets.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 59bfaf12576..18c1ca73c31 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,7 +1,8 @@ """Queryset for the redirects app.""" from django.db import models -from django.db.models import Value, CharField, Q, F, Func +from django.db.models import Value, CharField, Q, F, ExpressionWrapper +from django.db.models.functions import Substr, Length from readthedocs.core.utils.extend import SettingsOverrideObject @@ -38,14 +39,16 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver output_field=CharField(), ), - # NOTE: using replace here could take some time if there are a lot - # of redirect for this project. - from_url_without_rest=Func( - F('from_url'), - Value('$rest'), - Value(''), - # This could be done with ``Replace`` once on Django 2.2 - function='replace', + from_length=ExpressionWrapper( + Length('from_url'), + output_field=CharField() + ), + + # 1-indexed + from_url_without_rest=Substr( + 'from_url', + 1, + F('from_length') - 5 # Strip "$rest" ), ) prefix = Q( From b29f745c6228d240301aed33b2f2e30b31735220 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Fri, 22 Nov 2019 12:28:48 +0100 Subject: [PATCH 05/11] Fix query to do an exact compares --- readthedocs/redirects/querysets.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 18c1ca73c31..4be5058c267 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -50,6 +50,13 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver 1, F('from_length') - 5 # Strip "$rest" ), + + # 1-indexed + full_path_without_rest=Substr( + 'full_path', + 1, + F('from_length') - 5 # Strip "$rest" + ), ) prefix = Q( redirect_type='prefix', @@ -62,8 +69,11 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver exact = ( Q( redirect_type='exact', - from_url__endswith='$rest', # Python implementation does "in" - full_path__startswith=F('from_url_without_rest'), + from_url__endswith='$rest', + # This works around a bug in Django doing a substr and an endswith, + # so instead we do 2 substrs and an exact + # https://code.djangoproject.com/ticket/29155 + full_path_without_rest=F('from_url_without_rest'), ) | Q( redirect_type='exact', full_path__iexact=F('from_url'), From 6b44283b559e2b27304d698d7933a6b462de895e Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Sun, 24 Nov 2019 12:47:26 +0100 Subject: [PATCH 06/11] Define proper output_field on annotation --- readthedocs/redirects/querysets.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 4be5058c267..1ea106e989f 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,7 +1,7 @@ """Queryset for the redirects app.""" from django.db import models -from django.db.models import Value, CharField, Q, F, ExpressionWrapper +from django.db.models import Value, CharField, IntegerField, Q, F, ExpressionWrapper from django.db.models.functions import Substr, Length from readthedocs.core.utils.extend import SettingsOverrideObject @@ -41,21 +41,23 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver from_length=ExpressionWrapper( Length('from_url'), - output_field=CharField() + output_field=IntegerField(), ), # 1-indexed from_url_without_rest=Substr( 'from_url', 1, - F('from_length') - 5 # Strip "$rest" + F('from_length') - 5, # Strip "$rest" + output_field=CharField(), ), # 1-indexed full_path_without_rest=Substr( 'full_path', 1, - F('from_length') - 5 # Strip "$rest" + F('from_length') - 5, # Strip "$rest" + output_field=CharField(), ), ) prefix = Q( From ed587cf5a83063186eccb8313247df7d6c5623d6 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Sun, 24 Nov 2019 12:47:39 +0100 Subject: [PATCH 07/11] Fix redirect_type for htmldir --- readthedocs/redirects/querysets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 1ea106e989f..7057320b901 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -91,7 +91,7 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver ) ) sphinx_htmldir = Q( - redirect_type='sphinx_html', + redirect_type='sphinx_htmldir', path__endswith='.html', ) From 662ceb8a771f242f39a0657c6c6c0881e661ca23 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Sun, 24 Nov 2019 12:49:00 +0100 Subject: [PATCH 08/11] Rename from_length to from_url_length --- readthedocs/redirects/querysets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 7057320b901..b549e250daf 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -39,7 +39,7 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver output_field=CharField(), ), - from_length=ExpressionWrapper( + from_url_length=ExpressionWrapper( Length('from_url'), output_field=IntegerField(), ), @@ -48,7 +48,7 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver from_url_without_rest=Substr( 'from_url', 1, - F('from_length') - 5, # Strip "$rest" + F('from_url_length') - 5, # Strip "$rest" output_field=CharField(), ), @@ -56,7 +56,7 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver full_path_without_rest=Substr( 'full_path', 1, - F('from_length') - 5, # Strip "$rest" + F('from_url_length') - 5, # Strip "$rest" output_field=CharField(), ), ) From 45ed0173451cb23e0027903407be759f81251203 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Sun, 24 Nov 2019 14:37:22 +0100 Subject: [PATCH 09/11] Log redirects --- readthedocs/proxito/views/serve.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index f2f749d6db3..3fb12fd0855 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -89,6 +89,11 @@ def get(self, # Redirects shouldn't change the domain, version or language. # However, if the new_path is already an absolute URI, just use it new_path = request.build_absolute_uri(new_path) + log.info( + 'Redirecting to: http_status=%s url=%s', + http_status, + new_path, + ) if http_status and http_status == 301: return HttpResponsePermanentRedirect(new_path) From e387a641e3b26aa30515729dc204be18d355c2cd Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Sun, 24 Nov 2019 14:45:46 +0100 Subject: [PATCH 10/11] Full log for redirect --- readthedocs/proxito/views/serve.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index cad5605d53b..9f4b25bab58 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -90,9 +90,10 @@ def get(self, # However, if the new_path is already an absolute URI, just use it new_path = request.build_absolute_uri(new_path) log.info( - 'Redirecting to: http_status=%s url=%s', - http_status, + 'Redirecting: from=%s to=%s http_status=%s', + request.build_absolute_uri(), new_path, + http_status, ) if http_status and http_status == 301: From aaaa584a0fde5d2c45bc2548788db9b6f9cff528 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 25 Nov 2019 14:25:54 +0100 Subject: [PATCH 11/11] Perform redirects on El Proxito 404 handler We are not prepared yet to check and perform the redirects on El Proxito serve docs. So, we are moving it to the 404 handler for now. Once we want to activate it, we can just uncomment the lines on serve docs. --- readthedocs/proxito/views/mixins.py | 51 ++++++++++++++++++++++++++- readthedocs/proxito/views/serve.py | 54 ++++++++++++++--------------- 2 files changed, 76 insertions(+), 29 deletions(-) diff --git a/readthedocs/proxito/views/mixins.py b/readthedocs/proxito/views/mixins.py index a87e40be3c0..67b4f703d1d 100644 --- a/readthedocs/proxito/views/mixins.py +++ b/readthedocs/proxito/views/mixins.py @@ -1,9 +1,14 @@ import logging import mimetypes +from urllib.parse import urlparse, urlunparse from django.conf import settings from django.core.files.storage import get_storage_class -from django.http import HttpResponse +from django.http import ( + HttpResponse, + HttpResponseRedirect, + HttpResponsePermanentRedirect, +) from django.shortcuts import render from django.utils.encoding import iri_to_uri from django.views.static import serve @@ -79,3 +84,47 @@ def _serve_401(self, request, project): res.status_code = 401 log.debug('Unauthorized access to %s documentation', project.slug) return res + + +class ServeRedirectMixin: + + def get_redirect(self, project, lang_slug, version_slug, filename, full_path): + """ + Check for a redirect for this project that matches ``full_path``. + + :returns: the path to redirect the request and its status code + :rtype: tuple + """ + redirect_path, http_status = project.redirects.get_redirect_path_with_status( + language=lang_slug, + version_slug=version_slug, + path=filename, + full_path=full_path, + ) + return redirect_path, http_status + + def get_redirect_response(self, request, redirect_path, http_status): + """ + Build the response for the ``redirect_path`` and its ``http_status``. + + :returns: redirect respose with the correct path + :rtype: HttpResponseRedirect or HttpResponsePermanentRedirect + """ + schema, netloc, path, params, query, fragments = urlparse(request.path) + new_path = urlunparse((schema, netloc, redirect_path, params, query, fragments)) + + # Re-use the domain and protocol used in the current request. + # Redirects shouldn't change the domain, version or language. + # However, if the new_path is already an absolute URI, just use it + new_path = request.build_absolute_uri(new_path) + log.info( + 'Redirecting: from=%s to=%s http_status=%s', + request.build_absolute_uri(), + new_path, + http_status, + ) + + if http_status and http_status == 301: + return HttpResponsePermanentRedirect(new_path) + + return HttpResponseRedirect(new_path) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 9f4b25bab58..e55d30be42c 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -3,11 +3,11 @@ import itertools import logging import os -from urllib.parse import urlparse, urlunparse +from urllib.parse import urlparse from django.conf import settings from django.core.files.storage import get_storage_class -from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponsePermanentRedirect +from django.http import Http404, HttpResponse, HttpResponseRedirect from django.shortcuts import render from django.urls import resolve as url_resolve from django.utils.decorators import method_decorator @@ -20,7 +20,7 @@ from readthedocs.projects import constants from readthedocs.projects.templatetags.projects_tags import sort_version_aware -from .mixins import ServeDocsMixin +from .mixins import ServeDocsMixin, ServeRedirectMixin from .decorators import map_project_slug from .redirects import redirect_project_slug @@ -30,7 +30,7 @@ log = logging.getLogger(__name__) # noqa -class ServeDocsBase(ServeDocsMixin, View): +class ServeDocsBase(ServeRedirectMixin, ServeDocsMixin, View): def get(self, request, @@ -77,29 +77,16 @@ def get(self, ) raise Http404('Invalid URL for project with versions') - full_path = request.path - redirect_path, http_status = final_project.redirects.get_redirect_path_with_status( - language=lang_slug, version_slug=version_slug, path=filename, full_path=full_path, - ) - if redirect_path is not None: - schema, netloc, path, params, query, fragments = urlparse(full_path) - new_path = urlunparse((schema, netloc, redirect_path, params, query, fragments)) - - # Re-use the domain and protocol used in the current request. - # Redirects shouldn't change the domain, version or language. - # However, if the new_path is already an absolute URI, just use it - new_path = request.build_absolute_uri(new_path) - log.info( - 'Redirecting: from=%s to=%s http_status=%s', - request.build_absolute_uri(), - new_path, - http_status, - ) - - if http_status and http_status == 301: - return HttpResponsePermanentRedirect(new_path) - - return HttpResponseRedirect(new_path) + # TODO: un-comment when ready to perform redirect here + # redirect_path, http_status = self.get_redirect( + # final_project, + # lang_slug, + # version_slug, + # filename, + # request.path, + # ) + # if redirect_path and http_status: + # return self.get_redirect_response(request, redirect_path, http_status) # Check user permissions and return an unauthed response if needed if not self.allowed_user(request, final_project, version_slug): @@ -125,7 +112,7 @@ class ServeDocs(SettingsOverrideObject): _default_class = ServeDocsBase -class ServeError404Base(View): +class ServeError404Base(ServeRedirectMixin, View): def get(self, request, proxito_path, template_name='404.html'): """ @@ -156,6 +143,17 @@ def get(self, request, proxito_path, template_name='404.html'): filename=kwargs.get('filename', ''), ) + # Check and perform redirects on 404 handler + redirect_path, http_status = self.get_redirect( + final_project, + lang_slug, + version_slug, + filename, + request.path, + ) + if redirect_path and http_status: + return self.get_redirect_response(request, redirect_path, http_status) + storage_root_path = final_project.get_storage_path( type_='html', version_slug=version_slug,