From d88c1877475792093684c7042069b689e2a40e09 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Fri, 13 Mar 2020 19:06:43 -0700 Subject: [PATCH 1/2] Revert commit 88c7640ad16fa26f18c8988ff48323b99d4e25aa This commit reverts 88c7640ad16fa26f18c8988ff48323b99d4e25aa from PR https://github.com/readthedocs/readthedocs.org/pull/6430/s We found a problem with `from_url` that are smaller than 5 char length. I'm reverting this here and adding a new commit that will workaround this problem. --- readthedocs/redirects/querysets.py | 76 +++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 1d7fb15e7fe..b549e250daf 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,6 +1,8 @@ """Queryset for the redirects app.""" from django.db import models +from django.db.models import Value, CharField, IntegerField, Q, F, ExpressionWrapper +from django.db.models.functions import Substr, Length from readthedocs.core.utils.extend import SettingsOverrideObject @@ -25,7 +27,79 @@ def api(self, user=None, detail=True): return queryset def get_redirect_path_with_status(self, path, full_path=None, language=None, version_slug=None): - for redirect in self.select_related('project'): + # add extra fields with the ``path`` and ``full_path`` to perform a + # filter at db level instead with Python + queryset = self.annotate( + path=Value( + path, + output_field=CharField(), + ), + full_path=Value( + full_path, + output_field=CharField(), + ), + + from_url_length=ExpressionWrapper( + Length('from_url'), + output_field=IntegerField(), + ), + + # 1-indexed + from_url_without_rest=Substr( + 'from_url', + 1, + F('from_url_length') - 5, # Strip "$rest" + output_field=CharField(), + ), + + # 1-indexed + full_path_without_rest=Substr( + 'full_path', + 1, + F('from_url_length') - 5, # Strip "$rest" + output_field=CharField(), + ), + ) + prefix = Q( + redirect_type='prefix', + path__startswith=F('from_url'), + ) + page = Q( + redirect_type='page', + path__iexact=F('from_url'), + ) + exact = ( + Q( + redirect_type='exact', + from_url__endswith='$rest', + # This works around a bug in Django doing a substr and an endswith, + # so instead we do 2 substrs and an exact + # https://code.djangoproject.com/ticket/29155 + full_path_without_rest=F('from_url_without_rest'), + ) | Q( + redirect_type='exact', + full_path__iexact=F('from_url'), + ) + ) + sphinx_html = ( + Q( + redirect_type='sphinx_html', + path__endswith='/', + ) | Q( + redirect_type='sphinx_html', + path__endswith='/index.html', + ) + ) + sphinx_htmldir = Q( + redirect_type='sphinx_htmldir', + path__endswith='.html', + ) + + # There should be one and only one redirect returned by this query. I + # can't think in a case where there can be more at this point. I'm + # leaving the loop just in case for now + queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir) + for redirect in queryset.select_related('project'): new_path = redirect.get_redirect_path( path=path, language=language, From dcc682b72195ac660f689f95a29c71bd54babc33 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Fri, 13 Mar 2020 19:10:25 -0700 Subject: [PATCH 2/2] Ignore failing redirects (from_url smaller than 5 chars long) We first perform a `.count()` query to check if the DB SQL query will fail. If it fails, we ignore the `exact` Q object and we continue with the redirects check ignoring all the Exact Redirects for this project. This query will *only fail* if there is at least one Exact Redirect with less than 5 chars long on this project. This is because we are substracting the `$rest` part from the `from_url` field and we can't end up with a negative number. --- readthedocs/redirects/querysets.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index b549e250daf..5f690ccb909 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,11 +1,15 @@ """Queryset for the redirects app.""" -from django.db import models +import logging + +from django.db import models, DataError from django.db.models import Value, CharField, IntegerField, Q, F, ExpressionWrapper from django.db.models.functions import Substr, Length from readthedocs.core.utils.extend import SettingsOverrideObject +log = logging.getLogger(__name__) + class RedirectQuerySetBase(models.QuerySet): @@ -95,10 +99,24 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver path__endswith='.html', ) + try: + # Using the ``exact`` Q object we created here could cause an + # execption because it uses ``from_url_without_rest`` and + # ``full_path_without_rest`` which could produce a database error + # ("negative substring length not allowed") when ``from_url``'s + # length is less than 5 ('$rest' string substracted from it). We + # perform a query using ``exact`` here and if it fail we catch it + # and just ignore these redirects for this project. + queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir) + queryset.select_related('project').count() + except DataError: + # Fallback to the query without using ``exact`` on it + log.warning('Failing Exact Redirects on this project. Ignoring them.') + queryset = queryset.filter(prefix | page | sphinx_html | sphinx_htmldir) + # There should be one and only one redirect returned by this query. I # can't think in a case where there can be more at this point. I'm # leaving the loop just in case for now - queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir) for redirect in queryset.select_related('project'): new_path = redirect.get_redirect_path( path=path,