Skip to content

Perform redirects at DB level #6398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Nov 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion readthedocs/proxito/views/mixins.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import logging
import mimetypes
from urllib.parse import urlparse, urlunparse

from django.conf import settings
from django.core.files.storage import get_storage_class
from django.http import HttpResponse
from django.http import (
HttpResponse,
HttpResponseRedirect,
HttpResponsePermanentRedirect,
)
from django.shortcuts import render
from django.utils.encoding import iri_to_uri
from django.views.static import serve
Expand Down Expand Up @@ -79,3 +84,47 @@ def _serve_401(self, request, project):
res.status_code = 401
log.debug('Unauthorized access to %s documentation', project.slug)
return res


class ServeRedirectMixin:

def get_redirect(self, project, lang_slug, version_slug, filename, full_path):
"""
Check for a redirect for this project that matches ``full_path``.

:returns: the path to redirect the request and its status code
:rtype: tuple
"""
redirect_path, http_status = project.redirects.get_redirect_path_with_status(
language=lang_slug,
version_slug=version_slug,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not quite sure why we need the lang_slug and version_slug here. It feels a bit weird. I looked through the code and they're used a couple places, but they don't seem necessary. I wonder if we can just remove it and simplify the redirects code accordingly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't take a deep look at this on purpose. I didn't want to refactor all this without being sure that this is the direction we want to move forward but besides, because we are changing the whole logic of redirects and I felt it was too much. I'm opening an issue to track this refactor.

path=filename,
full_path=full_path,
)
return redirect_path, http_status

def get_redirect_response(self, request, redirect_path, http_status):
"""
Build the response for the ``redirect_path`` and its ``http_status``.

:returns: redirect respose with the correct path
:rtype: HttpResponseRedirect or HttpResponsePermanentRedirect
"""
schema, netloc, path, params, query, fragments = urlparse(request.path)
new_path = urlunparse((schema, netloc, redirect_path, params, query, fragments))

# Re-use the domain and protocol used in the current request.
# Redirects shouldn't change the domain, version or language.
# However, if the new_path is already an absolute URI, just use it
new_path = request.build_absolute_uri(new_path)
log.info(
'Redirecting: from=%s to=%s http_status=%s',
request.build_absolute_uri(),
new_path,
http_status,
)

if http_status and http_status == 301:
return HttpResponsePermanentRedirect(new_path)

return HttpResponseRedirect(new_path)
30 changes: 23 additions & 7 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from readthedocs.projects import constants
from readthedocs.projects.templatetags.projects_tags import sort_version_aware

from .mixins import ServeDocsMixin
from .mixins import ServeDocsMixin, ServeRedirectMixin

from .decorators import map_project_slug
from .redirects import redirect_project_slug
Expand All @@ -30,7 +30,7 @@
log = logging.getLogger(__name__) # noqa


class ServeDocsBase(ServeDocsMixin, View):
class ServeDocsBase(ServeRedirectMixin, ServeDocsMixin, View):

def get(self,
request,
Expand Down Expand Up @@ -77,11 +77,16 @@ def get(self,
)
raise Http404('Invalid URL for project with versions')

# TODO: Redirects need to be refactored before we can turn them on
# They currently do 1 request per redirect that exists for the project
# path, http_status = final_project.redirects.get_redirect_path_with_status(
# language=lang_slug, version_slug=version_slug, path=filename
# TODO: un-comment when ready to perform redirect here
# redirect_path, http_status = self.get_redirect(
# final_project,
# lang_slug,
# version_slug,
# filename,
# request.path,
# )
# if redirect_path and http_status:
# return self.get_redirect_response(request, redirect_path, http_status)

# Check user permissions and return an unauthed response if needed
if not self.allowed_user(request, final_project, version_slug):
Expand All @@ -107,7 +112,7 @@ class ServeDocs(SettingsOverrideObject):
_default_class = ServeDocsBase


class ServeError404Base(View):
class ServeError404Base(ServeRedirectMixin, View):

def get(self, request, proxito_path, template_name='404.html'):
"""
Expand Down Expand Up @@ -138,6 +143,17 @@ def get(self, request, proxito_path, template_name='404.html'):
filename=kwargs.get('filename', ''),
)

# Check and perform redirects on 404 handler
redirect_path, http_status = self.get_redirect(
final_project,
lang_slug,
version_slug,
filename,
request.path,
)
if redirect_path and http_status:
return self.get_redirect_response(request, redirect_path, http_status)

storage_root_path = final_project.get_storage_path(
type_='html',
version_slug=version_slug,
Expand Down
79 changes: 76 additions & 3 deletions readthedocs/redirects/querysets.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Queryset for the redirects app."""

from django.db import models
from django.db.models import Value, CharField, IntegerField, Q, F, ExpressionWrapper
from django.db.models.functions import Substr, Length

from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects import constants


class RedirectQuerySetBase(models.QuerySet):
Expand All @@ -25,8 +26,80 @@ def api(self, user=None, detail=True):
queryset = self._add_user_repos(queryset, user)
return queryset

def get_redirect_path_with_status(self, path, language=None, version_slug=None):
for redirect in self.select_related('project'):
def get_redirect_path_with_status(self, path, full_path=None, language=None, version_slug=None):
# add extra fields with the ``path`` and ``full_path`` to perform a
# filter at db level instead with Python
queryset = self.annotate(
path=Value(
path,
output_field=CharField(),
),
full_path=Value(
full_path,
output_field=CharField(),
),

from_url_length=ExpressionWrapper(
Length('from_url'),
output_field=IntegerField(),
),

# 1-indexed
from_url_without_rest=Substr(
'from_url',
1,
F('from_url_length') - 5, # Strip "$rest"
output_field=CharField(),
),

# 1-indexed
full_path_without_rest=Substr(
'full_path',
1,
F('from_url_length') - 5, # Strip "$rest"
output_field=CharField(),
),
)
prefix = Q(
redirect_type='prefix',
path__startswith=F('from_url'),
)
page = Q(
redirect_type='page',
path__iexact=F('from_url'),
)
exact = (
Q(
redirect_type='exact',
from_url__endswith='$rest',
# This works around a bug in Django doing a substr and an endswith,
# so instead we do 2 substrs and an exact
# https://code.djangoproject.com/ticket/29155
full_path_without_rest=F('from_url_without_rest'),
) | Q(
redirect_type='exact',
full_path__iexact=F('from_url'),
)
)
sphinx_html = (
Q(
redirect_type='sphinx_html',
path__endswith='/',
) | Q(
redirect_type='sphinx_html',
path__endswith='/index.html',
)
)
sphinx_htmldir = Q(
redirect_type='sphinx_htmldir',
path__endswith='.html',
)

# There should be one and only one redirect returned by this query. I
# can't think in a case where there can be more at this point. I'm
# leaving the loop just in case for now
queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir)
for redirect in queryset.select_related('project'):
new_path = redirect.get_redirect_path(
path=path,
language=language,
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/redirects/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_redirect_response(request, full_path):
language, version_slug, path = language_and_version_from_path(path)

path, http_status = project.redirects.get_redirect_path_with_status(
path=path, language=language, version_slug=version_slug
path=path, full_path=full_path, language=language, version_slug=version_slug
Copy link
Member Author

@humitos humitos Nov 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why we weren't passing this full_path already. We are re-calculating it for one of the cases at redirect_exact method.

Passing it at this point, allows us to use it as a value to compare at db level instead from Python itself.

)

if path is None:
Expand Down