Skip to content

Denormalize from_url_without_rest onto the redirects model #6780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions readthedocs/redirects/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class RedirectAdmin(admin.ModelAdmin):
'from_url',
'to_url',
)
readonly_fields = ('from_url_without_rest',)


admin.site.register(Redirect, RedirectAdmin)
38 changes: 38 additions & 0 deletions readthedocs/redirects/migrations/0004_denormalize-from-url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.26 on 2020-03-14 14:15
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file should be generated with out current version of django

from __future__ import unicode_literals

from django.db import migrations, models


def forward(apps, schema_editor):
"""Calculate new ``from_url_without_rest`` attribute."""
Redirect = apps.get_model('redirects', 'Redirect')

queryset = Redirect.objects.filter(
redirect_type='exact',
from_url__endswith='$rest',
)
for redirect in queryset:
redirect.from_url_without_rest = redirect.from_url.replace('$rest', '') or None
redirect.save()

def backward(apps, schema_editor):
# just no-op
pass


class Migration(migrations.Migration):

dependencies = [
('redirects', '0003_add_default_redirect_http_status_to_302'),
]

operations = [
migrations.AddField(
model_name='redirect',
name='from_url_without_rest',
field=models.CharField(blank=True, db_index=True, help_text='Only for internal querying use', max_length=255, null=True),
),
migrations.RunPython(forward, backward),
]
15 changes: 15 additions & 0 deletions readthedocs/redirects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@ class Redirect(models.Model):
blank=True,
)

# We are denormalizing the database here to easily query for Exact Redirects
# with ``$rest`` on them from El Proxito
from_url_without_rest = models.CharField(
max_length=255,
db_index=True,
help_text='Only for internal querying use',
blank=True,
null=True,
)

to_url = models.CharField(
_('To URL'),
max_length=255,
Expand All @@ -102,6 +112,11 @@ class Meta:
verbose_name_plural = _('redirects')
ordering = ('-update_dt',)

def save(self, *args, **kwargs): # pylint: disable=arguments-differ
if self.redirect_type == 'exact' and '$rest' in self.from_url:
self.from_url_without_rest = self.from_url.replace('$rest', '')
super().save(*args, **kwargs)

def __str__(self):
redirect_text = '{type}: {from_to_url}'
if self.redirect_type in ['prefix', 'page', 'exact']:
Expand Down
56 changes: 55 additions & 1 deletion readthedocs/redirects/querysets.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
"""Queryset for the redirects app."""

import logging

from django.db import models
from django.db.models import Value, CharField, Q, F

from readthedocs.core.utils.extend import SettingsOverrideObject

log = logging.getLogger(__name__)


class RedirectQuerySetBase(models.QuerySet):

Expand All @@ -25,7 +30,56 @@ def api(self, user=None, detail=True):
return queryset

def get_redirect_path_with_status(self, path, full_path=None, language=None, version_slug=None):
for redirect in self.select_related('project'):
# add extra fields with the ``path`` and ``full_path`` to perform a
# filter at db level instead with Python
queryset = self.annotate(
path=Value(
path,
output_field=CharField(),
),
full_path=Value(
full_path,
output_field=CharField(),
),
)
prefix = Q(
redirect_type='prefix',
path__startswith=F('from_url'),
)
page = Q(
redirect_type='page',
path__exact=F('from_url'),
)
exact = (
Q(
redirect_type='exact',
from_url__endswith='$rest',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can test using from_url_without_rest__isnull=False, which may be faster than this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The downside of this, is that it depends on how the from_url_without_rest ends up after the migration and the handling. We strictly depend on setting it correctly.

full_path__startswith=F('from_url_without_rest'),
) | Q(
redirect_type='exact',
full_path__exact=F('from_url'),
)
)
sphinx_html = (
Q(
redirect_type='sphinx_html',
path__endswith='/',
) | Q(
redirect_type='sphinx_html',
path__endswith='/index.html',
)
)
sphinx_htmldir = Q(
redirect_type='sphinx_htmldir',
path__endswith='.html',
)

queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir)

# There should be one and only one redirect returned by this query. I
# can't think in a case where there can be more at this point. I'm
Comment on lines +79 to +80
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A user could create several redirects with the same value, or with $rest in different positions, like /en/latest/foo/$rest and /en/latest/foo/bar/$rest

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably we should sort the result by creation date if we don't do that already

Copy link
Member

@humitos humitos Mar 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like /en/latest/foo/$rest and /en/latest/foo/bar/$rest

Good point. Although, the first one contains the second one in this case, so it does not make too much sense at this point.

We could implement a new feature that the one that matches more gets priority, though. But that's a different story.

# leaving the loop just in case for now
for redirect in queryset.select_related('project'):
new_path = redirect.get_redirect_path(
path=path,
language=language,
Expand Down