Skip to content

Add Initial Modeling with Through Model and Data Migration for RemoteRepository Model #7536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

128 changes: 128 additions & 0 deletions readthedocs/oauth/migrations/0011_add_remote_relation_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Generated by Django 2.2.16 on 2020-10-10 14:55

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion

import django_extensions.db.fields
import jsonfield.fields


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('oauth', '0010_index_full_name'),
]

operations = [
migrations.SeparateDatabaseAndState(
database_operations=[
migrations.RunSQL(
sql='ALTER TABLE oauth_remoterepository_users RENAME TO oauth_remoterelation',
reverse_sql='ALTER TABLE oauth_remoterelation RENAME TO oauth_remoterepository_users',
),
],
state_operations=[
migrations.CreateModel(
name='RemoteRelation',
fields=[
(
'id',
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name='ID',
),
),
(
'user',
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='remote_relations',
to=settings.AUTH_USER_MODEL
),
),
(
'remoterepository',
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='remote_relations',
to='oauth.RemoteRepository'
),
),
],
),
migrations.AlterField(
model_name='remoterepository',
name='users',
field=models.ManyToManyField(
related_name='oauth_repositories',
through='oauth.RemoteRelation',
to=settings.AUTH_USER_MODEL,
verbose_name='Users'
),
),
migrations.AlterUniqueTogether(
name='remoterelation',
unique_together={('remoterepository', 'user')},
),
],
),
migrations.AddField(
model_name='remoterelation',
name='account',
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='remote_relations',
to='socialaccount.SocialAccount',
verbose_name='Connected account'
),
),
migrations.AddField(
model_name='remoterelation',
name='active',
field=models.BooleanField(
default=False,
verbose_name='Active'
),
),
migrations.AddField(
model_name='remoterelation',
name='admin',
field=models.BooleanField(
default=False,
verbose_name='Has admin privilege'
),
),
migrations.AddField(
model_name='remoterelation',
name='json',
field=jsonfield.fields.JSONField(
default=dict,
verbose_name='Serialized API response'
),
preserve_default=False,
),
migrations.AddField(
model_name='remoterelation',
name='created',
field=django_extensions.db.fields.CreationDateTimeField(
auto_now_add=True,
default=django.utils.timezone.now,
verbose_name='created',
),
preserve_default=False,
),
migrations.AddField(
model_name='remoterelation',
name='modified',
field=django_extensions.db.fields.ModificationDateTimeField(
auto_now=True,
verbose_name='modified'
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Generated by Django 2.2.16 on 2020-10-10 15:00

from itertools import islice
import json
import logging

from django.db import migrations
from django.utils import timezone

log = logging.getLogger(__name__)


def move_data_to_remote_relations(apps, schema_editor):
RemoteRelation = apps.get_model('oauth', 'RemoteRelation')

def remote_relations_generator(relations, batch_size):
for relation in relations.iterator(chunk_size=batch_size):
relation.account_id = relation.remoterepository.account_id
relation.active = relation.remoterepository.active
relation.admin = relation.remoterepository.admin
relation.created = relation.remoterepository.pub_date
relation.modified = relation.remoterepository.modified_date

try:
relation.json = json.loads(relation.remoterepository.json)
except json.decoder.JSONDecodeError:
log.warning(
'Could not migrate json data for remote_repository=%s',
relation.remoterepository_id
)

yield relation

relations_queryset = RemoteRelation.objects.filter(
user__last_login__gte=timezone.now() - timezone.timedelta(days=30)
).select_related(
'remoterepository'
).only(
'account_id', 'active', 'admin', 'created',
'modified', 'json', 'remoterepository__account_id',
'remoterepository__active', 'remoterepository__admin',
'remoterepository__pub_date', 'remoterepository__json',
'remoterepository__modified_date'
)

batch_size = 1000
remote_relations = remote_relations_generator(
relations_queryset, batch_size
)

# Follows Example from django docs
# https://docs.djangoproject.com/en/2.2/ref/models/querysets/#bulk-create
while True:
batch = list(islice(remote_relations, batch_size))

if not batch:
break

RemoteRelation.objects.bulk_update(
batch,
[
'account_id', 'active',
'admin', 'json',
'created', 'modified',
],
batch_size
)


class Migration(migrations.Migration):

dependencies = [
('oauth', '0011_add_remote_relation_model'),
]

operations = [
migrations.RunPython(move_data_to_remote_relations),
]
33 changes: 32 additions & 1 deletion readthedocs/oauth/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@

import json

from allauth.socialaccount.models import SocialAccount
from django.contrib.auth.models import User
from django.core.validators import URLValidator
from django.db import models
from django.db.models import Q
from django.urls import reverse
from django.utils.translation import ugettext_lazy as _

from allauth.socialaccount.models import SocialAccount
from django_extensions.db.models import TimeStampedModel
from jsonfield import JSONField

from readthedocs.projects.constants import REPO_CHOICES
from readthedocs.projects.models import Project

Expand Down Expand Up @@ -90,6 +93,7 @@ class RemoteRepository(models.Model):
User,
verbose_name=_('Users'),
related_name='oauth_repositories',
through='RemoteRelation'
)
account = models.ForeignKey(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably add the remote_id field here as well and populate it with the id that it's in remoterepository.json.

If we are not doing it in this PR, we should create an issue to keep track of. We will need it to remove the usage of full_name in many places.

I created a project at https://github.com/orgs/readthedocs/projects/74?add_cards_query=is%3Aopen where we can track all of the related PR and issues. Let me know if you have permissions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay sure, maybe we can do data migration for this and #7536 (comment) first.

This PR will remove the json field from RemoteRepository model so I think its better to do this first in a different PR or make it the first migration on this PR.
@humitos What do you say?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created a project at https://github.com/orgs/readthedocs/projects/74?add_cards_query=is%3Aopen where we can track all of the related PR and issues. Let me know if you have permissions.

@humitos I can not see some of the cards maybe they are from a private repo or something, otherwise it all good 👍

SocialAccount,
Expand Down Expand Up @@ -206,3 +210,30 @@ def matches(self, user):
},
),
} for project in projects]


class RemoteRelation(TimeStampedModel):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may need to rename this model to something like RemoteRepositoryRelation because we will also need a relation model for RemoteOrganization as well and we will have conflicting names.

Does it make sense to add make remoterepository field nullable and add remoteorganization field as well to this RemoteRelation model and use it for both? It seems it's exactly the same data but RemoteOrganization does not has admin field.

remoterepository = models.ForeignKey(
RemoteRepository,
related_name='remote_relations',
on_delete=models.CASCADE
)
user = models.ForeignKey(
User,
related_name='remote_relations',
on_delete=models.CASCADE
)
account = models.ForeignKey(
SocialAccount,
verbose_name=_('Connected account'),
related_name='remote_relations',
null=True,
blank=True,
on_delete=models.SET_NULL,
)
active = models.BooleanField(_('Active'), default=False)
admin = models.BooleanField(_('Has admin privilege'), default=False)
json = JSONField(_('Serialized API response'))

class Meta:
unique_together = (('remoterepository', 'user'),)