Skip to content

Save config on build model #4749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Nov 7, 2018
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
6891db8
New field
stsewd Oct 11, 2018
b1b3620
Change API
stsewd Oct 11, 2018
27d8251
Implement as_dict to config
stsewd Oct 11, 2018
ab5ea04
One more test for the api
stsewd Oct 12, 2018
5f4fb8c
Save config on build
stsewd Oct 12, 2018
9d70812
Simple query
stsewd Oct 12, 2018
13abeda
Test for version.config property
stsewd Oct 12, 2018
98d26a4
Use callable as default
stsewd Oct 15, 2018
660c03a
Update migration
stsewd Oct 23, 2018
d29b6d9
Save on model only if there is a difference
stsewd Oct 23, 2018
7fc1e2b
Merge branch 'master' into save-config-on-build
stsewd Oct 23, 2018
8f62f03
More tests
stsewd Oct 23, 2018
76929ac
Linter
stsewd Oct 23, 2018
7ade872
One more test
stsewd Oct 23, 2018
6f8becc
Better approach for save config
stsewd Oct 23, 2018
07efa1b
Tests
stsewd Oct 23, 2018
715e5ed
Fix test for API
stsewd Oct 23, 2018
dab5860
Update docstrings
stsewd Oct 23, 2018
5aacd63
Merge branch 'master' into save-config-on-build
stsewd Oct 23, 2018
822b793
Check for previous only on first creation or if the object hast changed
stsewd Nov 2, 2018
8c60760
Merge branch 'master' into save-config-on-build
stsewd Nov 2, 2018
7ed110d
Rebase migration
stsewd Nov 2, 2018
2ffdbf6
Use just patch
stsewd Nov 2, 2018
5cce5d8
Docstring
stsewd Nov 5, 2018
9d8266f
Don't reference to empty builds
stsewd Nov 5, 2018
28cd935
Fix varname
stsewd Nov 5, 2018
433b709
Add test
stsewd Nov 5, 2018
b88837d
Docstring
stsewd Nov 5, 2018
4723705
Merge branch 'master' into save-config-on-build
stsewd Nov 5, 2018
043f4e6
Don't expose `_config` on the API
stsewd Nov 6, 2018
79fbb8e
Refactor as feedback
stsewd Nov 7, 2018
f33d54a
Merge branch 'master' into save-config-on-build
stsewd Nov 7, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions readthedocs/builds/migrations/0005_add_config_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.13 on 2018-10-22 19:37
from __future__ import unicode_literals

from django.db import migrations, models
import jsonfield.fields
import readthedocs.builds.version_slug


class Migration(migrations.Migration):

dependencies = [
('builds', '0004_add-apiversion-proxy-model'),
]

operations = [
migrations.AddField(
model_name='build',
name='config',
field=jsonfield.fields.JSONField(default=dict, verbose_name='Configuration used in the build'),
),
migrations.AlterField(
model_name='build',
name='error',
field=models.TextField(blank=True, default='', verbose_name='Error'),
),
migrations.AlterField(
model_name='build',
name='output',
field=models.TextField(blank=True, default='', verbose_name='Output'),
),
migrations.AlterField(
model_name='build',
name='state',
field=models.CharField(choices=[('triggered', 'Triggered'), ('cloning', 'Cloning'), ('installing', 'Installing'), ('building', 'Building'), ('finished', 'Finished')], default='finished', max_length=55, verbose_name='State'),
),
migrations.AlterField(
model_name='build',
name='type',
field=models.CharField(choices=[('html', 'HTML'), ('pdf', 'PDF'), ('epub', 'Epub'), ('man', 'Manpage'), ('dash', 'Dash')], default='html', max_length=55, verbose_name='Type'),
),
migrations.AlterField(
model_name='version',
name='privacy_level',
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Level of privacy for this Version.', max_length=20, verbose_name='Privacy Level'),
),
migrations.AlterField(
model_name='version',
name='slug',
field=readthedocs.builds.version_slug.VersionSlugField(db_index=True, max_length=255, populate_from='verbose_name', verbose_name='Slug'),
),
migrations.AlterField(
model_name='version',
name='type',
field=models.CharField(choices=[('branch', 'Branch'), ('tag', 'Tag'), ('unknown', 'Unknown')], default='unknown', max_length=20, verbose_name='Type'),
),
migrations.AlterField(
model_name='versionalias',
name='from_slug',
field=models.CharField(default='', max_length=255, verbose_name='From slug'),
),
migrations.AlterField(
model_name='versionalias',
name='to_slug',
field=models.CharField(blank=True, default='', max_length=255, verbose_name='To slug'),
),
]
89 changes: 81 additions & 8 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,57 @@
"""Models for the builds app."""

from __future__ import (
absolute_import, division, print_function, unicode_literals)
absolute_import,
division,
print_function,
unicode_literals,
)

import logging
import os.path
import re
from builtins import object
from shutil import rmtree

from builtins import object
from django.conf import settings
from django.core.urlresolvers import reverse
from django.db import models
from django.utils import timezone
from django.utils.encoding import python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from django.utils.translation import ugettext_lazy as _
from guardian.shortcuts import assign
from jsonfield import JSONField
from taggit.managers import TaggableManager

from readthedocs.core.utils import broadcast
from readthedocs.projects.constants import (
BITBUCKET_URL, GITHUB_URL, GITLAB_URL, PRIVACY_CHOICES, PRIVATE)
BITBUCKET_URL,
GITHUB_URL,
GITLAB_URL,
PRIVACY_CHOICES,
PRIVATE,
)
from readthedocs.projects.models import APIProject, Project

from .constants import (
BRANCH, BUILD_STATE, BUILD_STATE_FINISHED, BUILD_TYPES, LATEST,
NON_REPOSITORY_VERSIONS, STABLE, TAG, VERSION_TYPES)
BRANCH,
BUILD_STATE,
BUILD_STATE_FINISHED,
BUILD_TYPES,
LATEST,
NON_REPOSITORY_VERSIONS,
STABLE,
TAG,
VERSION_TYPES,
)
from .managers import VersionManager
from .querysets import BuildQuerySet, RelatedBuildQuerySet, VersionQuerySet
from .utils import (
get_bitbucket_username_repo, get_github_username_repo,
get_gitlab_username_repo)
get_bitbucket_username_repo,
get_github_username_repo,
get_gitlab_username_repo,
)
from .version_slug import VersionSlugField

DEFAULT_VERSION_PRIVACY_LEVEL = getattr(
Expand Down Expand Up @@ -109,6 +130,16 @@ def __str__(self):
pk=self.pk,
))

@property
def config(self):
"""Returns the configuration used in the last successful build."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to explain a little more what this returns and what's its type. Probably, it's good to use :returns: and :rtype: in the docstring. We do have many different types of Config objects and this will confuse us in the near future. In fact, I think this is not returning a Config object, but a simple dict 😕

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, it's just a dict

last_build = (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder how long this query will take. Our build table is pretty large, but perhaps the filtering will make it a bit faster. Feels like something we might want to denormalize if we use it a lot.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use this, for now, we'll see if we can cache this maybe

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is only used in the tests. Do we need this at all? Did you add it as a helper for debugging or so? Does it worth to keep it here? Where do you think it will be used?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be used on the api and resolver, it'll be used on the second part of this PR

self.builds.filter(state='finished', success=True)
.order_by('-date')
.first()
)
return last_build.get_config()

@property
def commit_name(self):
"""
Expand Down Expand Up @@ -448,6 +479,7 @@ class Build(models.Model):
exit_code = models.IntegerField(_('Exit code'), null=True, blank=True)
commit = models.CharField(
_('Commit'), max_length=255, null=True, blank=True)
config = JSONField(_('Configuration used in the build'), default=dict)

length = models.IntegerField(_('Build Length'), null=True, blank=True)

Expand All @@ -466,6 +498,47 @@ class Meta(object):
get_latest_by = 'date'
index_together = [['version', 'state', 'type']]

@property
def previous(self):
"""Returns the previous build to this one."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to be more explicit here,

Return the previous build to the current Build (matching the project and version)

date = self.date or timezone.now()
if self.project is not None and self.version is not None:
return (
Build.objects
.filter(
project=self.project,
version=self.version,
date__lt=date,
)
.order_by('-date')
.first()
)
return None

def get_config(self):
"""Get the config from correct object."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are not going to access the config from this object directly, I suggest to use _config for the model field, and make this method a @property called just config. This way, we will avoid potential future problems and mistakes.

The docstring could be more explicit.

Get the config used for this build.

Since we are saving the config into the JSON field only when it differs from the previous one,
this helper returns the correct JSON used for this particular Build object
(it could be stored in this object or one of the previous ones)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of using _config, also I'd add a @property.set, to set _config through just config. The weird part, would be on the API, but I think we can handle that 🤔

if '__config' in self.config:
return Build.objects.get(pk=self.config['__config']).config
return self.config

def save(self, *args, **kwargs): # noqa
"""
Save object.

To save space on the db we only save the config if it's different
from the previous one.

If the config is the same, we save the pk of the object
that has the **real** config under the ``__config`` key.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __config field could be more explicit also, like __config_used__build_pk.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, it would be good if this is a CONSTANT defined in the class, instead something that it's written many times.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm not convinced with the name either, not sure about __config_used__build_pk. I don't have better ideas for the name though :/

"""
previous = self.previous
if previous is not None and self.config == previous.get_config():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know how deep the == operator works with dicts?

Also, I think this comparison needs to be a method because it will be more explicit. Like, has_config_changed or similar.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

previous_pk = previous.pk
if '__config' in previous.config:
previous_pk = previous.config['__config']
self.config = {'__config': previous_pk}
super(Build, self).save(*args, **kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little worried that we're running this on each Build save. We do this a number of times during a documentation build, and this will be adding a decent number of queries (eg. self.previous is a query, as is previous.config).

Is there a way to set the _config just once when we have changed it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is an is_changed method on django, but I think we can do this using an internal variable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I'm just trying to think if there is a way to run this logic just when we are trying to set a config. Perhaps it belongs somewhere else?


def __str__(self):
return ugettext(
'Build {project} for {usernames} ({pk})'.format(
Expand Down
15 changes: 15 additions & 0 deletions readthedocs/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,21 @@ def python_full_version(self):
)
return ver

def as_dict(self):
attributes = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this logic should live somewhere else, as a PUBLIC_ATTRIBUTES or something that is more likely to be updated when changed/added?

'version', 'formats', 'python',
'conda', 'build', 'doctype',
'sphinx', 'mkdocs', 'submodules',
]
config = {}
for name in attributes:
attr = getattr(self, name)
if hasattr(attr, '_asdict'):
config[name] = attr._asdict()
else:
config[name] = attr
return config

def __getattr__(self, name):
"""Raise an error for unknown attributes."""
raise ConfigOptionNotSupportedError(name)
Expand Down
98 changes: 98 additions & 0 deletions readthedocs/config/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,60 @@ def test_config_filenames_regex(correct_config_filename):
assert re.match(CONFIG_FILENAME_REGEX, correct_config_filename)


def test_as_dict(tmpdir):
apply_fs(tmpdir, {'requirements.txt': ''})
build = get_build_config(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of the variable is tricky. This is not a build object, but a config object.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a BuildConfig object, for historical reasons it was left like that p: I mean, we are using that variable name in all the tests of the config module, not sure if we want to rename all of them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK.

Feel free to do whatever you prefer. In case you think this should be renamed, open an issue in the Cleanup milestone for the "future".

{
'version': 1,
'formats': ['pdf'],
'python': {
'version': 3.5,
},
'requirements_file': 'requirements.txt',
},
get_env_config({
'defaults': {
'doctype': 'sphinx',
'sphinx_configuration': None,
},
}),
source_file=str(tmpdir.join('readthedocs.yml')),
)
build.validate()
expected_dict = {
'version': '1',
'formats': ['pdf'],
'python': {
'version': 3.5,
'requirements': 'requirements.txt',
'install_with_pip': False,
'install_with_setup': False,
'extra_requirements': [],
'use_system_site_packages': False,
},
'build': {
'image': 'readthedocs/build:2.0',
},
'conda': None,
'sphinx': {
'builder': 'sphinx',
'configuration': None,
'fail_on_warning': False,
},
'mkdocs': {
'configuration': None,
'fail_on_warning': False,
},
'doctype': 'sphinx',
'submodules': {
'include': ALL,
'exclude': [],
'recursive': True,
},
}
assert build.as_dict() == expected_dict


class TestBuildConfigV2(object):

def get_build_config(self, config, env_config=None,
Expand Down Expand Up @@ -1804,3 +1858,47 @@ def test_pop_config_raise_exception(self):
build.pop_config('one.four', raise_ex=True)
assert excinfo.value.value == 'four'
assert excinfo.value.code == VALUE_NOT_FOUND

def test_as_dict(self, tmpdir):
apply_fs(tmpdir, {'requirements.txt': ''})
build = self.get_build_config(
{
'version': 2,
'formats': ['pdf'],
'python': {
'version': 3.6,
'requirements': 'requirements.txt',
},
},
source_file=str(tmpdir.join('readthedocs.yml')),
)
build.validate()
expected_dict = {
'version': '2',
'formats': ['pdf'],
'python': {
'version': 3.6,
'requirements': str(tmpdir.join('requirements.txt')),
'install_with_pip': False,
'install_with_setup': False,
'extra_requirements': [],
'use_system_site_packages': False,
},
'build': {
'image': 'readthedocs/build:latest',
},
'conda': None,
'sphinx': {
'builder': 'sphinx',
'configuration': None,
'fail_on_warning': False,
},
'mkdocs': None,
'doctype': 'sphinx',
'submodules': {
'include': [],
'exclude': ALL,
'recursive': False,
},
}
assert build.as_dict() == expected_dict
24 changes: 20 additions & 4 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,10 @@ def run_setup(self, record=True):
raise YAMLParseError(
YAMLParseError.GENERIC_WITH_PARSE_EXCEPTION.format(
exception=str(e),
))
)
)

self.save_build_config()
self.additional_vcs_operations()

if self.setup_env.failure or self.config is None:
Expand Down Expand Up @@ -529,9 +531,14 @@ def get_build(build_pk):
build = {}
if build_pk:
build = api_v2.build(build_pk).get()
return dict((key, val) for (key, val) in list(build.items())
if key not in ['project', 'version', 'resource_uri',
'absolute_uri'])
private_keys = [
'project', 'version', 'resource_uri', 'absolute_uri'
]
return {
key: val
for key, val in build.items()
if key not in private_keys
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The blocks are the same, just refactored it to be more readable


def setup_vcs(self):
"""
Expand Down Expand Up @@ -592,6 +599,15 @@ def set_valid_clone(self):
self.project.has_valid_clone = True
self.version.project.has_valid_clone = True

def save_build_config(self):
"""Save config in the build object."""
pk = self.build['id']
build_data = api_v2.build(pk).get()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels a bit overkill. Can we not use the patch method to just update the config, without having to get() it previously? Similar to: https://github.com/rtfd/readthedocs.org/blob/master/readthedocs/projects/tasks.py#L608

config = self.config.as_dict()
build_data['config'] = config
api_v2.build(pk).put(build_data)
self.build['config'] = config
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we are going to use this here, but just in case, it also helps when testing this, write a test for this was hard 😞

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, testing just about everything about our builds is tricky :/


def update_app_instances(self, html=False, localmedia=False, search=False,
pdf=False, epub=False):
"""
Expand Down
1 change: 1 addition & 0 deletions readthedocs/restapi/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ class BuildSerializer(serializers.ModelSerializer):
version_slug = serializers.ReadOnlyField(source='version.slug')
docs_url = serializers.ReadOnlyField(source='version.get_absolute_url')
state_display = serializers.ReadOnlyField(source='get_state_display')
config = serializers.JSONField(required=False)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this because of rpkilby/jsonfield#188 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should be a code comment


class Meta(object):
model = Build
Expand Down
Loading