Skip to content

Commit c3ace24

Browse files
authored
Collect build data (#9113)
1 parent ec19706 commit c3ace24

File tree

20 files changed

+828
-45
lines changed

20 files changed

+828
-45
lines changed

readthedocs/api/v2/views/model_views.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Endpoints for listing Projects, Versions, Builds, etc."""
22

33
import json
4-
import structlog
54

5+
import structlog
66
from allauth.socialaccount.models import SocialAccount
77
from django.conf import settings
88
from django.db.models import BooleanField, Case, Value, When

readthedocs/builds/admin.py

+4-35
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
11
"""Django admin interface for `~builds.models.Build` and related models."""
22

3-
import json
43
from django.contrib import admin, messages
5-
from django.utils.safestring import mark_safe
6-
from polymorphic.admin import (
7-
PolymorphicChildModelAdmin,
8-
PolymorphicParentModelAdmin,
9-
)
10-
11-
from pygments import highlight
12-
from pygments.lexers import JsonLexer
13-
from pygments.formatters import HtmlFormatter
4+
from polymorphic.admin import PolymorphicChildModelAdmin, PolymorphicParentModelAdmin
145

156
from readthedocs.builds.models import (
167
Build,
@@ -20,33 +11,11 @@
2011
VersionAutomationRule,
2112
)
2213
from readthedocs.core.utils import trigger_build
14+
from readthedocs.core.utils.admin import pretty_json_field
2315
from readthedocs.projects.models import HTMLFile
2416
from readthedocs.search.utils import _indexing_helper
2517

2618

27-
def _pretty_config(instance):
28-
"""
29-
Function to display pretty version of our data.
30-
31-
Thanks to PyDanny: https://www.pydanny.com/pretty-formatting-json-django-admin.html
32-
"""
33-
34-
# Convert the data to sorted, indented JSON
35-
response = json.dumps(instance.config, sort_keys=True, indent=2)
36-
37-
# Get the Pygments formatter
38-
formatter = HtmlFormatter()
39-
40-
# Highlight the data
41-
response = highlight(response, JsonLexer(), formatter)
42-
43-
# Get the stylesheet
44-
style = "<style>" + formatter.get_style_defs() + "</style><br>"
45-
46-
# Safe the output
47-
return mark_safe(style + response)
48-
49-
5019
class BuildCommandResultInline(admin.TabularInline):
5120
model = BuildCommandResult
5221
fields = ('command', 'exit_code', 'output')
@@ -96,7 +65,7 @@ def version_slug(self, obj):
9665
return obj.version.slug
9766

9867
def pretty_config(self, instance):
99-
return _pretty_config(instance)
68+
return pretty_json_field(instance, "config")
10069

10170
pretty_config.short_description = 'Config File'
10271

@@ -123,7 +92,7 @@ def project_slug(self, obj):
12392
return obj.project.slug
12493

12594
def pretty_config(self, instance):
126-
return _pretty_config(instance)
95+
return pretty_json_field(instance, "config")
12796

12897
pretty_config.short_description = 'Config File'
12998

readthedocs/config/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ class BuildConfigBase:
180180
def __init__(self, env_config, raw_config, source_file):
181181
self.env_config = env_config
182182
self._raw_config = copy.deepcopy(raw_config)
183+
self.source_config = copy.deepcopy(raw_config)
183184
self.source_file = source_file
184185
if os.path.isdir(self.source_file):
185186
self.base_path = self.source_file

readthedocs/core/db.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""
2+
Custom database routers.
3+
4+
https://docs.djangoproject.com/en/4.0/topics/db/multi-db/#automatic-database-routing
5+
"""
6+
7+
from collections import defaultdict
8+
9+
10+
class MapAppsRouter:
11+
12+
"""
13+
Router to map Django applications to a specific database.
14+
15+
:py:attr:`apps_to_db` is used to map an application to a database,
16+
if an application isn't listed here, it will use the ``default`` database.
17+
"""
18+
19+
def __init__(self):
20+
self.apps_to_db = defaultdict(lambda: "default")
21+
self.apps_to_db.update({"telemetry": "telemetry"})
22+
23+
def db_for_read(self, model, **hints):
24+
return self.apps_to_db[model._meta.app_label]
25+
26+
def db_for_write(self, model, **hints):
27+
return self.apps_to_db[model._meta.app_label]
28+
29+
def allow_relation(self, obj1, obj2, **hints):
30+
return (
31+
self.apps_to_db[obj1._meta.app_label]
32+
== self.apps_to_db[obj2._meta.app_label]
33+
)
34+
35+
def allow_migrate(self, db, app_label, model_name=None, **hints):
36+
return self.apps_to_db[app_label] == db

readthedocs/core/utils/admin.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import json
2+
3+
from django.utils.safestring import mark_safe
4+
from pygments import highlight
5+
from pygments.formatters import HtmlFormatter
6+
from pygments.lexers import JsonLexer
7+
8+
9+
def pretty_json_field(instance, field):
10+
"""
11+
Display a pretty version of a JSON field in the admin.
12+
13+
Thanks to PyDanny: https://www.pydanny.com/pretty-formatting-json-django-admin.html
14+
"""
15+
# Convert the data to sorted, indented JSON
16+
response = json.dumps(getattr(instance, field), sort_keys=True, indent=2)
17+
18+
# Get the Pygments formatter
19+
formatter = HtmlFormatter()
20+
21+
# Highlight the data
22+
response = highlight(response, JsonLexer(), formatter)
23+
24+
# Get the stylesheet
25+
style = "<style>" + formatter.get_style_defs() + "</style><br>"
26+
27+
# Safe the output
28+
return mark_safe(style + response)

readthedocs/projects/tasks/builds.py

+44-6
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
YAMLParseError,
4747
)
4848
from readthedocs.storage import build_media_storage
49+
from readthedocs.telemetry.collectors import BuildDataCollector
50+
from readthedocs.telemetry.tasks import save_build_data
4951
from readthedocs.worker import app
5052

5153
from ..exceptions import (
@@ -346,6 +348,8 @@ def before_start(self, task_id, args, kwargs):
346348
# Reset any previous build error reported to the user
347349
self.data.build['error'] = ''
348350

351+
self.data.build_data = None
352+
349353
# Also note there are builds that are triggered without a commit
350354
# because they just build the latest commit for that version
351355
self.data.build_commit = kwargs.get('build_commit')
@@ -540,6 +544,7 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo):
540544
self.data.build['length'] = (timezone.now() - self.data.start_time).seconds
541545

542546
self.update_build(BUILD_STATE_FINISHED)
547+
self.save_build_data()
543548

544549
build_complete.send(sender=Build, build=self.data.build)
545550

@@ -595,13 +600,46 @@ def execute(self):
595600
# ``__exit__``
596601
self.data.build_director.create_build_environment()
597602
with self.data.build_director.build_environment:
598-
# Installing
599-
self.update_build(state=BUILD_STATE_INSTALLING)
600-
self.data.build_director.setup_environment()
603+
try:
604+
# Installing
605+
self.update_build(state=BUILD_STATE_INSTALLING)
606+
self.data.build_director.setup_environment()
607+
608+
# Building
609+
self.update_build(state=BUILD_STATE_BUILDING)
610+
self.data.build_director.build()
611+
finally:
612+
self.data.build_data = self.collect_build_data()
613+
614+
def collect_build_data(self):
615+
"""
616+
Collect data from the current build.
601617
602-
# Building
603-
self.update_build(state=BUILD_STATE_BUILDING)
604-
self.data.build_director.build()
618+
The data is collected from inside the container,
619+
so this must be called before killing the container.
620+
"""
621+
try:
622+
return BuildDataCollector(
623+
self.data.build_director.build_environment
624+
).collect()
625+
except Exception:
626+
log.exception("Error while collecting build data")
627+
628+
def save_build_data(self):
629+
"""
630+
Save the data collected from the build after it has ended.
631+
632+
This must be called after the build has finished updating its state,
633+
otherwise some attributes like ``length`` won't be available.
634+
"""
635+
try:
636+
if self.data.build_data:
637+
save_build_data.delay(
638+
build_id=self.data.build_pk,
639+
data=self.data.build_data,
640+
)
641+
except Exception:
642+
log.exception("Error while saving build data")
605643

606644
@staticmethod
607645
def get_project(project_pk):

readthedocs/projects/tests/test_build_tasks.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@
1717
from readthedocs.projects.exceptions import RepositoryError
1818
from readthedocs.projects.models import EnvironmentVariable, Project, WebHookEvent
1919
from readthedocs.projects.tasks.builds import sync_repository_task, update_docs_task
20+
from readthedocs.telemetry.models import BuildData
2021

2122
from .mockers import BuildEnvironmentMocker
2223

2324

24-
@pytest.mark.django_db
25+
@pytest.mark.django_db(databases="__all__")
2526
class BuildEnvironmentBase:
2627

2728
# NOTE: `load_yaml_config` maybe be moved to the setup and assign to self.
@@ -258,6 +259,8 @@ def test_successful_build(
258259
}
259260
)
260261

262+
assert not BuildData.objects.all().exists()
263+
261264
self._trigger_update_docs_task()
262265

263266
# It has to be called twice, ``before_start`` and ``after_return``
@@ -393,6 +396,8 @@ def test_successful_build(
393396
"error": "",
394397
}
395398

399+
assert BuildData.objects.all().exists()
400+
396401
self.mocker.mocks["build_media_storage"].sync_directory.assert_has_calls(
397402
[
398403
mock.call(mock.ANY, "html/project/latest"),
@@ -418,6 +423,8 @@ def test_failed_build(
418423
send_external_build_status,
419424
build_complete,
420425
):
426+
assert not BuildData.objects.all().exists()
427+
421428
# Force an exception from the execution of the task. We don't really
422429
# care "where" it was raised: setup, build, syncing directories, etc
423430
execute.side_effect = Exception('Force and exception here.')
@@ -449,6 +456,10 @@ def test_failed_build(
449456
build=mock.ANY,
450457
)
451458

459+
# The build data is None (we are failing the build before the environment is created)
460+
# and the task won't be run.
461+
assert not BuildData.objects.all().exists()
462+
452463
# Test we are updating the DB by calling the API with the updated build object
453464
api_request = self.requests_mock.request_history[
454465
-1

readthedocs/settings/base.py

+2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def SESSION_COOKIE_SAMESITE(self):
128128
DONT_HIT_API = False
129129
DONT_HIT_DB = True
130130
RTD_SAVE_BUILD_COMMANDS_TO_STORAGE = False
131+
DATABASE_ROUTERS = ['readthedocs.core.db.MapAppsRouter']
131132

132133
USER_MATURITY_DAYS = 7
133134

@@ -199,6 +200,7 @@ def INSTALLED_APPS(self): # noqa
199200
'readthedocs.sphinx_domains',
200201
'readthedocs.search',
201202
'readthedocs.embed',
203+
'readthedocs.telemetry',
202204

203205
# allauth
204206
'allauth',

readthedocs/settings/dev.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ def DATABASES(self): # noqa
1818
'default': {
1919
'ENGINE': 'django.db.backends.sqlite3',
2020
'NAME': os.path.join(self.SITE_ROOT, 'dev.db'),
21-
}
21+
},
22+
'telemetry': {
23+
'ENGINE': 'django.db.backends.sqlite3',
24+
'NAME': os.path.join(self.SITE_ROOT, 'telemetry.dev.db'),
25+
},
2226
}
2327

2428
DONT_HIT_DB = False

readthedocs/settings/docker_compose.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,15 @@ def DATABASES(self): # noqa
121121
"PASSWORD": os.environ.get("DB_PWD", "docs_pwd"),
122122
"HOST": os.environ.get("DB_HOST", "database"),
123123
"PORT": "",
124-
}
124+
},
125+
"telemetry": {
126+
"ENGINE": "django.db.backends.postgresql_psycopg2",
127+
"NAME": "telemetry",
128+
"USER": os.environ.get("DB_USER", "docs_user"),
129+
"PASSWORD": os.environ.get("DB_PWD", "docs_pwd"),
130+
"HOST": os.environ.get("DB_HOST", "database"),
131+
"PORT": "",
132+
},
125133
}
126134

127135
def show_debug_toolbar(request):

readthedocs/telemetry/__init__.py

Whitespace-only changes.

readthedocs/telemetry/admin.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Telemetry admin."""
2+
3+
4+
from django.contrib import admin
5+
6+
from readthedocs.core.utils.admin import pretty_json_field
7+
from readthedocs.telemetry.models import BuildData
8+
9+
10+
@admin.register(BuildData)
11+
class BuildDataAdmin(admin.ModelAdmin):
12+
13+
fields = ("created", "modified", "pretty_data")
14+
readonly_fields = (
15+
"created",
16+
"modified",
17+
"pretty_data",
18+
)
19+
20+
# pylint: disable=no-self-use
21+
def pretty_data(self, instance):
22+
return pretty_json_field(instance, "data")

readthedocs/telemetry/apps.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""
2+
Telemetry application.
3+
4+
Collect relevant data to be analyzed later.
5+
"""
6+
7+
from django.apps import AppConfig
8+
9+
10+
class TelemetryConfig(AppConfig):
11+
default_auto_field = "django.db.models.BigAutoField"
12+
name = "readthedocs.telemetry"
13+
14+
def ready(self):
15+
import readthedocs.telemetry.tasks # noqa

0 commit comments

Comments
 (0)