Skip to content

Collect build data #9113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion readthedocs/api/v2/views/model_views.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Endpoints for listing Projects, Versions, Builds, etc."""

import json
import structlog

import structlog
from allauth.socialaccount.models import SocialAccount
from django.conf import settings
from django.db.models import BooleanField, Case, Value, When
Expand All @@ -19,6 +19,7 @@
from readthedocs.oauth.services import GitHubService, registry
from readthedocs.projects.models import Domain, Project
from readthedocs.storage import build_commands_storage
from readthedocs.telemetry.models import BuildData

from ..permissions import APIPermission, APIRestrictedPermission, IsOwner
from ..serializers import (
Expand Down Expand Up @@ -285,6 +286,17 @@ def reset(self, request, **kwargs):
instance.reset()
return Response(status=status.HTTP_204_NO_CONTENT)

@decorators.action(
detail=True,
permission_classes=[permissions.IsAdminUser],
methods=["post"],
)
def telemetry(self, request, **kwargs):
"""Collect telemetry data from the build."""
build = self.get_object()
BuildData.objects.collect(build, request.data)
return Response(status=status.HTTP_204_NO_CONTENT)


class BuildCommandViewSet(DisableListEndpoint, UserSelectViewSet):
parser_classes = [JSONParser, MultiPartParser]
Expand Down
39 changes: 4 additions & 35 deletions readthedocs/builds/admin.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
"""Django admin interface for `~builds.models.Build` and related models."""

import json
from django.contrib import admin, messages
from django.utils.safestring import mark_safe
from polymorphic.admin import (
PolymorphicChildModelAdmin,
PolymorphicParentModelAdmin,
)

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import HtmlFormatter
from polymorphic.admin import PolymorphicChildModelAdmin, PolymorphicParentModelAdmin

from readthedocs.builds.models import (
Build,
Expand All @@ -20,33 +11,11 @@
VersionAutomationRule,
)
from readthedocs.core.utils import trigger_build
from readthedocs.core.utils.admin import pretty_json_field
from readthedocs.projects.models import HTMLFile
from readthedocs.search.utils import _indexing_helper


def _pretty_config(instance):
"""
Function to display pretty version of our data.

Thanks to PyDanny: https://www.pydanny.com/pretty-formatting-json-django-admin.html
"""

# Convert the data to sorted, indented JSON
response = json.dumps(instance.config, sort_keys=True, indent=2)

# Get the Pygments formatter
formatter = HtmlFormatter()

# Highlight the data
response = highlight(response, JsonLexer(), formatter)

# Get the stylesheet
style = "<style>" + formatter.get_style_defs() + "</style><br>"

# Safe the output
return mark_safe(style + response)


class BuildCommandResultInline(admin.TabularInline):
model = BuildCommandResult
fields = ('command', 'exit_code', 'output')
Expand Down Expand Up @@ -96,7 +65,7 @@ def version_slug(self, obj):
return obj.version.slug

def pretty_config(self, instance):
return _pretty_config(instance)
return pretty_json_field(instance, "config")

pretty_config.short_description = 'Config File'

Expand All @@ -123,7 +92,7 @@ def project_slug(self, obj):
return obj.project.slug

def pretty_config(self, instance):
return _pretty_config(instance)
return pretty_json_field(instance, "config")

pretty_config.short_description = 'Config File'

Expand Down
1 change: 1 addition & 0 deletions readthedocs/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ class BuildConfigBase:
def __init__(self, env_config, raw_config, source_file):
self.env_config = env_config
self._raw_config = copy.deepcopy(raw_config)
self.source_config = copy.deepcopy(raw_config)
self.source_file = source_file
if os.path.isdir(self.source_file):
self.base_path = self.source_file
Expand Down
28 changes: 28 additions & 0 deletions readthedocs/core/utils/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json

from django.utils.safestring import mark_safe
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import JsonLexer


def pretty_json_field(instance, field):
"""
Display a pretty version of a JSON field in the admin.

Thanks to PyDanny: https://www.pydanny.com/pretty-formatting-json-django-admin.html
"""
# Convert the data to sorted, indented JSON
response = json.dumps(getattr(instance, field), sort_keys=True, indent=2)

# Get the Pygments formatter
formatter = HtmlFormatter()

# Highlight the data
response = highlight(response, JsonLexer(), formatter)

# Get the stylesheet
style = "<style>" + formatter.get_style_defs() + "</style><br>"

# Safe the output
return mark_safe(style + response)
46 changes: 40 additions & 6 deletions readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
YAMLParseError,
)
from readthedocs.storage import build_media_storage
from readthedocs.telemetry.collectors import BuildDataCollector
from readthedocs.worker import app

from ..exceptions import (
Expand Down Expand Up @@ -341,6 +342,8 @@ def before_start(self, task_id, args, kwargs):
# Reset any previous build error reported to the user
self.data.build['error'] = ''

self.data.build_data = None

# Also note there are builds that are triggered without a commit
# because they just build the latest commit for that version
self.data.build_commit = kwargs.get('build_commit')
Expand Down Expand Up @@ -535,6 +538,7 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo):
self.data.build['length'] = (timezone.now() - self.data.start_time).seconds

self.update_build(BUILD_STATE_FINISHED)
self.upload_build_data()

build_complete.send(sender=Build, build=self.data.build)

Expand Down Expand Up @@ -590,13 +594,43 @@ def execute(self):
# ``__exit__``
self.data.build_director.create_build_environment()
with self.data.build_director.build_environment:
# Installing
self.update_build(state=BUILD_STATE_INSTALLING)
self.data.build_director.setup_environment()
try:
# Installing
self.update_build(state=BUILD_STATE_INSTALLING)
self.data.build_director.setup_environment()

# Building
self.update_build(state=BUILD_STATE_BUILDING)
self.data.build_director.build()
finally:
self.data.build_data = self.collect_build_data()

def collect_build_data(self):
"""
Collect data from the current build.

The data is collected from inside the container,
to this must be called before killing the container.
"""
try:
return BuildDataCollector(
self.data.build_director.build_environment
).collect()
except Exception:
log.exception("Error while collecting build data")

def upload_build_data(self):
"""
Upload data collected from the build after the build has ended.

# Building
self.update_build(state=BUILD_STATE_BUILDING)
self.data.build_director.build()
This must be called after the build has finished updating its state,
otherwise some attributes like ``length`` won't be available.
"""
try:
if self.data.build_data:
api_v2.build(self.data.build_pk).telemetry.post(self.data.build_data)
except Exception:
log.exception("Error while uploading build data")

@staticmethod
def get_project(project_pk):
Expand Down
9 changes: 9 additions & 0 deletions readthedocs/projects/tests/test_build_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ def test_successful_build(
"error": "",
}

request = self.requests_mock.request_history[10]
assert request.path == "/api/v2/build/1/telemetry/"

self.mocker.mocks["build_media_storage"].sync_directory.assert_has_calls(
[
mock.call(mock.ANY, "html/project/latest"),
Expand All @@ -407,6 +410,7 @@ def test_successful_build(

@mock.patch("readthedocs.projects.tasks.builds.build_complete")
@mock.patch("readthedocs.projects.tasks.builds.send_external_build_status")
@mock.patch("readthedocs.projects.tasks.builds.UpdateDocsTask.upload_build_data")
@mock.patch("readthedocs.projects.tasks.builds.UpdateDocsTask.execute")
@mock.patch("readthedocs.projects.tasks.builds.UpdateDocsTask.send_notifications")
@mock.patch("readthedocs.projects.tasks.builds.clean_build")
Expand All @@ -415,6 +419,7 @@ def test_failed_build(
clean_build,
send_notifications,
execute,
upload_build_data,
send_external_build_status,
build_complete,
):
Expand Down Expand Up @@ -449,6 +454,10 @@ def test_failed_build(
build=mock.ANY,
)

# The build data is None (we are failing the build before the environment is created)
# and the API won't be hit, but we can test that the method was called at least.
upload_build_data.assert_called_once()

# Test we are updating the DB by calling the API with the updated build object
api_request = self.requests_mock.request_history[
-1
Expand Down
66 changes: 31 additions & 35 deletions readthedocs/rtd_tests/tests/test_privacy_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@
from readthedocs.core.utils.tasks import TaskNoPermission
from readthedocs.integrations.models import HttpExchange, Integration
from readthedocs.oauth.models import RemoteOrganization, RemoteRepository
from readthedocs.projects.models import (
Domain,
EnvironmentVariable,
Project,
WebHook,
)
from readthedocs.projects.models import Domain, EnvironmentVariable, Project, WebHook
from readthedocs.rtd_tests.utils import create_user


Expand Down Expand Up @@ -399,35 +394,36 @@ def setUp(self):
'api_webhook_stripe': {},
}
self.response_data = {
'domain-list': {'status_code': 410},
'buildcommandresult-list': {'status_code': 410},
'build-concurrent': {'status_code': 403},
'build-list': {'status_code': 410},
'build-reset': {'status_code': 403},
'project-sync-versions': {'status_code': 403},
'project-token': {'status_code': 403},
'emailhook-list': {'status_code': 403},
'emailhook-detail': {'status_code': 403},
'embed': {'status_code': 400},
'docurl': {'status_code': 400},
'cname': {'status_code': 400},
'index_search': {'status_code': 403},
'api_search': {'status_code': 400},
'api_project_search': {'status_code': 400},
'api_section_search': {'status_code': 400},
'api_sync_remote_repositories': {'status_code': 403},
'api_webhook': {'status_code': 405},
'api_webhook_github': {'status_code': 405},
'api_webhook_gitlab': {'status_code': 405},
'api_webhook_bitbucket': {'status_code': 405},
'api_webhook_generic': {'status_code': 403},
'api_webhook_stripe': {'status_code': 405},
'sphinxdomain-detail': {'status_code': 404},
'project-list': {'status_code': 410},
'remoteorganization-detail': {'status_code': 404},
'remoterepository-detail': {'status_code': 404},
'remoteaccount-detail': {'status_code': 404},
'version-list': {'status_code': 410},
"domain-list": {"status_code": 410},
"buildcommandresult-list": {"status_code": 410},
"build-concurrent": {"status_code": 403},
"build-telemetry": {"status_code": 403},
"build-list": {"status_code": 410},
"build-reset": {"status_code": 403},
"project-sync-versions": {"status_code": 403},
"project-token": {"status_code": 403},
"emailhook-list": {"status_code": 403},
"emailhook-detail": {"status_code": 403},
"embed": {"status_code": 400},
"docurl": {"status_code": 400},
"cname": {"status_code": 400},
"index_search": {"status_code": 403},
"api_search": {"status_code": 400},
"api_project_search": {"status_code": 400},
"api_section_search": {"status_code": 400},
"api_sync_remote_repositories": {"status_code": 403},
"api_webhook": {"status_code": 405},
"api_webhook_github": {"status_code": 405},
"api_webhook_gitlab": {"status_code": 405},
"api_webhook_bitbucket": {"status_code": 405},
"api_webhook_generic": {"status_code": 403},
"api_webhook_stripe": {"status_code": 405},
"sphinxdomain-detail": {"status_code": 404},
"project-list": {"status_code": 410},
"remoteorganization-detail": {"status_code": 404},
"remoterepository-detail": {"status_code": 404},
"remoteaccount-detail": {"status_code": 404},
"version-list": {"status_code": 410},
}


Expand Down
1 change: 1 addition & 0 deletions readthedocs/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def INSTALLED_APPS(self): # noqa
'readthedocs.sphinx_domains',
'readthedocs.search',
'readthedocs.embed',
'readthedocs.telemetry',

# allauth
'allauth',
Expand Down
Empty file.
18 changes: 18 additions & 0 deletions readthedocs/telemetry/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Telemetry admin."""


from django.contrib import admin

from readthedocs.core.utils.admin import pretty_json_field
from readthedocs.telemetry.models import BuildData


@admin.register(BuildData)
class BuildDataAdmin(admin.ModelAdmin):

fields = ("pretty_data",)
readonly_fields = ("pretty_data",)

# pylint: disable=no-self-use
def pretty_data(self, instance):
return pretty_json_field(instance, "data")
12 changes: 12 additions & 0 deletions readthedocs/telemetry/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""
Telemetry application.

Collect relevant data to be analyzed later.
"""

from django.apps import AppConfig


class TelemetryConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "readthedocs.telemetry"
Loading