Skip to content

APIv2: better build command sanitization #10025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 15, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 4 additions & 27 deletions readthedocs/api/v2/serializers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Defines serializers for each of our models."""

import re

from allauth.socialaccount.models import SocialAccount
from django.conf import settings
from rest_framework import serializers

from readthedocs.api.v2.utils import sanitize_build_command
from readthedocs.builds.models import Build, BuildCommandResult, Version
from readthedocs.oauth.models import RemoteOrganization, RemoteRepository
from readthedocs.projects.models import Domain, Project
Expand Down Expand Up @@ -157,31 +156,9 @@ class BuildCommandReadOnlySerializer(BuildCommandSerializer):
command = serializers.SerializerMethodField()

def get_command(self, obj):
project_slug = obj.build.version.project.slug
version_slug = obj.build.version.slug
docroot = settings.DOCROOT.rstrip("/") # remove trailing '/'

# Remove Docker hash from DOCROOT when running it locally
# DOCROOT contains the Docker container hash (e.g. b7703d1b5854).
# We have to remove it from the DOCROOT it self since it changes each time
# we spin up a new Docker instance locally.
container_hash = "/"
if settings.RTD_DOCKER_COMPOSE:
docroot = re.sub("/[0-9a-z]+/?$", "", settings.DOCROOT, count=1)
container_hash = "/([0-9a-z]+/)?"

command = obj.command
regex = f"{docroot}{container_hash}{project_slug}/envs/{version_slug}(/bin/)?"
command = re.sub(regex, "", command, count=1)

# Remove explicit variable names we use to run commands,
# since users don't care about these.
regex = r"^\$READTHEDOCS_VIRTUALENV_PATH/bin/"
command = re.sub(regex, "", command, count=1)

regex = r"^\$CONDA_ENVS_PATH/\\$CONDA_DEFAULT_ENV/bin/"
command = re.sub(regex, "", command, count=1)
return command
return sanitize_build_command(
obj.command, obj.build.project.slug, obj.build.version.slug
)


class BuildSerializer(serializers.ModelSerializer):
Expand Down
32 changes: 32 additions & 0 deletions readthedocs/api/v2/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Utility functions that are used by both views and celery tasks."""

import itertools
import re

import structlog
from django.conf import settings
from rest_framework.pagination import PageNumberPagination

from readthedocs.builds.constants import (
Expand Down Expand Up @@ -260,6 +262,36 @@ def run_automation_rules(project, added_versions, deleted_active_versions):
rule.run(version)


def sanitize_build_command(command, project_slug, version_slug):
"""
Sanitize the build command to be shown to users.

It removes internal variables and long paths to make them nicer.
"""
docroot = settings.DOCROOT.rstrip("/") # remove trailing '/'

# Remove Docker hash from DOCROOT when running it locally
# DOCROOT contains the Docker container hash (e.g. b7703d1b5854).
# We have to remove it from the DOCROOT it self since it changes each time
# we spin up a new Docker instance locally.
container_hash = "/"
if settings.RTD_DOCKER_COMPOSE:
docroot = re.sub("/[0-9a-z]+/?$", "", settings.DOCROOT, count=1)
container_hash = "/([0-9a-z]+/)?"

regex = f"{docroot}{container_hash}{project_slug}/envs/{version_slug}(/bin/)?"
command = re.sub(regex, "", command, count=1)

# Remove explicit variable names we use to run commands,
# since users don't care about these.
regex = r"^\$READTHEDOCS_VIRTUALENV_PATH/bin/"
command = re.sub(regex, "", command, count=1)

regex = r"^\$CONDA_ENVS_PATH/\$CONDA_DEFAULT_ENV/bin/"
command = re.sub(regex, "", command, count=1)
return command


class RemoteOrganizationPagination(PageNumberPagination):
page_size = 25

Expand Down
9 changes: 9 additions & 0 deletions readthedocs/api/v2/views/model_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from rest_framework.renderers import BaseRenderer, JSONRenderer
from rest_framework.response import Response

from readthedocs.api.v2.utils import sanitize_build_command
from readthedocs.builds.constants import INTERNAL
from readthedocs.builds.models import Build, BuildCommandResult, Version
from readthedocs.oauth.models import RemoteOrganization, RemoteRepository
Expand Down Expand Up @@ -282,6 +283,14 @@ def retrieve(self, *args, **kwargs):
try:
json_resp = build_commands_storage.open(storage_path).read()
data['commands'] = json.loads(json_resp)

# Sanitize commands in the same way than when returning them using the serializer
for buildcommand in data["commands"]:
buildcommand["command"] = sanitize_build_command(
buildcommand["command"],
instance.project.slug,
instance.version.slug,
)
except Exception:
log.exception(
'Failed to read build data from storage.',
Expand Down
29 changes: 24 additions & 5 deletions readthedocs/rtd_tests/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,8 +484,20 @@ def test_make_build_commands(self):
'/api/v2/command/',
{
"build": build["id"],
"command": "echo test",
"description": "foo",
"command": "$CONDA_ENVS_PATH/$CONDA_DEFAULT_ENV/bin/python -m sphinx",
"description": "Conda and Sphinx command",
"exit_code": 0,
"start_time": start_time,
"end_time": end_time,
},
format="json",
)
resp = client.post(
"/api/v2/command/",
{
"build": build["id"],
"command": "$READTHEDOCS_VIRTUALENV_PATH/bin/python -m sphinx",
"description": "Python and Sphinx command",
"exit_code": 0,
"start_time": start_time,
"end_time": end_time,
Expand All @@ -496,10 +508,12 @@ def test_make_build_commands(self):
resp = client.get('/api/v2/build/%s/' % build['id'])
self.assertEqual(resp.status_code, 200)
build = resp.data
self.assertEqual(len(build["commands"]), 1)
self.assertEqual(build["commands"][0]["command"], "echo test")
self.assertEqual(len(build["commands"]), 2)
self.assertEqual(build["commands"][0]["command"], "python -m sphinx")
self.assertEqual(build["commands"][0]["run_time"], 5)
self.assertEqual(build["commands"][0]["description"], "foo")
self.assertEqual(
build["commands"][0]["description"], "Conda and Sphinx command"
)
self.assertEqual(build["commands"][0]["exit_code"], 0)
self.assertEqual(
dateutil.parser.parse(build["commands"][0]["start_time"]), start_time
Expand All @@ -508,6 +522,11 @@ def test_make_build_commands(self):
dateutil.parser.parse(build["commands"][0]["end_time"]), end_time
)

self.assertEqual(build["commands"][1]["command"], "python -m sphinx")
self.assertEqual(
build["commands"][1]["description"], "Python and Sphinx command"
)

def test_get_raw_log_success(self):
project = Project.objects.get(pk=1)
version = project.versions.first()
Expand Down