From faec2a295ba6b42763ed9d05b13b9ff58d371d74 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 11 Jan 2023 15:18:31 +0100 Subject: [PATCH 01/19] Build: standardize output directory for artifacts Always expose the final artifacts under `_readthedocs/` directory, where `` can be one of the following: - html - htmlzip - pdf - epub - json This allows all users of the platform to have the same behavior. No matter if they are using the default Sphinx/MkDocs builders or a custom one by using `build.commands`. Besides, this commit removes the "intermediary" directory used _before_ uploading the files to S3. This allows users to modify the output files directly from `_readthedocs/` using `build.jobs.post_build`, for example. - `BaseBuilder.type` is removed since it was not required anymore. - There is no `old_artifact_path` anymore since there is no intermediary folder. - The `sphinx-build` command is executed from path where the repository is cloned, instead from the the directory where the `conf.py` lives. This is to simplify the code and be able to use relative path for the build output directory (e.g. `sphinx-build -b html ... docs/ _readthedocs/html`). - A new `BaseSphinx._post_build` method was added to cleanup the output directory (leave only one .epub, .html and .pdf file before moving forward). - These changes should allow us to support _all formats_ for `build.commands`. We would need to check if these standard directories exist, set these formats in the version and upload their content. - Upload step now validate there is one and only one PDF, ePUB and ZIP file when they are built. Currently, we only support one file per version. In the future we could remove this restriction. It fails silently for now, which is not great. We need to refactor this code a little to fail the build properly and communicate this to the user. I think it's _the correct way_ of doing it. - This could also allow to support other ways to build PDF (SimplePDF or `tectonic`, see https://github.com/readthedocs/readthedocs.org/issues/9598) and also ZIP (e.g. zundler, see https://github.com/readthedocs/meta/issues/77#issuecomment-1301927685). Closes https://github.com/readthedocs/readthedocs.org/issues/9179 --- readthedocs/doc_builder/backends/mkdocs.py | 9 +- readthedocs/doc_builder/backends/sphinx.py | 264 +++++++++------------ readthedocs/doc_builder/base.py | 38 +-- readthedocs/doc_builder/director.py | 16 +- readthedocs/projects/models.py | 4 +- readthedocs/projects/tasks/builds.py | 35 ++- 6 files changed, 157 insertions(+), 209 deletions(-) diff --git a/readthedocs/doc_builder/backends/mkdocs.py b/readthedocs/doc_builder/backends/mkdocs.py index 832301ec847..355b95a7043 100644 --- a/readthedocs/doc_builder/backends/mkdocs.py +++ b/readthedocs/doc_builder/backends/mkdocs.py @@ -46,10 +46,6 @@ class BaseMkdocs(BaseBuilder): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.yaml_file = self.get_yaml_config() - self.old_artifact_path = os.path.join( - os.path.dirname(self.yaml_file), - self.build_dir, - ) # README: historically, the default theme was ``readthedocs`` but in # https://github.com/rtfd/readthedocs.org/pull/4556 we change it to @@ -343,9 +339,8 @@ def get_theme_name(self, mkdocs_config): class MkdocsHTML(BaseMkdocs): - type = 'mkdocs' - builder = 'build' - build_dir = '_build/html' + builder = "build" + build_dir = "_readthedocs/html" # TODO: find a better way to integrate with MkDocs. diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 9f5712b6388..8f34a7f7fc3 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -3,8 +3,11 @@ .. _Sphinx: http://www.sphinx-doc.org/ """ + import itertools import os +import shutil +import tempfile import zipfile from glob import glob from pathlib import Path @@ -18,13 +21,13 @@ from readthedocs.api.v2.client import api from readthedocs.builds import utils as version_utils -from readthedocs.core.utils.filesystem import safe_copytree, safe_open, safe_rmtree +from readthedocs.core.utils.filesystem import safe_open, safe_rmtree from readthedocs.projects.constants import PUBLIC from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound from readthedocs.projects.models import Feature from readthedocs.projects.utils import safe_write -from ..base import BaseBuilder, restoring_chdir +from ..base import BaseBuilder from ..constants import PDF_RE from ..environments import BuildCommand, DockerBuildCommand from ..exceptions import BuildUserError @@ -37,6 +40,8 @@ class BaseSphinx(BaseBuilder): """The parent for most sphinx builders.""" + sphinx_doctrees_dir = "_build/doctrees" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.config_file = self.config.sphinx.configuration @@ -48,16 +53,8 @@ def __init__(self, *args, **kwargs): self.project_path, self.config_file, ) - self.old_artifact_path = os.path.join( - os.path.dirname(self.config_file), - self.sphinx_build_dir, - ) except ProjectConfigurationError: - docs_dir = self.docs_dir() - self.old_artifact_path = os.path.join( - docs_dir, - self.sphinx_build_dir, - ) + pass def _write_config(self, master_doc='index'): """Create ``conf.py`` if it doesn't exist.""" @@ -264,7 +261,6 @@ def append_conf(self): ) def build(self): - self.clean() project = self.project build_command = [ *self.get_sphinx_cmd(), @@ -273,22 +269,34 @@ def build(self): *self.sphinx_parallel_arg(), ] if self.config.sphinx.fail_on_warning: - build_command.extend(['-W', '--keep-going']) - build_command.extend([ - '-b', - self.sphinx_builder, - '-d', - '_build/doctrees', - '-D', - 'language={lang}'.format(lang=project.language), - '.', - self.sphinx_build_dir, - ]) + build_command.extend(["-W", "--keep-going"]) + build_command.extend( + [ + "-b", + self.sphinx_builder, + "-d", + self.sphinx_doctrees_dir, + "-D", + f"language={project.language}", + # Execute the command at the clone directory, + # but use `docs/` instead of `.` for the Sphinx path + os.path.dirname( + os.path.relpath( + self.config_file, + self.project_path, + ), + ), + self.sphinx_build_dir, + ] + ) cmd_ret = self.run( *build_command, - cwd=os.path.dirname(self.config_file), bin_path=self.python_env.venv_bin(), + cwd=self.project_path, ) + + self._post_build() + return cmd_ret.successful def get_sphinx_cmd(self): @@ -337,8 +345,10 @@ def venv_sphinx_supports_latexmk(self): class HtmlBuilder(BaseSphinx): - type = 'sphinx' - sphinx_build_dir = '_build/html' + # Build directory is relative to the path where ``sphinx-build`` command is executed from. + # Currently, we are executing it form the project's path + # (from inside where the repository was clonned) + sphinx_build_dir = "_readthedocs/html" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -346,31 +356,8 @@ def __init__(self, *args, **kwargs): if self.project.has_feature(Feature.USE_SPHINX_BUILDERS): self.sphinx_builder = 'html' - def move(self, **__): - super().move() - # Copy JSON artifacts to its own directory - # to keep compatibility with the older builder. - json_path = os.path.abspath( - os.path.join(self.old_artifact_path, '..', 'json'), - ) - json_path_target = self.project.artifact_path( - version=self.version.slug, - type_='sphinx_search', - ) - if os.path.exists(json_path): - if os.path.exists(json_path_target): - safe_rmtree(json_path_target) - log.debug('Copying json on the local filesystem') - safe_copytree( - json_path, - json_path_target, - ) - else: - log.warning('Not moving json because the build dir is unknown.',) - class HtmlDirBuilder(HtmlBuilder): - type = 'sphinx_htmldir' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -380,7 +367,6 @@ def __init__(self, *args, **kwargs): class SingleHtmlBuilder(HtmlBuilder): - type = 'sphinx_singlehtml' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -390,65 +376,62 @@ def __init__(self, *args, **kwargs): class LocalMediaBuilder(BaseSphinx): - type = 'sphinx_localmedia' sphinx_builder = 'readthedocssinglehtmllocalmedia' - sphinx_build_dir = '_build/localmedia' - - @restoring_chdir - def move(self, **__): - if not os.path.exists(self.old_artifact_path): - log.warning('Not moving localmedia because the build dir is unknown.') - return + sphinx_build_dir = "_readthedocs/htmlzip" - log.debug('Creating zip file from path.', path=self.old_artifact_path) + def _post_build(self): + """Internal post build to create the ZIP file from the HTML output.""" + sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) + temp_zip_file = tempfile.mktemp(suffix=".zip") target_file = os.path.join( - self.target, - '{}.zip'.format(self.project.slug), + sphinx_build_dir, + f"{self.project.slug}.zip", ) - if not os.path.exists(self.target): - os.makedirs(self.target) - if os.path.exists(target_file): - os.remove(target_file) - - # Create a .zip file - os.chdir(self.old_artifact_path) - archive = zipfile.ZipFile(target_file, 'w') - for root, __, files in os.walk('.'): + + archive = zipfile.ZipFile(temp_zip_file, "w") + for root, __, files in os.walk(sphinx_build_dir): for fname in files: to_write = os.path.join(root, fname) archive.write( filename=to_write, arcname=os.path.join( - '{}-{}'.format(self.project.slug, self.version.slug), - to_write, + f"{self.project.slug}-{self.version.slug}", + os.path.relpath( + to_write, + sphinx_build_dir, + ), ), ) archive.close() + safe_rmtree(sphinx_build_dir) + os.makedirs(sphinx_build_dir) + shutil.move(temp_zip_file, target_file) + class EpubBuilder(BaseSphinx): - type = 'sphinx_epub' - sphinx_builder = 'epub' - sphinx_build_dir = '_build/epub' - - def move(self, **__): - from_globs = glob(os.path.join(self.old_artifact_path, '*.epub')) - if not os.path.exists(self.target): - os.makedirs(self.target) - if from_globs: - from_file = from_globs[0] - to_file = os.path.join( - self.target, - '{}.epub'.format(self.project.slug), - ) - self.run( - 'mv', - '-f', - from_file, - to_file, - cwd=self.project_path, - ) + sphinx_builder = "epub" + sphinx_build_dir = "_readthedocs/epub" + + def _post_build(self): + """Internal post build to cleanup EPUB output directory and leave only one .epub file.""" + sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) + temp_epub_file = tempfile.mktemp(suffix=".epub") + target_file = os.path.join( + sphinx_build_dir, + f"{self.project.slug}.epub", + ) + + epub_sphinx_filepaths = glob(os.path.join(sphinx_build_dir, "*.epub")) + if epub_sphinx_filepaths: + # NOTE: we currently support only one .epub per version + epub_filepath = epub_sphinx_filepaths[0] + + shutil.move(epub_filepath, temp_epub_file) + safe_rmtree(sphinx_build_dir) + os.makedirs(sphinx_build_dir) + shutil.move(temp_epub_file, target_file) class LatexBuildCommand(BuildCommand): @@ -479,30 +462,36 @@ class PdfBuilder(BaseSphinx): """Builder to generate PDF documentation.""" - type = 'sphinx_pdf' - sphinx_build_dir = '_build/latex' + sphinx_build_dir = "_readthedocs/pdf" + sphinx_builder = "latex" pdf_file_name = None def build(self): - self.clean() - cwd = os.path.dirname(self.config_file) - - # Default to this so we can return it always. self.run( *self.get_sphinx_cmd(), - '-b', - 'latex', + # New arguments for standardization + "-T", + "-E", *self.sphinx_parallel_arg(), - '-D', - 'language={lang}'.format(lang=self.project.language), - '-d', - '_build/doctrees', - '.', - '_build/latex', - cwd=cwd, + "-b", + self.sphinx_builder, + "-d", + self.sphinx_doctrees_dir, + "-D", + f"language={self.project.language}", + # Execute the command at the clone directory, + # but use `docs/` instead of `.` for the Sphinx path + os.path.dirname( + os.path.relpath( + self.config_file, + self.project_path, + ), + ), + self.sphinx_build_dir, + cwd=self.project_path, bin_path=self.python_env.venv_bin(), ) - latex_cwd = os.path.join(cwd, '_build', 'latex') + latex_cwd = os.path.join(self.project_path, self.sphinx_build_dir) tex_files = glob(os.path.join(latex_cwd, '*.tex')) if not tex_files: @@ -512,9 +501,12 @@ def build(self): # Build PDF with ``latexmk`` if Sphinx supports it, otherwise fallback # to ``pdflatex`` to support old versions if self.venv_sphinx_supports_latexmk(): - return self._build_latexmk(cwd, latex_cwd) + success = self._build_latexmk(self.project_path, latex_cwd) + else: + success = self._build_pdflatex(tex_files, latex_cwd) - return self._build_pdflatex(tex_files, latex_cwd) + self._post_build() + return success def _build_latexmk(self, cwd, latex_cwd): # These steps are copied from the Makefile generated by Sphinx >= 1.6 @@ -632,40 +624,20 @@ def _build_pdflatex(self, tex_files, latex_cwd): pdf_commands.append(cmd_ret) return all(cmd.successful for cmd in pdf_commands) - def move(self, **__): - if not os.path.exists(self.target): - os.makedirs(self.target) - - exact = os.path.join( - self.old_artifact_path, - '{}.pdf'.format(self.project.slug), - ) - exact_upper = os.path.join( - self.old_artifact_path, - '{}.pdf'.format(self.project.slug.capitalize()), + def _post_build(self): + """Internal post build to cleanup PDF output directory and leave only one .pdf file.""" + # TODO: merge this with ePUB since it's pretty much the same + sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) + temp_pdf_file = tempfile.mktemp(suffix=".pdf") + target_file = os.path.join( + sphinx_build_dir, + self.pdf_file_name, ) - if self.pdf_file_name and os.path.exists(self.pdf_file_name): - from_file = self.pdf_file_name - if os.path.exists(exact): - from_file = exact - elif os.path.exists(exact_upper): - from_file = exact_upper - else: - from_globs = glob(os.path.join(self.old_artifact_path, '*.pdf')) - if from_globs: - from_file = max(from_globs, key=os.path.getmtime) - else: - from_file = None - if from_file: - to_file = os.path.join( - self.target, - '{}.pdf'.format(self.project.slug), - ) - self.run( - 'mv', - '-f', - from_file, - to_file, - cwd=self.project_path, - ) + # NOTE: we currently support only one .pdf per version + pdf_sphinx_filepath = os.path.join(sphinx_build_dir, self.pdf_file_name) + if os.path.exists(pdf_sphinx_filepath): + shutil.move(pdf_sphinx_filepath, temp_pdf_file) + safe_rmtree(sphinx_build_dir) + os.makedirs(sphinx_build_dir) + shutil.move(temp_pdf_file, target_file) diff --git a/readthedocs/doc_builder/base.py b/readthedocs/doc_builder/base.py index 42f5f0acd65..5b08525567d 100644 --- a/readthedocs/doc_builder/base.py +++ b/readthedocs/doc_builder/base.py @@ -1,12 +1,11 @@ """Base classes for Builders.""" import os -import shutil from functools import wraps import structlog -from readthedocs.core.utils.filesystem import safe_copytree, safe_open, safe_rmtree +from readthedocs.core.utils.filesystem import safe_open from readthedocs.projects.models import Feature log = structlog.get_logger(__name__) @@ -27,15 +26,9 @@ def decorator(*args, **kw): class BaseBuilder: - """ - The Base for all Builders. Defines the API for subclasses. - - Expects subclasses to define ``old_artifact_path``, which points at the - directory where artifacts should be copied from. - """ + """The Base for all Builders. Defines the API for subclasses.""" ignore_patterns = [] - old_artifact_path = None def __init__(self, build_env, python_env): self.build_env = build_env @@ -44,10 +37,6 @@ def __init__(self, build_env, python_env): self.project = build_env.project self.config = python_env.config if python_env else None self.project_path = self.project.checkout_path(self.version.slug) - self.target = self.project.artifact_path( - version=self.version.slug, - type_=self.type, - ) def get_final_doctype(self): """Some builders may have a different doctype at build time.""" @@ -60,27 +49,8 @@ def build(self): """Do the actual building of the documentation.""" raise NotImplementedError - def move(self, **__): - """Move the generated documentation to its artifact directory.""" - if os.path.exists(self.old_artifact_path): - if os.path.exists(self.target): - safe_rmtree(self.target) - log.debug('Copying output type on the local filesystem.', output_type=self.type) - log.debug('Ignoring patterns.', patterns=self.ignore_patterns) - safe_copytree( - self.old_artifact_path, - self.target, - ignore=shutil.ignore_patterns(*self.ignore_patterns), - ) - else: - log.warning('Not moving docs because the build dir is unknown.') - - def clean(self, **__): - """Clean the path where documentation will be built.""" - # NOTE: this shouldn't be needed. We are always CLEAN_AFTER_BUILD now - if os.path.exists(self.old_artifact_path): - safe_rmtree(self.old_artifact_path) - log.info('Removing old artifact path.', path=self.old_artifact_path) + def _post_build(self): + """Execute extra steps (e.g. create ZIP, rename PDF, etc) after building if required.""" def docs_dir(self, docs_dir=None, **__): """Handle creating a custom docs_dir if it doesn't exist.""" diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py index c8d60a6bd3b..ed576f51e60 100644 --- a/readthedocs/doc_builder/director.py +++ b/readthedocs/doc_builder/director.py @@ -6,12 +6,11 @@ from django.utils.translation import gettext_lazy as _ from readthedocs.builds.constants import EXTERNAL -from readthedocs.core.utils.filesystem import safe_copytree from readthedocs.doc_builder.config import load_yaml_config from readthedocs.doc_builder.exceptions import BuildUserError from readthedocs.doc_builder.loader import get_builder_class from readthedocs.doc_builder.python_environments import Conda, Virtualenv -from readthedocs.projects.constants import BUILD_COMMANDS_OUTPUT_PATH_HTML, GENERIC +from readthedocs.projects.constants import BUILD_COMMANDS_OUTPUT_PATH_HTML from readthedocs.projects.exceptions import RepositoryError from readthedocs.projects.models import Feature from readthedocs.projects.signals import after_build, before_build, before_vcs @@ -362,17 +361,10 @@ def run_build_commands(self): record=False, ) - # Copy files to artifacts path so they are uploaded to S3 - target = self.data.project.artifact_path( - version=self.data.version.slug, - type_=GENERIC, - ) - artifacts_path = os.path.join(cwd, BUILD_COMMANDS_OUTPUT_PATH_HTML) - if not os.path.exists(artifacts_path): + html_output_path = os.path.join(cwd, BUILD_COMMANDS_OUTPUT_PATH_HTML) + if not os.path.exists(html_output_path): raise BuildUserError(BuildUserError.BUILD_COMMANDS_WITHOUT_OUTPUT) - safe_copytree(artifacts_path, target) - # Update the `Version.documentation_type` to match the doctype defined # by the config file. When using `build.commands` it will be `GENERIC` self.data.version.documentation_type = self.data.config.doctype @@ -536,8 +528,6 @@ def build_docs_class(self, builder_class): self.data.version.documentation_type = builder.get_final_doctype() success = builder.build() - builder.move() - return success def get_vcs_env_vars(self): diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index c05d9eb4388..017a6710ab7 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -817,8 +817,8 @@ def full_doc_path(self, version=LATEST): return doc_base def artifact_path(self, type_, version=LATEST): - """The path to the build html docs in the project.""" - return os.path.join(self.doc_path, 'artifacts', version, type_) + """The path to the build docs output for the project.""" + return os.path.join(self.checkout_path(version=version), "_readthedocs", type_) def conf_file(self, version=LATEST): """Find a ``conf.py`` file in the project checkout.""" diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index f5a72d2a0c3..ff6b98541ff 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -4,6 +4,7 @@ This includes fetching repository code, cleaning ``conf.py`` files, and rebuilding documentation. """ +import os import signal import socket from collections import defaultdict @@ -800,32 +801,52 @@ def store_build_artifacts( # HTML media if html: - types_to_copy.append(('html', self.data.config.doctype)) + types_to_copy.append("html") # Search media (JSON) if search: - types_to_copy.append(('json', 'sphinx_search')) + types_to_copy.append("json") if localmedia: - types_to_copy.append(('htmlzip', 'sphinx_localmedia')) + types_to_copy.append("htmlzip") else: types_to_delete.append('htmlzip') if pdf: - types_to_copy.append(('pdf', 'sphinx_pdf')) + types_to_copy.append("pdf") else: types_to_delete.append('pdf') if epub: - types_to_copy.append(('epub', 'sphinx_epub')) + types_to_copy.append("epub") else: types_to_delete.append('epub') - for media_type, build_type in types_to_copy: + for media_type in types_to_copy: + # NOTE: here is where we get the correct FROM path to upload from_path = self.data.version.project.artifact_path( version=self.data.version.slug, - type_=build_type, + type_=media_type, ) + + # Check if there are multiple files on source directories. + # These output format does not support multiple files yet. + # In case multiple files are found, the upload for this format is not performed. + # + # TODO: we should fail the build for these cases and clearly communicate this. + # to the user. To do this, we need to call this method (``store_build_artifacts``) + # since the ``execute`` method. + # It will allow us to just `raise BuildUserError` and handle it at + # Celery `on_failure` handler. + if media_type in ("htmlzip", "epub", "pdf"): + if len(os.listdir(from_path)) > 1: + log.exception( + "Multiple files are not supported for this format. " + "Skipping this output format.", + output_format=media_type, + ) + continue + to_path = self.data.version.project.get_storage_path( type_=media_type, version_slug=self.data.version.slug, From 0666df6b30458c7bfc1d16829cc0cbfa3503056a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 11 Jan 2023 17:05:57 +0100 Subject: [PATCH 02/19] Build: decideif there is an output type based on the path existence --- readthedocs/doc_builder/director.py | 13 ++-- readthedocs/projects/tasks/builds.py | 90 ++++++++++++---------------- 2 files changed, 42 insertions(+), 61 deletions(-) diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py index ed576f51e60..fedd6ed7f63 100644 --- a/readthedocs/doc_builder/director.py +++ b/readthedocs/doc_builder/director.py @@ -180,11 +180,11 @@ def build(self): self.run_build_job("pre_build") - self.data.outcomes["html"] = self.build_html() - self.data.outcomes["search"] = self.is_type_sphinx() - self.data.outcomes["localmedia"] = self.build_htmlzip() - self.data.outcomes["pdf"] = self.build_pdf() - self.data.outcomes["epub"] = self.build_epub() + # Build all formats + self.build_html() + self.build_htmlzip() + self.build_pdf() + self.build_epub() self.run_build_job("post_build") @@ -369,9 +369,6 @@ def run_build_commands(self): # by the config file. When using `build.commands` it will be `GENERIC` self.data.version.documentation_type = self.data.config.doctype - # Mark HTML as the only outcome available for this type of builder - self.data.outcomes["html"] = True - def install_build_tools(self): """ Install all ``build.tools`` defined by the user in the config file. diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index ff6b98541ff..53e3cf91a52 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -7,7 +7,6 @@ import os import signal import socket -from collections import defaultdict from dataclasses import dataclass, field import structlog @@ -109,8 +108,6 @@ class TaskData: # Dictionary returned from the API. build: dict = field(default_factory=dict) - # If HTML, PDF, ePub, etc formats were built. - outcomes: dict = field(default_factory=lambda: defaultdict(lambda: False)) # Build data for analytics (telemetry). build_data: dict = field(default_factory=dict) @@ -515,28 +512,38 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): self.data.build['success'] = False def on_success(self, retval, task_id, args, kwargs): - html = self.data.outcomes['html'] - search = self.data.outcomes['search'] - localmedia = self.data.outcomes['localmedia'] - pdf = self.data.outcomes['pdf'] - epub = self.data.outcomes['epub'] - time_before_store_build_artifacts = timezone.now() # Store build artifacts to storage (local or cloud storage) - self.store_build_artifacts( - html=html, - search=search, - localmedia=localmedia, - pdf=pdf, - epub=epub, - ) + # + # TODO: move ``store_build_artifacts`` inside ``execute`` so we can + # handle exceptions properly on ``on_failure`` + self.store_build_artifacts() log.info( "Store build artifacts finished.", time=(timezone.now() - time_before_store_build_artifacts).seconds, ) - # NOTE: we are updating the db version instance *only* when # HTML are built successfully. + html = os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, type_="html" + ) + ) + localmedia = os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, type_="htmlzip" + ) + ) + pdf = os.path.exists( + self.data.project.artifact_path(version=self.data.version.slug, type_="pdf") + ) + epub = os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, type_="epub" + ) + ) + + # NOTE: we are updating the db version instance *only* when if html: try: api_v2.version(self.data.version.pk).patch( @@ -768,14 +775,7 @@ def set_valid_clone(self): self.data.project.has_valid_clone = True self.data.version.project.has_valid_clone = True - def store_build_artifacts( - self, - html=False, - localmedia=False, - search=False, - pdf=False, - epub=False, - ): + def store_build_artifacts(self): """ Save build artifacts to "storage" using Django's storage API. @@ -783,12 +783,6 @@ def store_build_artifacts( such as S3, Azure storage or Google Cloud Storage. Remove build artifacts of types not included in this build (PDF, ePub, zip only). - - :param html: whether to save HTML output - :param localmedia: whether to save localmedia (htmlzip) output - :param search: whether to save search artifacts - :param pdf: whether to save PDF output - :param epub: whether to save ePub output """ log.info('Writing build artifacts to media storage') # NOTE: I don't remember why we removed this state from the Build @@ -799,28 +793,18 @@ def store_build_artifacts( types_to_copy = [] types_to_delete = [] - # HTML media - if html: - types_to_copy.append("html") - - # Search media (JSON) - if search: - types_to_copy.append("json") - - if localmedia: - types_to_copy.append("htmlzip") - else: - types_to_delete.append('htmlzip') - - if pdf: - types_to_copy.append("pdf") - else: - types_to_delete.append('pdf') - - if epub: - types_to_copy.append("epub") - else: - types_to_delete.append('epub') + artifact_types = ("html", "json", "htmlzip", "pdf", "epub") + for artifact_type in artifact_types: + if os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, + type_=artifact_type, + ) + ): + types_to_copy.append(artifact_type) + # Never delete HTML nor JSON (search index) + elif artifact_type not in ("html", "json"): + types_to_delete.append(artifact_type) for media_type in types_to_copy: # NOTE: here is where we get the correct FROM path to upload From b494c648d76a9b05a5a93cb1c454c6ddcaeceed2 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 11 Jan 2023 19:51:51 +0100 Subject: [PATCH 03/19] Test: update build tests to match changes --- readthedocs/projects/models.py | 7 +- readthedocs/projects/tasks/builds.py | 2 +- .../projects/tests/test_build_tasks.py | 126 ++++++++++++------ 3 files changed, 91 insertions(+), 44 deletions(-) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 017a6710ab7..68042b4aa7e 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -817,7 +817,12 @@ def full_doc_path(self, version=LATEST): return doc_base def artifact_path(self, type_, version=LATEST): - """The path to the build docs output for the project.""" + """ + The path to the build docs output for the project. + + :param type_: one of `html`, `json`, `htmlzip`, `pdf`, `epub`. + :param version: slug of the version. + """ return os.path.join(self.checkout_path(version=version), "_readthedocs", type_) def conf_file(self, version=LATEST): diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 53e3cf91a52..2585834e03a 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -829,7 +829,7 @@ def store_build_artifacts(self): "Skipping this output format.", output_format=media_type, ) - continue + continue to_path = self.data.version.project.get_storage_path( type_=media_type, diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 18afa6460b0..eeca38365d5 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -118,8 +118,8 @@ def test_build_sphinx_formats(self, load_yaml_config, formats, builders): "_build/doctrees", "-D", "language=en", - ".", - "_build/html", + "docs", + "_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ) @@ -139,8 +139,8 @@ def test_build_sphinx_formats(self, load_yaml_config, formats, builders): "_build/doctrees", "-D", "language=en", - ".", - "_build/html", + "docs", + "_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ) @@ -197,6 +197,11 @@ def test_build_updates_documentation_type(self, load_yaml_config): } ) + # Create the artifact paths, so it's detected by the builder + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + self._trigger_update_docs_task() # Update version state @@ -205,8 +210,8 @@ def test_build_updates_documentation_type(self, load_yaml_config): assert self.requests_mock.request_history[7].json() == { "built": True, "documentation_type": "mkdocs", - "has_pdf": False, - "has_epub": False, + "has_pdf": True, + "has_epub": True, "has_htmlzip": False, } @@ -292,8 +297,14 @@ def test_get_env_vars(self, load_yaml_config, build_environment, config, externa @mock.patch("readthedocs.projects.tasks.builds.UpdateDocsTask.send_notifications") @mock.patch("readthedocs.projects.tasks.builds.clean_build") @mock.patch("readthedocs.doc_builder.director.load_yaml_config") + @mock.patch("readthedocs.doc_builder.backends.sphinx.PdfBuilder._post_build") + @mock.patch("readthedocs.doc_builder.backends.sphinx.LocalMediaBuilder._post_build") + @mock.patch("readthedocs.doc_builder.backends.sphinx.EpubBuilder._post_build") def test_successful_build( self, + epub_post_build, + htmlzip_post_build, + pdf_post_build, load_yaml_config, clean_build, send_notifications, @@ -313,6 +324,15 @@ def test_successful_build( assert not BuildData.objects.all().exists() + # Create the artifact paths, so it's detected by the builder + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="json")) + os.makedirs( + self.project.artifact_path(version=self.version.slug, type_="htmlzip") + ) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + self._trigger_update_docs_task() # It has to be called twice, ``before_start`` and ``after_return`` @@ -528,7 +548,16 @@ def test_failed_build( } @mock.patch("readthedocs.doc_builder.director.load_yaml_config") - def test_build_commands_executed(self, load_yaml_config): + @mock.patch("readthedocs.doc_builder.backends.sphinx.PdfBuilder._post_build") + @mock.patch("readthedocs.doc_builder.backends.sphinx.LocalMediaBuilder._post_build") + @mock.patch("readthedocs.doc_builder.backends.sphinx.EpubBuilder._post_build") + def test_build_commands_executed( + self, + epub_post_build, + htmlzip_post_build, + pdf_post_build, + load_yaml_config, + ): load_yaml_config.return_value = self._config_file( { "version": 2, @@ -539,6 +568,15 @@ def test_build_commands_executed(self, load_yaml_config): } ) + # Create the artifact paths, so it's detected by the builder + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="json")) + os.makedirs( + self.project.artifact_path(version=self.version.slug, type_="htmlzip") + ) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + self._trigger_update_docs_task() self.mocker.mocks["git.Backend.run"].assert_has_calls( @@ -612,8 +650,8 @@ def test_build_commands_executed(self, load_yaml_config): "_build/doctrees", "-D", "language=en", - ".", - "_build/html", + "docs", + "_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -629,8 +667,8 @@ def test_build_commands_executed(self, load_yaml_config): "_build/doctrees", "-D", "language=en", - ".", - "_build/localmedia", + "docs", + "_readthedocs/htmlzip", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -638,14 +676,16 @@ def test_build_commands_executed(self, load_yaml_config): mock.ANY, "-m", "sphinx", + "-T", + "-E", "-b", "latex", - "-D", - "language=en", "-d", "_build/doctrees", - ".", - "_build/latex", + "-D", + "language=en", + "docs", + "_readthedocs/pdf", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -659,16 +699,6 @@ def test_build_commands_executed(self, load_yaml_config): shell=True, record=False, ), - mock.call( - "mv", - "-f", - "output.file", - # TODO: take a look at - # https://callee.readthedocs.io/en/latest/reference/strings.html#callee.strings.EndsWith - # to match `project.pdf` - mock.ANY, - cwd=mock.ANY, - ), mock.call( mock.ANY, "-m", @@ -681,23 +711,35 @@ def test_build_commands_executed(self, load_yaml_config): "_build/doctrees", "-D", "language=en", - ".", - "_build/epub", + "docs", + "_readthedocs/epub", cwd=mock.ANY, bin_path=mock.ANY, ), - mock.call( - "mv", - "-f", - "output.file", - # TODO: take a look at - # https://callee.readthedocs.io/en/latest/reference/strings.html#callee.strings.EndsWith - # to match `project.epub` - mock.ANY, - cwd=mock.ANY, - ), # FIXME: I think we are hitting this issue here: # https://github.com/pytest-dev/pytest-mock/issues/234 + mock.call("lsb_release", "--description", record=False, demux=True), + mock.call("python", "--version", record=False, demux=True), + mock.call( + "dpkg-query", + "--showformat", + "${package} ${version}\\n", + "--show", + record=False, + demux=True, + ), + mock.call( + "python", + "-m", + "pip", + "list", + "--pre", + "--local", + "--format", + "json", + record=False, + demux=True, + ), ] ) @@ -1141,8 +1183,8 @@ def test_sphinx_fail_on_warning(self, load_yaml_config): "_build/doctrees", "-D", "language=en", - ".", - "_build/html", + "docs", + "_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -1172,7 +1214,7 @@ def test_mkdocs_fail_on_warning(self, load_yaml_config): "build", "--clean", "--site-dir", - "_build/html", + "_readthedocs/html", "--config-file", "docs/mkdocs.yaml", "--strict", # fail on warning flag @@ -1520,8 +1562,8 @@ def test_sphinx_builder(self, load_yaml_config, value, command): "_build/doctrees", "-D", "language=en", - ".", - "_build/html", + "docs", + "_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), From d2bf3f0704a6c28665b6e73f11d2b2dbd1a8378a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 12 Jan 2023 10:32:48 +0100 Subject: [PATCH 04/19] Test: check if the file exist before continue --- readthedocs/doc_builder/backends/sphinx.py | 5 +++++ readthedocs/projects/tests/test_build_tasks.py | 12 ------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 8f34a7f7fc3..5d74c9050f8 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -626,6 +626,11 @@ def _build_pdflatex(self, tex_files, latex_cwd): def _post_build(self): """Internal post build to cleanup PDF output directory and leave only one .pdf file.""" + + if not self.pdf_file_name: + log.info("PDF file was not generated/found.") + return + # TODO: merge this with ePUB since it's pretty much the same sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) temp_pdf_file = tempfile.mktemp(suffix=".pdf") diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index eeca38365d5..02a73882163 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -297,14 +297,8 @@ def test_get_env_vars(self, load_yaml_config, build_environment, config, externa @mock.patch("readthedocs.projects.tasks.builds.UpdateDocsTask.send_notifications") @mock.patch("readthedocs.projects.tasks.builds.clean_build") @mock.patch("readthedocs.doc_builder.director.load_yaml_config") - @mock.patch("readthedocs.doc_builder.backends.sphinx.PdfBuilder._post_build") - @mock.patch("readthedocs.doc_builder.backends.sphinx.LocalMediaBuilder._post_build") - @mock.patch("readthedocs.doc_builder.backends.sphinx.EpubBuilder._post_build") def test_successful_build( self, - epub_post_build, - htmlzip_post_build, - pdf_post_build, load_yaml_config, clean_build, send_notifications, @@ -548,14 +542,8 @@ def test_failed_build( } @mock.patch("readthedocs.doc_builder.director.load_yaml_config") - @mock.patch("readthedocs.doc_builder.backends.sphinx.PdfBuilder._post_build") - @mock.patch("readthedocs.doc_builder.backends.sphinx.LocalMediaBuilder._post_build") - @mock.patch("readthedocs.doc_builder.backends.sphinx.EpubBuilder._post_build") def test_build_commands_executed( self, - epub_post_build, - htmlzip_post_build, - pdf_post_build, load_yaml_config, ): load_yaml_config.return_value = self._config_file( From 2ab29baa38376e875e976a7d97cc0f90fa4dfed1 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 12 Jan 2023 10:34:12 +0100 Subject: [PATCH 05/19] Build: simplify the code by running the commands inside the container --- readthedocs/doc_builder/backends/sphinx.py | 88 +++++++++++++--------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 5d74c9050f8..5127429814e 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -6,9 +6,6 @@ import itertools import os -import shutil -import tempfile -import zipfile from glob import glob from pathlib import Path @@ -21,7 +18,7 @@ from readthedocs.api.v2.client import api from readthedocs.builds import utils as version_utils -from readthedocs.core.utils.filesystem import safe_open, safe_rmtree +from readthedocs.core.utils.filesystem import safe_open from readthedocs.projects.constants import PUBLIC from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound from readthedocs.projects.models import Feature @@ -382,31 +379,26 @@ class LocalMediaBuilder(BaseSphinx): def _post_build(self): """Internal post build to create the ZIP file from the HTML output.""" sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) - temp_zip_file = tempfile.mktemp(suffix=".zip") + temp_zip_file = f"/tmp/{self.project.slug}-{self.version.slug}.zip" target_file = os.path.join( sphinx_build_dir, f"{self.project.slug}.zip", ) - archive = zipfile.ZipFile(temp_zip_file, "w") - for root, __, files in os.walk(sphinx_build_dir): - for fname in files: - to_write = os.path.join(root, fname) - archive.write( - filename=to_write, - arcname=os.path.join( - f"{self.project.slug}-{self.version.slug}", - os.path.relpath( - to_write, - sphinx_build_dir, - ), - ), - ) - archive.close() - - safe_rmtree(sphinx_build_dir) - os.makedirs(sphinx_build_dir) - shutil.move(temp_zip_file, target_file) + # FIXME: ``arcname`` is invalid with this command still, but it's close + self.run( + "zip", + "-r", + temp_zip_file, + self.sphinx_build_dir, + cwd=self.project_path, + record=False, + ) + self.run("rm", "-r", self.sphinx_build_dir, cwd=self.project_path, record=False) + self.run( + "mkdir", "-p", self.sphinx_build_dir, cwd=self.project_path, record=False + ) + self.run("mv", temp_zip_file, target_file, cwd=self.project_path, record=False) class EpubBuilder(BaseSphinx): @@ -417,7 +409,7 @@ class EpubBuilder(BaseSphinx): def _post_build(self): """Internal post build to cleanup EPUB output directory and leave only one .epub file.""" sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) - temp_epub_file = tempfile.mktemp(suffix=".epub") + temp_epub_file = f"/tmp/{self.project.slug}-{self.version.slug}.epub" target_file = os.path.join( sphinx_build_dir, f"{self.project.slug}.epub", @@ -428,10 +420,22 @@ def _post_build(self): # NOTE: we currently support only one .epub per version epub_filepath = epub_sphinx_filepaths[0] - shutil.move(epub_filepath, temp_epub_file) - safe_rmtree(sphinx_build_dir) - os.makedirs(sphinx_build_dir) - shutil.move(temp_epub_file, target_file) + self.run( + "mv", epub_filepath, temp_epub_file, cwd=self.project_path, record=False + ) + self.run( + "rm", "-r", self.sphinx_build_dir, cwd=self.project_path, record=False + ) + self.run( + "mkdir", + "-p", + self.sphinx_build_dir, + cwd=self.project_path, + record=False, + ) + self.run( + "mv", temp_epub_file, target_file, cwd=self.project_path, record=False + ) class LatexBuildCommand(BuildCommand): @@ -633,7 +637,7 @@ def _post_build(self): # TODO: merge this with ePUB since it's pretty much the same sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) - temp_pdf_file = tempfile.mktemp(suffix=".pdf") + temp_pdf_file = f"/tmp/{self.project.slug}-{self.version.slug}.pdf" target_file = os.path.join( sphinx_build_dir, self.pdf_file_name, @@ -642,7 +646,23 @@ def _post_build(self): # NOTE: we currently support only one .pdf per version pdf_sphinx_filepath = os.path.join(sphinx_build_dir, self.pdf_file_name) if os.path.exists(pdf_sphinx_filepath): - shutil.move(pdf_sphinx_filepath, temp_pdf_file) - safe_rmtree(sphinx_build_dir) - os.makedirs(sphinx_build_dir) - shutil.move(temp_pdf_file, target_file) + self.run( + "mv", + pdf_sphinx_filepath, + temp_pdf_file, + cwd=self.project_path, + record=False, + ) + self.run( + "rm", "-r", self.sphinx_build_dir, cwd=self.project_path, record=False + ) + self.run( + "mkdir", + "-p", + self.sphinx_build_dir, + cwd=self.project_path, + record=False, + ) + self.run( + "mv", temp_pdf_file, target_file, cwd=self.project_path, record=False + ) From faa611fad689675f81101ea643770a6b669bf529 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 12 Jan 2023 10:45:13 +0100 Subject: [PATCH 06/19] Test: add checks for more commands --- .../projects/tests/test_build_tasks.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 02a73882163..5899815335f 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -660,6 +660,27 @@ def test_build_commands_executed( cwd=mock.ANY, bin_path=mock.ANY, ), + mock.call( + "zip", + "-r", + "/tmp/project-latest.zip", + "_readthedocs/htmlzip", + cwd=mock.ANY, + record=False, + ), + mock.call( + "rm", "-r", "_readthedocs/htmlzip", cwd=mock.ANY, record=False + ), + mock.call( + "mkdir", "-p", "_readthedocs/htmlzip", cwd=mock.ANY, record=False + ), + mock.call( + "mv", + "/tmp/project-latest.zip", + mock.ANY, + cwd=mock.ANY, + record=False, + ), mock.call( mock.ANY, "-m", @@ -677,6 +698,8 @@ def test_build_commands_executed( cwd=mock.ANY, bin_path=mock.ANY, ), + # NOTE: pdf `mv` commands and others are not here because the + # PDF resulting file is not found in the process (`_post_build`) mock.call( mock.ANY, "-c", @@ -704,6 +727,24 @@ def test_build_commands_executed( cwd=mock.ANY, bin_path=mock.ANY, ), + mock.call( + "mv", + mock.ANY, + "/tmp/project-latest.epub", + cwd=mock.ANY, + record=False, + ), + mock.call("rm", "-r", "_readthedocs/epub", cwd=mock.ANY, record=False), + mock.call( + "mkdir", "-p", "_readthedocs/epub", cwd=mock.ANY, record=False + ), + mock.call( + "mv", + "/tmp/project-latest.epub", + mock.ANY, + cwd=mock.ANY, + record=False, + ), # FIXME: I think we are hitting this issue here: # https://github.com/pytest-dev/pytest-mock/issues/234 mock.call("lsb_release", "--description", record=False, demux=True), From c80416a627a83b6aeca3bbd26ad534e0e43cd43f Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 13:11:02 +0100 Subject: [PATCH 07/19] Storage: use constants to make explicit artifact types --- readthedocs/builds/constants.py | 16 ++++++++++++++++ readthedocs/projects/tasks/builds.py | 7 ++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/readthedocs/builds/constants.py b/readthedocs/builds/constants.py index f5bf898bce0..94c3da021ca 100644 --- a/readthedocs/builds/constants.py +++ b/readthedocs/builds/constants.py @@ -148,3 +148,19 @@ MAX_BUILD_COMMAND_SIZE = 1000000 # This keeps us under Azure's upload limit LOCK_EXPIRE = 60 * 180 # Lock expires in 3 hours + +# All artifact types supported by Read the Docs. +# They match the output directory (`_readthedocs/`) +ARTIFACT_TYPES = ( + "html", + "json", + "htmlzip", + "pdf", + "epub", +) +# Artifacts that are not deleted when uploading to the storage, +# even if they weren't re-built in the build process. +UNDELETABLE_ARTIFACT_TYPES = ( + "html", + "json", +) diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 2585834e03a..ed4707f16ae 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -18,6 +18,7 @@ from readthedocs.api.v2.client import api as api_v2 from readthedocs.builds import tasks as build_tasks from readthedocs.builds.constants import ( + ARTIFACT_TYPES, BUILD_FINAL_STATES, BUILD_STATE_BUILDING, BUILD_STATE_CLONING, @@ -28,6 +29,7 @@ BUILD_STATUS_FAILURE, BUILD_STATUS_SUCCESS, EXTERNAL, + UNDELETABLE_ARTIFACT_TYPES, ) from readthedocs.builds.models import APIVersion, Build from readthedocs.builds.signals import build_complete @@ -793,8 +795,7 @@ def store_build_artifacts(self): types_to_copy = [] types_to_delete = [] - artifact_types = ("html", "json", "htmlzip", "pdf", "epub") - for artifact_type in artifact_types: + for artifact_type in ARTIFACT_TYPES: if os.path.exists( self.data.project.artifact_path( version=self.data.version.slug, @@ -803,7 +804,7 @@ def store_build_artifacts(self): ): types_to_copy.append(artifact_type) # Never delete HTML nor JSON (search index) - elif artifact_type not in ("html", "json"): + elif artifact_type not in UNDELETABLE_ARTIFACT_TYPES: types_to_delete.append(artifact_type) for media_type in types_to_copy: From 67f0958e52acef47e6665430ffc1ec1c47092a4b Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 13:12:29 +0100 Subject: [PATCH 08/19] PDF builder: raise an error if the PDF file is not found --- readthedocs/doc_builder/backends/sphinx.py | 4 ++-- readthedocs/doc_builder/exceptions.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 5127429814e..4b08dd40437 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -19,6 +19,7 @@ from readthedocs.api.v2.client import api from readthedocs.builds import utils as version_utils from readthedocs.core.utils.filesystem import safe_open +from readthedocs.doc_builder.exceptions import PDFNotFound from readthedocs.projects.constants import PUBLIC from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound from readthedocs.projects.models import Feature @@ -632,8 +633,7 @@ def _post_build(self): """Internal post build to cleanup PDF output directory and leave only one .pdf file.""" if not self.pdf_file_name: - log.info("PDF file was not generated/found.") - return + raise PDFNotFound() # TODO: merge this with ePUB since it's pretty much the same sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) diff --git a/readthedocs/doc_builder/exceptions.py b/readthedocs/doc_builder/exceptions.py index 3f0e0c308f6..d3730baa617 100644 --- a/readthedocs/doc_builder/exceptions.py +++ b/readthedocs/doc_builder/exceptions.py @@ -66,6 +66,12 @@ class BuildCancelled(BuildUserError): state = BUILD_STATE_CANCELLED +class PDFNotFound(BuildUserError): + message = gettext_noop( + 'PDF file was not generated/found in "_readthedocs/pdf" output directory.' + ) + + class MkDocsYAMLParseError(BuildUserError): GENERIC_WITH_PARSE_EXCEPTION = gettext_noop( 'Problem parsing MkDocs YAML configuration. {exception}', From f54ca20c3d0426ae71882310e41e18f929d5ca0e Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 13:17:32 +0100 Subject: [PATCH 09/19] Build: use `relative_output_dir` and `absolute_output_dir` --- readthedocs/doc_builder/backends/sphinx.py | 96 +++++++++++++--------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 4b08dd40437..4c6aa6e7d62 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -40,9 +40,16 @@ class BaseSphinx(BaseBuilder): sphinx_doctrees_dir = "_build/doctrees" + # Output directory relative to where the repository was cloned + # (e.g. "_readthedocs/") + relative_output_dir = "" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.config_file = self.config.sphinx.configuration + self.absolute_output_dir = os.path.abspath( + os.path.join(self.project_path, self.relative_output_dir) + ) try: if not self.config_file: self.config_file = self.project.conf_file(self.version.slug) @@ -343,10 +350,7 @@ def venv_sphinx_supports_latexmk(self): class HtmlBuilder(BaseSphinx): - # Build directory is relative to the path where ``sphinx-build`` command is executed from. - # Currently, we are executing it form the project's path - # (from inside where the repository was clonned) - sphinx_build_dir = "_readthedocs/html" + relative_output_dir = "_readthedocs/html" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -375,15 +379,18 @@ def __init__(self, *args, **kwargs): class LocalMediaBuilder(BaseSphinx): sphinx_builder = 'readthedocssinglehtmllocalmedia' - sphinx_build_dir = "_readthedocs/htmlzip" + relative_output_dir = "_readthedocs/htmlzip" def _post_build(self): """Internal post build to create the ZIP file from the HTML output.""" - sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) - temp_zip_file = f"/tmp/{self.project.slug}-{self.version.slug}.zip" - target_file = os.path.join( - sphinx_build_dir, - f"{self.project.slug}.zip", + target_file = os.path.abspath( + os.path.join( + self.absolute_output_dir, + # TODO: shouldn't this name include the name of the version as well? + # It seems we were using the project's slug previously. + # So, keeping it like that for now until we decide make that adjustment. + f"{self.project.slug}.zip", + ) ) # FIXME: ``arcname`` is invalid with this command still, but it's close @@ -405,18 +412,17 @@ def _post_build(self): class EpubBuilder(BaseSphinx): sphinx_builder = "epub" - sphinx_build_dir = "_readthedocs/epub" + relative_output_dir = "_readthedocs/epub" def _post_build(self): """Internal post build to cleanup EPUB output directory and leave only one .epub file.""" - sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) temp_epub_file = f"/tmp/{self.project.slug}-{self.version.slug}.epub" target_file = os.path.join( - sphinx_build_dir, + self.absolute_output_dir, f"{self.project.slug}.epub", ) - epub_sphinx_filepaths = glob(os.path.join(sphinx_build_dir, "*.epub")) + epub_sphinx_filepaths = glob(os.path.join(self.absolute_output_dir, "*.epub")) if epub_sphinx_filepaths: # NOTE: we currently support only one .epub per version epub_filepath = epub_sphinx_filepaths[0] @@ -425,12 +431,16 @@ def _post_build(self): "mv", epub_filepath, temp_epub_file, cwd=self.project_path, record=False ) self.run( - "rm", "-r", self.sphinx_build_dir, cwd=self.project_path, record=False + "rm", + "--recursive", + self.relative_output_dir, + cwd=self.project_path, + record=False, ) self.run( "mkdir", - "-p", - self.sphinx_build_dir, + "--parents", + self.relative_output_dir, cwd=self.project_path, record=False, ) @@ -467,7 +477,7 @@ class PdfBuilder(BaseSphinx): """Builder to generate PDF documentation.""" - sphinx_build_dir = "_readthedocs/pdf" + relative_output_dir = "_readthedocs/pdf" sphinx_builder = "latex" pdf_file_name = None @@ -496,9 +506,8 @@ def build(self): cwd=self.project_path, bin_path=self.python_env.venv_bin(), ) - latex_cwd = os.path.join(self.project_path, self.sphinx_build_dir) - tex_files = glob(os.path.join(latex_cwd, '*.tex')) + tex_files = glob(os.path.join(self.absolute_output_dir, '*.tex')) if not tex_files: raise BuildUserError('No TeX files were found') @@ -506,20 +515,19 @@ def build(self): # Build PDF with ``latexmk`` if Sphinx supports it, otherwise fallback # to ``pdflatex`` to support old versions if self.venv_sphinx_supports_latexmk(): - success = self._build_latexmk(self.project_path, latex_cwd) + success = self._build_latexmk(self.project_path) else: - success = self._build_pdflatex(tex_files, latex_cwd) + success = self._build_pdflatex(tex_files) self._post_build() return success - def _build_latexmk(self, cwd, latex_cwd): + def _build_latexmk(self, cwd): # These steps are copied from the Makefile generated by Sphinx >= 1.6 # https://github.com/sphinx-doc/sphinx/blob/master/sphinx/texinputs/Makefile_t - latex_path = Path(latex_cwd) images = [] - for extension in ('png', 'gif', 'jpg', 'jpeg'): - images.extend(latex_path.glob(f'*.{extension}')) + for extension in ("png", "gif", "jpg", "jpeg"): + images.extend(Path(self.absolute_output_dir).glob(f"*.{extension}")) # FIXME: instead of checking by language here, what we want to check if # ``latex_engine`` is ``platex`` @@ -535,7 +543,7 @@ def _build_latexmk(self, cwd, latex_cwd): self.run( 'extractbb', image.name, - cwd=latex_cwd, + cwd=self.absolute_output_dir, record=False, ) @@ -546,7 +554,7 @@ def _build_latexmk(self, cwd, latex_cwd): self.run( 'cat', rcfile, - cwd=latex_cwd, + cwd=self.absolute_output_dir, ) if self.build_env.command_class == DockerBuildCommand: @@ -574,22 +582,27 @@ def _build_latexmk(self, cwd, latex_cwd): cls=latex_class, cmd=cmd, warn_only=True, - cwd=latex_cwd, + cwd=self.absolute_output_dir, ) self.pdf_file_name = f'{self.project.slug}.pdf' return cmd_ret.successful - def _build_pdflatex(self, tex_files, latex_cwd): + def _build_pdflatex(self, tex_files): pdflatex_cmds = [ ['pdflatex', '-interaction=nonstopmode', tex_file] for tex_file in tex_files ] # yapf: disable makeindex_cmds = [ [ - 'makeindex', '-s', 'python.ist', '{}.idx'.format( - os.path.splitext(os.path.relpath(tex_file, latex_cwd))[0], + "makeindex", + "-s", + "python.ist", + "{}.idx".format( + os.path.splitext( + os.path.relpath(tex_file, self.absolute_output_dir) + )[0], ), ] for tex_file in tex_files @@ -604,7 +617,7 @@ def _build_pdflatex(self, tex_files, latex_cwd): cmd_ret = self.build_env.run_command_class( cls=latex_class, cmd=cmd, - cwd=latex_cwd, + cwd=self.absolute_output_dir, warn_only=True, ) pdf_commands.append(cmd_ret) @@ -612,7 +625,7 @@ def _build_pdflatex(self, tex_files, latex_cwd): cmd_ret = self.build_env.run_command_class( cls=latex_class, cmd=cmd, - cwd=latex_cwd, + cwd=self.absolute_output_dir, warn_only=True, ) pdf_commands.append(cmd_ret) @@ -620,7 +633,7 @@ def _build_pdflatex(self, tex_files, latex_cwd): cmd_ret = self.build_env.run_command_class( cls=latex_class, cmd=cmd, - cwd=latex_cwd, + cwd=self.absolute_output_dir, warn_only=True, ) pdf_match = PDF_RE.search(cmd_ret.output) @@ -636,15 +649,14 @@ def _post_build(self): raise PDFNotFound() # TODO: merge this with ePUB since it's pretty much the same - sphinx_build_dir = os.path.join(self.project_path, self.sphinx_build_dir) temp_pdf_file = f"/tmp/{self.project.slug}-{self.version.slug}.pdf" target_file = os.path.join( - sphinx_build_dir, + self.absolute_output_dir, self.pdf_file_name, ) # NOTE: we currently support only one .pdf per version - pdf_sphinx_filepath = os.path.join(sphinx_build_dir, self.pdf_file_name) + pdf_sphinx_filepath = os.path.join(self.absolute_output_dir, self.pdf_file_name) if os.path.exists(pdf_sphinx_filepath): self.run( "mv", @@ -654,12 +666,16 @@ def _post_build(self): record=False, ) self.run( - "rm", "-r", self.sphinx_build_dir, cwd=self.project_path, record=False + "rm", + "-r", + self.relative_output_dir, + cwd=self.project_path, + record=False, ) self.run( "mkdir", "-p", - self.sphinx_build_dir, + self.relative_output_dir, cwd=self.project_path, record=False, ) From ad270e95b498388a3fef7134001955f2cff8b353 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 13:19:10 +0100 Subject: [PATCH 10/19] Builder: execute `sphinx-build` from the same directory as `conf.py` I proposed to changed this CWD to be the one where the repository was cloned, but that could lead to some unexpected behavior. So, it's better to be conservative here and keep the CWD as it was. There are some small downsides for this decision, but it's better than breaking people's builds :) --- readthedocs/doc_builder/backends/sphinx.py | 38 ++++++++++++---------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 4c6aa6e7d62..3f32f5d8fb9 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -283,21 +283,22 @@ def build(self): self.sphinx_doctrees_dir, "-D", f"language={project.language}", - # Execute the command at the clone directory, - # but use `docs/` instead of `.` for the Sphinx path - os.path.dirname( - os.path.relpath( - self.config_file, - self.project_path, - ), + # Sphinx's source directory (SOURCEDIR). + # We are executing this command at the location of the `conf.py` file (CWD). + # TODO: ideally we should execute it from where the repository was clonned, + # but that could lead unexpected behavior to some users: + # https://github.com/readthedocs/readthedocs.org/pull/9888#issuecomment-1384649346 + ".", + # Sphinx's output build directory (OUTPUTDIR) + os.path.relpath( + self.absolute_output_dir, os.path.dirname(self.config_file) ), - self.sphinx_build_dir, ] ) cmd_ret = self.run( *build_command, bin_path=self.python_env.venv_bin(), - cwd=self.project_path, + cwd=os.path.dirname(self.config_file), ) self._post_build() @@ -494,16 +495,17 @@ def build(self): self.sphinx_doctrees_dir, "-D", f"language={self.project.language}", - # Execute the command at the clone directory, - # but use `docs/` instead of `.` for the Sphinx path - os.path.dirname( - os.path.relpath( - self.config_file, - self.project_path, - ), + # Sphinx's source directory (SOURCEDIR). + # We are executing this command at the location of the `conf.py` file (CWD). + # TODO: ideally we should execute it from where the repository was clonned, + # but that could lead unexpected behavior to some users: + # https://github.com/readthedocs/readthedocs.org/pull/9888#issuecomment-1384649346 + ".", + # Sphinx's output build directory (OUTPUTDIR) + os.path.relpath( + self.absolute_output_dir, os.path.dirname(self.config_file) ), - self.sphinx_build_dir, - cwd=self.project_path, + cwd=os.path.dirname(self.config_file), bin_path=self.python_env.venv_bin(), ) From da328fd8bcfb4bdd11e9a65ca097a8f0f5b16d6a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 13:21:05 +0100 Subject: [PATCH 11/19] HTMLZip build: use `zip` inside the Docker container to build it --- readthedocs/doc_builder/backends/sphinx.py | 31 ++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 3f32f5d8fb9..628718c78e3 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -394,20 +394,35 @@ def _post_build(self): ) ) - # FIXME: ``arcname`` is invalid with this command still, but it's close + # Move the directory into a temporal directory, + # so we can rename the directory for zip to use + # that prefix when zipping the files (arcname). + mktemp = self.run("mktemp", "--directory", record=False) + tmp_dir = Path(mktemp.output.strip()) + dirname = f"{self.project.slug}-{self.version.slug}" self.run( - "zip", - "-r", - temp_zip_file, - self.sphinx_build_dir, + "mv", + self.relative_output_dir, + str(tmp_dir / dirname), cwd=self.project_path, record=False, ) - self.run("rm", "-r", self.sphinx_build_dir, cwd=self.project_path, record=False) self.run( - "mkdir", "-p", self.sphinx_build_dir, cwd=self.project_path, record=False + "mkdir", + "--parents", + self.relative_output_dir, + cwd=self.project_path, + record=False, + ) + self.run( + "zip", + "--recurse-paths", # Include all files and directories. + "--symlinks", # Don't resolve symlinks. + target_file, + dirname, + cwd=str(tmp_dir), + record=False, ) - self.run("mv", temp_zip_file, target_file, cwd=self.project_path, record=False) class EpubBuilder(BaseSphinx): From 782179fe38bb71438cd3729a22e8fdd1908d7cca Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 13:21:47 +0100 Subject: [PATCH 12/19] Minor changes about docstring and final dot in a sentence :) --- readthedocs/doc_builder/backends/sphinx.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 628718c78e3..c7febeb445b 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -38,6 +38,8 @@ class BaseSphinx(BaseBuilder): """The parent for most sphinx builders.""" + # Sphinx reads and parses all source files before it can write + # an output file, the parsed source files are cached as "doctree pickles". sphinx_doctrees_dir = "_build/doctrees" # Output directory relative to where the repository was cloned @@ -500,7 +502,6 @@ class PdfBuilder(BaseSphinx): def build(self): self.run( *self.get_sphinx_cmd(), - # New arguments for standardization "-T", "-E", *self.sphinx_parallel_arg(), @@ -526,7 +527,7 @@ def build(self): tex_files = glob(os.path.join(self.absolute_output_dir, '*.tex')) if not tex_files: - raise BuildUserError('No TeX files were found') + raise BuildUserError("No TeX files were found.") # Run LaTeX -> PDF conversions # Build PDF with ``latexmk`` if Sphinx supports it, otherwise fallback From c999e55e3d62c67183651cfb16c8199915ab1bb4 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 14:58:08 +0100 Subject: [PATCH 13/19] Test: adapt them to match thew path and arguments changes --- readthedocs/projects/tests/mockers.py | 5 ++ .../projects/tests/test_build_tasks.py | 67 +++++++++++-------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/readthedocs/projects/tests/mockers.py b/readthedocs/projects/tests/mockers.py index 39649efc03d..e700bd66663 100644 --- a/readthedocs/projects/tests/mockers.py +++ b/readthedocs/projects/tests/mockers.py @@ -53,6 +53,11 @@ def _mock_artifact_builders(self): # 'readthedocs.doc_builder.backends.sphinx.PdfBuilder.build', # ) + self.patches["builder.pdf.PdfBuilder.pdf_file_name"] = mock.patch( + "readthedocs.doc_builder.backends.sphinx.PdfBuilder.pdf_file_name", + "project-slug.pdf", + ) + self.patches['builder.pdf.LatexBuildCommand.run'] = mock.patch( 'readthedocs.doc_builder.backends.sphinx.LatexBuildCommand.run', return_value=mock.MagicMock(output='stdout', successful=True) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 5899815335f..55cae9ffec0 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -118,8 +118,8 @@ def test_build_sphinx_formats(self, load_yaml_config, formats, builders): "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/html", + ".", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ) @@ -139,8 +139,8 @@ def test_build_sphinx_formats(self, load_yaml_config, formats, builders): "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/html", + ".", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ) @@ -638,8 +638,8 @@ def test_build_commands_executed( "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/html", + ".", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -655,28 +655,35 @@ def test_build_commands_executed( "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/htmlzip", + ".", + "../_readthedocs/htmlzip", cwd=mock.ANY, bin_path=mock.ANY, ), mock.call( - "zip", - "-r", - "/tmp/project-latest.zip", - "_readthedocs/htmlzip", - cwd=mock.ANY, + "mktemp", + "--directory", record=False, ), mock.call( - "rm", "-r", "_readthedocs/htmlzip", cwd=mock.ANY, record=False + "mv", + mock.ANY, + mock.ANY, + cwd=mock.ANY, + record=False, ), mock.call( - "mkdir", "-p", "_readthedocs/htmlzip", cwd=mock.ANY, record=False + "mkdir", + "--parents", + mock.ANY, + cwd=mock.ANY, + record=False, ), mock.call( - "mv", - "/tmp/project-latest.zip", + "zip", + "--recurse-paths", + "--symlinks", + mock.ANY, mock.ANY, cwd=mock.ANY, record=False, @@ -693,8 +700,8 @@ def test_build_commands_executed( "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/pdf", + ".", + "../_readthedocs/pdf", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -722,8 +729,8 @@ def test_build_commands_executed( "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/epub", + ".", + "../_readthedocs/epub", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -734,9 +741,15 @@ def test_build_commands_executed( cwd=mock.ANY, record=False, ), - mock.call("rm", "-r", "_readthedocs/epub", cwd=mock.ANY, record=False), mock.call( - "mkdir", "-p", "_readthedocs/epub", cwd=mock.ANY, record=False + "rm", "--recursive", "_readthedocs/epub", cwd=mock.ANY, record=False + ), + mock.call( + "mkdir", + "--parents", + "_readthedocs/epub", + cwd=mock.ANY, + record=False, ), mock.call( "mv", @@ -1212,8 +1225,8 @@ def test_sphinx_fail_on_warning(self, load_yaml_config): "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/html", + ".", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -1591,8 +1604,8 @@ def test_sphinx_builder(self, load_yaml_config, value, command): "_build/doctrees", "-D", "language=en", - "docs", - "_readthedocs/html", + ".", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), From 8195ecb00d7e9caa6b83ef822416982ad98f4346 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 17 Jan 2023 15:03:32 +0100 Subject: [PATCH 14/19] pre-commit missing changes --- readthedocs/doc_builder/backends/sphinx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index c7febeb445b..10fc06df53e 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -525,7 +525,7 @@ def build(self): bin_path=self.python_env.venv_bin(), ) - tex_files = glob(os.path.join(self.absolute_output_dir, '*.tex')) + tex_files = glob(os.path.join(self.absolute_output_dir, "*.tex")) if not tex_files: raise BuildUserError("No TeX files were found.") From 1f50a3bba0fb1823da71f248b5d85a4f8b4f818b Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Jan 2023 12:15:43 +0100 Subject: [PATCH 15/19] Sphinx builder: better default --- readthedocs/doc_builder/backends/sphinx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 10fc06df53e..57d7f02eba0 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -44,7 +44,7 @@ class BaseSphinx(BaseBuilder): # Output directory relative to where the repository was cloned # (e.g. "_readthedocs/") - relative_output_dir = "" + relative_output_dir = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From a88bf14fdce1f1527862aa9e05d0114a46a89dc9 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Jan 2023 12:16:36 +0100 Subject: [PATCH 16/19] Update readthedocs/doc_builder/backends/sphinx.py Co-authored-by: Eric Holscher <25510+ericholscher@users.noreply.github.com> --- readthedocs/doc_builder/backends/sphinx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index 57d7f02eba0..ba7327a96d0 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -396,6 +396,7 @@ def _post_build(self): ) ) + # **SECURITY CRITICAL: Advisory GHSA-hqwg-gjqw-h5wg** # Move the directory into a temporal directory, # so we can rename the directory for zip to use # that prefix when zipping the files (arcname). From 280e8e4e40e8c1b119b04d6cf1304636187e150a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Jan 2023 12:17:15 +0100 Subject: [PATCH 17/19] Update readthedocs/projects/tests/test_build_tasks.py Co-authored-by: Eric Holscher <25510+ericholscher@users.noreply.github.com> --- readthedocs/projects/tests/test_build_tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 55cae9ffec0..cffc3a6db72 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -197,7 +197,8 @@ def test_build_updates_documentation_type(self, load_yaml_config): } ) - # Create the artifact paths, so it's detected by the builder + # Create the artifact paths, so that `store_build_artifacts` + # properly runs: https://github.com/readthedocs/readthedocs.org/blob/faa611fad689675f81101ea643770a6b669bf529/readthedocs/projects/tasks/builds.py#L798-L804 os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) From e78442ad5cdebca999b3880dca598c95f822fc52 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Jan 2023 12:52:54 +0100 Subject: [PATCH 18/19] Minor changes to fix tests --- readthedocs/doc_builder/backends/sphinx.py | 2 +- readthedocs/rtd_tests/tests/test_doc_builder.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index ba7327a96d0..e5126a54c20 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -556,7 +556,7 @@ def _build_latexmk(self, cwd): # step. I don't know exactly why but most of the documentation that # I read differentiate this language from the others. I suppose # it's because it mix kanji (Chinese) with its own symbols. - pdfs = latex_path.glob('*.pdf') + pdfs = Path(self.absolute_output_dir).glob("*.pdf") for image in itertools.chain(images, pdfs): self.run( diff --git a/readthedocs/rtd_tests/tests/test_doc_builder.py b/readthedocs/rtd_tests/tests/test_doc_builder.py index e61587b3378..52e641a1764 100644 --- a/readthedocs/rtd_tests/tests/test_doc_builder.py +++ b/readthedocs/rtd_tests/tests/test_doc_builder.py @@ -51,6 +51,7 @@ def setUp(self): BaseSphinx.type = 'base' BaseSphinx.sphinx_build_dir = tempfile.mkdtemp() + BaseSphinx.relative_output_dir = "_readthedocs/" @patch('readthedocs.doc_builder.backends.sphinx.BaseSphinx.docs_dir') @patch('readthedocs.projects.models.Project.checkout_path') From c301c0dcbcb7110b0b0dc2a6d04a702cdf76fb04 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Jan 2023 13:16:59 +0100 Subject: [PATCH 19/19] Docstring: explain why there is an exception handling at this place --- readthedocs/doc_builder/backends/sphinx.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index e5126a54c20..f0c65313860 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -61,6 +61,18 @@ def __init__(self, *args, **kwargs): self.config_file, ) except ProjectConfigurationError: + # NOTE: this exception handling here is weird to me. + # We are raising this exception from inside `project.confi_file` when: + # - the repository has multiple config files (none of them with `doc` in its filename) + # - there is no config file at all + # + # IMO, if there are multiple config files, + # the build should fail immediately communicating this to the user. + # This can be achived by unhandle the exception here + # and leaving `on_failure` Celery handle to deal with it. + # + # In case there is no config file, we should continue the build + # because Read the Docs will automatically create one for it. pass def _write_config(self, master_doc='index'):