diff --git a/readthedocs/builds/constants.py b/readthedocs/builds/constants.py index f5bf898bce0..94c3da021ca 100644 --- a/readthedocs/builds/constants.py +++ b/readthedocs/builds/constants.py @@ -148,3 +148,19 @@ MAX_BUILD_COMMAND_SIZE = 1000000 # This keeps us under Azure's upload limit LOCK_EXPIRE = 60 * 180 # Lock expires in 3 hours + +# All artifact types supported by Read the Docs. +# They match the output directory (`_readthedocs/`) +ARTIFACT_TYPES = ( + "html", + "json", + "htmlzip", + "pdf", + "epub", +) +# Artifacts that are not deleted when uploading to the storage, +# even if they weren't re-built in the build process. +UNDELETABLE_ARTIFACT_TYPES = ( + "html", + "json", +) diff --git a/readthedocs/doc_builder/backends/mkdocs.py b/readthedocs/doc_builder/backends/mkdocs.py index 832301ec847..355b95a7043 100644 --- a/readthedocs/doc_builder/backends/mkdocs.py +++ b/readthedocs/doc_builder/backends/mkdocs.py @@ -46,10 +46,6 @@ class BaseMkdocs(BaseBuilder): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.yaml_file = self.get_yaml_config() - self.old_artifact_path = os.path.join( - os.path.dirname(self.yaml_file), - self.build_dir, - ) # README: historically, the default theme was ``readthedocs`` but in # https://github.com/rtfd/readthedocs.org/pull/4556 we change it to @@ -343,9 +339,8 @@ def get_theme_name(self, mkdocs_config): class MkdocsHTML(BaseMkdocs): - type = 'mkdocs' - builder = 'build' - build_dir = '_build/html' + builder = "build" + build_dir = "_readthedocs/html" # TODO: find a better way to integrate with MkDocs. diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py index dabccd823e8..f0c65313860 100644 --- a/readthedocs/doc_builder/backends/sphinx.py +++ b/readthedocs/doc_builder/backends/sphinx.py @@ -3,6 +3,7 @@ .. _Sphinx: http://www.sphinx-doc.org/ """ + import itertools import os from glob import glob @@ -17,13 +18,14 @@ from readthedocs.api.v2.client import api from readthedocs.builds import utils as version_utils -from readthedocs.core.utils.filesystem import safe_copytree, safe_open, safe_rmtree +from readthedocs.core.utils.filesystem import safe_open +from readthedocs.doc_builder.exceptions import PDFNotFound from readthedocs.projects.constants import PUBLIC from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound from readthedocs.projects.models import Feature from readthedocs.projects.utils import safe_write -from ..base import BaseBuilder, restoring_chdir +from ..base import BaseBuilder from ..constants import PDF_RE from ..environments import BuildCommand, DockerBuildCommand from ..exceptions import BuildUserError @@ -36,9 +38,20 @@ class BaseSphinx(BaseBuilder): """The parent for most sphinx builders.""" + # Sphinx reads and parses all source files before it can write + # an output file, the parsed source files are cached as "doctree pickles". + sphinx_doctrees_dir = "_build/doctrees" + + # Output directory relative to where the repository was cloned + # (e.g. "_readthedocs/") + relative_output_dir = None + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.config_file = self.config.sphinx.configuration + self.absolute_output_dir = os.path.abspath( + os.path.join(self.project_path, self.relative_output_dir) + ) try: if not self.config_file: self.config_file = self.project.conf_file(self.version.slug) @@ -47,16 +60,20 @@ def __init__(self, *args, **kwargs): self.project_path, self.config_file, ) - self.old_artifact_path = os.path.join( - os.path.dirname(self.config_file), - self.sphinx_build_dir, - ) except ProjectConfigurationError: - docs_dir = self.docs_dir() - self.old_artifact_path = os.path.join( - docs_dir, - self.sphinx_build_dir, - ) + # NOTE: this exception handling here is weird to me. + # We are raising this exception from inside `project.confi_file` when: + # - the repository has multiple config files (none of them with `doc` in its filename) + # - there is no config file at all + # + # IMO, if there are multiple config files, + # the build should fail immediately communicating this to the user. + # This can be achived by unhandle the exception here + # and leaving `on_failure` Celery handle to deal with it. + # + # In case there is no config file, we should continue the build + # because Read the Docs will automatically create one for it. + pass def _write_config(self, master_doc='index'): """Create ``conf.py`` if it doesn't exist.""" @@ -263,7 +280,6 @@ def append_conf(self): ) def build(self): - self.clean() project = self.project build_command = [ *self.get_sphinx_cmd(), @@ -272,22 +288,35 @@ def build(self): *self.sphinx_parallel_arg(), ] if self.config.sphinx.fail_on_warning: - build_command.extend(['-W', '--keep-going']) - build_command.extend([ - '-b', - self.sphinx_builder, - '-d', - '_build/doctrees', - '-D', - 'language={lang}'.format(lang=project.language), - '.', - self.sphinx_build_dir, - ]) + build_command.extend(["-W", "--keep-going"]) + build_command.extend( + [ + "-b", + self.sphinx_builder, + "-d", + self.sphinx_doctrees_dir, + "-D", + f"language={project.language}", + # Sphinx's source directory (SOURCEDIR). + # We are executing this command at the location of the `conf.py` file (CWD). + # TODO: ideally we should execute it from where the repository was clonned, + # but that could lead unexpected behavior to some users: + # https://github.com/readthedocs/readthedocs.org/pull/9888#issuecomment-1384649346 + ".", + # Sphinx's output build directory (OUTPUTDIR) + os.path.relpath( + self.absolute_output_dir, os.path.dirname(self.config_file) + ), + ] + ) cmd_ret = self.run( *build_command, - cwd=os.path.dirname(self.config_file), bin_path=self.python_env.venv_bin(), + cwd=os.path.dirname(self.config_file), ) + + self._post_build() + return cmd_ret.successful def get_sphinx_cmd(self): @@ -336,8 +365,7 @@ def venv_sphinx_supports_latexmk(self): class HtmlBuilder(BaseSphinx): - type = 'sphinx' - sphinx_build_dir = '_build/html' + relative_output_dir = "_readthedocs/html" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -345,31 +373,8 @@ def __init__(self, *args, **kwargs): if self.project.has_feature(Feature.USE_SPHINX_BUILDERS): self.sphinx_builder = 'html' - def move(self, **__): - super().move() - # Copy JSON artifacts to its own directory - # to keep compatibility with the older builder. - json_path = os.path.abspath( - os.path.join(self.old_artifact_path, '..', 'json'), - ) - json_path_target = self.project.artifact_path( - version=self.version.slug, - type_='sphinx_search', - ) - if os.path.exists(json_path): - if os.path.exists(json_path_target): - safe_rmtree(json_path_target) - log.debug('Copying json on the local filesystem') - safe_copytree( - json_path, - json_path_target, - ) - else: - log.warning('Not moving json because the build dir is unknown.',) - class HtmlDirBuilder(HtmlBuilder): - type = 'sphinx_htmldir' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -379,7 +384,6 @@ def __init__(self, *args, **kwargs): class SingleHtmlBuilder(HtmlBuilder): - type = 'sphinx_singlehtml' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -389,34 +393,42 @@ def __init__(self, *args, **kwargs): class LocalMediaBuilder(BaseSphinx): - type = 'sphinx_localmedia' sphinx_builder = 'readthedocssinglehtmllocalmedia' - sphinx_build_dir = '_build/localmedia' - - @restoring_chdir - def move(self, **__): - if not os.path.exists(self.old_artifact_path): - log.warning('Not moving localmedia because the build dir is unknown.') - return - - log.debug('Creating zip file from path.', path=self.old_artifact_path) - target_file = os.path.join( - self.target, - '{}.zip'.format(self.project.slug), + relative_output_dir = "_readthedocs/htmlzip" + + def _post_build(self): + """Internal post build to create the ZIP file from the HTML output.""" + target_file = os.path.abspath( + os.path.join( + self.absolute_output_dir, + # TODO: shouldn't this name include the name of the version as well? + # It seems we were using the project's slug previously. + # So, keeping it like that for now until we decide make that adjustment. + f"{self.project.slug}.zip", + ) ) - if not os.path.exists(self.target): - os.makedirs(self.target) - if os.path.exists(target_file): - os.remove(target_file) + # **SECURITY CRITICAL: Advisory GHSA-hqwg-gjqw-h5wg** # Move the directory into a temporal directory, # so we can rename the directory for zip to use - # that prefix when zipping the files. - result = self.run("mktemp", "--directory") - tmp_dir = Path(result.output.strip()) + # that prefix when zipping the files (arcname). + mktemp = self.run("mktemp", "--directory", record=False) + tmp_dir = Path(mktemp.output.strip()) dirname = f"{self.project.slug}-{self.version.slug}" - self.run("mv", self.old_artifact_path, str(tmp_dir / dirname)) - # Create a .zip file + self.run( + "mv", + self.relative_output_dir, + str(tmp_dir / dirname), + cwd=self.project_path, + record=False, + ) + self.run( + "mkdir", + "--parents", + self.relative_output_dir, + cwd=self.project_path, + record=False, + ) self.run( "zip", "--recurse-paths", # Include all files and directories. @@ -424,31 +436,47 @@ def move(self, **__): target_file, dirname, cwd=str(tmp_dir), + record=False, ) class EpubBuilder(BaseSphinx): - type = 'sphinx_epub' - sphinx_builder = 'epub' - sphinx_build_dir = '_build/epub' - - def move(self, **__): - from_globs = glob(os.path.join(self.old_artifact_path, '*.epub')) - if not os.path.exists(self.target): - os.makedirs(self.target) - if from_globs: - from_file = from_globs[0] - to_file = os.path.join( - self.target, - '{}.epub'.format(self.project.slug), + sphinx_builder = "epub" + relative_output_dir = "_readthedocs/epub" + + def _post_build(self): + """Internal post build to cleanup EPUB output directory and leave only one .epub file.""" + temp_epub_file = f"/tmp/{self.project.slug}-{self.version.slug}.epub" + target_file = os.path.join( + self.absolute_output_dir, + f"{self.project.slug}.epub", + ) + + epub_sphinx_filepaths = glob(os.path.join(self.absolute_output_dir, "*.epub")) + if epub_sphinx_filepaths: + # NOTE: we currently support only one .epub per version + epub_filepath = epub_sphinx_filepaths[0] + + self.run( + "mv", epub_filepath, temp_epub_file, cwd=self.project_path, record=False ) self.run( - 'mv', - '-f', - from_file, - to_file, + "rm", + "--recursive", + self.relative_output_dir, cwd=self.project_path, + record=False, + ) + self.run( + "mkdir", + "--parents", + self.relative_output_dir, + cwd=self.project_path, + record=False, + ) + self.run( + "mv", temp_epub_file, target_file, cwd=self.project_path, record=False ) @@ -480,50 +508,57 @@ class PdfBuilder(BaseSphinx): """Builder to generate PDF documentation.""" - type = 'sphinx_pdf' - sphinx_build_dir = '_build/latex' + relative_output_dir = "_readthedocs/pdf" + sphinx_builder = "latex" pdf_file_name = None def build(self): - self.clean() - cwd = os.path.dirname(self.config_file) - - # Default to this so we can return it always. self.run( *self.get_sphinx_cmd(), - '-b', - 'latex', + "-T", + "-E", *self.sphinx_parallel_arg(), - '-D', - 'language={lang}'.format(lang=self.project.language), - '-d', - '_build/doctrees', - '.', - '_build/latex', - cwd=cwd, + "-b", + self.sphinx_builder, + "-d", + self.sphinx_doctrees_dir, + "-D", + f"language={self.project.language}", + # Sphinx's source directory (SOURCEDIR). + # We are executing this command at the location of the `conf.py` file (CWD). + # TODO: ideally we should execute it from where the repository was clonned, + # but that could lead unexpected behavior to some users: + # https://github.com/readthedocs/readthedocs.org/pull/9888#issuecomment-1384649346 + ".", + # Sphinx's output build directory (OUTPUTDIR) + os.path.relpath( + self.absolute_output_dir, os.path.dirname(self.config_file) + ), + cwd=os.path.dirname(self.config_file), bin_path=self.python_env.venv_bin(), ) - latex_cwd = os.path.join(cwd, '_build', 'latex') - tex_files = glob(os.path.join(latex_cwd, '*.tex')) + tex_files = glob(os.path.join(self.absolute_output_dir, "*.tex")) if not tex_files: - raise BuildUserError('No TeX files were found') + raise BuildUserError("No TeX files were found.") # Run LaTeX -> PDF conversions # Build PDF with ``latexmk`` if Sphinx supports it, otherwise fallback # to ``pdflatex`` to support old versions if self.venv_sphinx_supports_latexmk(): - return self._build_latexmk(cwd, latex_cwd) + success = self._build_latexmk(self.project_path) + else: + success = self._build_pdflatex(tex_files) - return self._build_pdflatex(tex_files, latex_cwd) + self._post_build() + return success - def _build_latexmk(self, cwd, latex_cwd): + def _build_latexmk(self, cwd): # These steps are copied from the Makefile generated by Sphinx >= 1.6 # https://github.com/sphinx-doc/sphinx/blob/master/sphinx/texinputs/Makefile_t - latex_path = Path(latex_cwd) images = [] - for extension in ('png', 'gif', 'jpg', 'jpeg'): - images.extend(latex_path.glob(f'*.{extension}')) + for extension in ("png", "gif", "jpg", "jpeg"): + images.extend(Path(self.absolute_output_dir).glob(f"*.{extension}")) # FIXME: instead of checking by language here, what we want to check if # ``latex_engine`` is ``platex`` @@ -533,13 +568,13 @@ def _build_latexmk(self, cwd, latex_cwd): # step. I don't know exactly why but most of the documentation that # I read differentiate this language from the others. I suppose # it's because it mix kanji (Chinese) with its own symbols. - pdfs = latex_path.glob('*.pdf') + pdfs = Path(self.absolute_output_dir).glob("*.pdf") for image in itertools.chain(images, pdfs): self.run( 'extractbb', image.name, - cwd=latex_cwd, + cwd=self.absolute_output_dir, record=False, ) @@ -550,7 +585,7 @@ def _build_latexmk(self, cwd, latex_cwd): self.run( 'cat', rcfile, - cwd=latex_cwd, + cwd=self.absolute_output_dir, ) if self.build_env.command_class == DockerBuildCommand: @@ -578,22 +613,27 @@ def _build_latexmk(self, cwd, latex_cwd): cls=latex_class, cmd=cmd, warn_only=True, - cwd=latex_cwd, + cwd=self.absolute_output_dir, ) self.pdf_file_name = f'{self.project.slug}.pdf' return cmd_ret.successful - def _build_pdflatex(self, tex_files, latex_cwd): + def _build_pdflatex(self, tex_files): pdflatex_cmds = [ ['pdflatex', '-interaction=nonstopmode', tex_file] for tex_file in tex_files ] # yapf: disable makeindex_cmds = [ [ - 'makeindex', '-s', 'python.ist', '{}.idx'.format( - os.path.splitext(os.path.relpath(tex_file, latex_cwd))[0], + "makeindex", + "-s", + "python.ist", + "{}.idx".format( + os.path.splitext( + os.path.relpath(tex_file, self.absolute_output_dir) + )[0], ), ] for tex_file in tex_files @@ -608,7 +648,7 @@ def _build_pdflatex(self, tex_files, latex_cwd): cmd_ret = self.build_env.run_command_class( cls=latex_class, cmd=cmd, - cwd=latex_cwd, + cwd=self.absolute_output_dir, warn_only=True, ) pdf_commands.append(cmd_ret) @@ -616,7 +656,7 @@ def _build_pdflatex(self, tex_files, latex_cwd): cmd_ret = self.build_env.run_command_class( cls=latex_class, cmd=cmd, - cwd=latex_cwd, + cwd=self.absolute_output_dir, warn_only=True, ) pdf_commands.append(cmd_ret) @@ -624,7 +664,7 @@ def _build_pdflatex(self, tex_files, latex_cwd): cmd_ret = self.build_env.run_command_class( cls=latex_class, cmd=cmd, - cwd=latex_cwd, + cwd=self.absolute_output_dir, warn_only=True, ) pdf_match = PDF_RE.search(cmd_ret.output) @@ -633,40 +673,43 @@ def _build_pdflatex(self, tex_files, latex_cwd): pdf_commands.append(cmd_ret) return all(cmd.successful for cmd in pdf_commands) - def move(self, **__): - if not os.path.exists(self.target): - os.makedirs(self.target) + def _post_build(self): + """Internal post build to cleanup PDF output directory and leave only one .pdf file.""" - exact = os.path.join( - self.old_artifact_path, - '{}.pdf'.format(self.project.slug), - ) - exact_upper = os.path.join( - self.old_artifact_path, - '{}.pdf'.format(self.project.slug.capitalize()), + if not self.pdf_file_name: + raise PDFNotFound() + + # TODO: merge this with ePUB since it's pretty much the same + temp_pdf_file = f"/tmp/{self.project.slug}-{self.version.slug}.pdf" + target_file = os.path.join( + self.absolute_output_dir, + self.pdf_file_name, ) - if self.pdf_file_name and os.path.exists(self.pdf_file_name): - from_file = self.pdf_file_name - if os.path.exists(exact): - from_file = exact - elif os.path.exists(exact_upper): - from_file = exact_upper - else: - from_globs = glob(os.path.join(self.old_artifact_path, '*.pdf')) - if from_globs: - from_file = max(from_globs, key=os.path.getmtime) - else: - from_file = None - if from_file: - to_file = os.path.join( - self.target, - '{}.pdf'.format(self.project.slug), + # NOTE: we currently support only one .pdf per version + pdf_sphinx_filepath = os.path.join(self.absolute_output_dir, self.pdf_file_name) + if os.path.exists(pdf_sphinx_filepath): + self.run( + "mv", + pdf_sphinx_filepath, + temp_pdf_file, + cwd=self.project_path, + record=False, + ) + self.run( + "rm", + "-r", + self.relative_output_dir, + cwd=self.project_path, + record=False, ) self.run( - 'mv', - '-f', - from_file, - to_file, + "mkdir", + "-p", + self.relative_output_dir, cwd=self.project_path, + record=False, + ) + self.run( + "mv", temp_pdf_file, target_file, cwd=self.project_path, record=False ) diff --git a/readthedocs/doc_builder/base.py b/readthedocs/doc_builder/base.py index 42f5f0acd65..5b08525567d 100644 --- a/readthedocs/doc_builder/base.py +++ b/readthedocs/doc_builder/base.py @@ -1,12 +1,11 @@ """Base classes for Builders.""" import os -import shutil from functools import wraps import structlog -from readthedocs.core.utils.filesystem import safe_copytree, safe_open, safe_rmtree +from readthedocs.core.utils.filesystem import safe_open from readthedocs.projects.models import Feature log = structlog.get_logger(__name__) @@ -27,15 +26,9 @@ def decorator(*args, **kw): class BaseBuilder: - """ - The Base for all Builders. Defines the API for subclasses. - - Expects subclasses to define ``old_artifact_path``, which points at the - directory where artifacts should be copied from. - """ + """The Base for all Builders. Defines the API for subclasses.""" ignore_patterns = [] - old_artifact_path = None def __init__(self, build_env, python_env): self.build_env = build_env @@ -44,10 +37,6 @@ def __init__(self, build_env, python_env): self.project = build_env.project self.config = python_env.config if python_env else None self.project_path = self.project.checkout_path(self.version.slug) - self.target = self.project.artifact_path( - version=self.version.slug, - type_=self.type, - ) def get_final_doctype(self): """Some builders may have a different doctype at build time.""" @@ -60,27 +49,8 @@ def build(self): """Do the actual building of the documentation.""" raise NotImplementedError - def move(self, **__): - """Move the generated documentation to its artifact directory.""" - if os.path.exists(self.old_artifact_path): - if os.path.exists(self.target): - safe_rmtree(self.target) - log.debug('Copying output type on the local filesystem.', output_type=self.type) - log.debug('Ignoring patterns.', patterns=self.ignore_patterns) - safe_copytree( - self.old_artifact_path, - self.target, - ignore=shutil.ignore_patterns(*self.ignore_patterns), - ) - else: - log.warning('Not moving docs because the build dir is unknown.') - - def clean(self, **__): - """Clean the path where documentation will be built.""" - # NOTE: this shouldn't be needed. We are always CLEAN_AFTER_BUILD now - if os.path.exists(self.old_artifact_path): - safe_rmtree(self.old_artifact_path) - log.info('Removing old artifact path.', path=self.old_artifact_path) + def _post_build(self): + """Execute extra steps (e.g. create ZIP, rename PDF, etc) after building if required.""" def docs_dir(self, docs_dir=None, **__): """Handle creating a custom docs_dir if it doesn't exist.""" diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py index c8d60a6bd3b..fedd6ed7f63 100644 --- a/readthedocs/doc_builder/director.py +++ b/readthedocs/doc_builder/director.py @@ -6,12 +6,11 @@ from django.utils.translation import gettext_lazy as _ from readthedocs.builds.constants import EXTERNAL -from readthedocs.core.utils.filesystem import safe_copytree from readthedocs.doc_builder.config import load_yaml_config from readthedocs.doc_builder.exceptions import BuildUserError from readthedocs.doc_builder.loader import get_builder_class from readthedocs.doc_builder.python_environments import Conda, Virtualenv -from readthedocs.projects.constants import BUILD_COMMANDS_OUTPUT_PATH_HTML, GENERIC +from readthedocs.projects.constants import BUILD_COMMANDS_OUTPUT_PATH_HTML from readthedocs.projects.exceptions import RepositoryError from readthedocs.projects.models import Feature from readthedocs.projects.signals import after_build, before_build, before_vcs @@ -181,11 +180,11 @@ def build(self): self.run_build_job("pre_build") - self.data.outcomes["html"] = self.build_html() - self.data.outcomes["search"] = self.is_type_sphinx() - self.data.outcomes["localmedia"] = self.build_htmlzip() - self.data.outcomes["pdf"] = self.build_pdf() - self.data.outcomes["epub"] = self.build_epub() + # Build all formats + self.build_html() + self.build_htmlzip() + self.build_pdf() + self.build_epub() self.run_build_job("post_build") @@ -362,24 +361,14 @@ def run_build_commands(self): record=False, ) - # Copy files to artifacts path so they are uploaded to S3 - target = self.data.project.artifact_path( - version=self.data.version.slug, - type_=GENERIC, - ) - artifacts_path = os.path.join(cwd, BUILD_COMMANDS_OUTPUT_PATH_HTML) - if not os.path.exists(artifacts_path): + html_output_path = os.path.join(cwd, BUILD_COMMANDS_OUTPUT_PATH_HTML) + if not os.path.exists(html_output_path): raise BuildUserError(BuildUserError.BUILD_COMMANDS_WITHOUT_OUTPUT) - safe_copytree(artifacts_path, target) - # Update the `Version.documentation_type` to match the doctype defined # by the config file. When using `build.commands` it will be `GENERIC` self.data.version.documentation_type = self.data.config.doctype - # Mark HTML as the only outcome available for this type of builder - self.data.outcomes["html"] = True - def install_build_tools(self): """ Install all ``build.tools`` defined by the user in the config file. @@ -536,8 +525,6 @@ def build_docs_class(self, builder_class): self.data.version.documentation_type = builder.get_final_doctype() success = builder.build() - builder.move() - return success def get_vcs_env_vars(self): diff --git a/readthedocs/doc_builder/exceptions.py b/readthedocs/doc_builder/exceptions.py index 3f0e0c308f6..d3730baa617 100644 --- a/readthedocs/doc_builder/exceptions.py +++ b/readthedocs/doc_builder/exceptions.py @@ -66,6 +66,12 @@ class BuildCancelled(BuildUserError): state = BUILD_STATE_CANCELLED +class PDFNotFound(BuildUserError): + message = gettext_noop( + 'PDF file was not generated/found in "_readthedocs/pdf" output directory.' + ) + + class MkDocsYAMLParseError(BuildUserError): GENERIC_WITH_PARSE_EXCEPTION = gettext_noop( 'Problem parsing MkDocs YAML configuration. {exception}', diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index c05d9eb4388..68042b4aa7e 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -817,8 +817,13 @@ def full_doc_path(self, version=LATEST): return doc_base def artifact_path(self, type_, version=LATEST): - """The path to the build html docs in the project.""" - return os.path.join(self.doc_path, 'artifacts', version, type_) + """ + The path to the build docs output for the project. + + :param type_: one of `html`, `json`, `htmlzip`, `pdf`, `epub`. + :param version: slug of the version. + """ + return os.path.join(self.checkout_path(version=version), "_readthedocs", type_) def conf_file(self, version=LATEST): """Find a ``conf.py`` file in the project checkout.""" diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index f5a72d2a0c3..ed4707f16ae 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -4,9 +4,9 @@ This includes fetching repository code, cleaning ``conf.py`` files, and rebuilding documentation. """ +import os import signal import socket -from collections import defaultdict from dataclasses import dataclass, field import structlog @@ -18,6 +18,7 @@ from readthedocs.api.v2.client import api as api_v2 from readthedocs.builds import tasks as build_tasks from readthedocs.builds.constants import ( + ARTIFACT_TYPES, BUILD_FINAL_STATES, BUILD_STATE_BUILDING, BUILD_STATE_CLONING, @@ -28,6 +29,7 @@ BUILD_STATUS_FAILURE, BUILD_STATUS_SUCCESS, EXTERNAL, + UNDELETABLE_ARTIFACT_TYPES, ) from readthedocs.builds.models import APIVersion, Build from readthedocs.builds.signals import build_complete @@ -108,8 +110,6 @@ class TaskData: # Dictionary returned from the API. build: dict = field(default_factory=dict) - # If HTML, PDF, ePub, etc formats were built. - outcomes: dict = field(default_factory=lambda: defaultdict(lambda: False)) # Build data for analytics (telemetry). build_data: dict = field(default_factory=dict) @@ -514,28 +514,38 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): self.data.build['success'] = False def on_success(self, retval, task_id, args, kwargs): - html = self.data.outcomes['html'] - search = self.data.outcomes['search'] - localmedia = self.data.outcomes['localmedia'] - pdf = self.data.outcomes['pdf'] - epub = self.data.outcomes['epub'] - time_before_store_build_artifacts = timezone.now() # Store build artifacts to storage (local or cloud storage) - self.store_build_artifacts( - html=html, - search=search, - localmedia=localmedia, - pdf=pdf, - epub=epub, - ) + # + # TODO: move ``store_build_artifacts`` inside ``execute`` so we can + # handle exceptions properly on ``on_failure`` + self.store_build_artifacts() log.info( "Store build artifacts finished.", time=(timezone.now() - time_before_store_build_artifacts).seconds, ) - # NOTE: we are updating the db version instance *only* when # HTML are built successfully. + html = os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, type_="html" + ) + ) + localmedia = os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, type_="htmlzip" + ) + ) + pdf = os.path.exists( + self.data.project.artifact_path(version=self.data.version.slug, type_="pdf") + ) + epub = os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, type_="epub" + ) + ) + + # NOTE: we are updating the db version instance *only* when if html: try: api_v2.version(self.data.version.pk).patch( @@ -767,14 +777,7 @@ def set_valid_clone(self): self.data.project.has_valid_clone = True self.data.version.project.has_valid_clone = True - def store_build_artifacts( - self, - html=False, - localmedia=False, - search=False, - pdf=False, - epub=False, - ): + def store_build_artifacts(self): """ Save build artifacts to "storage" using Django's storage API. @@ -782,12 +785,6 @@ def store_build_artifacts( such as S3, Azure storage or Google Cloud Storage. Remove build artifacts of types not included in this build (PDF, ePub, zip only). - - :param html: whether to save HTML output - :param localmedia: whether to save localmedia (htmlzip) output - :param search: whether to save search artifacts - :param pdf: whether to save PDF output - :param epub: whether to save ePub output """ log.info('Writing build artifacts to media storage') # NOTE: I don't remember why we removed this state from the Build @@ -798,34 +795,43 @@ def store_build_artifacts( types_to_copy = [] types_to_delete = [] - # HTML media - if html: - types_to_copy.append(('html', self.data.config.doctype)) - - # Search media (JSON) - if search: - types_to_copy.append(('json', 'sphinx_search')) - - if localmedia: - types_to_copy.append(('htmlzip', 'sphinx_localmedia')) - else: - types_to_delete.append('htmlzip') - - if pdf: - types_to_copy.append(('pdf', 'sphinx_pdf')) - else: - types_to_delete.append('pdf') - - if epub: - types_to_copy.append(('epub', 'sphinx_epub')) - else: - types_to_delete.append('epub') + for artifact_type in ARTIFACT_TYPES: + if os.path.exists( + self.data.project.artifact_path( + version=self.data.version.slug, + type_=artifact_type, + ) + ): + types_to_copy.append(artifact_type) + # Never delete HTML nor JSON (search index) + elif artifact_type not in UNDELETABLE_ARTIFACT_TYPES: + types_to_delete.append(artifact_type) - for media_type, build_type in types_to_copy: + for media_type in types_to_copy: + # NOTE: here is where we get the correct FROM path to upload from_path = self.data.version.project.artifact_path( version=self.data.version.slug, - type_=build_type, + type_=media_type, ) + + # Check if there are multiple files on source directories. + # These output format does not support multiple files yet. + # In case multiple files are found, the upload for this format is not performed. + # + # TODO: we should fail the build for these cases and clearly communicate this. + # to the user. To do this, we need to call this method (``store_build_artifacts``) + # since the ``execute`` method. + # It will allow us to just `raise BuildUserError` and handle it at + # Celery `on_failure` handler. + if media_type in ("htmlzip", "epub", "pdf"): + if len(os.listdir(from_path)) > 1: + log.exception( + "Multiple files are not supported for this format. " + "Skipping this output format.", + output_format=media_type, + ) + continue + to_path = self.data.version.project.get_storage_path( type_=media_type, version_slug=self.data.version.slug, diff --git a/readthedocs/projects/tests/mockers.py b/readthedocs/projects/tests/mockers.py index 39649efc03d..e700bd66663 100644 --- a/readthedocs/projects/tests/mockers.py +++ b/readthedocs/projects/tests/mockers.py @@ -53,6 +53,11 @@ def _mock_artifact_builders(self): # 'readthedocs.doc_builder.backends.sphinx.PdfBuilder.build', # ) + self.patches["builder.pdf.PdfBuilder.pdf_file_name"] = mock.patch( + "readthedocs.doc_builder.backends.sphinx.PdfBuilder.pdf_file_name", + "project-slug.pdf", + ) + self.patches['builder.pdf.LatexBuildCommand.run'] = mock.patch( 'readthedocs.doc_builder.backends.sphinx.LatexBuildCommand.run', return_value=mock.MagicMock(output='stdout', successful=True) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 18afa6460b0..cffc3a6db72 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -119,7 +119,7 @@ def test_build_sphinx_formats(self, load_yaml_config, formats, builders): "-D", "language=en", ".", - "_build/html", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ) @@ -140,7 +140,7 @@ def test_build_sphinx_formats(self, load_yaml_config, formats, builders): "-D", "language=en", ".", - "_build/html", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ) @@ -197,6 +197,12 @@ def test_build_updates_documentation_type(self, load_yaml_config): } ) + # Create the artifact paths, so that `store_build_artifacts` + # properly runs: https://github.com/readthedocs/readthedocs.org/blob/faa611fad689675f81101ea643770a6b669bf529/readthedocs/projects/tasks/builds.py#L798-L804 + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + self._trigger_update_docs_task() # Update version state @@ -205,8 +211,8 @@ def test_build_updates_documentation_type(self, load_yaml_config): assert self.requests_mock.request_history[7].json() == { "built": True, "documentation_type": "mkdocs", - "has_pdf": False, - "has_epub": False, + "has_pdf": True, + "has_epub": True, "has_htmlzip": False, } @@ -313,6 +319,15 @@ def test_successful_build( assert not BuildData.objects.all().exists() + # Create the artifact paths, so it's detected by the builder + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="json")) + os.makedirs( + self.project.artifact_path(version=self.version.slug, type_="htmlzip") + ) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + self._trigger_update_docs_task() # It has to be called twice, ``before_start`` and ``after_return`` @@ -528,7 +543,10 @@ def test_failed_build( } @mock.patch("readthedocs.doc_builder.director.load_yaml_config") - def test_build_commands_executed(self, load_yaml_config): + def test_build_commands_executed( + self, + load_yaml_config, + ): load_yaml_config.return_value = self._config_file( { "version": 2, @@ -539,6 +557,15 @@ def test_build_commands_executed(self, load_yaml_config): } ) + # Create the artifact paths, so it's detected by the builder + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="json")) + os.makedirs( + self.project.artifact_path(version=self.version.slug, type_="htmlzip") + ) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + self._trigger_update_docs_task() self.mocker.mocks["git.Backend.run"].assert_has_calls( @@ -613,7 +640,7 @@ def test_build_commands_executed(self, load_yaml_config): "-D", "language=en", ".", - "_build/html", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -630,25 +657,57 @@ def test_build_commands_executed(self, load_yaml_config): "-D", "language=en", ".", - "_build/localmedia", + "../_readthedocs/htmlzip", cwd=mock.ANY, bin_path=mock.ANY, ), + mock.call( + "mktemp", + "--directory", + record=False, + ), + mock.call( + "mv", + mock.ANY, + mock.ANY, + cwd=mock.ANY, + record=False, + ), + mock.call( + "mkdir", + "--parents", + mock.ANY, + cwd=mock.ANY, + record=False, + ), + mock.call( + "zip", + "--recurse-paths", + "--symlinks", + mock.ANY, + mock.ANY, + cwd=mock.ANY, + record=False, + ), mock.call( mock.ANY, "-m", "sphinx", + "-T", + "-E", "-b", "latex", - "-D", - "language=en", "-d", "_build/doctrees", + "-D", + "language=en", ".", - "_build/latex", + "../_readthedocs/pdf", cwd=mock.ANY, bin_path=mock.ANY, ), + # NOTE: pdf `mv` commands and others are not here because the + # PDF resulting file is not found in the process (`_post_build`) mock.call( mock.ANY, "-c", @@ -659,16 +718,6 @@ def test_build_commands_executed(self, load_yaml_config): shell=True, record=False, ), - mock.call( - "mv", - "-f", - "output.file", - # TODO: take a look at - # https://callee.readthedocs.io/en/latest/reference/strings.html#callee.strings.EndsWith - # to match `project.pdf` - mock.ANY, - cwd=mock.ANY, - ), mock.call( mock.ANY, "-m", @@ -682,22 +731,58 @@ def test_build_commands_executed(self, load_yaml_config): "-D", "language=en", ".", - "_build/epub", + "../_readthedocs/epub", cwd=mock.ANY, bin_path=mock.ANY, ), mock.call( "mv", - "-f", - "output.file", - # TODO: take a look at - # https://callee.readthedocs.io/en/latest/reference/strings.html#callee.strings.EndsWith - # to match `project.epub` + mock.ANY, + "/tmp/project-latest.epub", + cwd=mock.ANY, + record=False, + ), + mock.call( + "rm", "--recursive", "_readthedocs/epub", cwd=mock.ANY, record=False + ), + mock.call( + "mkdir", + "--parents", + "_readthedocs/epub", + cwd=mock.ANY, + record=False, + ), + mock.call( + "mv", + "/tmp/project-latest.epub", mock.ANY, cwd=mock.ANY, + record=False, ), # FIXME: I think we are hitting this issue here: # https://github.com/pytest-dev/pytest-mock/issues/234 + mock.call("lsb_release", "--description", record=False, demux=True), + mock.call("python", "--version", record=False, demux=True), + mock.call( + "dpkg-query", + "--showformat", + "${package} ${version}\\n", + "--show", + record=False, + demux=True, + ), + mock.call( + "python", + "-m", + "pip", + "list", + "--pre", + "--local", + "--format", + "json", + record=False, + demux=True, + ), ] ) @@ -1142,7 +1227,7 @@ def test_sphinx_fail_on_warning(self, load_yaml_config): "-D", "language=en", ".", - "_build/html", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), @@ -1172,7 +1257,7 @@ def test_mkdocs_fail_on_warning(self, load_yaml_config): "build", "--clean", "--site-dir", - "_build/html", + "_readthedocs/html", "--config-file", "docs/mkdocs.yaml", "--strict", # fail on warning flag @@ -1521,7 +1606,7 @@ def test_sphinx_builder(self, load_yaml_config, value, command): "-D", "language=en", ".", - "_build/html", + "../_readthedocs/html", cwd=mock.ANY, bin_path=mock.ANY, ), diff --git a/readthedocs/rtd_tests/tests/test_doc_builder.py b/readthedocs/rtd_tests/tests/test_doc_builder.py index e61587b3378..52e641a1764 100644 --- a/readthedocs/rtd_tests/tests/test_doc_builder.py +++ b/readthedocs/rtd_tests/tests/test_doc_builder.py @@ -51,6 +51,7 @@ def setUp(self): BaseSphinx.type = 'base' BaseSphinx.sphinx_build_dir = tempfile.mkdtemp() + BaseSphinx.relative_output_dir = "_readthedocs/" @patch('readthedocs.doc_builder.backends.sphinx.BaseSphinx.docs_dir') @patch('readthedocs.projects.models.Project.checkout_path')