From a1293344eee7f7c6b831bc4d6c9536f147f45f51 Mon Sep 17 00:00:00 2001
From: Santos Gallegos <stsewd@proton.me>
Date: Fri, 13 Dec 2024 15:13:03 -0500
Subject: [PATCH] WIP: migrate config validation to pydantic

NOTE: DO NOT REVIEW

I'm just playing around how the validation with pydantic will look like.
I'll use this as a reference to make a incremental migration.
---
 readthedocs/config/config.py | 664 ++++-------------------------------
 readthedocs/config/models.py | 264 +++++++++++++-
 2 files changed, 322 insertions(+), 606 deletions(-)

diff --git a/readthedocs/config/config.py b/readthedocs/config/config.py
index e1cbbd60b33..7812bcb07cf 100644
--- a/readthedocs/config/config.py
+++ b/readthedocs/config/config.py
@@ -2,42 +2,21 @@
 
 import copy
 import os
-import re
 from contextlib import contextmanager
 from functools import lru_cache
 
 from django.conf import settings
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 
-from readthedocs.config.utils import list_to_dict
+from readthedocs.config.models import BuildConfig as BuildConfigModel
 from readthedocs.core.utils.filesystem import safe_open
 from readthedocs.projects.constants import GENERIC
 
 from .exceptions import ConfigError, ConfigValidationError
 from .find import find_one
-from .models import (
-    BuildJobs,
-    BuildJobsBuildTypes,
-    BuildTool,
-    BuildWithOs,
-    Conda,
-    Mkdocs,
-    Python,
-    PythonInstall,
-    Search,
-    Sphinx,
-    Submodules,
-)
+from .models import PythonInstall
 from .parser import ParseError, parse
-from .validation import (
-    validate_bool,
-    validate_choice,
-    validate_dict,
-    validate_list,
-    validate_path,
-    validate_path_pattern,
-    validate_string,
-)
+from .validation import validate_dict, validate_path
 
 __all__ = (
     "ALL",
@@ -101,9 +80,10 @@ def __init__(self, raw_config, source_file, base_path=None):
                 self.base_path = os.path.dirname(self.source_file)
 
         self._config = {}
+        self._build_config = None
 
     @contextmanager
-    def catch_validation_error(self, key):
+    def catch_validation_error(self, key=None):
         """Catch a ``ConfigValidationError`` and raises a ``ConfigError`` error."""
         # NOTE: I don't like too much this pattern of re-raising an exception via a context manager.
         # I think we should raise the exception where it happens, instead of encapsulating all of them.
@@ -118,7 +98,7 @@ def catch_validation_error(self, key):
             format_values = getattr(error, "format_values", {})
             format_values.update(
                 {
-                    "key": key,
+                    "key": key if key else error.format_values.get("key"),
                     "value": error.format_values.get("value"),
                     "source_file": os.path.relpath(self.source_file, self.base_path),
                 }
@@ -174,11 +154,11 @@ def validate(self):
 
     @property
     def is_using_conda(self):
-        return self.python_interpreter in ("conda", "mamba")
+        return self._build_config.is_using_conda
 
     @property
     def is_using_build_commands(self):
-        return self.build.commands != []
+        return self._build_config.build.commands is not None
 
     @property
     def is_using_setup_py_install(self):
@@ -190,14 +170,7 @@ def is_using_setup_py_install(self):
 
     @property
     def python_interpreter(self):
-        tool = self.build.tools.get("python")
-        if tool and tool.version.startswith("mamba"):
-            return "mamba"
-        if tool and tool.version.startswith("miniconda"):
-            return "conda"
-        if tool:
-            return "python"
-        return None
+        return self._build_config.python_interpreter
 
     @property
     def docker_image(self):
@@ -237,592 +210,109 @@ def settings(self):
         return settings.RTD_DOCKER_BUILD_SETTINGS
 
     def validate(self):
-        """Validates and process ``raw_config``."""
-        self._config["formats"] = self.validate_formats()
-
-        # This should be called before ``validate_python`` and ``validate_conda``
-        self._config["build"] = self.validate_build()
-
-        self._config["conda"] = self.validate_conda()
-        self._config["python"] = self.validate_python()
-        # Call this before validate sphinx and mkdocs
-        self.validate_doc_types()
-        self._config["mkdocs"] = self.validate_mkdocs()
-        self._config["sphinx"] = self.validate_sphinx()
-        self._config["submodules"] = self.validate_submodules()
-        self._config["search"] = self.validate_search()
-        self.validate_keys()
-
-    def validate_formats(self):
-        """
-        Validates that formats contains only valid formats.
-
-        The ``ALL`` keyword can be used to indicate that all formats are used.
-        We ignore the default values here.
-        """
-        formats = self.pop_config("formats", [])
-        if formats == ALL:
-            return self.valid_formats
-        with self.catch_validation_error("formats"):
-            validate_list(formats)
-            for format_ in formats:
-                validate_choice(format_, self.valid_formats)
-        return formats
-
-    def validate_conda(self):
-        """Validates the conda key."""
-        raw_conda = self._raw_config.get("conda")
-        if raw_conda is None:
-            if self.is_using_conda and not self.is_using_build_commands:
-                raise ConfigError(
-                    message_id=ConfigError.CONDA_KEY_REQUIRED,
-                    format_values={"key": "conda"},
-                )
-            return None
-
-        with self.catch_validation_error("conda"):
-            validate_dict(raw_conda)
-
-        conda = {}
-        with self.catch_validation_error("conda.environment"):
-            environment = self.pop_config("conda.environment", raise_ex=True)
-            conda["environment"] = validate_path(environment, self.base_path)
-        return conda
-
-    # TODO: rename these methods to call them just `validate_build_config`
-    def validate_build_config_with_os(self):
+        with self.catch_validation_error():
+            try:
+                self._build_config = BuildConfigModel(**self._raw_config)
+            except ValidationError as exc:
+                raise self._cast_pydantic_error(exc.errors()[0])
+
+        # Normalize paths.
+        if self._build_config.conda:
+            self._build_config.conda.environment = validate_path(self._build_config.conda.environment, self.base_path)
+        if self._build_config.python:
+            for install in self._build_config.python.install:
+                if isinstance(install, PythonInstall):
+                    install.requirements = validate_path(install.requirements, self.base_path)
+                else:
+                    install.path = validate_path(install.path, self.base_path)
+
+        if self._build_config.sphinx and self._build_config.sphinx.configuration:
+            self._build_config.sphinx.configuration = validate_path(self._build_config.sphinx.configuration, self.base_path)
+
+        if self._build_config.mkdocs and self._build_config.mkdocs.configuration:
+            self._build_config.mkdocs.configuration = validate_path(self._build_config.mkdocs.configuration, self.base_path)
+
+    def _cast_pydantic_error(self, error):
         """
-        Validates the build object (new format).
-
-        At least one element must be provided in ``build.tools``.
-        """
-        build = {}
-        with self.catch_validation_error("build.os"):
-            build_os = self.pop_config("build.os", raise_ex=True)
-            build["os"] = validate_choice(build_os, self.settings["os"].keys())
-
-        tools = {}
-        with self.catch_validation_error("build.tools"):
-            tools = self.pop_config("build.tools")
-            if tools:
-                validate_dict(tools)
-                for tool in tools.keys():
-                    validate_choice(tool, self.settings["tools"].keys())
-
-        jobs = {}
-        with self.catch_validation_error("build.jobs"):
-            # FIXME: should we use `default={}` or kept the `None` here and
-            # shortcircuit the rest of the logic?
-            jobs = self.pop_config("build.jobs", default={})
-            validate_dict(jobs)
-            # NOTE: besides validating that each key is one of the expected
-            # ones, we could validate the value of each of them is a list of
-            # commands. However, I don't think we should validate the "command"
-            # looks like a real command.
-            valid_jobs = list(BuildJobs.model_fields.keys())
-            for job in jobs.keys():
-                validate_choice(job, valid_jobs)
-
-        commands = []
-        with self.catch_validation_error("build.commands"):
-            commands = self.pop_config("build.commands", default=[])
-            validate_list(commands)
-
-        if not (tools or commands):
-            raise ConfigError(
-                message_id=ConfigError.NOT_BUILD_TOOLS_OR_COMMANDS,
-                format_values={
-                    "key": "build",
-                },
-            )
-
-        if commands and jobs:
-            raise ConfigError(
-                message_id=ConfigError.BUILD_JOBS_AND_COMMANDS,
-                format_values={
-                    "key": "build",
-                },
-            )
-
-        build["jobs"] = {}
-
-        with self.catch_validation_error("build.jobs.build"):
-            build["jobs"]["build"] = self.validate_build_jobs_build(jobs)
-        # Remove the build.jobs.build key from the build.jobs dict,
-        # since it's the only key that should be a dictionary,
-        # it was already validated above.
-        jobs.pop("build", None)
-
-        for job, job_commands in jobs.items():
-            with self.catch_validation_error(f"build.jobs.{job}"):
-                build["jobs"][job] = [
-                    validate_string(job_command)
-                    for job_command in validate_list(job_commands)
-                ]
-
-        build["commands"] = []
-        for command in commands:
-            with self.catch_validation_error("build.commands"):
-                build["commands"].append(validate_string(command))
-
-        build["tools"] = {}
-        if tools:
-            for tool, version in tools.items():
-                with self.catch_validation_error(f"build.tools.{tool}"):
-                    build["tools"][tool] = validate_choice(
-                        version,
-                        self.settings["tools"][tool].keys(),
-                    )
-
-        build["apt_packages"] = self.validate_apt_packages()
-        return build
-
-    def validate_build_jobs_build(self, build_jobs):
-        result = {}
-        build_jobs_build = build_jobs.get("build", {})
-        validate_dict(build_jobs_build)
-
-        allowed_build_types = list(BuildJobsBuildTypes.model_fields.keys())
-        for build_type, build_commands in build_jobs_build.items():
-            validate_choice(build_type, allowed_build_types)
-            if build_type != "html" and build_type not in self.formats:
-                raise ConfigError(
-                    message_id=ConfigError.BUILD_JOBS_BUILD_TYPE_MISSING_IN_FORMATS,
-                    format_values={
-                        "build_type": build_type,
-                    },
-                )
-            with self.catch_validation_error(f"build.jobs.build.{build_type}"):
-                result[build_type] = [
-                    validate_string(build_command)
-                    for build_command in validate_list(build_commands)
-                ]
-
-        return result
-
-    def validate_apt_packages(self):
-        apt_packages = []
-        with self.catch_validation_error("build.apt_packages"):
-            raw_packages = self._raw_config.get("build", {}).get("apt_packages", [])
-            validate_list(raw_packages)
-            # Transform to a dict, so is easy to validate individual entries.
-            self._raw_config.setdefault("build", {})["apt_packages"] = list_to_dict(
-                raw_packages
-            )
-
-            apt_packages = [
-                self.validate_apt_package(index) for index in range(len(raw_packages))
-            ]
-            if not raw_packages:
-                self.pop_config("build.apt_packages")
-
-        return apt_packages
-
-    def validate_build(self):
-        raw_build = self._raw_config.get("build", {})
-        with self.catch_validation_error("build"):
-            validate_dict(raw_build)
-        return self.validate_build_config_with_os()
-
-    def validate_apt_package(self, index):
+        All possible types of errors that can be found at https://docs.pydantic.dev/latest/errors/validation_errors/.
         """
-        Validate the package name to avoid injections of extra options.
-
-        We validate that they aren't interpreted as an option or file.
-        See https://manpages.ubuntu.com/manpages/xenial/man8/apt-get.8.html
-        and https://www.debian.org/doc/manuals/debian-reference/ch02.en.html#_debian_package_file_names  # noqa
-        for allowed chars in packages names.
-        """
-        key = f"build.apt_packages.{index}"
-        package = self.pop_config(key)
-        with self.catch_validation_error(key):
-            validate_string(package)
-            package = package.strip()
-            invalid_starts = [
-                # Don't allow extra options.
-                "-",
-                # Don't allow to install from a path.
-                "/",
-                ".",
-            ]
-            for start in invalid_starts:
-                if package.startswith(start):
-                    raise ConfigError(
-                        message_id=ConfigError.APT_INVALID_PACKAGE_NAME_PREFIX,
-                        format_values={
-                            "prefix": start,
-                            "package": package,
-                            "key": key,
-                        },
-                    )
-
-            # List of valid chars in packages names.
-            pattern = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9.+-]*$")
-            if not pattern.match(package):
-                raise ConfigError(
-                    message_id=ConfigError.APT_INVALID_PACKAGE_NAME,
-                    format_values={
-                        "package": package,
-                        "key": key,
-                    },
-                )
-        return package
-
-    def validate_python(self):
-        """
-        Validates the python key.
-
-        validate_build should be called before this, since it initialize the
-        build.image attribute.
-
-        .. note::
-           - ``version`` can be a string or number type.
-           - ``extra_requirements`` needs to be used with ``install: 'pip'``.
-        """
-        raw_python = self._raw_config.get("python", {})
-        with self.catch_validation_error("python"):
-            validate_dict(raw_python)
-
-        python = {}
-        with self.catch_validation_error("python.install"):
-            raw_install = self._raw_config.get("python", {}).get("install", [])
-            validate_list(raw_install)
-            if raw_install:
-                # Transform to a dict, so it's easy to validate extra keys.
-                self._raw_config.setdefault("python", {})["install"] = list_to_dict(
-                    raw_install
-                )
-            else:
-                self.pop_config("python.install")
-
-        raw_install = self._raw_config.get("python", {}).get("install", [])
-        python["install"] = [
-            self.validate_python_install(index) for index in range(len(raw_install))
-        ]
-
-        return python
-
-    def validate_python_install(self, index):
-        """Validates the python.install.{index} key."""
-        python_install = {}
-        key = "python.install.{}".format(index)
-        raw_install = self._raw_config["python"]["install"][str(index)]
-        with self.catch_validation_error(key):
-            validate_dict(raw_install)
-
-        if "requirements" in raw_install:
-            requirements_key = key + ".requirements"
-            with self.catch_validation_error(requirements_key):
-                requirements = validate_path(
-                    self.pop_config(requirements_key),
-                    self.base_path,
-                )
-                python_install["requirements"] = requirements
-        elif "path" in raw_install:
-            path_key = key + ".path"
-            with self.catch_validation_error(path_key):
-                path = validate_path(
-                    self.pop_config(path_key),
-                    self.base_path,
-                )
-                python_install["path"] = path
-
-            method_key = key + ".method"
-            with self.catch_validation_error(method_key):
-                method = validate_choice(
-                    self.pop_config(method_key, PIP),
-                    self.valid_install_method,
-                )
-                python_install["method"] = method
-
-            extra_req_key = key + ".extra_requirements"
-            with self.catch_validation_error(extra_req_key):
-                extra_requirements = validate_list(
-                    self.pop_config(extra_req_key, []),
-                )
-                if extra_requirements and python_install["method"] != PIP:
-                    raise ConfigError(
-                        message_id=ConfigError.USE_PIP_FOR_EXTRA_REQUIREMENTS,
-                    )
-                python_install["extra_requirements"] = extra_requirements
+        key = ".".join(str(part) for part in error['loc'])
+        message_id = ConfigError.GENERIC
+        context = {}
+        if error['type'] == 'missing':
+            message_id = ConfigValidationError.VALUE_NOT_FOUND
+        if error['type'] in ('model_type', 'dict_type'):
+            message_id = ConfigValidationError.INVALID_DICT
+        if error['type'] == 'extra_forbidden':
+            message_id = ConfigError.INVALID_KEY_NAME
+        if error['type'] == 'list_type':
+            message_id = ConfigValidationError.INVALID_LIST
+        if error['type'] == 'enum':
+            message_id = ConfigValidationError.INVALID_CHOICE
+            context["expected"] = error['ctx']['expected']
         else:
-            raise ConfigError(
-                message_id=ConfigError.PIP_PATH_OR_REQUIREMENT_REQUIRED,
-                format_values={
-                    "key": key,
-                },
-            )
-
-        return python_install
-
-    def validate_doc_types(self):
-        """
-        Validates that the user only have one type of documentation.
-
-        This should be called before validating ``sphinx`` or ``mkdocs`` to
-        avoid innecessary validations.
-        """
-        with self.catch_validation_error("."):
-            if "sphinx" in self._raw_config and "mkdocs" in self._raw_config:
-                raise ConfigError(
-                    message_id=ConfigError.SPHINX_MKDOCS_CONFIG_TOGETHER,
-                )
-
-    def validate_mkdocs(self):
-        """
-        Validates the mkdocs key.
-
-        It makes sure we are using an existing configuration file.
-        """
-        raw_mkdocs = self._raw_config.get("mkdocs")
-        if raw_mkdocs is None:
-            return None
-
-        with self.catch_validation_error("mkdocs"):
-            validate_dict(raw_mkdocs)
-
-        mkdocs = {}
-        with self.catch_validation_error("mkdocs.configuration"):
-            configuration = self.pop_config("mkdocs.configuration", None)
-            if configuration is not None:
-                configuration = validate_path(configuration, self.base_path)
-            mkdocs["configuration"] = configuration
-
-        with self.catch_validation_error("mkdocs.fail_on_warning"):
-            fail_on_warning = self.pop_config("mkdocs.fail_on_warning", False)
-            mkdocs["fail_on_warning"] = validate_bool(fail_on_warning)
-
-        return mkdocs
-
-    def validate_sphinx(self):
-        """
-        Validates the sphinx key.
-
-        It makes sure we are using an existing configuration file.
-
-        .. note::
-           It should be called after ``validate_mkdocs``. That way
-           we can default to sphinx if ``mkdocs`` is not given.
-        """
-        raw_sphinx = self._raw_config.get("sphinx")
-        if raw_sphinx is None:
-            if self.mkdocs is None:
-                raw_sphinx = {}
-            else:
-                return None
-
-        with self.catch_validation_error("sphinx"):
-            validate_dict(raw_sphinx)
-
-        sphinx = {}
-        with self.catch_validation_error("sphinx.builder"):
-            builder = validate_choice(
-                self.pop_config("sphinx.builder", "html"),
-                self.valid_sphinx_builders.keys(),
-            )
-            sphinx["builder"] = self.valid_sphinx_builders[builder]
-
-        with self.catch_validation_error("sphinx.configuration"):
-            configuration = self.pop_config(
-                "sphinx.configuration",
-            )
-            if configuration is not None:
-                configuration = validate_path(configuration, self.base_path)
-            sphinx["configuration"] = configuration
-
-        with self.catch_validation_error("sphinx.fail_on_warning"):
-            fail_on_warning = self.pop_config("sphinx.fail_on_warning", False)
-            sphinx["fail_on_warning"] = validate_bool(fail_on_warning)
-
-        return sphinx
-
-    def validate_submodules(self):
-        """
-        Validates the submodules key.
-
-        - We can use the ``ALL`` keyword in include or exclude.
-        - We can't exclude and include submodules at the same time.
-        """
-        raw_submodules = self._raw_config.get("submodules", {})
-        with self.catch_validation_error("submodules"):
-            validate_dict(raw_submodules)
-
-        submodules = {}
-        with self.catch_validation_error("submodules.include"):
-            include = self.pop_config("submodules.include", [])
-            if include != ALL:
-                include = [
-                    validate_string(submodule) for submodule in validate_list(include)
-                ]
-            submodules["include"] = include
-
-        with self.catch_validation_error("submodules.exclude"):
-            default = [] if submodules["include"] else ALL
-            exclude = self.pop_config("submodules.exclude", default)
-            if exclude != ALL:
-                exclude = [
-                    validate_string(submodule) for submodule in validate_list(exclude)
-                ]
-            submodules["exclude"] = exclude
-
-        with self.catch_validation_error("submodules"):
-            is_including = bool(submodules["include"])
-            is_excluding = submodules["exclude"] == ALL or bool(submodules["exclude"])
-            if is_including and is_excluding:
-                raise ConfigError(
-                    message_id=ConfigError.SUBMODULES_INCLUDE_EXCLUDE_TOGETHER,
-                )
-
-        with self.catch_validation_error("submodules.recursive"):
-            recursive = self.pop_config("submodules.recursive", False)
-            submodules["recursive"] = validate_bool(recursive)
-
-        return submodules
-
-    def validate_search(self):
-        """
-        Validates the search key.
-
-        - ``ranking`` is a map of path patterns to a rank.
-        - ``ignore`` is a list of patterns.
-        - The path pattern supports basic globs (*, ?, [seq]).
-        - The rank can be a integer number between -10 and 10.
-        """
-        raw_search = self._raw_config.get("search", {})
-        with self.catch_validation_error("search"):
-            validate_dict(raw_search)
-
-        search = {}
-        with self.catch_validation_error("search.ranking"):
-            ranking = self.pop_config("search.ranking", {})
-            validate_dict(ranking)
-
-            valid_rank_range = list(range(-10, 10 + 1))
-
-            final_ranking = {}
-            for pattern, rank in ranking.items():
-                pattern = validate_path_pattern(pattern)
-                validate_choice(rank, valid_rank_range)
-                final_ranking[pattern] = rank
-
-            search["ranking"] = final_ranking
-
-        with self.catch_validation_error("search.ignore"):
-            ignore_default = [
-                "search.html",
-                "search/index.html",
-                "404.html",
-                "404/index.html",
-            ]
-            search_ignore = self.pop_config("search.ignore", ignore_default)
-            validate_list(search_ignore)
-
-            final_ignore = [validate_path_pattern(pattern) for pattern in search_ignore]
-            search["ignore"] = final_ignore
-
-        return search
-
-    def validate_keys(self):
-        """
-        Checks that we don't have extra keys (invalid ones).
-
-        This should be called after all the validations are done and all keys
-        are popped from `self._raw_config`.
-        """
-        # The version key isn't popped, but it's
-        # validated in `load`.
-        self.pop_config("version", None)
-        wrong_key = ".".join(self._get_extra_key(self._raw_config))
-        if wrong_key:
-            raise ConfigError(
-                message_id=ConfigError.INVALID_KEY_NAME,
-                format_values={
-                    "key": wrong_key,
-                },
-            )
-
-    def _get_extra_key(self, value):
-        """
-        Get the extra keyname (list form) of a dict object.
-
-        If there is more than one extra key, the first one is returned.
-
-        Example::
-
-        {
-            'key': {
-                'name':  'inner',
-            }
-        }
+            message_id = ConfigError.GENERIC
+            context["message"] = error['msg']
+            # TODO: log to sentry as error, so we can implement all possible errors.
+
+        # If there is an error inside formats, the whole key would be something like
+        # formats.list[str-enum[Formats]].0, we don't want to show that.
+        if key.startswith('formats.'):
+            key = 'formats'
+
+        return ConfigValidationError(
+            message_id=message_id,
+            format_values={
+                "key": key,
+                **context,
+            },
+        )
 
-        Will return `['key', 'name']`.
-        """
-        if isinstance(value, dict) and value:
-            key_name = next(iter(value))
-            return [key_name] + self._get_extra_key(value[key_name])
-        return []
+    def validate(self):
+        """Validates and process ``raw_config``."""
+        return self.validate_with_pydantic()
 
     @property
     def formats(self):
-        return self._config["formats"]
+        return self._build_config.formats
 
     @property
     def conda(self):
-        if self._config["conda"]:
-            return Conda(**self._config["conda"])
-        return None
+        return self._build_config.conda
 
     @property
     @lru_cache(maxsize=1)
     def build(self):
-        build = self._config["build"]
-        tools = {
-            tool: BuildTool(
-                version=version,
-                full_version=self.settings["tools"][tool][version],
-            )
-            for tool, version in build["tools"].items()
-        }
-        return BuildWithOs(
-            os=build["os"],
-            tools=tools,
-            jobs=BuildJobs(**build["jobs"]),
-            commands=build["commands"],
-            apt_packages=build["apt_packages"],
-        )
+        return self._build_config.build
 
     @property
     def python(self):
-        return Python(**self._config["python"])
+        return self._build_config.python
 
     @property
     def sphinx(self):
-        if self._config["sphinx"]:
-            return Sphinx(**self._config["sphinx"])
-        return None
+        self._build_config.sphinx
 
     @property
     def mkdocs(self):
-        if self._config["mkdocs"]:
-            return Mkdocs(**self._config["mkdocs"])
-        return None
+        return self._build_config.mkdocs
 
     @property
     def doctype(self):
-        if "commands" in self._config["build"] and self._config["build"]["commands"]:
+        if self._build_config.build.commands is not None:
             return GENERIC
 
         if self.mkdocs:
             return "mkdocs"
-        return self.sphinx.builder
+        return self.valid_sphinx_builder[self.sphinx.builder]
 
     @property
     def submodules(self):
-        return Submodules(**self._config["submodules"])
+        return self._build_config.submodules
 
     @property
     def search(self):
-        return Search(**self._config["search"])
+        return self._build_config.search
 
 
 def load(path, readthedocs_yaml_path=None):
diff --git a/readthedocs/config/models.py b/readthedocs/config/models.py
index 5d63e11ad90..195c82a1f4a 100644
--- a/readthedocs/config/models.py
+++ b/readthedocs/config/models.py
@@ -8,17 +8,29 @@
 but we aren't using it yet, and instead we are doing the validation
 in a separate step.
 """
+
+import re
+from enum import Enum
 from typing import Literal
 
-from pydantic import BaseModel
+from django.conf import settings
+from pydantic import BaseModel, ConfigDict, field_validator, model_validator
+
+from readthedocs.config.exceptions import ConfigError
+from readthedocs.config.validation import validate_choice, validate_path_pattern
+
+
+class Parent(BaseModel):
+    model_config = ConfigDict(extra="forbid")
 
 
-class BuildTool(BaseModel):
+# TODO: remove this
+class BuildTool(Parent):
     version: str
     full_version: str
 
 
-class BuildJobsBuildTypes(BaseModel):
+class BuildJobsBuildTypes(Parent):
     """Object used for `build.jobs.build` key."""
 
     html: list[str] | None = None
@@ -27,7 +39,7 @@ class BuildJobsBuildTypes(BaseModel):
     htmlzip: list[str] | None = None
 
 
-class BuildJobs(BaseModel):
+class BuildJobs(Parent):
     """Object used for `build.jobs` key."""
 
     pre_checkout: list[str] = []
@@ -46,52 +58,152 @@ class BuildJobs(BaseModel):
 
 
 # TODO: rename this class to `Build`
-class BuildWithOs(BaseModel):
+class BuildWithOs(Parent):
     os: str
-    tools: dict[str, BuildTool]
-    jobs: BuildJobs = BuildJobs()
+    tools: dict[str, str]
+    jobs: BuildJobs | None = None
     apt_packages: list[str] = []
-    commands: list[str] = []
+    commands: list[str] | None = None
+
+    @field_validator("os")
+    @classmethod
+    def validate_os(cls, value):
+        validate_choice(value, settings.RTD_DOCKER_BUILD_SETTINGS["os"].keys())
+        return value
+
+    @field_validator("tools")
+    @classmethod
+    def validate_tools(cls, value):
+        tools = {}
+        docker_settings = settings.RTD_DOCKER_BUILD_SETTINGS
+        for tool, version in value.items():
+            validate_choice(tool, docker_settings["tools"].keys())
+            validate_choice(
+                version, docker_settings["tools"][tool].keys()
+            )
+            tools[tool] = BuildTool(version=version, full_version=docker_settings["tools"][tool][version])
+        return tools
+
+    @field_validator("apt_packages")
+    @classmethod
+    def validate_apt_packages(cls, value):
+        return [cls.validate_apt_package(package) for package in value]
+
+    @classmethod
+    def validate_apt_package(cls, package):
+        """
+        Validate the package name to avoid injections of extra options.
 
+        We validate that they aren't interpreted as an option or file.
+        See https://manpages.ubuntu.com/manpages/xenial/man8/apt-get.8.html
+        and https://www.debian.org/doc/manuals/debian-reference/ch02.en.html#_debian_package_file_names  # noqa
+        for allowed chars in packages names.
+        """
+        package = package.strip()
+        invalid_starts = [
+            # Don't allow extra options.
+            "-",
+            # Don't allow to install from a path.
+            "/",
+            ".",
+        ]
+        for start in invalid_starts:
+            if package.startswith(start):
+                raise ConfigError(
+                    message_id=ConfigError.APT_INVALID_PACKAGE_NAME_PREFIX,
+                    format_values={
+                        "prefix": start,
+                        "package": package,
+                    },
+                )
 
-class PythonInstallRequirements(BaseModel):
+            # List of valid chars in packages names.
+            pattern = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9.+-]*$")
+            if not pattern.match(package):
+                raise ConfigError(
+                    message_id=ConfigError.APT_INVALID_PACKAGE_NAME,
+                    format_values={
+                        "package": package,
+                    },
+                )
+        return package
+
+    @model_validator(mode="after")
+    def validate_jobs_and_commands_cant_be_used_together(self):
+        if self.jobs and self.commands:
+            raise ConfigError(
+                message_id=ConfigError.BUILD_JOBS_AND_COMMANDS,
+                format_values={
+                    "key": "build",
+                },
+            )
+        return self
+
+    @model_validator(mode="after")
+    def validate_tools_or_commands_are_used(self):
+        if not self.tools and not self.commands:
+            raise ConfigError(
+                message_id=ConfigError.NOT_BUILD_TOOLS_OR_COMMANDS,
+                format_values={
+                    "key": "build",
+                },
+            )
+        return self
+
+
+class PythonInstallRequirements(Parent):
     requirements: str
 
 
-class PythonInstall(BaseModel):
+class PythonInstall(Parent):
     path: str
     method: Literal["pip", "setuptools"] = "pip"
     extra_requirements: list[str] = []
 
 
-class Python(BaseModel):
+class Python(Parent):
     install: list[PythonInstall | PythonInstallRequirements] = []
 
 
-class Conda(BaseModel):
+class Conda(Parent):
     environment: str
 
 
-class Sphinx(BaseModel):
+
+class Sphinx(Parent):
     configuration: str | None
-    # NOTE: This is how we save the object in the DB,
-    # the actual options for users are "html", "htmldir", "singlehtml".
-    builder: Literal["sphinx", "sphinx_htmldir", "sphinx_singlehtml"] = "sphinx"
+    builder: Literal["html", "dirhtml", "singlehtml"] = "html"
     fail_on_warning: bool = False
 
+    @field_validator("builder", mode="before")
+    @classmethod
+    def validate_builder(cls, value):
+        # This is to keep compatibility with the old configuration.
+        if value == "htmldir":
+            return "dirhtml"
+        return value
+
 
-class Mkdocs(BaseModel):
+class Mkdocs(Parent):
     configuration: str | None
     fail_on_warning: bool = False
 
 
-class Submodules(BaseModel):
+class Submodules(Parent):
     include: list[str] | Literal["all"] = []
     exclude: list[str] | Literal["all"] = []
     recursive: bool = False
 
+    @model_validator(mode="after")
+    def validate_include_exclude_together(self):
+        if self.include and self.exclude:
+            raise ConfigError(
+                message_id=ConfigError.SUBMODULES_INCLUDE_EXCLUDE_TOGETHER,
+            )
+        return self
 
-class Search(BaseModel):
+
+class Search(Parent):
     ranking: dict[str, int] = {}
     ignore: list[str] = [
         "search.html",
@@ -99,3 +211,117 @@ class Search(BaseModel):
         "404.html",
         "404/index.html",
     ]
+
+    @field_validator("ranking")
+    @classmethod
+    def validate_ranking(cls, value):
+        valid_rank_range = list(range(-10, 10 + 1))
+        final_ranking = {}
+        for pattern, rank in value.items():
+            pattern = validate_path_pattern(pattern)
+            validate_choice(rank, valid_rank_range)
+            final_ranking[pattern] = rank
+        return final_ranking
+
+    @field_validator("ignore")
+    @classmethod
+    def validate_ignore(cls, value):
+        return [validate_path_pattern(pattern) for pattern in value]
+
+
+# TODO: replace with StrEnum when we upgrade to Python 3.11.
+class Formats(str, Enum):
+    pdf = "pdf"
+    epub = "epub"
+    htmlzip = "htmlzip"
+
+
+class BuildConfig(Parent):
+
+    version: Literal[2, "2"]
+    formats: list[Formats] | Literal["all"] = []
+    build: BuildWithOs
+
+    conda: Conda | None = None
+    python: Python | None = None
+
+    sphinx: Sphinx | None = None
+    mkdocs: Mkdocs | None = None
+    submodules: Submodules = Submodules()
+    search: Search = Search()
+
+    @field_validator("formats", mode="before")
+    @classmethod
+    def validate_formats(cls, value):
+        if value == "all":
+            return [Formats.pdf, Formats.epub, Formats.htmlzip]
+        return value
+
+    @model_validator(mode="after")
+    def validate_formats_matches_build_overrides(self):
+        if not self.build.jobs:
+            return self
+        if self.build.jobs.build.pdf is not None and Formats.pdf not in self.formats:
+            raise ConfigError(
+                message_id=ConfigError.BUILD_JOBS_BUILD_TYPE_MISSING_IN_FORMATS,
+                format_values={
+                    "build_type": "pdf",
+                },
+            )
+        if self.build.jobs.build.epub is not None and Formats.epub not in self.formats:
+            raise ConfigError(
+                message_id=ConfigError.BUILD_JOBS_BUILD_TYPE_MISSING_IN_FORMATS,
+                format_values={
+                    "build_type": "epub",
+                },
+            )
+        if (
+            self.build.jobs.build.htmlzip is not None
+            and Formats.htmlzip not in self.formats
+        ):
+            raise ConfigError(
+                message_id=ConfigError.BUILD_JOBS_BUILD_TYPE_MISSING_IN_FORMATS,
+                format_values={
+                    "build_type": "epub",
+                },
+            )
+        return self
+
+    @model_validator(mode="after")
+    def validate_sphinx_and_mkdocs_cant_be_used_together(self):
+        if self.sphinx and self.mkdocs:
+            raise ConfigError(
+                message_id=ConfigError.SPHINX_MKDOCS_CONFIG_TOGETHER,
+            )
+        return self
+
+    @model_validator(mode="after")
+    def validate_conda_if_using_conda_in_build_tools(self):
+        if self.build.commands is not None and self.is_using_conda:
+            raise ConfigError(
+                message_id=ConfigError.CONDA_KEY_REQUIRED,
+                format_values={"key": "conda"},
+            )
+        return self
+
+    @property
+    def is_using_conda(self):
+        return self.python_interpreter in ["mamba", "conda"]
+
+    @property
+    def python_interpreter(self):
+        tool = self.build.tools.get("python")
+        if not tool:
+            return None
+        if tool.startswith("mamba"):
+            return "mamba"
+        if tool.startswith("miniconda"):
+            return "conda"
+        return "python"
+
+
+def load(d):
+    try:
+        return BuildConfig(**d)
+    except Exception as e:
+        return e