From dddf5b48a10799d98a396b3f56403b408c257f17 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Fri, 22 Nov 2024 16:00:52 -0800 Subject: [PATCH 1/2] Add test filesection indexer This is a test implementation of storing the section data in a similar way to filetreediff. The goal would be to build a command pallette that initially lets users do some basic filtering of just the filenames. --- readthedocs/filesections/__init__.py | 50 +++++++++++++++++++++++++ readthedocs/filesections/dataclasses.py | 38 +++++++++++++++++++ readthedocs/projects/constants.py | 2 + readthedocs/projects/tasks/search.py | 39 +++++++++++++++++++ readthedocs/proxito/views/hosting.py | 36 ++++++++++++++++++ 5 files changed, 165 insertions(+) create mode 100644 readthedocs/filesections/__init__.py create mode 100644 readthedocs/filesections/dataclasses.py diff --git a/readthedocs/filesections/__init__.py b/readthedocs/filesections/__init__.py new file mode 100644 index 00000000000..0c7cb060c8a --- /dev/null +++ b/readthedocs/filesections/__init__.py @@ -0,0 +1,50 @@ +""" +Module for the file sections feature. + +This feature is used to store the title and path name of each page in the index. +""" + +import json +import logging + +from readthedocs.builds.models import Version +from readthedocs.filesections.dataclasses import FileSectionManifest +from readthedocs.projects.constants import MEDIA_TYPE_SECTIONS +from readthedocs.storage import build_media_storage + +SECTION_MANIFEST_FILE_NAME = "sections_manifest.json" + +log = logging.getLogger(__name__) + + +def get_section_manifest(version: Version) -> FileSectionManifest | None: + storage_path = version.project.get_storage_path( + type_=MEDIA_TYPE_SECTIONS, + version_slug=version.slug, + include_file=False, + version_type=version.type, + ) + manifest_path = build_media_storage.join(storage_path, SECTION_MANIFEST_FILE_NAME) + try: + with build_media_storage.open(manifest_path) as manifest_file: + manifest = json.load(manifest_file) + log.info(f"Loaded section manifest from {manifest_path}") + except FileNotFoundError: + log.warning(f"Section manifest not found at {manifest_path}") + return None + + return FileSectionManifest.from_dict(manifest) + + +def write_section_manifest(version: Version, manifest: FileSectionManifest): + storage_path = version.project.get_storage_path( + type_=MEDIA_TYPE_SECTIONS, + version_slug=version.slug, + include_file=False, + version_type=version.type, + ) + manifest_path = build_media_storage.join(storage_path, SECTION_MANIFEST_FILE_NAME) + with build_media_storage.open(manifest_path, "w") as f: + manifest_dict = manifest.as_dict() + log.info(f"Writing section manifest: {manifest_dict}") + json.dump(manifest_dict, f) diff --git a/readthedocs/filesections/dataclasses.py b/readthedocs/filesections/dataclasses.py new file mode 100644 index 00000000000..649602f72de --- /dev/null +++ b/readthedocs/filesections/dataclasses.py @@ -0,0 +1,38 @@ +from dataclasses import asdict, dataclass + + +@dataclass(slots=True) +class FileSection: + id: str + title: str + + +@dataclass(slots=True) +class Page: + path: str + sections: list[FileSection] + + +@dataclass(slots=True) +class FileSectionManifest: + build: int + pages: list[Page] + + def __init__(self, build_id: int, pages: list[Page]): + self.build = build_id + self.pages = pages + + @classmethod + def from_dict(cls, data: dict) -> "FileSectionManifest": + build_id = data["build"] + pages = [ + Page( + path=page["path"], + sections=[FileSection(**section) for section in page["sections"]], + ) + for page in data["pages"] + ] + return cls(build_id, pages) + + def as_dict(self) -> dict: + return asdict(self) diff --git a/readthedocs/projects/constants.py b/readthedocs/projects/constants.py index eae06bd9e8f..f2b48fc982a 100644 --- a/readthedocs/projects/constants.py +++ b/readthedocs/projects/constants.py @@ -35,6 +35,7 @@ MEDIA_TYPE_HTMLZIP = "htmlzip" MEDIA_TYPE_JSON = "json" MEDIA_TYPE_DIFF = "diff" +MEDIA_TYPE_SECTIONS = "sections" DOWNLOADABLE_MEDIA_TYPES = ( MEDIA_TYPE_PDF, MEDIA_TYPE_EPUB, @@ -47,6 +48,7 @@ MEDIA_TYPE_HTMLZIP, MEDIA_TYPE_JSON, MEDIA_TYPE_DIFF, + MEDIA_TYPE_SECTIONS, ) BUILD_COMMANDS_OUTPUT_PATH = "_readthedocs/" diff --git a/readthedocs/projects/tasks/search.py b/readthedocs/projects/tasks/search.py index 74cbb3adfa8..9d450587fd1 100644 --- a/readthedocs/projects/tasks/search.py +++ b/readthedocs/projects/tasks/search.py @@ -4,6 +4,8 @@ from readthedocs.builds.constants import BUILD_STATE_FINISHED, INTERNAL, LATEST from readthedocs.builds.models import Build, Version +from readthedocs.filesections import write_section_manifest +from readthedocs.filesections.dataclasses import FileSection, FileSectionManifest, Page from readthedocs.filetreediff import write_manifest from readthedocs.filetreediff.dataclasses import FileTreeDiffFile, FileTreeDiffManifest from readthedocs.projects.models import Feature, HTMLFile, Project @@ -142,6 +144,36 @@ def collect(self, sync_id: int): write_manifest(self.version, manifest) +class FileSectionIndexer(Indexer): + def __init__(self, version: Version, build: Build): + self.version = version + self.build = build + self.pages = [] + + def process(self, html_file: HTMLFile, sync_id: int): + log.debug("Processing file for sections", path=html_file.path) + processed_json = html_file.processed_json + if processed_json: + sections = [ + FileSection( + id=section["id"], + title=section["title"], + ) + for section in processed_json.get("sections", []) + ] + self.pages.append(Page(path=html_file.path, sections=sections)) + log.debug("Finished processing file for sections", path=html_file.path) + + def collect(self, sync_id: int): + log.debug("Collecting sections for manifest", build_id=self.build.id) + manifest = FileSectionManifest( + build_id=self.build.id, + pages=self.pages, + ) + write_section_manifest(self.version, manifest) + log.debug("Finished collecting sections for manifest", build_id=self.build.id) + + def _get_indexers(*, version: Version, build: Build, search_index_name=None): build_config = build.config or {} search_config = build_config.get("search", {}) @@ -182,6 +214,13 @@ def _get_indexers(*, version: Version, build: Build, search_index_name=None): version=version, ) indexers.append(index_file_indexer) + + file_section_indexer = FileSectionIndexer( + version=version, + build=build, + ) + indexers.append(file_section_indexer) + return indexers diff --git a/readthedocs/proxito/views/hosting.py b/readthedocs/proxito/views/hosting.py index 4315de518d4..eed5e17d69f 100644 --- a/readthedocs/proxito/views/hosting.py +++ b/readthedocs/proxito/views/hosting.py @@ -23,6 +23,7 @@ from readthedocs.core.resolver import Resolver from readthedocs.core.unresolver import UnresolverError, unresolver from readthedocs.core.utils.extend import SettingsOverrideObject +from readthedocs.filesections import get_section_manifest from readthedocs.filetreediff import get_diff from readthedocs.projects.constants import ( ADDONS_FLYOUT_SORTING_CALVER, @@ -535,6 +536,9 @@ def _v1(self, project, version, build, filename, url, request): "filetreediff": { "enabled": False, }, + "filesections": { + "enabled": False, + }, }, } @@ -548,6 +552,13 @@ def _v1(self, project, version, build, filename, url, request): if response: data["addons"]["filetreediff"].update(response) + sections_response = self._get_filesections_response( + project=project, + version=version, + ) + if sections_response: + data["addons"]["filesections"].update(sections_response) + # Show the subprojects filter on the parent project and subproject # TODO: Remove these queries and try to find a way to get this data # from the resolver, which has already done these queries. @@ -711,6 +722,31 @@ def _get_filetreediff_response(self, *, request, project, version, resolver): }, } + def _get_filesections_response(self, *, project, version): + """ + Get the file sections response for the given version. + """ + manifest = get_section_manifest(version) + if not manifest: + return None + + return { + "enabled": True, + "sections": [ + { + "path": page.path, + "sections": [ + { + "id": section.id, + "title": section.title, + } + for section in page.sections + ], + } + for page in manifest.pages + ], + } + def _v2(self, project, version, build, filename, url, user): return { "api_version": "2", From 60ae1de6bd5ab7d42abc8cb6d95171a5fe54abc6 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Sun, 24 Nov 2024 14:46:56 -0800 Subject: [PATCH 2/2] Change reponse name to pages --- readthedocs/proxito/views/hosting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/proxito/views/hosting.py b/readthedocs/proxito/views/hosting.py index eed5e17d69f..563a562c016 100644 --- a/readthedocs/proxito/views/hosting.py +++ b/readthedocs/proxito/views/hosting.py @@ -732,7 +732,7 @@ def _get_filesections_response(self, *, project, version): return { "enabled": True, - "sections": [ + "pages": [ { "path": page.path, "sections": [