Skip to content

Search: refactor serializer's context #9624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions readthedocs/search/api/v2/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ class PageSearchSerializer(serializers.Serializer):
"""
Page serializer.

If ``projects_data`` is passed into the context, the serializer
will try to use that to generate the link before querying the database.
It's a dictionary mapping the project slug to a ProjectData object.
If ``projects`` is passed in the constructor, the serializer
will pre-generate a cache with that information,
this is to avoid querying the database again for each result.

:param projects: A list of tuples of project and version.
"""

type = serializers.CharField(default="page", source=None, read_only=True)
Expand All @@ -76,6 +78,28 @@ class PageSearchSerializer(serializers.Serializer):
highlights = PageHighlightSerializer(source="meta.highlight", default=dict)
blocks = serializers.SerializerMethodField()

def __init__(self, *args, projects=None, **kwargs):
if projects:
context = kwargs.setdefault("context", {})
context["projects_data"] = {
project.slug: self._build_project_data(project, version.slug)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better if we pass the project and the version object, in particular if it's a private method and won't be used from outside. Then we can consume whatever is required from the inner method.

The API is easier to use and it's easier to think about while following the code, IMO.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, the similar version of this method (_get_project_data) was receiving the objects as I'm suggesting here :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, now I remember why this was changed, I was about to use it in this same class, to avoid duplicating the logic at

if project:
docs_url = project.get_docs_url(version_slug=obj.version)
project_alias = project.superprojects.values_list(
"alias", flat=True
).first()
projects_data = self.context.setdefault("projects_data", {})
version_data = VersionData(
slug=obj.version,
docs_url=docs_url,
)
projects_data[obj.project] = ProjectData(
alias=project_alias,
version=version_data,
)
return projects_data[obj.project]

for project, version in projects
}
super().__init__(*args, **kwargs)

def _build_project_data(self, project, version_slug):
"""Build a `ProjectData` object given a project and its version."""
url = project.get_docs_url(version_slug=version_slug)
project_alias = project.superprojects.values_list("alias", flat=True).first()
version_data = VersionData(
slug=version_slug,
docs_url=url,
)
return ProjectData(
alias=project_alias,
version=version_data,
)

def _get_project_data(self, obj):
"""
Get and cache the project data.
Expand All @@ -91,20 +115,8 @@ def _get_project_data(self, obj):

project = Project.objects.filter(slug=obj.project).first()
if project:
docs_url = project.get_docs_url(version_slug=obj.version)
project_alias = project.superprojects.values_list(
"alias", flat=True
).first()

projects_data = self.context.setdefault("projects_data", {})
version_data = VersionData(
slug=obj.version,
docs_url=docs_url,
)
projects_data[obj.project] = ProjectData(
alias=project_alias,
version=version_data,
)
projects_data[obj.project] = self._build_project_data(project, obj.version)
return projects_data[obj.project]
return None

Expand Down
76 changes: 12 additions & 64 deletions readthedocs/search/api/v2/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@
from readthedocs.projects.models import Feature, Project
from readthedocs.search import tasks
from readthedocs.search.api.pagination import SearchPagination
from readthedocs.search.api.v2.serializers import (
PageSearchSerializer,
ProjectData,
VersionData,
)
from readthedocs.search.api.v2.serializers import PageSearchSerializer
from readthedocs.search.faceted_search import PageSearch

log = structlog.get_logger(__name__)
Expand Down Expand Up @@ -80,45 +76,15 @@ def _validate_query_params(self):
raise ValidationError(errors)

@lru_cache(maxsize=1)
def _get_all_projects_data(self):
"""
Return a dictionary of the project itself and all its subprojects.

Example:

.. code::

{
"requests": ProjectData(
alias='alias',
version=VersionData(
"latest",
"https://requests.readthedocs.io/en/latest/",
),
),
"requests-oauth": ProjectData(
alias=None,
version=VersionData(
"latest",
"https://requests-oauth.readthedocs.io/en/latest/",
),
),
}

.. note:: The response is cached into the instance.

:rtype: A dictionary of project slugs mapped to a `VersionData` object.
"""
def _get_projects_to_search(self):
"""Get all projects to search."""
main_version = self._get_version()
main_project = self._get_project()

if not self._has_permission(self.request.user, main_version):
return {}

projects_data = {
main_project.slug: self._get_project_data(main_project, main_version),
}
return []

projects_to_search = [(main_project, main_version)]
subprojects = Project.objects.filter(superprojects__parent_id=main_project.id)
for subproject in subprojects:
version = self._get_project_version(
Expand All @@ -136,24 +102,9 @@ def _get_all_projects_data(self):
)

if version and self._has_permission(self.request.user, version):
projects_data[subproject.slug] = self._get_project_data(
subproject, version
)
projects_to_search.append((subproject, version))

return projects_data

def _get_project_data(self, project, version):
"""Build a `ProjectData` object given a project and its version."""
url = project.get_docs_url(version_slug=version.slug)
project_alias = project.superprojects.values_list("alias", flat=True).first()
version_data = VersionData(
slug=version.slug,
docs_url=url,
)
return ProjectData(
alias=project_alias,
version=version_data,
)
return projects_to_search

def _get_project_version(self, project, version_slug, include_hidden=True):
"""
Expand Down Expand Up @@ -219,8 +170,8 @@ def get_queryset(self):
is compatible with DRF's paginator.
"""
projects = {
project: project_data.version.slug
for project, project_data in self._get_all_projects_data().items()
project.slug: version.slug
for project, version in self._get_projects_to_search()
}
# Check to avoid searching all projects in case it's empty.
if not projects:
Expand All @@ -236,11 +187,6 @@ def get_queryset(self):
)
return queryset

def get_serializer_context(self):
context = super().get_serializer_context()
context["projects_data"] = self._get_all_projects_data()
return context

def get(self, request, *args, **kwargs):
self._validate_query_params()
result = self.list()
Expand All @@ -255,7 +201,9 @@ def list(self):
self.request,
view=self,
)
serializer = self.get_serializer(page, many=True)
serializer = self.get_serializer(
page, many=True, projects=self._get_projects_to_search()
)
return self.paginator.get_paginated_response(serializer.data)


Expand Down
4 changes: 2 additions & 2 deletions readthedocs/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@ def test_doc_search_unexisting_version(self, api_client, project):
resp = self.get_search(api_client, search_params)
assert resp.status_code == 404

@mock.patch.object(PageSearchAPIView, '_get_all_projects_data', dict)
@mock.patch.object(PageSearchAPIView, "_get_projects_to_search", list)
def test_get_all_projects_returns_empty_results(self, api_client, project):
"""If there is a case where `_get_all_projects` returns empty, we could be querying all projects."""
"""If there is a case where `_get_projects_to_search` returns empty, we could be querying all projects."""

# `documentation` word is present both in `kuma` and `docs` files
# and not in `pipeline`, so search with this phrase but filter through project
Expand Down
25 changes: 1 addition & 24 deletions readthedocs/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
from readthedocs.projects.models import Feature, Project
from readthedocs.search.api.v2.serializers import (
PageSearchSerializer,
ProjectData,
ProjectSearchSerializer,
VersionData,
)
from readthedocs.search.faceted_search import ALL_FACETS, PageSearch, ProjectSearch

Expand Down Expand Up @@ -91,26 +89,6 @@ def _get_project(self, project_slug):
project = get_object_or_404(queryset, slug=project_slug)
return project

def _get_project_data(self, project, version_slug):
docs_url = project.get_docs_url(version_slug=version_slug)
version_data = VersionData(
slug=version_slug,
docs_url=docs_url,
)
project_data = {
project.slug: ProjectData(
alias=None,
version=version_data,
)
}
return project_data

def get_serializer_context(self, project, version_slug):
context = {
'projects_data': self._get_project_data(project, version_slug),
}
return context

def get(self, request, project_slug):
project_obj = self._get_project(project_slug)
use_advanced_query = not project_obj.has_feature(
Expand All @@ -132,8 +110,7 @@ def get(self, request, project_slug):
use_advanced_query=use_advanced_query,
)

context = self.get_serializer_context(project_obj, user_input.version)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing the context here was saving just one query, and to pass it again we will need to query the version (we have only the slug), so we are just omitting the projects parameter here for now (we would use it again when using the new search)

results = PageSearchSerializer(results, many=True, context=context).data
results = PageSearchSerializer(results, many=True).data

template_context = user_input._asdict()
template_context.update({
Expand Down