-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Search: Use serializers to parse search results #7157
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a035ba2
c57df3f
3c0d2a7
078adf6
1cc6d1a
388ce8c
b2bed6d
db8e203
1197370
7ac6194
445bfc2
5ec5e36
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
""" | ||
Serializers for the ES's search result object. | ||
|
||
.. note:: | ||
Some fields are re-named to make their meaning more clear. | ||
They should be renamed in the ES index too. | ||
""" | ||
|
||
import itertools | ||
from operator import attrgetter | ||
|
||
from django.shortcuts import get_object_or_404 | ||
from rest_framework import serializers | ||
|
||
from readthedocs.core.resolver import resolve | ||
from readthedocs.projects.models import Project | ||
|
||
|
||
class ProjectHighlightSerializer(serializers.Serializer): | ||
|
||
name = serializers.ListField(child=serializers.CharField(), default=list) | ||
slug = serializers.ListField(child=serializers.CharField(), default=list) | ||
description = serializers.ListField(child=serializers.CharField(), default=list) | ||
|
||
|
||
class ProjectSearchSerializer(serializers.Serializer): | ||
|
||
type = serializers.CharField(default='project', source=None, read_only=True) | ||
name = serializers.CharField() | ||
slug = serializers.CharField() | ||
link = serializers.CharField(source='url') | ||
highlight = ProjectHighlightSerializer(source='meta.highlight', default=dict) | ||
|
||
|
||
class PageHighlightSerializer(serializers.Serializer): | ||
|
||
title = serializers.ListField(child=serializers.CharField(), default=list) | ||
|
||
|
||
class PageSearchSerializer(serializers.Serializer): | ||
|
||
type = serializers.CharField(default='page', source=None, read_only=True) | ||
project = serializers.CharField() | ||
version = serializers.CharField() | ||
title = serializers.CharField() | ||
link = serializers.SerializerMethodField() | ||
highlight = PageHighlightSerializer(source='meta.highlight', default=dict) | ||
blocks = serializers.SerializerMethodField() | ||
|
||
def get_link(self, obj): | ||
# TODO: optimize this to not query the db for each result. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are already doing one query per result, so this isn't something I'm introducing in this PR. We can fix this when merging the serializer from the api. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's doing more than 1 query if we're calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
My plan was to so something similar to subprojects, fetch the project once and resolve the main domain for that project and then pass it down here. |
||
# TODO: return an relative URL when this is called from the indoc search. | ||
project = Project.objects.filter(slug=obj.project).first() | ||
if project: | ||
return resolve( | ||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
project=project, | ||
version_slug=obj.version, | ||
filename=obj.full_path, | ||
) | ||
return None | ||
|
||
def get_blocks(self, obj): | ||
"""Combine and sort inner results (domains and sections).""" | ||
serializers = { | ||
'domain': DomainSearchSerializer, | ||
'section': SectionSearchSerializer, | ||
} | ||
|
||
inner_hits = obj.meta.inner_hits | ||
sections = inner_hits.sections or [] | ||
domains = inner_hits.domains or [] | ||
|
||
# Make them identifiable before merging them | ||
for s in sections: | ||
s.type = 'section' | ||
for d in domains: | ||
d.type = 'domain' | ||
|
||
sorted_results = sorted( | ||
itertools.chain(sections, domains), | ||
key=attrgetter('_score'), | ||
reverse=True, | ||
) | ||
sorted_results = [ | ||
serializers[hit.type](hit).data | ||
for hit in sorted_results | ||
] | ||
return sorted_results | ||
|
||
|
||
class DomainHighlightSerializer(serializers.Serializer): | ||
|
||
""" | ||
Serializer for domain results. | ||
|
||
.. note:: | ||
|
||
We override the `to_representation` method instead of declaring each field | ||
because serializers don't play nice with keys that include `.`. | ||
""" | ||
|
||
def to_representation(self, instance): | ||
return { | ||
'name': getattr(instance, 'domains.name', []), | ||
'docstring': getattr(instance, 'domains.docstrings', []), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is named |
||
} | ||
|
||
|
||
class DomainSearchSerializer(serializers.Serializer): | ||
|
||
type = serializers.CharField(default='domain', source=None, read_only=True) | ||
role = serializers.CharField(source='_source.role_name') | ||
name = serializers.CharField(source='_source.name') | ||
id = serializers.CharField(source='_source.anchor') | ||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
docstring = serializers.CharField(source='_source.docstrings') | ||
highlight = DomainHighlightSerializer(default=dict) | ||
|
||
|
||
class SectionHighlightSerializer(serializers.Serializer): | ||
|
||
""" | ||
Serializer for section results. | ||
|
||
.. note:: | ||
|
||
We override the `to_representation` method instead of declaring each field | ||
because serializers don't play nice with keys that include `.`. | ||
""" | ||
|
||
def to_representation(self, instance): | ||
return { | ||
'title': getattr(instance, 'sections.title', []), | ||
'content': getattr(instance, 'sections.content', []), | ||
} | ||
|
||
|
||
class SectionSearchSerializer(serializers.Serializer): | ||
|
||
type = serializers.CharField(default='section', source=None, read_only=True) | ||
id = serializers.CharField(source='_source.id') | ||
title = serializers.CharField(source='_source.title') | ||
content = serializers.CharField(source='_source.content') | ||
highlight = SectionHighlightSerializer(default=dict) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,14 @@ | |
get_search_query_from_project_file, | ||
) | ||
|
||
OLD_TYPES = { | ||
'domain': 'domains', | ||
'section': 'sections', | ||
} | ||
OLD_FIELDS = { | ||
'docstring': 'docstrings', | ||
} | ||
|
||
Comment on lines
+26
to
+33
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be removed once we start using the same serializer in the api |
||
|
||
@pytest.mark.django_db | ||
@pytest.mark.search | ||
|
@@ -43,7 +51,7 @@ def test_search_works_with_title_query(self, api_client, project, page_num): | |
query = get_search_query_from_project_file( | ||
project_slug=project.slug, | ||
page_num=page_num, | ||
data_type='title' | ||
field='title' | ||
) | ||
|
||
version = project.versions.all().first() | ||
|
@@ -77,10 +85,12 @@ def test_search_works_with_sections_and_domains_query( | |
page_num, | ||
data_type | ||
): | ||
type, field = data_type.split('.') | ||
query = get_search_query_from_project_file( | ||
project_slug=project.slug, | ||
page_num=page_num, | ||
data_type=data_type | ||
type=type, | ||
field=field, | ||
) | ||
version = project.versions.all().first() | ||
search_params = { | ||
|
@@ -105,10 +115,11 @@ def test_search_works_with_sections_and_domains_query( | |
|
||
inner_hit_0 = inner_hits[0] # first inner_hit | ||
|
||
expected_type = data_type.split('.')[0] # can be "sections" or "domains" | ||
assert inner_hit_0['type'] == expected_type | ||
old_type = OLD_TYPES.get(type, type) | ||
assert inner_hit_0['type'] == old_type | ||
|
||
highlight = inner_hit_0['highlight'][data_type] | ||
old_field = old_type + '.' + OLD_FIELDS.get(field, field) | ||
highlight = inner_hit_0['highlight'][old_field] | ||
assert ( | ||
len(highlight) == 1 | ||
), 'number_of_fragments is set to 1' | ||
|
Uh oh!
There was an error while loading. Please reload this page.