From 38cb041544063dc4ac17e71edb90dccae4db4e68 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 3 Nov 2020 17:50:53 -0500 Subject: [PATCH] Search: exclude some fields from source results This should hopefully make search a little faster, we don't make use of these fields in the serializer. The big ones are sections and domains, we will be fetching all of them per each page, instead of just the ones that matched. Also fixed a bug, we were omitting the description from the project in the serializer. --- readthedocs/search/faceted_search.py | 9 ++++++++- readthedocs/search/serializers.py | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index c7305ae71cd..37277a40ea4 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -28,6 +28,9 @@ class RTDFacetedSearch(FacetedSearch): operators = [] + # Sources to be excluded from results. + excludes = [] + _highlight_options = { 'encoder': 'html', 'number_of_fragments': 1, @@ -201,7 +204,7 @@ def query(self, search, query): * Adds HTML encoding of results to avoid XSS issues. """ search = search.highlight_options(**self._highlight_options) - search = search.source(exclude=['content', 'headers']) + search = search.source(excludes=self.excludes) queries = self._get_queries( query=query, @@ -220,6 +223,7 @@ class ProjectSearchBase(RTDFacetedSearch): index = ProjectDocument._index._name fields = ('name^10', 'slug^5', 'description') operators = ['and', 'or'] + excludes = ['users', 'language'] class PageSearchBase(RTDFacetedSearch): @@ -248,6 +252,8 @@ class PageSearchBase(RTDFacetedSearch): # the score of and should be higher as it satisfies both or and and operators = ['and', 'or'] + excludes = ['rank', 'sections', 'domains', 'commit', 'build'] + def total_count(self): """Returns the total count of results of the current query.""" s = self.build_search() @@ -261,6 +267,7 @@ def total_count(self): def query(self, search, query): """Manipulates the query to support nested queries and a custom rank for pages.""" search = search.highlight_options(**self._highlight_options) + search = search.source(excludes=self.excludes) queries = self._get_queries( query=query, diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 01b04eca56e..c6758b529a4 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -37,6 +37,7 @@ class ProjectSearchSerializer(serializers.Serializer): name = serializers.CharField() slug = serializers.CharField() link = serializers.CharField(source='url') + description = serializers.CharField() highlights = ProjectHighlightSerializer(source='meta.highlight', default=dict)