Skip to content

Search: remove old endpoint #7414

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions readthedocs/api/v2/proxied_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

api_footer_urls = [
url(r'footer_html/', ProxiedFooterHTML.as_view(), name='footer_html'),
url(r'docsearch/$', ProxiedPageSearchAPIView.as_view(), name='doc_search'),
url(r'search/$', ProxiedPageSearchAPIView.as_view(new_api=True), name='search_api'),
url(r'search/$', ProxiedPageSearchAPIView.as_view(), name='search_api'),
]

urlpatterns = api_footer_urls
Expand Down
82 changes: 2 additions & 80 deletions readthedocs/search/api.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
import itertools
import logging
import re
from functools import namedtuple
from math import ceil

from django.shortcuts import get_object_or_404
from django.utils import timezone
from django.utils.translation import ugettext as _
from rest_framework import serializers
from rest_framework.exceptions import NotFound, ValidationError
from rest_framework.generics import GenericAPIView
from rest_framework.pagination import PageNumberPagination
from rest_framework.response import Response
from rest_framework.utils.urls import remove_query_param, replace_query_param

from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion
from readthedocs.builds.models import Version
from readthedocs.projects.constants import MKDOCS, SPHINX_HTMLDIR
from readthedocs.projects.models import Feature, Project
from readthedocs.search import tasks, utils
from readthedocs.search import tasks
from readthedocs.search.faceted_search import PageSearch

from .serializers import PageSearchSerializer
Expand Down Expand Up @@ -122,67 +116,6 @@ def paginate_queryset(self, queryset, request, view=None):
return result


class OldPageSearchSerializer(serializers.Serializer):

"""
Serializer for page search results.

.. note::

This serializer is deprecated in favor of
`readthedocs.search.serializers.PageSearchSerializer`.
"""

project = serializers.CharField()
version = serializers.CharField()
title = serializers.CharField()
path = serializers.CharField()
full_path = serializers.CharField()
link = serializers.SerializerMethodField()
highlight = serializers.SerializerMethodField()
inner_hits = serializers.SerializerMethodField()

def get_link(self, obj):
project_data = self.context['projects_data'].get(obj.project)
if not project_data:
return None

docs_url, doctype = project_data
path = obj.full_path

# Generate an appropriate link for the doctypes that use htmldir,
# and always end it with / so it goes directly to proxito.
if doctype in {SPHINX_HTMLDIR, MKDOCS}:
new_path = re.sub('(^|/)index.html$', '/', path)
# docs_url already ends with /,
# so path doesn't need to start with /.
path = new_path.lstrip('/')

return docs_url + path

def get_highlight(self, obj):
highlight = getattr(obj.meta, 'highlight', None)
if highlight:
ret = highlight.to_dict()
log.debug('API Search highlight [Page title]: %s', ret)
return ret

def get_inner_hits(self, obj):
inner_hits = getattr(obj.meta, 'inner_hits', None)
if inner_hits:
sections = inner_hits.sections or []
domains = inner_hits.domains or []
all_results = itertools.chain(sections, domains)

sorted_results = utils._get_sorted_results(
results=all_results,
source_key='_source',
)

log.debug('[API] Sorted Results: %s', sorted_results)
return sorted_results


class PageSearchAPIView(GenericAPIView):

"""
Expand All @@ -194,12 +127,6 @@ class PageSearchAPIView(GenericAPIView):
- project
- version

Optional params from the view:

- new_api (true/false): Make use of the new stable API.
Defaults to false. Remove after a couple of days/weeks
and always use the new API.

.. note::

The methods `_get_project` and `_get_version`
Expand All @@ -209,7 +136,7 @@ class PageSearchAPIView(GenericAPIView):
http_method_names = ['get']
permission_classes = [IsAuthorizedToViewVersion]
pagination_class = SearchPagination
new_api = False
serializer_class = PageSearchSerializer

def _get_project(self):
cache_key = '_cached_project'
Expand Down Expand Up @@ -371,11 +298,6 @@ def get_queryset(self):
)
return queryset

def get_serializer_class(self):
if self.new_api:
return PageSearchSerializer
return OldPageSearchSerializer

def get_serializer_context(self):
context = super().get_serializer_context()
context['projects_data'] = self._get_all_projects_data()
Expand Down
21 changes: 4 additions & 17 deletions readthedocs/search/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
"""Utilities related to reading and generating indexable search content."""

import logging
from operator import attrgetter

from django.utils import timezone
from django_elasticsearch_dsl.apps import DEDConfig
from django_elasticsearch_dsl.registries import registry

from readthedocs.projects.models import HTMLFile


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -116,7 +114,10 @@ def _indexing_helper(html_objs_qs, wipe=False):
else, html_objs are indexed.
"""
from readthedocs.search.documents import PageDocument
from readthedocs.search.tasks import index_objects_to_es, delete_objects_in_es
from readthedocs.search.tasks import (
delete_objects_in_es,
index_objects_to_es,
)

if html_objs_qs:
obj_ids = []
Expand All @@ -140,20 +141,6 @@ def _indexing_helper(html_objs_qs, wipe=False):
delete_objects_in_es.delay(**kwargs)


def _get_sorted_results(results, source_key='_source'):
"""Sort results according to their score and returns results as list."""
sorted_results = [
{
'type': hit._nested.field,
source_key: hit._source.to_dict(),
'highlight': hit.highlight.to_dict() if hasattr(hit, 'highlight') else {}
}
for hit in sorted(results, key=attrgetter('_score'), reverse=True)
]

return sorted_results


def _last_30_days_iter():
"""Returns iterator for previous 30 days (including today)."""
thirty_days_ago = timezone.now().date() - timezone.timedelta(days=30)
Expand Down
5 changes: 2 additions & 3 deletions readthedocs/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@

api_urls = [
url(r'^api/v2/', include('readthedocs.api.v2.urls')),
# Keep the `doc_search` at root level, so the test does not fail for other API
url(r'^api/v2/docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
url(r'^api/v2/search/$', PageSearchAPIView.as_view(new_api=True), name='search_api'),
# Keep `search_api` at root level, so the test does not fail for other API
url(r'^api/v2/search/$', PageSearchAPIView.as_view(), name='search_api'),
url(
r'^api-auth/',
include('rest_framework.urls', namespace='rest_framework')
Expand Down