fixup and adding test

safwanrahman · safwanrahman · commit 4dc3e353f358 · 2018-06-29T02:24:34.000+06:00
diff --git a/conftest.py b/conftest.py
@@ -1,6 +1,7 @@
 import logging
 
 import pytest
+from rest_framework.test import APIClient
 
 
 def pytest_addoption(parser):
@@ -17,3 +18,8 @@ def pytest_configure(config):
 @pytest.fixture(autouse=True)
 def settings_modification(settings):
     settings.CELERY_ALWAYS_EAGER = True
+
+
+@pytest.fixture
+def api_client():
+    return APIClient()
diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
@@ -11,6 +11,7 @@
 from readthedocs.restapi.views import (
     core_views, footer_views, search_views, task_views, integrations
 )
+from readthedocs.search.api import PageSearchAPIView
 
 from .views.model_views import (BuildViewSet, BuildCommandViewSet,
                                 ProjectViewSet, NotificationViewSet,
@@ -85,19 +86,15 @@
         name='api_webhook'),
 ]
 
+api_search_urls = [
+    url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
+]
+
 urlpatterns += function_urls
-urlpatterns += search_urls
 urlpatterns += task_urls
+urlpatterns += search_urls
 urlpatterns += integration_urls
-
-try:
-    from readthedocsext.search.docsearch import DocSearch
-    api_search_urls = [
-        url(r'^docsearch/$', DocSearch.as_view(), name='doc_search'),
-    ]
-    urlpatterns += api_search_urls
-except ImportError:
-    pass
+urlpatterns += api_search_urls
 
 try:
     from readthedocsext.donate.restapi.urls import urlpatterns as sustainability_urls
diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
@@ -12,6 +12,12 @@ class PageSearchAPIView(generics.ListAPIView):
     serializer_class = PageSearchSerializer
 
     def get_queryset(self):
-        query = self.request.query_params.get('query')
+        """Return Elasticsearch DSL Search object instead of Django Queryset.
+
+        Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern.
+        So for searching, its possible to return ``Search`` object instead of queryset.
+        The ``filter_backends`` and ``pagination_class`` is compatible with ``Search``
+        """
+        query = self.request.query_params.get('query', '')
         queryset = PageDocument.search(query=query)
         return queryset
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
@@ -87,12 +87,12 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non
         return FileSearch(**kwargs)
 
     @classmethod
-    def search(cls, using=None, index=None, **kwargs):
-        es_search = super(PageDocument, cls).search(using=using, index=index)
+    def search(cls, *args, **kwargs):
         query = kwargs.pop('query')
+        es_search = super(PageDocument, cls).search(*args, **kwargs)
         es_query = cls.get_es_query(query=query)
 
-        es_search = es_search.query(es_query)
+        es_search = es_search.query(es_query).highlight('content')
         return es_search
 
     @classmethod
diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py
@@ -4,12 +4,17 @@
 
 
 class SearchFilterBackend(filters.BaseFilterBackend):
-    """
-    Filter search result with project
-    """
 
-    def filter_queryset(self, request, queryset, view):
+    """Filter search result with project"""
+
+    def filter_queryset(self, request, es_search, view):
+        """Overwrite the method to compatible with Elasticsearch DSL Search object."""
         project_slug = request.query_params.get('project')
+        version_slug = request.query_params.get('version')
         project_slug_list = get_project_slug_list_or_404(project_slug=project_slug,
                                                          user=request.user)
-        return queryset.filter('terms', project=project_slug_list)
+        # Elasticsearch ``terms`` query can take multiple values as list,
+        # while ``term`` query takes single value.
+        filtered_es_search = (es_search.filter('terms', project=project_slug_list)
+                                       .filter('term', version=version_slug))
+        return filtered_es_search
diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
@@ -3,6 +3,10 @@
 
 class PageSearchSerializer(serializers.Serializer):
     title = serializers.CharField()
-    headers = serializers.ListField()
-    content = serializers.CharField()
     path = serializers.CharField()
+    highlight = serializers.SerializerMethodField()
+
+    def get_highlight(self, obj):
+        highlight = getattr(obj.meta, 'highlight', None)
+        if highlight:
+            return highlight.to_dict()
diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py
@@ -0,0 +1,164 @@
+import pytest
+from django.core.urlresolvers import reverse
+
+from readthedocs.search.tests.utils import get_search_query_from_project_file
+
+
+@pytest.mark.django_db
+@pytest.mark.search
+class TestPageSearch(object):
+    url = reverse('doc_search')
+
+    @pytest.mark.parametrize('data_type', ['content', 'headers', 'title'])
+    @pytest.mark.parametrize('page_num', [0, 1])
+    def test_search_works(self, api_client, project, data_type, page_num):
+        query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num,
+                                                   data_type=data_type)
+
+        version = project.versions.all()[0]
+        url = reverse('doc_search')
+        resp = api_client.get(url, {'project': project.slug, 'version': version.slug, 'query': query})
+        data = resp.data
+        assert len(data['results']) == 1
+
+    # @pytest.mark.parametrize('case', ['upper', 'lower', 'title'])
+    # def test_file_search_case_insensitive(self, client, project, case):
+    #     """Check File search is case insensitive
+    #
+    #     It tests with uppercase, lowercase and camelcase
+    #     """
+    #     query_text = get_search_query_from_project_file(project_slug=project.slug)
+    #
+    #     cased_query = getattr(query_text, case)
+    #     query = cased_query()
+    #
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #
+    #     assert len(result) == 1
+    #     # Check the actual text is in the result, not the cased one
+    #     assert query_text in result.text()
+    #
+    # def test_file_search_exact_match(self, client, project):
+    #     """Check quoted query match exact phrase
+    #
+    #     Making a query with quoted text like ``"foo bar"`` should match
+    #     exactly ``foo bar`` phrase.
+    #     """
+    #
+    #     # `Github` word is present both in `kuma` and `pipeline` files
+    #     # But the phrase Github can is available only in kuma docs.
+    #     # So search with this phrase to check
+    #     query = r'"GitHub can"'
+    #
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #
+    #     assert len(result) == 1
+    #
+    # def test_page_search_not_return_removed_page(self, client, project):
+    #     """Check removed page are not in the search index"""
+    #     query = get_search_query_from_project_file(project_slug=project.slug)
+    #     # Make a query to check it returns result
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #     assert len(result) == 1
+    #
+    #     # Delete all the HTML files of the project
+    #     HTMLFile.objects.filter(project=project).delete()
+    #     # Run the query again and this time there should not be any result
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #     assert len(result) == 0
+    #
+    # def test_file_search_show_projects(self, client, all_projects):
+    #     """Test that search result page shows list of projects while searching for files"""
+    #
+    #     # `Github` word is present both in `kuma` and `pipeline` files
+    #     # so search with this phrase
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params={'q': 'GitHub', 'type': 'file'})
+    #
+    #     # There should be 2 search result
+    #     assert len(result) == 2
+    #
+    #     # there should be 2 projects in the left side column
+    #     content = page.find('.navigable .project-list')
+    #     assert len(content) == 2
+    #     text = content.text()
+    #
+    #     # kuma and pipeline should be there
+    #     assert 'kuma' and 'pipeline' in text
+    #
+    # def test_file_search_filter_by_project(self, client):
+    #     """Test that search result are filtered according to project"""
+    #
+    #     # `Github` word is present both in `kuma` and `pipeline` files
+    #     # so search with this phrase but filter through `kuma` project
+    #     search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'}
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params=search_params)
+    #
+    #     # There should be 1 search result as we have filtered
+    #     assert len(result) == 1
+    #     content = page.find('.navigable .project-list')
+    #
+    #     # kuma should should be there only
+    #     assert 'kuma' in result.text()
+    #     assert 'pipeline' not in result.text()
+    #
+    #     # But there should be 2 projects in the left side column
+    #     # as the query is present in both projects
+    #     content = page.find('.navigable .project-list')
+    #     if len(content) != 2:
+    #         pytest.xfail("failing because currently all projects are not showing in project list")
+    #     else:
+    #         assert 'kuma' and 'pipeline' in content.text()
+    #
+    # @pytest.mark.xfail(reason="Versions are not showing correctly! Fixme while rewrite!")
+    # def test_file_search_show_versions(self, client, all_projects, es_index, settings):
+    #     # override the settings to index all versions
+    #     settings.INDEX_ONLY_LATEST = False
+    #
+    #     project = all_projects[0]
+    #     # Create some versions of the project
+    #     versions = [G(Version, project=project) for _ in range(3)]
+    #
+    #     query = get_search_query_from_project_file(project_slug=project.slug)
+    #
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params={'q': query, 'type': 'file'})
+    #
+    #     # There should be only one result because by default
+    #     # only latest version result should be there
+    #     assert len(result) == 1
+    #
+    #     content = page.find('.navigable .version-list')
+    #     # There should be total 4 versions
+    #     # one is latest, and other 3 that we created above
+    #     assert len(content) == 4
+    #
+    #     project_versions = [v.slug for v in versions] + [LATEST]
+    #     content_versions = []
+    #     for element in content:
+    #         text = element.text_content()
+    #         # strip and split to keep the version slug only
+    #         slug = text.strip().split('\n')[0]
+    #         content_versions.append(slug)
+    #
+    #     assert sorted(project_versions) == sorted(content_versions)
+    #
+    # def test_file_search_subprojects(self, client, all_projects, es_index):
+    #     """File search should return results from subprojects also"""
+    #     project = all_projects[0]
+    #     subproject = all_projects[1]
+    #     # Add another project as subproject of the project
+    #     project.add_subproject(subproject)
+    #
+    #     # Now search with subproject content but explicitly filter by the parent project
+    #     query = get_search_query_from_project_file(project_slug=subproject.slug)
+    #     search_params = {'q': query, 'type': 'file', 'project': project.slug}
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params=search_params)
+    #
+    #     assert len(result) == 1
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
@@ -313,7 +313,8 @@ def parse_sections(documentation_type, content):
 # TODO: Rewrite all the views using this in Class Based View,
 # and move this function to a mixin
 def get_project_slug_list_or_404(project_slug, user):
-    """Return list of subproject's slug including own slug.
+    """
+    Return list of subproject's slug including own slug.
     If the project is not available to user, redirect to 404
     """
     queryset = Project.objects.api(user).only('slug')
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
@@ -22,7 +22,6 @@
     do_not_track,
 )
 from readthedocs.search import views as search_views
-from readthedocs.search.api import PageSearchAPIView
 
 v1_api = Api(api_name='v1')
 v1_api.register(UserResource())
@@ -67,7 +66,6 @@
     url(r'^api/', include(v1_api.urls)),
     url(r'^api/v2/', include('readthedocs.restapi.urls')),
     url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),
-    url(r'^api/search/', PageSearchAPIView.as_view()),
 ]
 
 i18n_urls = [

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,6 @@`
`22`	`22`	`do_not_track,`
`23`	`23`	`)`
`24`	`24`	`from readthedocs.search import views as search_views`
`25`		`-from readthedocs.search.api import PageSearchAPIView`
`26`	`25`
`27`	`26`	`v1_api = Api(api_name='v1')`
`28`	`27`	`v1_api.register(UserResource())`
`@@ -67,7 +66,6 @@`
`67`	`66`	`url(r'^api/', include(v1_api.urls)),`
`68`	`67`	`url(r'^api/v2/', include('readthedocs.restapi.urls')),`
`69`	`68`	`url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),`
`70`		`- url(r'^api/search/', PageSearchAPIView.as_view()),`
`71`	`69`	`]`
`72`	`70`
`73`	`71`	`i18n_urls = [`