diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 91798fec8bb..fc4ce42de21 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,6 +1,7 @@ import itertools import logging +from django.utils import timezone from rest_framework import generics, serializers from rest_framework.exceptions import ValidationError from rest_framework.pagination import PageNumberPagination @@ -160,8 +161,11 @@ def list(self, request, *args, **kwargs): project_slug = self.request.query_params.get('project', None) version_slug = self.request.query_params.get('version', None) - query = self.request.query_params.get('q', '') total_results = response.data.get('count', 0) + time = timezone.now() + + query = self.request.query_params.get('q', '') + query = query.lower().strip() # record the search query with a celery task tasks.record_search_query.delay( @@ -169,6 +173,7 @@ def list(self, request, *args, **kwargs): version_slug, query, total_results, + time, ) return response diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 0057f90b497..1a91e78f3b3 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -142,20 +142,45 @@ def delete_old_search_queries_from_db(): @app.task(queue='web') -def record_search_query(project_slug, version_slug, query, total_results): - """Record search query in database.""" - if not project_slug or not version_slug or not query or not total_results: +def record_search_query(project_slug, version_slug, query, total_results, time): + """Record/update search query in database.""" + if not project_slug or not version_slug or not query: log.debug( 'Not recording the search query. Passed arguments: ' - 'project_slug: %s, version_slug: %s, query: %s, total_results: %s' % ( - project_slug, version_slug, query, total_results + 'project_slug: %s, version_slug: %s, query: %s, total_results: %s, time: %s' % ( + project_slug, version_slug, query, total_results, time ) ) return - project_qs = Project.objects.filter(slug=project_slug) + before_10_sec = time - timezone.timedelta(seconds=10) + partial_query_qs = SearchQuery.objects.filter( + project__slug=project_slug, + version__slug=version_slug, + created__gte=before_10_sec, + ).order_by('-created') + + # check if partial query exists, + # if yes, then just update the object. + for partial_query in partial_query_qs.iterator(): + if query.startswith(partial_query.query): + partial_query.created = time + partial_query.query = query + partial_query.save() + return + + # don't record query with zero results. + if not total_results: + log.debug( + 'Not recording search query because of zero results. Passed arguments: ' + 'project_slug: %s, version_slug: %s, query: %s, total_results: %s, time: %s' % ( + project_slug, version_slug, query, total_results, time + ) + ) + return - if not project_qs.exists(): + project = Project.objects.filter(slug=project_slug).first() + if not project: log.debug( 'Not recording the search query because project does not exist. ' 'project_slug: %s' % ( @@ -164,15 +189,24 @@ def record_search_query(project_slug, version_slug, query, total_results): ) return - project = project_qs.first() version_qs = Version.objects.filter(project=project, slug=version_slug) if not version_qs.exists(): + log.debug( + 'Not recording the search query because version does not exist. ' + 'project_slug: %s, version_slug: %s' % ( + project_slug, version_slug + ) + ) return version = version_qs.first() - SearchQuery.objects.create( + + # make a new SearchQuery object. + obj = SearchQuery.objects.create( project=project, version=version, query=query, ) + obj.created = time + obj.save() diff --git a/readthedocs/search/tests/test_search_tasks.py b/readthedocs/search/tests/test_search_tasks.py index 11f189f8d74..86d2742ee67 100644 --- a/readthedocs/search/tests/test_search_tasks.py +++ b/readthedocs/search/tests/test_search_tasks.py @@ -1,5 +1,6 @@ """Tests for search tasks.""" +import mock import pytest from django.urls import reverse @@ -43,6 +44,48 @@ def test_search_query_recorded_when_results_not_zero(self, api_client): SearchQuery.objects.all().count() == 1 ), 'there should be 1 obj since a search is made which returns one result.' + def test_partial_queries_are_not_recorded(self, api_client): + """Test if partial queries are not recorded.""" + + assert ( + SearchQuery.objects.all().count() == 0 + ), 'no SearchQuery should be present if there is no search made.' + + time = timezone.now() + search_params = { 'q': 'stack', 'project': 'docs', 'version': 'latest' } + + with mock.patch('django.utils.timezone.now') as test_time: + test_time.return_value = time + resp = api_client.get(self.url, search_params) + assert resp.status_code, 200 + + assert ( + SearchQuery.objects.all().count() == 1 + ), 'one SearchQuery should be present' + + # update the time and the search query and make another search request + time = time + timezone.timedelta(seconds=2) + search_params['q'] = 'stack over' + with mock.patch('django.utils.timezone.now') as test_time: + test_time.return_value = time + resp = api_client.get(self.url, search_params) + assert resp.status_code, 200 + + # update the time and the search query and make another search request + time = time + timezone.timedelta(seconds=2) + search_params['q'] = 'stack overflow' + with mock.patch('django.utils.timezone.now') as test_time: + test_time.return_value = time + resp = api_client.get(self.url, search_params) + assert resp.status_code, 200 + + assert ( + SearchQuery.objects.all().count() == 1 + ), 'one SearchQuery should be present' + assert ( + SearchQuery.objects.all().first().query == 'stack overflow' + ), 'one SearchQuery should be there because partial queries gets updated' + def test_search_query_not_recorded_when_results_are_zero(self, api_client): """Test that search queries are not recorded when they have zero results."""