Skip to content

Commit 38143e3

Browse files
ericholschersafwanrahman
authored andcommitted
Merge pull request readthedocs#4292 from safwanrahman/exact_match
[Fix readthedocs#2457] Implement exact match search
2 parents 24f1404 + 4ac0993 commit 38143e3

File tree

7 files changed

+87
-5
lines changed

7 files changed

+87
-5
lines changed

readthedocs/search/faceted_search.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from elasticsearch_dsl import FacetedSearch, TermsFacet
2+
from elasticsearch_dsl.query import SimpleQueryString, Bool
23

34

45
class RTDFacetedSearch(FacetedSearch):
@@ -29,3 +30,21 @@ class FileSearch(RTDFacetedSearch):
2930
'project': TermsFacet(field='project'),
3031
'version': TermsFacet(field='version')
3132
}
33+
34+
def query(self, search, query):
35+
"""Add query part to ``search``"""
36+
if query:
37+
all_queries = []
38+
39+
# Need to search for both 'AND' and 'OR' operations
40+
# The score of AND should be higher as it comes first
41+
for operator in ['AND', 'OR']:
42+
query_string = SimpleQueryString(query=query, fields=self.fields,
43+
default_operator=operator)
44+
all_queries.append(query_string)
45+
46+
# Run bool query with should, so it returns result where either of the query matches
47+
bool_query = Bool(should=all_queries)
48+
search = search.query(bool_query)
49+
50+
return search

readthedocs/search/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def es_index():
2020

2121

2222
@pytest.fixture(autouse=True)
23-
def all_projects(es_index, mock_processed_json):
23+
def all_projects(es_index, mock_processed_json, db):
2424
projects_list = []
2525
for project_slug in ALL_PROJECTS:
2626
project = G(Project, slug=project_slug, name=project_slug)

readthedocs/search/tests/data/docs/story.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
{
2-
"content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial Support\nThe time of the core developers of Read the Docs is limited. We provide official support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.",
2+
"content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial website Support\nThe time of the core developers of Read the Docs is limited. We provide official developers support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.",
33
"headers": [
4-
"Official Support",
54
"Unsupported",
65
"Rationale"
76
],

readthedocs/search/tests/data/pipeline/installation.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"content": "PipelineInstallation Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.",
2+
"content": "PipelineInstallation Official Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.",
33
"headers": [
44
"Installation",
55
"Upgrading from 1.3",
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import pytest
2+
3+
from readthedocs.search.documents import PageDocument
4+
5+
6+
class TestFileSearch(object):
7+
8+
@pytest.mark.parametrize('case', ['upper', 'lower', 'title'])
9+
def test_search_exact_match(self, client, project, case):
10+
"""Check quoted query match exact phrase with case insensitively
11+
12+
Making a query with quoted text like ``"foo bar"`` should match
13+
exactly ``foo bar`` or ``Foo Bar`` etc
14+
"""
15+
# `Github` word is present both in `kuma` and `pipeline` files
16+
# But the phrase Github can is available only in kuma docs.
17+
# So search with this phrase to check
18+
query_text = r'"GitHub can"'
19+
cased_query = getattr(query_text, case)
20+
query = cased_query()
21+
22+
page_search = PageDocument.faceted_search(query=query)
23+
results = page_search.execute()
24+
25+
assert len(results) == 1
26+
assert results[0]['project'] == 'kuma'
27+
assert results[0]['path'] == 'documentation'
28+
29+
def test_search_combined_result(self, client, project):
30+
"""Check search result are combined of both `AND` and `OR` operator
31+
32+
If query is `Foo Bar` then the result should be as following order:
33+
34+
- Where both `Foo Bar` is present
35+
- Where `Foo` or `Bar` is present
36+
"""
37+
query = 'Official Support'
38+
page_search = PageDocument.faceted_search(query=query)
39+
results = page_search.execute()
40+
assert len(results) == 3
41+
42+
result_paths = [r.path for r in results]
43+
# ``open-source-philosophy`` page has both ``Official Support`` words
44+
# ``docker`` page has ``Support`` word
45+
# ``installation`` page has ``Official`` word
46+
expected_paths = ['open-source-philosophy', 'docker', 'installation']
47+
48+
assert result_paths == expected_paths

readthedocs/search/tests/test_views.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,23 @@ def test_file_search_case_insensitive(self, client, project, case):
101101
# Check the actual text is in the result, not the cased one
102102
assert query_text in result.text()
103103

104+
def test_file_search_exact_match(self, client, project):
105+
"""Check quoted query match exact phrase
106+
107+
Making a query with quoted text like ``"foo bar"`` should match
108+
exactly ``foo bar`` phrase.
109+
"""
110+
111+
# `Github` word is present both in `kuma` and `pipeline` files
112+
# But the phrase Github can is available only in kuma docs.
113+
# So search with this phrase to check
114+
query = r'"GitHub can"'
115+
116+
result, _ = self._get_search_result(url=self.url, client=client,
117+
search_params={'q': query, 'type': 'file'})
118+
119+
assert len(result) == 1
120+
104121
def test_page_search_not_return_removed_page(self, client, project):
105122
"""Check removed page are not in the search index"""
106123
query = get_search_query_from_project_file(project_slug=project.slug)

tox.ini

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ setenv =
1515
DJANGO_SETTINGS_MODULE=readthedocs.settings.test
1616
LANG=C
1717
LC_CTYPE=C.UTF-8
18-
DJANGO_SETTINGS_SKIP_LOCAL=True
1918
deps = -r{toxinidir}/requirements/testing.txt
2019
changedir = {toxinidir}/readthedocs
2120
commands =

0 commit comments

Comments
 (0)