4
4
from elasticsearch import Elasticsearch
5
5
from elasticsearch_dsl import FacetedSearch , TermsFacet
6
6
from elasticsearch_dsl .faceted_search import NestedFacet
7
- from elasticsearch_dsl .query import Bool , Match , Nested , SimpleQueryString
7
+ from elasticsearch_dsl .query import Bool , MultiMatch , Nested , SimpleQueryString
8
8
9
9
from readthedocs .core .utils .extend import SettingsOverrideObject
10
10
from readthedocs .search .documents import PageDocument , ProjectDocument
@@ -27,17 +27,21 @@ class RTDFacetedSearch(FacetedSearch):
27
27
'post_tags' : ['</span>' ],
28
28
}
29
29
30
- def __init__ (self , query = None , filters = None , user = None , ** kwargs ):
30
+ def __init__ (self , query = None , filters = None , user = None , use_advanced_query = True , ** kwargs ):
31
31
"""
32
32
Pass in a user in order to filter search results by privacy.
33
33
34
+ If `use_advanced_query` is `True`,
35
+ force to always use `SimpleQueryString` for the text query object.
36
+
34
37
.. warning::
35
38
36
39
The `self.user` and `self.filter_by_user` attributes
37
40
aren't currently used on the .org, but are used on the .com.
38
41
"""
39
42
self .user = user
40
43
self .filter_by_user = kwargs .pop ('filter_by_user' , True )
44
+ self .use_advanced_query = use_advanced_query
41
45
42
46
# Hack a fix to our broken connection pooling
43
47
# This creates a new connection on every request,
@@ -55,6 +59,49 @@ def __init__(self, query=None, filters=None, user=None, **kwargs):
55
59
}
56
60
super ().__init__ (query = query , filters = valid_filters , ** kwargs )
57
61
62
+ def _get_text_query (self , * , query , fields , operator ):
63
+ """
64
+ Returns a text query object according to the query.
65
+
66
+ - SimpleQueryString: Provides a syntax to let advanced users manipulate
67
+ the results explicitly.
68
+ - MultiMatch: Allows us to have more control over the results
69
+ (like fuzziness) to provide a better experience for simple queries.
70
+ """
71
+ if self .use_advanced_query or self ._is_advanced_query (query ):
72
+ query_string = SimpleQueryString (
73
+ query = query ,
74
+ fields = fields ,
75
+ default_operator = operator
76
+ )
77
+ else :
78
+ query_string = MultiMatch (
79
+ query = query ,
80
+ fields = fields ,
81
+ operator = operator ,
82
+ fuzziness = "AUTO" ,
83
+ )
84
+ return query_string
85
+
86
+ def _is_advanced_query (self , query ):
87
+ """
88
+ Check if query looks like to be using the syntax from a simple query string.
89
+
90
+ .. note::
91
+
92
+ We don't check if the syntax is valid.
93
+ The tokens used aren't very common in a normal query, so checking if
94
+ the query contains any of them should be enough to determinate if
95
+ it's an advanced query.
96
+
97
+ Simple query syntax:
98
+
99
+ https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html#simple-query-string-syntax
100
+ """
101
+ tokens = {'+' , '|' , '-' , '"' , '*' , '(' , ')' , '~' }
102
+ query_tokens = set (query )
103
+ return not tokens .isdisjoint (query_tokens )
104
+
58
105
def query (self , search , query ):
59
106
"""
60
107
Add query part to ``search`` when needed.
@@ -71,10 +118,11 @@ def query(self, search, query):
71
118
72
119
# need to search for both 'and' and 'or' operations
73
120
# the score of and should be higher as it satisfies both or and and
74
-
75
121
for operator in self .operators :
76
- query_string = SimpleQueryString (
77
- query = query , fields = self .fields , default_operator = operator
122
+ query_string = self ._get_text_query (
123
+ query = query ,
124
+ fields = self .fields ,
125
+ operator = operator ,
78
126
)
79
127
all_queries .append (query_string )
80
128
@@ -135,13 +183,12 @@ def query(self, search, query):
135
183
136
184
# match query for the title (of the page) field.
137
185
for operator in self .operators :
138
- all_queries .append (
139
- SimpleQueryString (
140
- query = query ,
141
- fields = self .fields ,
142
- default_operator = operator
143
- )
186
+ query_string = self ._get_text_query (
187
+ query = query ,
188
+ fields = self .fields ,
189
+ operator = operator ,
144
190
)
191
+ all_queries .append (query_string )
145
192
146
193
# nested query for search in sections
147
194
sections_nested_query = self .generate_nested_query (
@@ -186,10 +233,10 @@ def generate_nested_query(self, query, path, fields, inner_hits):
186
233
queries = []
187
234
188
235
for operator in self .operators :
189
- query_string = SimpleQueryString (
236
+ query_string = self . _get_text_query (
190
237
query = query ,
191
238
fields = fields ,
192
- default_operator = operator
239
+ operator = operator ,
193
240
)
194
241
queries .append (query_string )
195
242
0 commit comments