-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Upgrade search app to Elastic Search 5.4 #3373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
f291550
a922986
b7e1189
848ee3c
e5c42c0
4cf429f
1291961
a4208de
ec992c3
8200055
d0a8ce2
0c538f8
75cf46f
f2d0758
ede0827
87c1c9d
213a390
9dafb8b
07be69d
dedf97f
24f9776
ba8c675
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
import datetime | ||
|
||
from elasticsearch import Elasticsearch, exceptions | ||
from elasticsearch.helpers import bulk_index | ||
from elasticsearch.helpers import bulk | ||
|
||
from django.conf import settings | ||
|
||
|
@@ -48,8 +48,6 @@ def get_settings(self, settings_override=None): | |
'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS, | ||
'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS, | ||
'refresh_interval': '5s', | ||
'store.compress.tv': True, | ||
'store.compress.stored': True, | ||
'analysis': self.get_analysis(), | ||
} | ||
if settings_override: | ||
|
@@ -139,7 +137,7 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None, | |
docs.append(doc) | ||
|
||
# TODO: This doesn't work with the new ES setup. | ||
bulk_index(self.es, docs, chunk_size=chunk_size) | ||
bulk(self.es, docs, chunk_size=chunk_size) | ||
|
||
def index_document(self, data, index=None, parent=None, routing=None): | ||
doc = self.extract_document(data) | ||
|
@@ -220,25 +218,24 @@ def get_mapping(self): | |
# Disable _all field to reduce index size. | ||
'_all': {'enabled': False}, | ||
'properties': { | ||
'id': {'type': 'long'}, | ||
'name': {'type': 'string', 'analyzer': 'default_icu'}, | ||
'description': {'type': 'string', 'analyzer': 'default_icu'}, | ||
|
||
'slug': {'type': 'string', 'index': 'not_analyzed'}, | ||
'lang': {'type': 'string', 'index': 'not_analyzed'}, | ||
'tags': {'type': 'string', 'index': 'not_analyzed'}, | ||
'privacy': {'type': 'string', 'index': 'not_analyzed'}, | ||
'id': {'type': 'keyword'}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe the new ES doesn't let these vary types, or something. I was getting an error, and it doesn't really need to be an ID, so we will just use it as a string like the others. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ericholscher Seems like they support it Without ID, what can be here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does seem strange that we're coercing to string here, but I'm not familiar enough with ES to say why token/string is better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not that it's better, it just doesn't work otherwise. I don't fully understand it either. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It throws this error: |
||
'name': {'type': 'text', 'analyzer': 'default_icu'}, | ||
'description': {'type': 'text', 'analyzer': 'default_icu'}, | ||
|
||
'slug': {'type': 'keyword'}, | ||
'lang': {'type': 'keyword'}, | ||
'tags': {'type': 'keyword'}, | ||
'privacy': {'type': 'keyword'}, | ||
'author': { | ||
'type': 'string', | ||
'type': 'text', | ||
'analyzer': 'default_icu', | ||
'fields': { | ||
'raw': { | ||
'type': 'string', | ||
'index': 'not_analyzed', | ||
'type': 'keyword', | ||
}, | ||
}, | ||
}, | ||
'url': {'type': 'string', 'index': 'not_analyzed'}, | ||
'url': {'type': 'keyword'}, | ||
# Add a weight field to enhance relevancy scoring. | ||
'weight': {'type': 'float'}, | ||
} | ||
|
@@ -273,19 +270,19 @@ def get_mapping(self): | |
# Disable _all field to reduce index size. | ||
'_all': {'enabled': False}, | ||
# Associate a page with a project. | ||
'_parent': {'type': self._parent}, | ||
# '_parent': {'type': self._parent}, | ||
'properties': { | ||
'id': {'type': 'string', 'index': 'not_analyzed'}, | ||
'sha': {'type': 'string', 'index': 'not_analyzed'}, | ||
'project': {'type': 'string', 'index': 'not_analyzed'}, | ||
'version': {'type': 'string', 'index': 'not_analyzed'}, | ||
'path': {'type': 'string', 'index': 'not_analyzed'}, | ||
'taxonomy': {'type': 'string', 'index': 'not_analyzed'}, | ||
'commit': {'type': 'string', 'index': 'not_analyzed'}, | ||
|
||
'title': {'type': 'string', 'analyzer': 'default_icu'}, | ||
'headers': {'type': 'string', 'analyzer': 'default_icu'}, | ||
'content': {'type': 'string', 'analyzer': 'default_icu'}, | ||
'id': {'type': 'keyword'}, | ||
'sha': {'type': 'keyword'}, | ||
'project': {'type': 'keyword'}, | ||
'version': {'type': 'keyword'}, | ||
'path': {'type': 'keyword'}, | ||
'taxonomy': {'type': 'keyword'}, | ||
'commit': {'type': 'keyword'}, | ||
|
||
'title': {'type': 'text', 'analyzer': 'default_icu'}, | ||
'headers': {'type': 'text', 'analyzer': 'default_icu'}, | ||
'content': {'type': 'text', 'analyzer': 'default_icu'}, | ||
# Add a weight field to enhance relevancy scoring. | ||
'weight': {'type': 'float'}, | ||
} | ||
|
@@ -321,7 +318,7 @@ def get_mapping(self): | |
# Disable _all field to reduce index size. | ||
'_all': {'enabled': False}, | ||
# Associate a section with a page. | ||
'_parent': {'type': self._parent}, | ||
# '_parent': {'type': self._parent}, | ||
# Commenting this out until we need it. | ||
# 'suggest': { | ||
# "type": "completion", | ||
|
@@ -330,18 +327,18 @@ def get_mapping(self): | |
# "payloads": True, | ||
# }, | ||
'properties': { | ||
'id': {'type': 'string', 'index': 'not_analyzed'}, | ||
'project': {'type': 'string', 'index': 'not_analyzed'}, | ||
'version': {'type': 'string', 'index': 'not_analyzed'}, | ||
'path': {'type': 'string', 'index': 'not_analyzed'}, | ||
'page_id': {'type': 'string', 'index': 'not_analyzed'}, | ||
'commit': {'type': 'string', 'index': 'not_analyzed'}, | ||
'title': {'type': 'string', 'analyzer': 'default_icu'}, | ||
'content': {'type': 'string', 'analyzer': 'default_icu'}, | ||
'id': {'type': 'keyword'}, | ||
'project': {'type': 'keyword'}, | ||
'version': {'type': 'keyword'}, | ||
'path': {'type': 'keyword'}, | ||
'page_id': {'type': 'keyword'}, | ||
'commit': {'type': 'keyword'}, | ||
'title': {'type': 'text', 'analyzer': 'default_icu'}, | ||
'content': {'type': 'text', 'analyzer': 'default_icu'}, | ||
'blocks': { | ||
'type': 'object', | ||
'properties': { | ||
'code': {'type': 'string', 'analyzer': 'default_icu'} | ||
'code': {'type': 'text', 'analyzer': 'default_icu'} | ||
} | ||
}, | ||
# Add a weight field to enhance relevancy scoring. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,7 @@ def INSTALLED_APPS(self): # noqa | |
if donate: | ||
apps.append('django_countries') | ||
apps.append('readthedocsext.donate') | ||
apps.append('readthedocsext.search') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this under There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I fixed that here, actually: https://github.com/rtfd/readthedocs.org/pull/3356/files#diff-5973fe59a4d52278d55f884d145bd5d7R11 :) Waiting on it to get merged. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merged #3356, this can be rebased |
||
return apps | ||
|
||
TEMPLATE_LOADERS = ( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,9 +86,13 @@ | |
|
||
if 'readthedocsext.donate' in settings.INSTALLED_APPS: | ||
# Include donation URL's | ||
groups.append([ | ||
url(r'^sustainability/', include('readthedocsext.donate.urls')), | ||
]) | ||
groups.insert(0, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this change? Seems to be the same, anyway. |
||
[url(r'^sustainability/', include('readthedocsext.donate.urls'))] | ||
) | ||
for num, _url in enumerate(rtd_urls): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question again. This is only if the donate app is installed, why? I don't find the relationship between donate app and search app. |
||
if _url and hasattr(_url, 'name') and _url.name == 'search': | ||
rtd_urls[num] = \ | ||
url(r'^search/', 'readthedocsext.search.mainsearch.elastic_search', name='search') | ||
if not getattr(settings, 'USE_SUBDOMAIN', False) or settings.DEBUG: | ||
groups.insert(0, docs_urls) | ||
if getattr(settings, 'ALLOW_ADMIN', True): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing this comma produces tests to fail... I had the same problem and I ended up writing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the test should be fixed. Its expecting a tuple but it should get the slug in sting
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@safwanrahman agreed -- want to PR it to master, and I'll merge it in here?