-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Upgrade Elasticsearch to version 6.x #4211
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
3c41b42
6410495
272b50a
b8f1a06
6c430e5
035c312
746b378
de47978
ab6fffb
3523fab
9a5b0ed
e9b1c03
37f6936
f730556
05f5e05
0965a94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from django_elasticsearch_dsl import DocType, Index, fields | ||
|
||
from readthedocs.projects.models import Project | ||
|
||
from readthedocs.search.faceted_search import ProjectSearch | ||
|
||
project_index = Index('project') | ||
|
||
project_index.settings( | ||
number_of_shards=1, | ||
number_of_replicas=0 | ||
) | ||
|
||
|
||
@project_index.doc_type | ||
class ProjectDocument(DocType): | ||
|
||
class Meta(object): | ||
model = Project | ||
fields = ('name', 'slug', 'description') | ||
|
||
url = fields.TextField() | ||
users = fields.NestedField(properties={ | ||
'username': fields.TextField(), | ||
'id': fields.IntegerField(), | ||
}) | ||
language = fields.KeywordField() | ||
|
||
def prepare_url(self, instance): | ||
return instance.get_absolute_url() | ||
|
||
@classmethod | ||
def faceted_search(cls, query, language=None, using=None, index=None): | ||
kwargs = { | ||
'using': using or cls._doc_type.using, | ||
'index': index or cls._doc_type.index, | ||
'doc_types': [cls], | ||
'model': cls._doc_type.model, | ||
'query': query | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this logic required? It seems a bit heavy/complex. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, to keep alligned with the |
||
|
||
if language: | ||
kwargs['filters'] = {'language': language} | ||
|
||
return ProjectSearch(**kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from elasticsearch_dsl import FacetedSearch, TermsFacet | ||
|
||
|
||
class ProjectSearch(FacetedSearch): | ||
fields = ['name^5', 'description'] | ||
facets = { | ||
'language': TermsFacet(field='language') | ||
} | ||
|
||
def __init__(self, using, index, doc_types, model, **kwargs): | ||
self.using = using | ||
self.index = index | ||
self.doc_types = doc_types | ||
self._model = model | ||
super(ProjectSearch, self).__init__(**kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,6 @@ | |
import datetime | ||
|
||
from elasticsearch import Elasticsearch, exceptions | ||
from elasticsearch.helpers import bulk_index | ||
|
||
from django.conf import settings | ||
|
||
|
@@ -143,7 +142,7 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None, | |
docs.append(doc) | ||
|
||
# TODO: This doesn't work with the new ES setup. | ||
bulk_index(self.es, docs, chunk_size=chunk_size) | ||
# bulk_index(self.es, docs, chunk_size=chunk_size) | ||
|
||
def index_document(self, data, index=None, parent=None, routing=None): | ||
doc = self.extract_document(data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guessing this entire file and other related code should be deleted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. it need to be deleted. I will delete once I implement the file searching functionality! |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
from random import shuffle | ||
|
||
import pytest | ||
from django.core.management import call_command | ||
from django_dynamic_fixture import G | ||
|
||
from readthedocs.projects.models import Project | ||
|
@@ -16,27 +17,17 @@ def mock_elastic_index(mocker): | |
mocker.patch.object(Index, '_index', index_name.lower()) | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def es_index(mock_elastic_index): | ||
# Create the index. | ||
index = Index() | ||
index_name = index.timestamped_index() | ||
index.create_index(index_name) | ||
index.update_aliases(index_name) | ||
# Update mapping | ||
proj = ProjectIndex() | ||
proj.put_mapping() | ||
page = PageIndex() | ||
page.put_mapping() | ||
sec = SectionIndex() | ||
sec.put_mapping() | ||
|
||
yield index | ||
index.delete_index(index_name=index_name) | ||
@pytest.fixture() | ||
def es_index(): | ||
call_command('search_index', '--delete', '-f') | ||
call_command('search_index', '--create') | ||
|
||
yield | ||
call_command('search_index', '--delete', '-f') | ||
|
||
|
||
@pytest.fixture | ||
def all_projects(): | ||
def all_projects(es_index): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where does this get passed in? Is it automatically callign the above fixture based on name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, its pytest's dependeny enjection. So if you have a fixture name |
||
projects = [G(Project, slug=project_slug, name=project_slug) for project_slug in ALL_PROJECTS] | ||
shuffle(projects) | ||
return projects | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
|
||
from readthedocs.builds.constants import LATEST | ||
from readthedocs.search import lib as search_lib | ||
from readthedocs.search.documents import ProjectDocument | ||
|
||
log = logging.getLogger(__name__) | ||
LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}' | ||
|
@@ -45,14 +46,18 @@ def elastic_search(request): | |
|
||
if user_input.query: | ||
if user_input.type == 'project': | ||
results = search_lib.search_project( | ||
request, user_input.query, language=user_input.language) | ||
project_search = ProjectDocument.faceted_search(query=user_input.query, | ||
language=user_input.language) | ||
response = project_search.execute() | ||
results = response.hits | ||
facets = response.facets | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Its used for showing facet (language) in project search results. |
||
elif user_input.type == 'file': | ||
results = search_lib.search_file( | ||
request, user_input.query, project_slug=user_input.project, | ||
version_slug=user_input.version, taxonomy=user_input.taxonomy) | ||
|
||
if results: | ||
# TODO: Temporary until finishing search upgrade for files | ||
if results and user_input.type == 'file': | ||
# pre and post 1.0 compat | ||
for num, hit in enumerate(results['hits']['hits']): | ||
for key, val in list(hit['fields'].items()): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting :)