Skip to content

ES: update dependencies #7408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions readthedocs/search/documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

from django.conf import settings
from django_elasticsearch_dsl import DocType, Index, fields
from django_elasticsearch_dsl import Document, Index, fields
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious if we're getting much value from the django integration here. It might make sense to look into just using the elasticsearch_dsl directly. I think we've disabled most of the Django-specific integration, so that might make our code easier to manage and a lot more explicit.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I saw that too. The main feature of the django integration is that it syncs the ES index when the models change, but we don't make use of that. I'll make a note.

from elasticsearch import Elasticsearch

from readthedocs.projects.models import HTMLFile, Project
Expand All @@ -28,8 +28,8 @@ def update(self, *args, **kwargs):
super().update(*args, **kwargs)


@project_index.doc_type
class ProjectDocument(RTDDocTypeMixin, DocType):
@project_index.document
class ProjectDocument(RTDDocTypeMixin, Document):

# Metadata
url = fields.TextField(attr='get_absolute_url')
Expand All @@ -43,14 +43,14 @@ class ProjectDocument(RTDDocTypeMixin, DocType):

modified_model_field = 'modified_date'

class Meta:
class Django:
model = Project
fields = ('name', 'slug', 'description')
ignore_signals = True


@page_index.doc_type
class PageDocument(RTDDocTypeMixin, DocType):
@page_index.document
class PageDocument(RTDDocTypeMixin, Document):

# Metadata
project = fields.KeywordField(attr='project.slug')
Expand Down Expand Up @@ -88,7 +88,7 @@ class PageDocument(RTDDocTypeMixin, DocType):

modified_model_field = 'modified_date'

class Meta:
class Django:
model = HTMLFile
fields = ('commit', 'build')
ignore_signals = True
Expand Down
4 changes: 2 additions & 2 deletions readthedocs/search/faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def query(self, search, query):
class ProjectSearchBase(RTDFacetedSearch):
facets = {'language': TermsFacet(field='language')}
doc_types = [ProjectDocument]
index = ProjectDocument._doc_type.index
index = ProjectDocument._index._name
fields = ('name^10', 'slug^5', 'description')
operators = ['and', 'or']

Expand All @@ -163,7 +163,7 @@ class PageSearchBase(RTDFacetedSearch):
),
}
doc_types = [PageDocument]
index = PageDocument._doc_type.index
index = PageDocument._index._name

# boosting for these fields need to be close enough
# to be re-boosted by the page rank.
Expand Down
14 changes: 9 additions & 5 deletions readthedocs/search/management/commands/reindex_elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import datetime
import logging

from celery import chord, chain
from celery import chain, chord
from django.apps import apps
from django.conf import settings
from django.core.management import BaseCommand
from django.utils import timezone
from django_elasticsearch_dsl.registries import registry

from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
index_missing_objects)
from ...tasks import (
create_new_es_index,
index_missing_objects,
index_objects_to_es,
switch_es_index,
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,11 +68,11 @@ def _run_reindex_tasks(self, models, queue):
app_label = queryset.model._meta.app_label
model_name = queryset.model.__name__

index_name = doc._doc_type.index
index_name = doc._index._name
new_index_name = "{}_{}".format(index_name, timestamp)
# Set index temporarily for indexing,
# this will only get set during the running of this command
doc._doc_type.index = new_index_name
doc._index._name = new_index_name

pre_index_task = create_new_es_index.si(app_label=app_label,
model_name=model_name,
Expand Down
20 changes: 10 additions & 10 deletions readthedocs/search/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def get_blocks(self, obj):

sorted_results = sorted(
itertools.chain(sections, domains),
key=attrgetter('_score'),
key=attrgetter('meta.score'),
reverse=True,
)
sorted_results = [
Expand All @@ -157,11 +157,11 @@ def get_content(self, obj):
class DomainSearchSerializer(serializers.Serializer):

type = serializers.CharField(default='domain', source=None, read_only=True)
role = serializers.CharField(source='_source.role_name')
name = serializers.CharField(source='_source.name')
id = serializers.CharField(source='_source.anchor')
content = serializers.CharField(source='_source.docstrings')
highlights = DomainHighlightSerializer(source='highlight', default=dict)
role = serializers.CharField(source='role_name')
name = serializers.CharField()
id = serializers.CharField(source='anchor')
content = serializers.CharField(source='docstrings')
highlights = DomainHighlightSerializer(source='meta.highlight', default=dict)


class SectionHighlightSerializer(serializers.Serializer):
Expand All @@ -181,7 +181,7 @@ def get_content(self, obj):
class SectionSearchSerializer(serializers.Serializer):

type = serializers.CharField(default='section', source=None, read_only=True)
id = serializers.CharField(source='_source.id')
title = serializers.CharField(source='_source.title')
content = serializers.CharField(source='_source.content')
highlights = SectionHighlightSerializer(source='highlight', default=dict)
id = serializers.CharField()
title = serializers.CharField()
content = serializers.CharField()
highlights = SectionHighlightSerializer(source='meta.highlight', default=dict)
15 changes: 9 additions & 6 deletions readthedocs/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from readthedocs.projects.models import Project
from readthedocs.search.models import SearchQuery
from readthedocs.worker import app
from .utils import _get_index, _get_document

from .utils import _get_document, _get_index

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -44,17 +45,19 @@ def index_objects_to_es(

if index_name:
# Hack the index name temporarily for reindexing tasks
old_index_name = document._doc_type.index
document._doc_type.index = index_name
old_index_name = document._index._name
document._index._name = index_name
log.info('Replacing index name %s with %s', old_index_name, index_name)

log.info("Indexing model: %s, '%s' objects", model.__name__, queryset.count())
doc_obj.update(queryset.iterator())

if index_name:
log.info('Undoing index replacement, settings %s with %s',
document._doc_type.index, old_index_name)
document._doc_type.index = old_index_name
log.info(
'Undoing index replacement, settings %s with %s',
document._index._name, old_index_name,
)
document._index._name = old_index_name


@app.task(queue='web')
Expand Down
9 changes: 6 additions & 3 deletions readthedocs/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _get_index(indices, index_name):
:return: DED Index
"""
for index in indices:
if str(index) == index_name:
if index._name == index_name:
return index


Expand All @@ -116,7 +116,10 @@ def _indexing_helper(html_objs_qs, wipe=False):
else, html_objs are indexed.
"""
from readthedocs.search.documents import PageDocument
from readthedocs.search.tasks import index_objects_to_es, delete_objects_in_es
from readthedocs.search.tasks import (
delete_objects_in_es,
index_objects_to_es,
)

if html_objs_qs:
obj_ids = []
Expand Down Expand Up @@ -148,7 +151,7 @@ def _get_sorted_results(results, source_key='_source'):
source_key: hit._source.to_dict(),
'highlight': hit.highlight.to_dict() if hasattr(hit, 'highlight') else {}
}
for hit in sorted(results, key=attrgetter('_score'), reverse=True)
for hit in sorted(results, key=attrgetter('meta.score'), reverse=True)
]

return sorted_results
Expand Down
9 changes: 5 additions & 4 deletions readthedocs/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,16 +581,17 @@ def DOCKER_LIMITS(self):
ES_INDEXES = {
'project': {
'name': 'project_index',
'settings': {'number_of_shards': 1,
'number_of_replicas': 1
}
'settings': {
'number_of_shards': 1,
'number_of_replicas': 1
},
},
'page': {
'name': 'page_index',
'settings': {
'number_of_shards': 1,
'number_of_replicas': 1,
}
},
},
}

Expand Down
19 changes: 2 additions & 17 deletions requirements/pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,8 @@ GitPython==3.1.7

# Search
elasticsearch==6.8.1 # pyup: <7.0.0


# elasticsearch-dsl==6.3.1 produces this error
# File "/home/travis/build/rtfd/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 8, in <module>
# from elasticsearch_dsl.document import DocTypeMeta as DSLDocTypeMeta
# ImportError: cannot import name 'DocTypeMeta'
#
# Commit 97e3f75 adds the NestedFacet
git+https://github.com/elastic/elasticsearch-dsl-py@97e3f756a8cacd1c863d3ced3d17abcafbb0f85e#egg=elasticsearch-dsl==6.1.1

# django-elasticsearch-dsl==6.4.1 produces this error
# File "/home/travis/build/readthedocs/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/__init__.py", line 3, in <module>
# from .documents import DocType # noqa
# File "/home/travis/build/readthedocs/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 7, in <module>
# from elasticsearch_dsl import Document as DSLDocument
# ImportError: cannot import name 'Document'
django-elasticsearch-dsl==0.5.1 # pyup: ignore
elasticsearch-dsl==6.4.0 # pyup: <7.0
django-elasticsearch-dsl==6.4.2 # pyup: <7.0
selectolax==0.2.7

# Ignoring orjson for now because it makes Travis to fail
Expand Down