From bb8189608b7b547a0e9c2854b37911987b2abcf8 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 24 Aug 2020 19:02:56 -0500 Subject: [PATCH] ES: update dependencies Getting ready to upgrade to the latest version --- readthedocs/search/documents.py | 14 ++++++------- readthedocs/search/faceted_search.py | 4 ++-- .../commands/reindex_elasticsearch.py | 14 ++++++++----- readthedocs/search/serializers.py | 20 +++++++++---------- readthedocs/search/tasks.py | 15 ++++++++------ readthedocs/search/utils.py | 9 ++++++--- readthedocs/settings/base.py | 9 +++++---- requirements/pip.txt | 19 ++---------------- 8 files changed, 50 insertions(+), 54 deletions(-) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index bb064339816..237574d7223 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -1,7 +1,7 @@ import logging from django.conf import settings -from django_elasticsearch_dsl import DocType, Index, fields +from django_elasticsearch_dsl import Document, Index, fields from elasticsearch import Elasticsearch from readthedocs.projects.models import HTMLFile, Project @@ -28,8 +28,8 @@ def update(self, *args, **kwargs): super().update(*args, **kwargs) -@project_index.doc_type -class ProjectDocument(RTDDocTypeMixin, DocType): +@project_index.document +class ProjectDocument(RTDDocTypeMixin, Document): # Metadata url = fields.TextField(attr='get_absolute_url') @@ -43,14 +43,14 @@ class ProjectDocument(RTDDocTypeMixin, DocType): modified_model_field = 'modified_date' - class Meta: + class Django: model = Project fields = ('name', 'slug', 'description') ignore_signals = True -@page_index.doc_type -class PageDocument(RTDDocTypeMixin, DocType): +@page_index.document +class PageDocument(RTDDocTypeMixin, Document): # Metadata project = fields.KeywordField(attr='project.slug') @@ -88,7 +88,7 @@ class PageDocument(RTDDocTypeMixin, DocType): modified_model_field = 'modified_date' - class Meta: + class Django: model = HTMLFile fields = ('commit', 'build') ignore_signals = True diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 62a21ab417c..32b544fdb4d 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -148,7 +148,7 @@ def query(self, search, query): class ProjectSearchBase(RTDFacetedSearch): facets = {'language': TermsFacet(field='language')} doc_types = [ProjectDocument] - index = ProjectDocument._doc_type.index + index = ProjectDocument._index._name fields = ('name^10', 'slug^5', 'description') operators = ['and', 'or'] @@ -163,7 +163,7 @@ class PageSearchBase(RTDFacetedSearch): ), } doc_types = [PageDocument] - index = PageDocument._doc_type.index + index = PageDocument._index._name # boosting for these fields need to be close enough # to be re-boosted by the page rank. diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py index 7c0ea6982cf..f4a8fc97681 100644 --- a/readthedocs/search/management/commands/reindex_elasticsearch.py +++ b/readthedocs/search/management/commands/reindex_elasticsearch.py @@ -1,15 +1,19 @@ import datetime import logging -from celery import chord, chain +from celery import chain, chord from django.apps import apps from django.conf import settings from django.core.management import BaseCommand from django.utils import timezone from django_elasticsearch_dsl.registries import registry -from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index, - index_missing_objects) +from ...tasks import ( + create_new_es_index, + index_missing_objects, + index_objects_to_es, + switch_es_index, +) log = logging.getLogger(__name__) @@ -64,11 +68,11 @@ def _run_reindex_tasks(self, models, queue): app_label = queryset.model._meta.app_label model_name = queryset.model.__name__ - index_name = doc._doc_type.index + index_name = doc._index._name new_index_name = "{}_{}".format(index_name, timestamp) # Set index temporarily for indexing, # this will only get set during the running of this command - doc._doc_type.index = new_index_name + doc._index._name = new_index_name pre_index_task = create_new_es_index.si(app_label=app_label, model_name=model_name, diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 557eabc7075..01b04eca56e 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -130,7 +130,7 @@ def get_blocks(self, obj): sorted_results = sorted( itertools.chain(sections, domains), - key=attrgetter('_score'), + key=attrgetter('meta.score'), reverse=True, ) sorted_results = [ @@ -157,11 +157,11 @@ def get_content(self, obj): class DomainSearchSerializer(serializers.Serializer): type = serializers.CharField(default='domain', source=None, read_only=True) - role = serializers.CharField(source='_source.role_name') - name = serializers.CharField(source='_source.name') - id = serializers.CharField(source='_source.anchor') - content = serializers.CharField(source='_source.docstrings') - highlights = DomainHighlightSerializer(source='highlight', default=dict) + role = serializers.CharField(source='role_name') + name = serializers.CharField() + id = serializers.CharField(source='anchor') + content = serializers.CharField(source='docstrings') + highlights = DomainHighlightSerializer(source='meta.highlight', default=dict) class SectionHighlightSerializer(serializers.Serializer): @@ -181,7 +181,7 @@ def get_content(self, obj): class SectionSearchSerializer(serializers.Serializer): type = serializers.CharField(default='section', source=None, read_only=True) - id = serializers.CharField(source='_source.id') - title = serializers.CharField(source='_source.title') - content = serializers.CharField(source='_source.content') - highlights = SectionHighlightSerializer(source='highlight', default=dict) + id = serializers.CharField() + title = serializers.CharField() + content = serializers.CharField() + highlights = SectionHighlightSerializer(source='meta.highlight', default=dict) diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index cd82f8fe25e..6d827c8398a 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -9,7 +9,8 @@ from readthedocs.projects.models import Project from readthedocs.search.models import SearchQuery from readthedocs.worker import app -from .utils import _get_index, _get_document + +from .utils import _get_document, _get_index log = logging.getLogger(__name__) @@ -44,17 +45,19 @@ def index_objects_to_es( if index_name: # Hack the index name temporarily for reindexing tasks - old_index_name = document._doc_type.index - document._doc_type.index = index_name + old_index_name = document._index._name + document._index._name = index_name log.info('Replacing index name %s with %s', old_index_name, index_name) log.info("Indexing model: %s, '%s' objects", model.__name__, queryset.count()) doc_obj.update(queryset.iterator()) if index_name: - log.info('Undoing index replacement, settings %s with %s', - document._doc_type.index, old_index_name) - document._doc_type.index = old_index_name + log.info( + 'Undoing index replacement, settings %s with %s', + document._index._name, old_index_name, + ) + document._index._name = old_index_name @app.task(queue='web') diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 95f7dd316ba..0091a6c1525 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -89,7 +89,7 @@ def _get_index(indices, index_name): :return: DED Index """ for index in indices: - if str(index) == index_name: + if index._name == index_name: return index @@ -116,7 +116,10 @@ def _indexing_helper(html_objs_qs, wipe=False): else, html_objs are indexed. """ from readthedocs.search.documents import PageDocument - from readthedocs.search.tasks import index_objects_to_es, delete_objects_in_es + from readthedocs.search.tasks import ( + delete_objects_in_es, + index_objects_to_es, + ) if html_objs_qs: obj_ids = [] @@ -148,7 +151,7 @@ def _get_sorted_results(results, source_key='_source'): source_key: hit._source.to_dict(), 'highlight': hit.highlight.to_dict() if hasattr(hit, 'highlight') else {} } - for hit in sorted(results, key=attrgetter('_score'), reverse=True) + for hit in sorted(results, key=attrgetter('meta.score'), reverse=True) ] return sorted_results diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index e6e6592467f..33bf954efa9 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -581,16 +581,17 @@ def DOCKER_LIMITS(self): ES_INDEXES = { 'project': { 'name': 'project_index', - 'settings': {'number_of_shards': 1, - 'number_of_replicas': 1 - } + 'settings': { + 'number_of_shards': 1, + 'number_of_replicas': 1 + }, }, 'page': { 'name': 'page_index', 'settings': { 'number_of_shards': 1, 'number_of_replicas': 1, - } + }, }, } diff --git a/requirements/pip.txt b/requirements/pip.txt index 01adbe80d27..d0d27e17994 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -47,23 +47,8 @@ GitPython==3.1.7 # Search elasticsearch==6.8.1 # pyup: <7.0.0 - - -# elasticsearch-dsl==6.3.1 produces this error -# File "/home/travis/build/rtfd/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 8, in -# from elasticsearch_dsl.document import DocTypeMeta as DSLDocTypeMeta -# ImportError: cannot import name 'DocTypeMeta' -# -# Commit 97e3f75 adds the NestedFacet -git+https://github.com/elastic/elasticsearch-dsl-py@97e3f756a8cacd1c863d3ced3d17abcafbb0f85e#egg=elasticsearch-dsl==6.1.1 - -# django-elasticsearch-dsl==6.4.1 produces this error -# File "/home/travis/build/readthedocs/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/__init__.py", line 3, in -# from .documents import DocType # noqa -# File "/home/travis/build/readthedocs/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 7, in -# from elasticsearch_dsl import Document as DSLDocument -# ImportError: cannot import name 'Document' -django-elasticsearch-dsl==0.5.1 # pyup: ignore +elasticsearch-dsl==6.4.0 # pyup: <7.0 +django-elasticsearch-dsl==6.4.2 # pyup: <7.0 selectolax==0.2.6 # Ignoring orjson for now because it makes Travis to fail