diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index f50150c389f..d2724a099f8 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -3,6 +3,8 @@ from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields +from elasticsearch import Elasticsearch + from readthedocs.projects.models import HTMLFile, Project from readthedocs.sphinx_domains.models import SphinxDomain @@ -22,8 +24,19 @@ log = logging.getLogger(__name__) +class RTDDocTypeMixin: + + def update(self, *args, **kwargs): + # Hack a fix to our broken connection pooling + # This creates a new connection on every request, + # but actually works :) + log.info('Hacking Elastic indexing to fix connection pooling') + self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default']) + super().update(*args, **kwargs) + + @domain_index.doc_type -class SphinxDomainDocument(DocType): +class SphinxDomainDocument(RTDDocTypeMixin, DocType): project = fields.KeywordField(attr='project.slug') version = fields.KeywordField(attr='version.slug') role_name = fields.KeywordField(attr='role_name') @@ -63,7 +76,7 @@ def get_queryset(self): @project_index.doc_type -class ProjectDocument(DocType): +class ProjectDocument(RTDDocTypeMixin, DocType): # Metadata url = fields.TextField(attr='get_absolute_url') @@ -97,7 +110,7 @@ def faceted_search(cls, query, user, language=None): @page_index.doc_type -class PageDocument(DocType): +class PageDocument(RTDDocTypeMixin, DocType): # Metadata project = fields.KeywordField(attr='project.slug') diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 8e58aa165e1..9fed27de679 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,8 +1,11 @@ import logging +from elasticsearch import Elasticsearch from elasticsearch_dsl import FacetedSearch, TermsFacet from elasticsearch_dsl.query import Bool, SimpleQueryString +from django.conf import settings + from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.search.documents import ( PageDocument, @@ -40,6 +43,12 @@ def __init__(self, user, **kwargs): if f in kwargs: del kwargs[f] + # Hack a fix to our broken connection pooling + # This creates a new connection on every request, + # but actually works :) + log.info('Hacking Elastic to fix search connection pooling') + self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default']) + super().__init__(**kwargs) def query(self, search, query): diff --git a/readthedocs/search/signals.py b/readthedocs/search/signals.py index d3ec9c64cfd..4ba1dde0e17 100644 --- a/readthedocs/search/signals.py +++ b/readthedocs/search/signals.py @@ -61,14 +61,17 @@ def remove_indexed_file(sender, instance_list, **kwargs): if version and commit: # Sanity check by deleting all old files not in this commit - log.info('Deleting old commits from search index') - document().search().filter( - 'term', version=version.slug, - ).filter( - 'term', project=version.project.slug, - ).exclude( - 'term', commit=commit, - ).delete() + try: + log.info('Deleting old commits from search index') + document().search().filter( + 'term', version=version.slug, + ).filter( + 'term', project=version.project.slug, + ).exclude( + 'term', commit=commit, + ).delete() + except Exception: + log.warning('Unable to delete a subset of files. Continuing.', exc_info=True) @receiver(post_save, sender=Project) diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 89e8a5dac80..0924b028263 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -60,7 +60,11 @@ def delete_objects_in_es(app_label, model_name, document_class, objects_id): queryset = doc_obj.get_queryset() queryset = queryset.filter(id__in=objects_id) log.info("Deleting model: %s, '%s' objects", model.__name__, queryset.count()) - doc_obj.update(queryset.iterator(), action='delete') + try: + # This is a common case that we should be handling a better way + doc_obj.update(queryset.iterator(), action='delete') + except Exception: + log.warning('Unable to delete a subset of files. Continuing.', exc_info=True) @app.task(queue='web')