Skip to content

Upgrade Elasticsearch to 5.X #3787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 32 additions & 22 deletions readthedocs/core/management/commands/reindex_elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Reindex Elastic Search indexes"""
# -*- coding: utf-8 -*-
"""Reindex Elastic Search indexes."""

from __future__ import (
absolute_import, division, print_function, unicode_literals)

from __future__ import absolute_import
import logging
import socket
from optparse import make_option

from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

from readthedocs.builds.constants import LATEST
from readthedocs.builds.models import Version
Expand All @@ -23,34 +25,42 @@ class Command(BaseCommand):
dest='project',
default='',
help='Project to index'),
make_option('-l',
dest='only_latest',
default=False,
action='store_true',
help='Only index latest'),
)

def handle(self, *args, **options):
"""Build/index all versions or a single project's version"""
"""Build/index all versions or a single project's version."""
project = options['project']
only_latest = options['only_latest']

queryset = Version.objects.all()
queryset = Version.objects.filter(active=True)

if project:
queryset = queryset.filter(project__slug=project)
if not queryset.exists():
raise CommandError(
'No project with slug: {slug}'.format(slug=project))
log.info("Building all versions for %s", project)
elif getattr(settings, 'INDEX_ONLY_LATEST', True):
u'No project with slug: {slug}'.format(slug=project))
log.info(u'Building all versions for %s', project)
if only_latest:
log.warning('Indexing only latest')
queryset = queryset.filter(slug=LATEST)

for version in queryset:
log.info("Reindexing %s", version)
try:
commit = version.project.vcs_repo(version.slug).commit
except: # pylint: disable=bare-except
# An exception can be thrown here in production, but it's not
# documented what the exception here is
commit = None

for version_pk, version_slug, project_slug in queryset.values_list(
'pk', 'slug', 'project__slug'):
log.info(u'Reindexing %s:%s' % (project_slug, version_slug))
try:
update_search(version.pk, commit,
delete_non_commit_files=False)
update_search.apply_async(
kwargs=dict(
version_pk=version_pk,
commit='reindex',
delete_non_commit_files=False
),
priority=0,
queue=socket.gethostname()
)
except Exception:
log.exception('Reindex failed for %s', version)
log.exception(u'Reindexing failed for %s:%s' % (project_slug, version_slug))
19 changes: 10 additions & 9 deletions readthedocs/projects/views/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ def elastic_project_search(request, project_slug):
{'match': {'title': {'query': query, 'boost': 10}}},
{'match': {'headers': {'query': query, 'boost': 5}}},
{'match': {'content': {'query': query}}},
],
'filter': [
{'term': {'project': project_slug}},
{'term': {'version': version_slug}},
]
}
},
Expand All @@ -315,13 +319,7 @@ def elastic_project_search(request, project_slug):
'content': {},
}
},
'fields': ['title', 'project', 'version', 'path'],
'filter': {
'and': [
{'term': {'project': project_slug}},
{'term': {'version': version_slug}},
]
},
'_source': ['title', 'project', 'version', 'path'],
'size': 50, # TODO: Support pagination.
}

Expand All @@ -335,9 +333,12 @@ def elastic_project_search(request, project_slug):
if results:
# pre and post 1.0 compat
for num, hit in enumerate(results['hits']['hits']):
for key, val in list(hit['fields'].items()):
for key, val in list(hit['_source'].items()):
if isinstance(val, list):
results['hits']['hits'][num]['fields'][key] = val[0]
results['hits']['hits'][num]['_source'][key] = val[0]
# we cannot render attributes starting with an underscore
hit['fields'] = hit['_source']
del hit['_source']

return render(
request,
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/restapi/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
url(r'index_search/',
search_views.index_search,
name='index_search'),
url(r'search/$', views.search_views.search, name='api_search'),
url(r'^search/$', views.search_views.search, name='api_search'),
url(r'search/project/$',
search_views.project_search,
name='api_project_search'),
Expand Down
3 changes: 1 addition & 2 deletions readthedocs/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,11 @@ def index_search_request(
for route in routes:
section_obj.bulk_index(
section_index_list,
parent=page_id,
routing=route,
)

for route in routes:
page_obj.bulk_index(index_list, parent=project.slug, routing=route)
page_obj.bulk_index(index_list, routing=route)

if delete:
log.info('Deleting files not in commit: %s', commit)
Expand Down
9 changes: 6 additions & 3 deletions readthedocs/restapi/views/search_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def index_search(request):

utils.index_search_request(
version=version, page_list=data['page_list'], commit=commit,
project_scale=project_scale, page_scale=page_scale)
project_scale=project_scale, page_scale=page_scale, section=False)

return Response({'indexed': True})

Expand Down Expand Up @@ -64,7 +64,7 @@ def search(request):
# Supplement result paths with domain information on project
hits = results.get('hits', {}).get('hits', [])
for (n, hit) in enumerate(hits):
fields = hit.get('fields', {})
fields = hit.get('_source', {})
search_project = fields.get('project')[0]
search_version = fields.get('version')[0]
path = fields.get('path')[0]
Expand All @@ -77,9 +77,12 @@ def search(request):
)
except ProjectRelationship.DoesNotExist:
pass
results['hits']['hits'][n]['fields']['link'] = (
results['hits']['hits'][n]['_source']['link'] = (
canonical_url + path
)
# we cannot render attributes starting with an underscore
results['hits']['hits'][n]['fields'] = results['hits']['hits'][n]['_source']
del results['hits']['hits'][n]['_source']

return Response({'results': results})

Expand Down
160 changes: 160 additions & 0 deletions readthedocs/rtd_tests/mocks/search_mock_responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
search_project_response = """
{
"took": 17,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.8232156,
"hits": [
{
"_index": "readthedocs",
"_type": "project",
"_id": "6",
"_score": 1.8232156,
"_source": {
"name": "Pip",
"description": "",
"lang": "en",
"url": "/projects/pip/",
"slug": "pip"
},
"highlight": {
"name": [
"<em>Pip</em>"
]
}
}
]
},
"aggregations": {
"language": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "en",
"doc_count": 1
}
]
}
}
}
"""

search_file_response = """
{
"took": 27,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 6.989019,
"hits": [
{
"_index": "readthedocs",
"_type": "page",
"_id": "AWKuy4jp-H7vMtbTbHP5",
"_score": 6.989019,
"_routing": "prova",
"_source": {
"path": "_docs/cap2",
"project": "prova",
"title": "Capitolo 2",
"version": "latest"
},
"highlight": {
"headers": [
"<em>Capitolo</em> 2"
],
"title": [
"<em>Capitolo</em> 2"
],
"content": [
"<em>Capitolo</em> 2 In questo <em>capitolo</em>, vengono trattate"
]
}
},
{
"_index": "readthedocs",
"_type": "page",
"_id": "AWKuy4jp-H7vMtbTbHP4",
"_score": 6.973402,
"_routing": "prova",
"_source": {
"path": "_docs/cap1",
"project": "prova",
"title": "Capitolo 1",
"version": "latest"
},
"highlight": {
"headers": [
"<em>Capitolo</em> 1"
],
"title": [
"<em>Capitolo</em> 1"
],
"content": [
"<em>Capitolo</em> 1 In questo <em>capitolo</em>, le funzioni principali"
]
}
},
{
"_index": "readthedocs",
"_type": "page",
"_id": "AWKuy4jp-H7vMtbTbHP3",
"_score": 0.2017303,
"_routing": "prova",
"_source": {
"path": "index",
"project": "prova",
"title": "Titolo del documento",
"version": "latest"
},
"highlight": {
"content": [
"Titolo del documento Nel <em>Capitolo</em> 1 Nel <em>Capitolo</em> 2"
]
}
}
]
},
"aggregations": {
"project": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "prova",
"doc_count": 3
}
]
},
"taxonomy": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
},
"version": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "latest",
"doc_count": 3
}
]
}
}
}
"""
Loading