diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py
index a2bce6df840..24927a02f3e 100644
--- a/readthedocs/core/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/core/management/commands/reindex_elasticsearch.py
@@ -1,12 +1,14 @@
-"""Reindex Elastic Search indexes"""
+# -*- coding: utf-8 -*-
+"""Reindex Elastic Search indexes."""
+
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals)
-from __future__ import absolute_import
import logging
+import socket
from optparse import make_option
-from django.core.management.base import BaseCommand
-from django.core.management.base import CommandError
-from django.conf import settings
+from django.core.management.base import BaseCommand, CommandError
from readthedocs.builds.constants import LATEST
from readthedocs.builds.models import Version
@@ -23,34 +25,42 @@ class Command(BaseCommand):
dest='project',
default='',
help='Project to index'),
+ make_option('-l',
+ dest='only_latest',
+ default=False,
+ action='store_true',
+ help='Only index latest'),
)
def handle(self, *args, **options):
- """Build/index all versions or a single project's version"""
+ """Build/index all versions or a single project's version."""
project = options['project']
+ only_latest = options['only_latest']
- queryset = Version.objects.all()
+ queryset = Version.objects.filter(active=True)
if project:
queryset = queryset.filter(project__slug=project)
if not queryset.exists():
raise CommandError(
- 'No project with slug: {slug}'.format(slug=project))
- log.info("Building all versions for %s", project)
- elif getattr(settings, 'INDEX_ONLY_LATEST', True):
+ u'No project with slug: {slug}'.format(slug=project))
+ log.info(u'Building all versions for %s', project)
+ if only_latest:
+ log.warning('Indexing only latest')
queryset = queryset.filter(slug=LATEST)
- for version in queryset:
- log.info("Reindexing %s", version)
- try:
- commit = version.project.vcs_repo(version.slug).commit
- except: # pylint: disable=bare-except
- # An exception can be thrown here in production, but it's not
- # documented what the exception here is
- commit = None
-
+ for version_pk, version_slug, project_slug in queryset.values_list(
+ 'pk', 'slug', 'project__slug'):
+ log.info(u'Reindexing %s:%s' % (project_slug, version_slug))
try:
- update_search(version.pk, commit,
- delete_non_commit_files=False)
+ update_search.apply_async(
+ kwargs=dict(
+ version_pk=version_pk,
+ commit='reindex',
+ delete_non_commit_files=False
+ ),
+ priority=0,
+ queue=socket.gethostname()
+ )
except Exception:
- log.exception('Reindex failed for {}'.format(version))
+ log.exception(u'Reindexing failed for %s:%s' % (project_slug, version_slug))
diff --git a/readthedocs/projects/migrations/0022_add-view-data.py b/readthedocs/projects/migrations/0022_add-view-data.py
new file mode 100644
index 00000000000..e9fee53cca7
--- /dev/null
+++ b/readthedocs/projects/migrations/0022_add-view-data.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.9.12 on 2017-12-11 13:05
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('projects', '0021_add-webhook-deprecation-feature'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='domain',
+ name='canonical',
+ field=models.BooleanField(default=False, help_text='This Domain is the primary one where the documentation is served from'),
+ ),
+ migrations.AlterField(
+ model_name='domain',
+ name='count',
+ field=models.IntegerField(default=0, help_text='Number of times this domain has been hit'),
+ ),
+ migrations.AlterField(
+ model_name='project',
+ name='allow_promos',
+ field=models.BooleanField(default=True, help_text='If unchecked, users will still see community ads.', verbose_name='Allow paid advertising'),
+ ),
+ migrations.AlterField(
+ model_name='project',
+ name='comment_moderation',
+ field=models.BooleanField(default=False, verbose_name='Comment Moderation'),
+ ),
+ migrations.AlterField(
+ model_name='project',
+ name='conf_py_file',
+ field=models.CharField(blank=True, default=b'', help_text='Path from project root to conf.py
file (ex. docs/conf.py
). Leave blank if you want us to find it for you.', max_length=255, verbose_name='Python configuration file'),
+ ),
+ migrations.AlterField(
+ model_name='project',
+ name='has_valid_webhook',
+ field=models.BooleanField(default=False, help_text='This project has been built with a webhook'),
+ ),
+ migrations.AlterField(
+ model_name='project',
+ name='programming_language',
+ field=models.CharField(blank=True, choices=[('words', 'Only Words'), ('py', 'Python'), ('js', 'JavaScript'), ('php', 'PHP'), ('ruby', 'Ruby'), ('perl', 'Perl'), ('java', 'Java'), ('go', 'Go'), ('julia', 'Julia'), ('c', 'C'), ('csharp', 'C#'), ('cpp', 'C++'), ('objc', 'Objective-C'), ('other', 'Other')], default=b'words', help_text='The primary programming language the project is written in.', max_length=20, verbose_name='Programming Language'),
+ ),
+ ]
diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 9d6fbd19229..9fc1a36b84d 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -47,7 +47,7 @@
url(r'index_search/',
search_views.index_search,
name='index_search'),
- url(r'search/$', views.search_views.search, name='api_search'),
+ url(r'^search/$', views.search_views.search, name='api_search'),
url(r'search/project/$',
search_views.project_search,
name='api_project_search'),
diff --git a/readthedocs/restapi/utils.py b/readthedocs/restapi/utils.py
index b80190bc427..7f6d87ad68f 100644
--- a/readthedocs/restapi/utils.py
+++ b/readthedocs/restapi/utils.py
@@ -1,6 +1,9 @@
+# -*- coding: utf-8 -*-
"""Utility functions that are used by both views and celery tasks."""
-from __future__ import absolute_import
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals)
+
import hashlib
import logging
@@ -37,7 +40,7 @@ def sync_versions(project, versions, type): # pylint: disable=redefined-builtin
type=type,
machine=False,
)
- log.info("(Sync Versions) Updated Version: [%s=%s] ",
+ log.info('(Sync Versions) Updated Version: [%s=%s] ',
version['verbose_name'], version['identifier'])
else:
# New Version
@@ -49,7 +52,7 @@ def sync_versions(project, versions, type): # pylint: disable=redefined-builtin
)
added.add(created_version.slug)
if added:
- log.info("(Sync Versions) Added Versions: [%s] ", ' '.join(added))
+ log.info('(Sync Versions) Added Versions: [%s] ', ' '.join(added))
return added
@@ -70,14 +73,14 @@ def delete_versions(project, version_data):
if to_delete_qs.count():
ret_val = {obj.slug for obj in to_delete_qs}
- log.info("(Sync Versions) Deleted Versions: [%s]", ' '.join(ret_val))
+ log.info('(Sync Versions) Deleted Versions: [%s]', ' '.join(ret_val))
to_delete_qs.delete()
return ret_val
return set()
def index_search_request(version, page_list, commit, project_scale, page_scale,
- section=True, delete=True):
+ section=False, delete=True):
"""
Update search indexes with build output JSON.
@@ -89,7 +92,7 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
project = version.project
log_msg = ' '.join([page['path'] for page in page_list])
- log.info("Updating search index: project=%s pages=[%s]",
+ log.info('Updating search index: project=%s pages=[%s]',
project.slug, log_msg)
project_obj = ProjectIndex()
@@ -112,7 +115,7 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
routes = [project.slug]
routes.extend([p.parent.slug for p in project.superprojects.all()])
for page in page_list:
- log.debug("Indexing page: %s:%s", project.slug, page['path'])
+ log.debug('Indexing page: %s:%s', project.slug, page['path'])
to_hash = '-'.join([project.slug, version.slug, page['path']])
page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest()
index_list.append({
@@ -142,25 +145,24 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
'weight': page_scale,
})
for route in routes:
- section_obj.bulk_index(section_index_list, parent=page_id,
- routing=route)
+ section_obj.bulk_index(section_index_list, routing=route)
for route in routes:
- page_obj.bulk_index(index_list, parent=project.slug, routing=route)
+ page_obj.bulk_index(index_list, routing=route)
if delete:
- log.info("Deleting files not in commit: %s", commit)
+ log.info('Deleting files not in commit: %s', commit)
# TODO: AK Make sure this works
delete_query = {
- "query": {
- "bool": {
- "must": [
- {"term": {"project": project.slug, }},
- {"term": {"version": version.slug, }},
+ 'query': {
+ 'bool': {
+ 'must': [
+ {'term': {'project': project.slug, }},
+ {'term': {'version': version.slug, }},
],
- "must_not": {
- "term": {
- "commit": commit
+ 'must_not': {
+ 'term': {
+ 'commit': commit
}
}
}
diff --git a/readthedocs/restapi/views/search_views.py b/readthedocs/restapi/views/search_views.py
index abe36174097..1db28af08e2 100644
--- a/readthedocs/restapi/views/search_views.py
+++ b/readthedocs/restapi/views/search_views.py
@@ -32,7 +32,7 @@ def index_search(request):
utils.index_search_request(
version=version, page_list=data['page_list'], commit=commit,
- project_scale=project_scale, page_scale=page_scale)
+ project_scale=project_scale, page_scale=page_scale, section=False)
return Response({'indexed': True})
diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
index 1b2ede6aaa9..378774e3b9e 100644
--- a/readthedocs/search/indexes.py
+++ b/readthedocs/search/indexes.py
@@ -19,7 +19,7 @@
import datetime
from elasticsearch import Elasticsearch, exceptions
-from elasticsearch.helpers import bulk_index
+from elasticsearch.helpers import bulk
from django.conf import settings
@@ -48,8 +48,6 @@ def get_settings(self, settings_override=None):
'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS,
'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS,
'refresh_interval': '5s',
- 'store.compress.tv': True,
- 'store.compress.stored': True,
'analysis': self.get_analysis(),
}
if settings_override:
@@ -76,7 +74,7 @@ def get_analysis(self):
analyzers['default_icu'] = {
'type': 'custom',
'tokenizer': 'icu_tokenizer',
- 'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'],
+ 'filter': ['custom_word_delimiter', 'icu_folding', 'icu_normalizer', 'lowercase'],
}
# Customize the word_delimiter filter to set various options.
@@ -139,7 +137,7 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None,
docs.append(doc)
# TODO: This doesn't work with the new ES setup.
- bulk_index(self.es, docs, chunk_size=chunk_size)
+ bulk(self.es, docs, chunk_size=chunk_size)
def index_document(self, data, index=None, parent=None, routing=None):
doc = self.extract_document(data)
@@ -220,25 +218,24 @@ def get_mapping(self):
# Disable _all field to reduce index size.
'_all': {'enabled': False},
'properties': {
- 'id': {'type': 'long'},
- 'name': {'type': 'string', 'analyzer': 'default_icu'},
- 'description': {'type': 'string', 'analyzer': 'default_icu'},
-
- 'slug': {'type': 'string', 'index': 'not_analyzed'},
- 'lang': {'type': 'string', 'index': 'not_analyzed'},
- 'tags': {'type': 'string', 'index': 'not_analyzed'},
- 'privacy': {'type': 'string', 'index': 'not_analyzed'},
+ 'id': {'type': 'keyword'},
+ 'name': {'type': 'text', 'analyzer': 'default_icu'},
+ 'description': {'type': 'text', 'analyzer': 'default_icu'},
+
+ 'slug': {'type': 'keyword'},
+ 'lang': {'type': 'keyword'},
+ 'tags': {'type': 'keyword'},
+ 'privacy': {'type': 'keyword'},
'author': {
- 'type': 'string',
+ 'type': 'text',
'analyzer': 'default_icu',
'fields': {
'raw': {
- 'type': 'string',
- 'index': 'not_analyzed',
+ 'type': 'keyword',
},
},
},
- 'url': {'type': 'string', 'index': 'not_analyzed'},
+ 'url': {'type': 'keyword'},
# Add a weight field to enhance relevancy scoring.
'weight': {'type': 'float'},
}
@@ -273,19 +270,19 @@ def get_mapping(self):
# Disable _all field to reduce index size.
'_all': {'enabled': False},
# Associate a page with a project.
- '_parent': {'type': self._parent},
+ # '_parent': {'type': self._parent},
'properties': {
- 'id': {'type': 'string', 'index': 'not_analyzed'},
- 'sha': {'type': 'string', 'index': 'not_analyzed'},
- 'project': {'type': 'string', 'index': 'not_analyzed'},
- 'version': {'type': 'string', 'index': 'not_analyzed'},
- 'path': {'type': 'string', 'index': 'not_analyzed'},
- 'taxonomy': {'type': 'string', 'index': 'not_analyzed'},
- 'commit': {'type': 'string', 'index': 'not_analyzed'},
-
- 'title': {'type': 'string', 'analyzer': 'default_icu'},
- 'headers': {'type': 'string', 'analyzer': 'default_icu'},
- 'content': {'type': 'string', 'analyzer': 'default_icu'},
+ 'id': {'type': 'keyword'},
+ 'sha': {'type': 'keyword'},
+ 'project': {'type': 'keyword'},
+ 'version': {'type': 'keyword'},
+ 'path': {'type': 'keyword'},
+ 'taxonomy': {'type': 'keyword'},
+ 'commit': {'type': 'keyword'},
+
+ 'title': {'type': 'text', 'analyzer': 'default_icu'},
+ 'headers': {'type': 'text', 'analyzer': 'default_icu'},
+ 'content': {'type': 'text', 'analyzer': 'default_icu'},
# Add a weight field to enhance relevancy scoring.
'weight': {'type': 'float'},
}
@@ -321,7 +318,7 @@ def get_mapping(self):
# Disable _all field to reduce index size.
'_all': {'enabled': False},
# Associate a section with a page.
- '_parent': {'type': self._parent},
+ # '_parent': {'type': self._parent},
# Commenting this out until we need it.
# 'suggest': {
# "type": "completion",
@@ -330,18 +327,18 @@ def get_mapping(self):
# "payloads": True,
# },
'properties': {
- 'id': {'type': 'string', 'index': 'not_analyzed'},
- 'project': {'type': 'string', 'index': 'not_analyzed'},
- 'version': {'type': 'string', 'index': 'not_analyzed'},
- 'path': {'type': 'string', 'index': 'not_analyzed'},
- 'page_id': {'type': 'string', 'index': 'not_analyzed'},
- 'commit': {'type': 'string', 'index': 'not_analyzed'},
- 'title': {'type': 'string', 'analyzer': 'default_icu'},
- 'content': {'type': 'string', 'analyzer': 'default_icu'},
+ 'id': {'type': 'keyword'},
+ 'project': {'type': 'keyword'},
+ 'version': {'type': 'keyword'},
+ 'path': {'type': 'keyword'},
+ 'page_id': {'type': 'keyword'},
+ 'commit': {'type': 'keyword'},
+ 'title': {'type': 'text', 'analyzer': 'default_icu'},
+ 'content': {'type': 'text', 'analyzer': 'default_icu'},
'blocks': {
'type': 'object',
'properties': {
- 'code': {'type': 'string', 'analyzer': 'default_icu'}
+ 'code': {'type': 'text', 'analyzer': 'default_icu'}
}
},
# Add a weight field to enhance relevancy scoring.
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
index 1c33ed11cf0..d462528a259 100644
--- a/readthedocs/settings/base.py
+++ b/readthedocs/settings/base.py
@@ -115,6 +115,7 @@ def INSTALLED_APPS(self): # noqa
if ext:
apps.append('django_countries')
apps.append('readthedocsext.donate')
+ apps.append('readthedocsext.search')
apps.append('readthedocsext.embed')
return apps
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
index b8068ae1f52..6b7bdfaeebe 100644
--- a/readthedocs/urls.py
+++ b/readthedocs/urls.py
@@ -87,15 +87,18 @@
if 'readthedocsext.donate' in settings.INSTALLED_APPS:
# Include donation URL's
groups.append([
- url(r'^sustainability/', include('readthedocsext.donate.urls')),
+ url(r'^sustainability/', include('readthedocsext.donate.urls'))
])
-
+if 'readthedocsext.search' in settings.INSTALLED_APPS:
+ for num, _url in enumerate(rtd_urls):
+ if _url and hasattr(_url, 'name') and _url.name == 'search':
+ rtd_urls[num] = \
+ url(r'^search/', 'readthedocsext.search.mainsearch.elastic_search', name='search')
if 'readthedocsext.embed' in settings.INSTALLED_APPS:
api_urls.insert(
0,
url(r'^api/v1/embed/', include('readthedocsext.embed.urls'))
)
-
if not getattr(settings, 'USE_SUBDOMAIN', False) or settings.DEBUG:
groups.insert(0, docs_urls)
if getattr(settings, 'ALLOW_ADMIN', True):
diff --git a/requirements/pip.txt b/requirements/pip.txt
index ca40caf724a..9100adbccd0 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -36,8 +36,7 @@ dnspython==1.15.0
httplib2==0.7.7
# Search
-elasticsearch==1.5.0
-pyelasticsearch==0.7.1
+elasticsearch==5.5.1
pyquery==1.2.2
# Utils