Skip to content

Commit 5ce11e9

Browse files
committed
Make indexing work for SphinxDomain objects.
1 parent 46fdd3c commit 5ce11e9

File tree

10 files changed

+96
-52
lines changed

10 files changed

+96
-52
lines changed

readthedocs/projects/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from django.db import models
1313
from django.urls import NoReverseMatch, reverse
1414
from django.utils.translation import ugettext_lazy as _
15+
from django.utils.functional import cached_property
1516
from django_extensions.db.models import TimeStampedModel
1617
from guardian.shortcuts import assign
1718
from six.moves import shlex_quote
@@ -1183,7 +1184,7 @@ def get_processed_json(self):
11831184
'sections': [],
11841185
}
11851186

1186-
@property
1187+
@cached_property
11871188
def processed_json(self):
11881189
return self.get_processed_json()
11891190

readthedocs/projects/tasks.py

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
)
6161
from readthedocs.doc_builder.loader import get_builder_class
6262
from readthedocs.doc_builder.python_environments import Conda, Virtualenv
63+
from readthedocs.search.documents import PageDocument, SphinxDomainDocument
6364
from readthedocs.sphinx_domains.models import SphinxDomain
6465
from readthedocs.projects.models import APIProject
6566
from readthedocs.restapi.client import api as api_v2
@@ -1213,13 +1214,12 @@ def fileify(version_pk, commit):
12131214
project = version.project
12141215

12151216
if not commit:
1216-
log.info(
1217+
log.warning(
12171218
LOG_TEMPLATE.format(
12181219
project=project.slug,
12191220
version=version.slug,
12201221
msg=(
1221-
'Imported File not being built because no commit '
1222-
'information'
1222+
'Search index not being built because no commit information'
12231223
),
12241224
),
12251225
)
@@ -1234,16 +1234,15 @@ def fileify(version_pk, commit):
12341234
msg='Creating ImportedFiles',
12351235
),
12361236
)
1237-
_manage_imported_files(version, path, commit)
1238-
_update_intersphinx_data(version, path, commit)
1239-
else:
1240-
log.info(
1241-
LOG_TEMPLATE.format(
1242-
project=project.slug,
1243-
version=version.slug,
1244-
msg='No ImportedFile files',
1245-
),
1246-
)
1237+
try:
1238+
_manage_imported_files(version, path, commit)
1239+
except Exception:
1240+
log.exception('Failed during ImportedFile creation')
1241+
1242+
try:
1243+
_update_intersphinx_data(version, path, commit)
1244+
except Exception:
1245+
log.exception('Failed during SphinxDomain creation')
12471246

12481247

12491248
def _update_intersphinx_data(version, path, commit):
@@ -1286,6 +1285,8 @@ class MockApp:
12861285
def warn(self, msg):
12871286
log.warning('Sphinx MockApp: %s', msg)
12881287

1288+
created_sphinx_domains = []
1289+
12891290
invdata = intersphinx.fetch_inventory(MockApp(), '', object_file)
12901291
for key, value in sorted(invdata.items() or {}):
12911292
domain, _type = key.split(':')
@@ -1302,7 +1303,7 @@ def warn(self, msg):
13021303
else:
13031304
doc_name, anchor = url, ''
13041305
display_name = einfo[3]
1305-
obj, _ = SphinxDomain.objects.get_or_create(
1306+
obj, created = SphinxDomain.objects.get_or_create(
13061307
project=version.project,
13071308
version=version,
13081309
domain=domain,
@@ -1317,9 +1318,25 @@ def warn(self, msg):
13171318
if obj.commit != commit:
13181319
obj.commit = commit
13191320
obj.save()
1320-
SphinxDomain.objects.filter(project=version.project,
1321-
version=version
1322-
).exclude(commit=commit).delete()
1321+
created_sphinx_domains.append(obj)
1322+
1323+
# Send bulk_post_create signal for bulk indexing to Elasticsearch
1324+
bulk_post_create.send(sender=SphinxDomainDocument, instance_list=created_sphinx_domains)
1325+
1326+
# Delete the HTMLFile first from previous commit and
1327+
# send bulk_post_delete signal for bulk removing from Elasticsearch
1328+
delete_queryset = (
1329+
SphinxDomain.objects.filter(project=version.project,
1330+
version=version
1331+
).exclude(commit=commit)
1332+
)
1333+
# Keep the objects into memory to send it to signal
1334+
instance_list = list(delete_queryset)
1335+
# Always pass the list of instance, not queryset.
1336+
bulk_post_delete.send(sender=SphinxDomainDocument, instance_list=instance_list)
1337+
1338+
# Delete from previous versions
1339+
delete_queryset.delete()
13231340

13241341

13251342
def _manage_imported_files(version, path, commit):
@@ -1367,7 +1384,7 @@ def _manage_imported_files(version, path, commit):
13671384
created_html_files.append(obj)
13681385

13691386
# Send bulk_post_create signal for bulk indexing to Elasticsearch
1370-
bulk_post_create.send(sender=HTMLFile, instance_list=created_html_files)
1387+
bulk_post_create.send(sender=PageDocument, instance_list=created_html_files)
13711388

13721389
# Delete the HTMLFile first from previous commit and
13731390
# send bulk_post_delete signal for bulk removing from Elasticsearch
@@ -1377,10 +1394,8 @@ def _manage_imported_files(version, path, commit):
13771394
)
13781395
# Keep the objects into memory to send it to signal
13791396
instance_list = list(delete_queryset)
1380-
# Safely delete from database
1381-
delete_queryset.delete()
13821397
# Always pass the list of instance, not queryset.
1383-
bulk_post_delete.send(sender=HTMLFile, instance_list=instance_list)
1398+
bulk_post_delete.send(sender=PageDocument, instance_list=instance_list)
13841399

13851400
# Delete ImportedFiles from previous versions
13861401
delete_queryset.delete()

readthedocs/search/documents.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,23 @@ class SphinxDomainDocument(DocType):
2727
project = fields.KeywordField(attr='project.slug')
2828
version = fields.KeywordField(attr='version.slug')
2929
role_name = fields.KeywordField(attr='role_name')
30+
31+
# For linking to the URL
32+
doc_name = fields.KeywordField(attr='doc_name')
3033
anchor = fields.KeywordField(attr='anchor')
31-
doc_display = fields.KeywordField(attr='doc_display')
32-
type_display = fields.KeywordField(attr='type_display')
34+
35+
# For showing in the search result
36+
type_display = fields.TextField(attr='type_display')
37+
doc_display = fields.TextField(attr='doc_display')
38+
# Simple analyzer breaks on `.`,
39+
# otherwise search results are too strict for this use case
3340
name = fields.TextField(attr='name', analyzer='simple')
41+
display_name = fields.TextField(attr='display_name', analyzer='simple')
3442

3543
modified_model_field = 'modified'
3644

3745
class Meta(object):
3846
model = SphinxDomain
39-
fields = ('display_name', 'doc_name')
4047
ignore_signals = True
4148

4249
def get_queryset(self):

readthedocs/search/faceted_search.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class DomainSearchBase(RTDFacetedSearch):
104104
}
105105
doc_types = [SphinxDomainDocument]
106106
index = SphinxDomainDocument._doc_type.index
107-
fields = ('display_name^5', 'name', 'project', 'type_display')
107+
fields = ('display_name^5', 'name^3', 'project^3', 'type_display')
108108
operators = ['and']
109109

110110

@@ -152,7 +152,8 @@ class AllSearch(RTDFacetedSearch):
152152
'version': TermsFacet(field='version'),
153153
'language': TermsFacet(field='language'),
154154
'role_name': TermsFacet(field='role_name'),
155-
'index': TermsFacet(field='_index'),
155+
# Need to improve UX here for exposing to users
156+
# 'index': TermsFacet(field='_index'),
156157
}
157158
doc_types = [SphinxDomainDocument, PageDocument, ProjectDocument]
158159
index = [SphinxDomainDocument._doc_type.index,

readthedocs/search/signals.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@
55
from django.dispatch import receiver
66
from django_elasticsearch_dsl.apps import DEDConfig
77

8-
from readthedocs.projects.models import HTMLFile, Project
8+
from readthedocs.projects.models import Project
99
from readthedocs.projects.signals import bulk_post_create, bulk_post_delete
1010
from readthedocs.search.tasks import delete_objects_in_es, index_objects_to_es
1111

1212

13-
@receiver(bulk_post_create, sender=HTMLFile)
14-
def index_html_file(instance_list, **_):
13+
@receiver(bulk_post_create)
14+
def index_indexed_file(sender, instance_list, **_):
1515
"""Handle indexing from the build process."""
16-
from readthedocs.search.documents import PageDocument
16+
model = sender._doc_type.model
1717
kwargs = {
18-
'app_label': HTMLFile._meta.app_label,
19-
'model_name': HTMLFile.__name__,
20-
'document_class': str(PageDocument),
18+
'app_label': model._meta.app_label,
19+
'model_name': model.__name__,
20+
'document_class': str(sender),
2121
'objects_id': [obj.id for obj in instance_list],
2222
}
2323

@@ -26,14 +26,14 @@ def index_html_file(instance_list, **_):
2626
index_objects_to_es(**kwargs)
2727

2828

29-
@receiver(bulk_post_delete, sender=HTMLFile)
30-
def remove_html_file(instance_list, **_):
29+
@receiver(bulk_post_delete)
30+
def remove_indexed_file(sender, instance_list, **_):
3131
"""Remove deleted files from the build process."""
32-
from readthedocs.search.documents import PageDocument
32+
model = sender._doc_type.model
3333
kwargs = {
34-
'app_label': HTMLFile._meta.app_label,
35-
'model_name': HTMLFile.__name__,
36-
'document_class': str(PageDocument),
34+
'app_label': model._meta.app_label,
35+
'model_name': model.__name__,
36+
'document_class': str(sender),
3737
'objects_id': [obj.id for obj in instance_list],
3838
}
3939

readthedocs/search/views.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,13 @@ def elastic_search(request, project_slug=None):
4040
:param project_slug: Sent when the view is a project search
4141
"""
4242

43-
request_type = request.GET.get('type', 'project')
44-
4543
if project_slug:
4644
queryset = Project.objects.protected(request.user)
4745
project_obj = get_object_or_404(queryset, slug=project_slug)
48-
request_type = 'all'
4946

5047
user_input = UserInput(
5148
query=request.GET.get('q'),
52-
type=request_type,
49+
type=request.GET.get('type', 'file'),
5350
project=project_slug or request.GET.get('project'),
5451
version=request.GET.get('version', LATEST),
5552
taxonomy=request.GET.get('taxonomy'),

readthedocs/settings/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,6 @@ def USE_PROMOS(self): # noqa
328328

329329
# CORS
330330
CORS_ORIGIN_REGEX_WHITELIST = (
331-
'(.*)localhost(.*)',
332331
'^http://(.+)\.readthedocs\.io$',
333332
'^https://(.+)\.readthedocs\.io$'
334333
)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# -*- coding: utf-8 -*-
2+
# Generated by Django 1.11.20 on 2019-03-01 14:25
3+
from __future__ import unicode_literals
4+
5+
from django.db import migrations, models
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
('sphinx_domains', '0002_increase_max_length'),
12+
]
13+
14+
operations = [
15+
migrations.AddField(
16+
model_name='sphinxdomain',
17+
name='doc_display',
18+
field=models.CharField(max_length=4092, null=True, verbose_name='Doc Display'),
19+
),
20+
migrations.AddField(
21+
model_name='sphinxdomain',
22+
name='type_display',
23+
field=models.CharField(max_length=4092, null=True, verbose_name='Type Display'),
24+
),
25+
]

readthedocs/sphinx_domains/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class SphinxDomain(TimeStampedModel):
5353
type_display = models.CharField(
5454
_('Type Display'),
5555
max_length=4092,
56+
null=True,
5657
)
5758
doc_name = models.CharField(
5859
_('Doc Name'),
@@ -61,6 +62,7 @@ class SphinxDomain(TimeStampedModel):
6162
doc_display = models.CharField(
6263
_('Doc Display'),
6364
max_length=4092,
65+
null=True,
6466
)
6567
anchor = models.CharField(
6668
_('Anchor'),

readthedocs/templates/search/elastic_search.html

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,6 @@
1313

1414
{% endblock %}
1515

16-
{% block extra_scripts %}
17-
<script type="text/javascript" src="{% static 'search/readthedocs-client.js' %}"></script>
18-
<!--<script type="text/javascript" src="{% static 'search/search-embed.js' %}"></script>-->
19-
{% endblock %}
20-
2116
{% block project_editing %}
2217
{% if project_obj %}
2318
{% with search_active="active" project=project_obj %}
@@ -31,10 +26,12 @@
3126
<div class="navigable">
3227
<ul>
3328
<h5>{% trans 'Object Type' %}</h5>
34-
<li class="{% if type == 'all' %}active{% endif %}"><a href="?{% url_replace request 'type' 'all' %}">{% trans 'All' %}</a></li>
29+
{% if not project_obj %}
3530
<li class="{% if type == 'project' %}active{% endif %}"><a href="?{% url_replace request 'type' 'project' %}">{% trans 'Projects' %}</a></li>
31+
{% endif %}
3632
<li class="{% if type == 'file' %}active{% endif %}"><a href="?{% url_replace request 'type' 'file' %}">{% trans 'Files' %}</a></li>
3733
<li class="{% if type == 'domain' %}active{% endif %}"><a href="?{% url_replace request 'type' 'domain' %}">{% trans 'Code API' %}</a></li>
34+
{# <li class="{% if type == 'all' %}active{% endif %}"><a href="?{% url_replace request 'type' 'all' %}">{% trans 'All' %}</a></li> #}
3835

3936
<hr>
4037

@@ -195,7 +192,7 @@ <h3>
195192
{% elif 'domain' in result.meta.index %}
196193

197194
<a href="{% doc_url result.project|get_project result.version result.doc_name %}?highlight={{ query }}#{{ result.anchor }}">
198-
{{ result.project }} - {{ result.name }}
195+
{{ result.project }} - {% if result.meta.highlight.name|length %} {{ result.meta.highlight.name.0|safe }} {% else %} {{ result.name }} {% endif %}
199196
</a>
200197
<p class="fragment">
201198
{{ result.type_display|capfirst }} in {{ result.doc_display }}

0 commit comments

Comments
 (0)