Skip to content

Commit 8d7942b

Browse files
ericholschersafwanrahman
authored andcommitted
Merge pull request readthedocs#4211 from safwanrahman/search
Upgrade Elasticsearch to version 6.x
2 parents dfdf4df + 0965a94 commit 8d7942b

29 files changed

+443
-173
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ python:
44
- 3.6
55
sudo: false
66
env:
7-
- ES_VERSION=1.3.9 ES_DOWNLOAD_URL=https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
7+
- ES_VERSION=6.2.4 ES_DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
88
matrix:
99
include:
1010
- python: 2.7
@@ -42,3 +42,4 @@ notifications:
4242
branches:
4343
only:
4444
- master
45+
- search_upgrade

readthedocs/projects/admin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from .forms import FeatureForm
1717
from .models import (Project, ImportedFile, Feature,
18-
ProjectRelationship, EmailHook, WebHook, Domain)
18+
ProjectRelationship, EmailHook, WebHook, Domain, HTMLFile)
1919
from .notifications import ResourceUsageNotification
2020
from .tasks import remove_dir
2121

@@ -206,3 +206,4 @@ def project_count(self, feature):
206206
admin.site.register(Feature, FeatureAdmin)
207207
admin.site.register(EmailHook)
208208
admin.site.register(WebHook)
209+
admin.site.register(HTMLFile)

readthedocs/projects/apps.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ class ProjectsConfig(AppConfig):
99
def ready(self):
1010
from readthedocs.projects import tasks
1111
from readthedocs.worker import app
12+
1213
app.tasks.register(tasks.SyncRepositoryTask)
1314
app.tasks.register(tasks.UpdateDocsTask)

readthedocs/projects/managers.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from django.db import models
2+
3+
4+
class HTMLFileManager(models.Manager):
5+
6+
def get_queryset(self):
7+
return super(HTMLFileManager, self).get_queryset().filter(name__endswith='.html')
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# -*- coding: utf-8 -*-
2+
# Generated by Django 1.9.13 on 2018-06-18 16:45
3+
from __future__ import unicode_literals
4+
5+
from django.db import migrations, models
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
('projects', '0025_show-version-warning-existing-projects'),
12+
]
13+
14+
operations = [
15+
migrations.CreateModel(
16+
name='HTMLFile',
17+
fields=[
18+
],
19+
options={
20+
'proxy': True,
21+
},
22+
bases=('projects.importedfile',),
23+
),
24+
migrations.AlterField(
25+
model_name='project',
26+
name='comment_moderation',
27+
field=models.BooleanField(default=False, verbose_name='Comment Moderation'),
28+
),
29+
migrations.AlterField(
30+
model_name='project',
31+
name='documentation_type',
32+
field=models.CharField(choices=[('auto', 'Automatically Choose'), ('sphinx', 'Sphinx Html'), ('mkdocs', 'Mkdocs (Markdown)'), ('sphinx_htmldir', 'Sphinx HtmlDir'), ('sphinx_singlehtml', 'Sphinx Single Page HTML')], default='sphinx', help_text='Type of documentation you are building. <a href="http://www.sphinx-doc.org/en/stable/builders.html#sphinx.builders.html.DirectoryHTMLBuilder">More info</a>.', max_length=20, verbose_name='Documentation type'),
33+
),
34+
migrations.AlterField(
35+
model_name='project',
36+
name='language',
37+
field=models.CharField(choices=[('aa', 'Afar'), ('ab', 'Abkhaz'), ('af', 'Afrikaans'), ('am', 'Amharic'), ('ar', 'Arabic'), ('as', 'Assamese'), ('ay', 'Aymara'), ('az', 'Azerbaijani'), ('ba', 'Bashkir'), ('be', 'Belarusian'), ('bg', 'Bulgarian'), ('bh', 'Bihari'), ('bi', 'Bislama'), ('bn', 'Bengali'), ('bo', 'Tibetan'), ('br', 'Breton'), ('ca', 'Catalan'), ('co', 'Corsican'), ('cs', 'Czech'), ('cy', 'Welsh'), ('da', 'Danish'), ('de', 'German'), ('dz', 'Dzongkha'), ('el', 'Greek'), ('en', 'English'), ('eo', 'Esperanto'), ('es', 'Spanish'), ('et', 'Estonian'), ('eu', 'Basque'), ('fa', 'Iranian'), ('fi', 'Finnish'), ('fj', 'Fijian'), ('fo', 'Faroese'), ('fr', 'French'), ('fy', 'Western Frisian'), ('ga', 'Irish'), ('gd', 'Scottish Gaelic'), ('gl', 'Galician'), ('gn', 'Guarani'), ('gu', 'Gujarati'), ('ha', 'Hausa'), ('hi', 'Hindi'), ('he', 'Hebrew'), ('hr', 'Croatian'), ('hu', 'Hungarian'), ('hy', 'Armenian'), ('ia', 'Interlingua'), ('id', 'Indonesian'), ('ie', 'Interlingue'), ('ik', 'Inupiaq'), ('is', 'Icelandic'), ('it', 'Italian'), ('iu', 'Inuktitut'), ('ja', 'Japanese'), ('jv', 'Javanese'), ('ka', 'Georgian'), ('kk', 'Kazakh'), ('kl', 'Kalaallisut'), ('km', 'Khmer'), ('kn', 'Kannada'), ('ko', 'Korean'), ('ks', 'Kashmiri'), ('ku', 'Kurdish'), ('ky', 'Kyrgyz'), ('la', 'Latin'), ('ln', 'Lingala'), ('lo', 'Lao'), ('lt', 'Lithuanian'), ('lv', 'Latvian'), ('mg', 'Malagasy'), ('mi', 'Maori'), ('mk', 'Macedonian'), ('ml', 'Malayalam'), ('mn', 'Mongolian'), ('mr', 'Marathi'), ('ms', 'Malay'), ('mt', 'Maltese'), ('my', 'Burmese'), ('na', 'Nauru'), ('ne', 'Nepali'), ('nl', 'Dutch'), ('no', 'Norwegian'), ('oc', 'Occitan'), ('om', 'Oromo'), ('or', 'Oriya'), ('pa', 'Panjabi'), ('pl', 'Polish'), ('ps', 'Pashto'), ('pt', 'Portuguese'), ('qu', 'Quechua'), ('rm', 'Romansh'), ('rn', 'Kirundi'), ('ro', 'Romanian'), ('ru', 'Russian'), ('rw', 'Kinyarwanda'), ('sa', 'Sanskrit'), ('sd', 'Sindhi'), ('sg', 'Sango'), ('si', 'Sinhala'), ('sk', 'Slovak'), ('sl', 'Slovenian'), ('sm', 'Samoan'), ('sn', 'Shona'), ('so', 'Somali'), ('sq', 'Albanian'), ('sr', 'Serbian'), ('ss', 'Swati'), ('st', 'Southern Sotho'), ('su', 'Sudanese'), ('sv', 'Swedish'), ('sw', 'Swahili'), ('ta', 'Tamil'), ('te', 'Telugu'), ('tg', 'Tajik'), ('th', 'Thai'), ('ti', 'Tigrinya'), ('tk', 'Turkmen'), ('tl', 'Tagalog'), ('tn', 'Tswana'), ('to', 'Tonga'), ('tr', 'Turkish'), ('ts', 'Tsonga'), ('tt', 'Tatar'), ('tw', 'Twi'), ('ug', 'Uyghur'), ('uk', 'Ukrainian'), ('ur', 'Urdu'), ('uz', 'Uzbek'), ('vi', 'Vietnamese'), ('vo', 'Volapuk'), ('wo', 'Wolof'), ('xh', 'Xhosa'), ('yi', 'Yiddish'), ('yo', 'Yoruba'), ('za', 'Zhuang'), ('zh', 'Chinese'), ('zu', 'Zulu'), ('nb_NO', 'Norwegian Bokmal'), ('pt_BR', 'Brazilian Portuguese'), ('es_MX', 'Mexican Spanish'), ('uk_UA', 'Ukrainian'), ('zh_CN', 'Simplified Chinese'), ('zh_TW', 'Traditional Chinese')], default='en', help_text="The language the project documentation is rendered in. Note: this affects your project's URL.", max_length=20, verbose_name='Language'),
38+
),
39+
migrations.AlterField(
40+
model_name='project',
41+
name='privacy_level',
42+
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Level of privacy that you want on the repository. Protected means public but not in listings.', max_length=20, verbose_name='Privacy Level'),
43+
),
44+
migrations.AlterField(
45+
model_name='project',
46+
name='python_interpreter',
47+
field=models.CharField(choices=[('python', 'CPython 2.x'), ('python3', 'CPython 3.x')], default='python', help_text='The Python interpreter used to create the virtual environment.', max_length=20, verbose_name='Python Interpreter'),
48+
),
49+
migrations.AlterField(
50+
model_name='project',
51+
name='version_privacy_level',
52+
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Default level of privacy you want on built versions of documentation.', max_length=20, verbose_name='Version Privacy Level'),
53+
),
54+
]

readthedocs/projects/models.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
import fnmatch
88
import logging
99
import os
10-
from builtins import object # pylint: disable=redefined-builtin
1110

11+
from builtins import object # pylint: disable=redefined-builtin
1212
from django.conf import settings
1313
from django.contrib.auth.models import User
1414
from django.core.urlresolvers import NoReverseMatch, reverse
1515
from django.db import models
1616
from django.utils.encoding import python_2_unicode_compatible
17+
from django.utils.functional import cached_property
1718
from django.utils.translation import ugettext_lazy as _
1819
from future.backports.urllib.parse import urlparse # noqa
1920
from guardian.shortcuts import assign
@@ -24,6 +25,7 @@
2425
from readthedocs.core.utils import broadcast, slugify
2526
from readthedocs.projects import constants
2627
from readthedocs.projects.exceptions import ProjectConfigurationError
28+
from readthedocs.projects.managers import HTMLFileManager
2729
from readthedocs.projects.querysets import (
2830
ChildRelatedProjectQuerySet, FeatureQuerySet, ProjectQuerySet,
2931
RelatedProjectQuerySet)
@@ -32,6 +34,7 @@
3234
from readthedocs.projects.version_handling import (
3335
determine_stable_version, version_windows)
3436
from readthedocs.restapi.client import api
37+
from readthedocs.search.parse_json import process_file
3538
from readthedocs.vcs_support.backends import backend_cls
3639
from readthedocs.vcs_support.utils import Lock, NonBlockingLock
3740

@@ -916,6 +919,40 @@ def __str__(self):
916919
return '%s: %s' % (self.name, self.project)
917920

918921

922+
class HTMLFile(ImportedFile):
923+
924+
"""
925+
Imported HTML file Proxy model.
926+
927+
This tracks only the HTML files for indexing to search.
928+
"""
929+
930+
class Meta(object):
931+
proxy = True
932+
933+
objects = HTMLFileManager()
934+
935+
@cached_property
936+
def json_file_path(self):
937+
basename = os.path.splitext(self.path)[0]
938+
file_path = basename + '.fjson'
939+
940+
full_json_path = self.project.get_production_media_path(type_='json',
941+
version_slug=self.version.slug,
942+
include_file=False)
943+
944+
file_path = os.path.join(full_json_path, file_path)
945+
return file_path
946+
947+
def get_processed_json(self):
948+
file_path = self.json_file_path
949+
return process_file(file_path)
950+
951+
@cached_property
952+
def processed_json(self):
953+
return self.get_processed_json()
954+
955+
919956
class Notification(models.Model):
920957
project = models.ForeignKey(Project,
921958
related_name='%(class)s_notifications')

readthedocs/projects/signals.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
# -*- coding: utf-8 -*-
21
"""Project signals"""
32

43
from __future__ import absolute_import
54
import django.dispatch
5+
from django.dispatch import receiver
6+
7+
from readthedocs.oauth.utils import attach_webhook
68

79

810
before_vcs = django.dispatch.Signal(providing_args=["version"])
@@ -14,3 +16,12 @@
1416
project_import = django.dispatch.Signal(providing_args=["project"])
1517

1618
files_changed = django.dispatch.Signal(providing_args=["project", "files"])
19+
20+
21+
@receiver(project_import)
22+
def handle_project_import(sender, **kwargs):
23+
"""Add post-commit hook on project import"""
24+
project = sender
25+
request = kwargs.get('request')
26+
27+
attach_webhook(project=project, request=request)

readthedocs/projects/tasks.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
absolute_import, division, print_function, unicode_literals)
1010

1111
import datetime
12+
import fnmatch
1213
import hashlib
1314
import json
1415
import logging
@@ -30,7 +31,7 @@
3031

3132
from .constants import LOG_TEMPLATE
3233
from .exceptions import RepositoryError
33-
from .models import ImportedFile, Project, Domain, Feature
34+
from .models import ImportedFile, Project, Domain, Feature, HTMLFile
3435
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed
3536
from readthedocs.builds.constants import (
3637
BUILD_STATE_BUILDING, BUILD_STATE_CLONING, BUILD_STATE_FINISHED,
@@ -987,18 +988,24 @@ def _manage_imported_files(version, path, commit):
987988
changed_files = set()
988989
for root, __, filenames in os.walk(path):
989990
for filename in filenames:
991+
if fnmatch.fnmatch(filename, '*.html'):
992+
model_class = HTMLFile
993+
else:
994+
model_class = ImportedFile
995+
990996
dirpath = os.path.join(root.replace(path, '').lstrip('/'),
991997
filename.lstrip('/'))
992998
full_path = os.path.join(root, filename)
993999
md5 = hashlib.md5(open(full_path, 'rb').read()).hexdigest()
9941000
try:
995-
obj, __ = ImportedFile.objects.get_or_create(
1001+
# pylint: disable=unpacking-non-sequence
1002+
obj, __ = model_class.objects.get_or_create(
9961003
project=version.project,
9971004
version=version,
9981005
path=dirpath,
9991006
name=filename,
10001007
)
1001-
except ImportedFile.MultipleObjectsReturned:
1008+
except model_class.MultipleObjectsReturned:
10021009
log.warning('Error creating ImportedFile')
10031010
continue
10041011
if obj.md5 != md5:
@@ -1007,6 +1014,12 @@ def _manage_imported_files(version, path, commit):
10071014
if obj.commit != commit:
10081015
obj.commit = commit
10091016
obj.save()
1017+
1018+
# Delete the HTMLFile first from previous versions
1019+
HTMLFile.objects.filter(project=version.project,
1020+
version=version
1021+
).exclude(commit=commit).delete()
1022+
10101023
# Delete ImportedFiles from previous versions
10111024
ImportedFile.objects.filter(project=version.project,
10121025
version=version
@@ -1188,7 +1201,6 @@ def sync_callback(_, version_pk, commit, *args, **kwargs):
11881201
The first argument is the result from previous tasks, which we discard.
11891202
"""
11901203
fileify(version_pk, commit=commit)
1191-
update_search(version_pk, commit=commit)
11921204

11931205

11941206
@app.task()

readthedocs/projects/utils.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,21 @@ def version_from_slug(slug, version):
3232
return v
3333

3434

35-
def find_file(filename):
35+
def find_file(basename, pattern, path):
3636
"""
37-
Recursively find matching file from the current working path.
37+
Recursively find matching file.
3838
39-
:param file: Filename to match
40-
:returns: A list of matching filenames.
39+
:param basename: Basename of a file to match
40+
:param pattern: Pattern to match
41+
:param path: the directory to search for the file
42+
:returns: path of matching file
4143
"""
42-
matches = []
43-
for root, __, filenames in os.walk('.'):
44-
for match in fnmatch.filter(filenames, filename):
45-
matches.append(os.path.join(root, match))
46-
return matches
44+
for root, _, files in os.walk(path):
45+
for filename in files:
46+
file_basename = os.path.splitext(filename)[0]
47+
48+
if fnmatch.fnmatch(filename, pattern) and file_basename == basename:
49+
return os.path.join(root, filename)
4750

4851

4952
def run(*commands):

readthedocs/search/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SEARCH_EXCLUDED_FILE = ['search.html', 'genindex.html', 'py-modindex.html']

0 commit comments

Comments
 (0)