Skip to content

[Fix #4247] deleting old search code #4635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 0 additions & 36 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
from readthedocs.doc_builder.python_environments import Conda, Virtualenv
from readthedocs.projects.models import APIProject
from readthedocs.restapi.client import api as api_v2
from readthedocs.restapi.utils import index_search_request
from readthedocs.search.parse_json import process_all_json_files
from readthedocs.vcs_support import utils as vcs_support_utils
from readthedocs.worker import app
from .constants import LOG_TEMPLATE
Expand Down Expand Up @@ -902,40 +900,6 @@ def move_files(version_pk, hostname, html=False, localmedia=False, search=False,
Syncer.copy(from_path, to_path, host=hostname)


@app.task(queue='web')
def update_search(version_pk, commit, delete_non_commit_files=True):
"""
Task to update search indexes.
:param version_pk: Version id to update
:param commit: Commit that updated index
:param delete_non_commit_files: Delete files not in commit from index
"""
version = Version.objects.get(pk=version_pk)

if version.project.is_type_sphinx:
page_list = process_all_json_files(version, build_dir=False)
else:
log.debug('Unknown documentation type: %s',
version.project.documentation_type)
return

log_msg = ' '.join([page['path'] for page in page_list])
log.info("(Search Index) Sending Data: %s [%s]", version.project.slug,
log_msg)
index_search_request(
version=version,
page_list=page_list,
commit=commit,
project_scale=0,
page_scale=0,
# Don't index sections to speed up indexing.
# They aren't currently exposed anywhere.
section=False,
delete=delete_non_commit_files,
)


@app.task(queue='web')
def symlink_project(project_pk):
project = Project.objects.get(pk=project_pk)
Expand Down
1 change: 0 additions & 1 deletion readthedocs/projects/views/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from readthedocs.builds.views import BuildTriggerMixin
from readthedocs.projects.models import ImportedFile, Project
from readthedocs.search.documents import PageDocument
from readthedocs.search.indexes import PageIndex
from readthedocs.search.views import LOG_TEMPLATE

from .base import ProjectOnboardMixin
Expand Down
29 changes: 1 addition & 28 deletions readthedocs/restapi/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,7 @@
from rest_framework import routers

from readthedocs.constants import pattern_opts
from readthedocs.restapi import views
from readthedocs.restapi.views import (
core_views,
footer_views,
integrations,
search_views,
task_views,
)

from readthedocs.restapi.views import (core_views, footer_views, task_views, integrations)
from .views.model_views import (
BuildCommandViewSet,
BuildViewSet,
Expand Down Expand Up @@ -69,24 +61,6 @@
url(r'footer_html/', footer_views.footer_html, name='footer_html'),
]

search_urls = [
url(
r'index_search/',
search_views.index_search,
name='index_search',
),
url(r'^search/$', views.search_views.search, name='api_search'),
url(r'search/project/$',
search_views.project_search,
name='api_project_search',
),
url(
r'search/section/$',
search_views.section_search,
name='api_section_search',
),
]

task_urls = [
url(
r'jobs/status/(?P<task_id>[^/]+)/',
Expand Down Expand Up @@ -138,7 +112,6 @@

urlpatterns += function_urls
urlpatterns += task_urls
urlpatterns += search_urls
urlpatterns += integration_urls


Expand Down
115 changes: 0 additions & 115 deletions readthedocs/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import (
absolute_import, division, print_function, unicode_literals)

import hashlib
import logging

from rest_framework.pagination import PageNumberPagination
Expand All @@ -13,7 +12,6 @@
NON_REPOSITORY_VERSIONS, STABLE,
STABLE_VERBOSE_NAME)
from readthedocs.builds.models import Version
from readthedocs.search.indexes import PageIndex, ProjectIndex, SectionIndex

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -154,119 +152,6 @@ def delete_versions(project, version_data):
return set()


def index_search_request(
version, page_list, commit, project_scale, page_scale, section=True,
delete=True):
"""
Update search indexes with build output JSON.

In order to keep sub-projects all indexed on the same shard, indexes will be
updated using the parent project's slug as the routing value.
"""
# TODO refactor this function
# pylint: disable=too-many-locals
project = version.project

log_msg = ' '.join([page['path'] for page in page_list])
log.info(
'Updating search index: project=%s pages=[%s]',
project.slug,
log_msg,
)

project_obj = ProjectIndex()
project_obj.index_document(
data={
'id': project.pk,
'name': project.name,
'slug': project.slug,
'description': project.description,
'lang': project.language,
'author': [user.username for user in project.users.all()],
'url': project.get_absolute_url(),
'tags': None,
'weight': project_scale,
})

page_obj = PageIndex()
section_obj = SectionIndex()
index_list = []
section_index_list = []
routes = [project.slug]
routes.extend([p.parent.slug for p in project.superprojects.all()])
for page in page_list:
log.debug('Indexing page: %s:%s', project.slug, page['path'])
to_hash = '-'.join([project.slug, version.slug, page['path']])
page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest()
index_list.append({
'id': page_id,
'project': project.slug,
'version': version.slug,
'path': page['path'],
'title': page['title'],
'headers': page['headers'],
'content': page['content'],
'taxonomy': None,
'commit': commit,
'weight': page_scale + project_scale,
})
if section:
for sect in page['sections']:
id_to_hash = '-'.join([
project.slug,
version.slug,
page['path'],
sect['id'],
])
section_index_list.append({
'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()),
'project': project.slug,
'version': version.slug,
'path': page['path'],
'page_id': sect['id'],
'title': sect['title'],
'content': sect['content'],
'weight': page_scale,
})
for route in routes:
section_obj.bulk_index(
section_index_list,
parent=page_id,
routing=route,
)

for route in routes:
page_obj.bulk_index(index_list, parent=project.slug, routing=route)

if delete:
log.info('Deleting files not in commit: %s', commit)
# TODO: AK Make sure this works
delete_query = {
'query': {
'bool': {
'must': [
{
'term': {
'project': project.slug,
},
},
{
'term': {
'version': version.slug,
},
},
],
'must_not': {
'term': {
'commit': commit,
},
},
},
},
}
page_obj.delete_document(body=delete_query)


class RemoteOrganizationPagination(PageNumberPagination):
page_size = 25

Expand Down
151 changes: 0 additions & 151 deletions readthedocs/restapi/views/search_views.py

This file was deleted.

Loading