Skip to content

Commit de4ffb1

Browse files
author
CM Lubinski
committed
Split index_search_request into several functions.
By splitting the function up, we not only resolve pyflakes issues around local variables, we also make this function easier to test. I don't like the SectionIndexer approach, but there was a chunk of state (section_index_list) which was extended in each loop iteration. To recreate that functionality while limiting the number of lines in each loop, the state is moved into a separate object. If the original logic was faulty, this could be much more legible.
1 parent c20edee commit de4ffb1

File tree

1 file changed

+91
-55
lines changed

1 file changed

+91
-55
lines changed

readthedocs/restapi/utils.py

Lines changed: 91 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,13 @@ def delete_versions(project, version_data):
7878
return set()
7979

8080

81-
def index_search_request(version, page_list, commit, project_scale, page_scale,
82-
section=True, delete=True):
83-
"""Update search indexes with build output JSON
84-
85-
In order to keep sub-projects all indexed on the same shard, indexes will be
86-
updated using the parent project's slug as the routing value.
87-
"""
88-
project = version.project
89-
90-
log_msg = ' '.join([page['path'] for page in page_list])
81+
def log_page_list(project, page_list):
82+
log_msg = ' '.join(page['path'] for page in page_list)
9183
log.info("Updating search index: project=%s pages=[%s]",
9284
project.slug, log_msg)
9385

86+
87+
def index_project(project, project_scale):
9488
project_obj = ProjectIndex()
9589
project_obj.index_document(data={
9690
'id': project.pk,
@@ -104,18 +98,76 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
10498
'weight': project_scale,
10599
})
106100

101+
102+
def index_pages(routes, index_list, project):
107103
page_obj = PageIndex()
108-
section_obj = SectionIndex()
109-
index_list = []
110-
section_index_list = []
111-
routes = [project.slug]
112-
routes.extend([p.parent.slug for p in project.superprojects.all()])
104+
for route in routes:
105+
page_obj.bulk_index(index_list, parent=project.slug, routing=route)
106+
107+
108+
def delete_pages(version, commit, project):
109+
page_obj = PageIndex()
110+
log.info("Deleting files not in commit: %s", commit)
111+
# TODO: AK Make sure this works
112+
delete_query = {
113+
"query": {
114+
"bool": {
115+
"must": [
116+
{"term": {"project": project.slug, }},
117+
{"term": {"version": version.slug, }},
118+
],
119+
"must_not": {
120+
"term": {
121+
"commit": commit
122+
}
123+
}
124+
}
125+
}
126+
}
127+
page_obj.delete_document(body=delete_query)
128+
129+
130+
class SectionIndexer(object):
131+
132+
"""Stores section metadata. Wraps state between calls."""
133+
134+
def __init__(self):
135+
self.section_index_list = []
136+
self.section_obj = SectionIndex()
137+
138+
def __call__(self, page, project, version, page_scale, page_id, routes):
139+
for section in page['sections']:
140+
self.section_index_list.append({
141+
'id': (hashlib
142+
.md5('-'.join([project.slug, version.slug,
143+
page['path'], section['id']]))
144+
.hexdigest()),
145+
'project': project.slug,
146+
'version': version.slug,
147+
'path': page['path'],
148+
'page_id': section['id'],
149+
'title': section['title'],
150+
'content': section['content'],
151+
'weight': page_scale,
152+
})
153+
for route in routes:
154+
self.section_obj.bulk_index(self.section_index_list, parent=page_id,
155+
routing=route)
156+
157+
158+
def generate_index_list(version, project, page_list, commit, project_scale,
159+
page_scale, routes, index_sections):
160+
"""Generate an index dict per page.
161+
162+
If requested, also index the relevant page sections.
163+
"""
164+
section_indexer = SectionIndexer()
113165
for page in page_list:
114166
log.debug("Indexing page: %s:%s", project.slug, page['path'])
115167
page_id = (hashlib
116168
.md5('-'.join([project.slug, version.slug, page['path']]))
117169
.hexdigest())
118-
index_list.append({
170+
yield {
119171
'id': page_id,
120172
'project': project.slug,
121173
'version': version.slug,
@@ -126,45 +178,29 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
126178
'taxonomy': None,
127179
'commit': commit,
128180
'weight': page_scale + project_scale,
129-
})
130-
if section:
131-
for sect in page['sections']:
132-
section_index_list.append({
133-
'id': (hashlib
134-
.md5('-'.join([project.slug, version.slug,
135-
page['path'], sect['id']]))
136-
.hexdigest()),
137-
'project': project.slug,
138-
'version': version.slug,
139-
'path': page['path'],
140-
'page_id': sect['id'],
141-
'title': sect['title'],
142-
'content': sect['content'],
143-
'weight': page_scale,
144-
})
145-
for route in routes:
146-
section_obj.bulk_index(section_index_list, parent=page_id,
147-
routing=route)
181+
}
182+
if index_sections:
183+
section_indexer(page, project, version, page_scale, routes, page_id)
148184

149-
for route in routes:
150-
page_obj.bulk_index(index_list, parent=project.slug, routing=route)
185+
186+
def index_search_request(version, page_list, commit, project_scale, page_scale,
187+
section=True, delete=True):
188+
"""Update search indexes with build output JSON
189+
190+
In order to keep sub-projects all indexed on the same shard, indexes will be
191+
updated using the parent project's slug as the routing value.
192+
"""
193+
project = version.project
194+
log_page_list(project, page_list)
195+
routes = [project.slug]
196+
routes.extend(p.parent.slug for p in project.superprojects.all())
197+
198+
index_project(project, project_scale)
199+
index_list = list(generate_index_list(
200+
version, project, page_list, commit, project_scale, page_scale,
201+
routes, index_sections=section
202+
))
203+
index_pages(routes, index_list, project)
151204

152205
if delete:
153-
log.info("Deleting files not in commit: %s", commit)
154-
# TODO: AK Make sure this works
155-
delete_query = {
156-
"query": {
157-
"bool": {
158-
"must": [
159-
{"term": {"project": project.slug, }},
160-
{"term": {"version": version.slug, }},
161-
],
162-
"must_not": {
163-
"term": {
164-
"commit": commit
165-
}
166-
}
167-
}
168-
}
169-
}
170-
page_obj.delete_document(body=delete_query)
206+
delete_pages(version, commit, project)

0 commit comments

Comments
 (0)