@@ -78,19 +78,13 @@ def delete_versions(project, version_data):
78
78
return set ()
79
79
80
80
81
- def index_search_request (version , page_list , commit , project_scale , page_scale ,
82
- section = True , delete = True ):
83
- """Update search indexes with build output JSON
84
-
85
- In order to keep sub-projects all indexed on the same shard, indexes will be
86
- updated using the parent project's slug as the routing value.
87
- """
88
- project = version .project
89
-
90
- log_msg = ' ' .join ([page ['path' ] for page in page_list ])
81
+ def log_page_list (project , page_list ):
82
+ log_msg = ' ' .join (page ['path' ] for page in page_list )
91
83
log .info ("Updating search index: project=%s pages=[%s]" ,
92
84
project .slug , log_msg )
93
85
86
+
87
+ def index_project (project , project_scale ):
94
88
project_obj = ProjectIndex ()
95
89
project_obj .index_document (data = {
96
90
'id' : project .pk ,
@@ -104,18 +98,76 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
104
98
'weight' : project_scale ,
105
99
})
106
100
101
+
102
+ def index_pages (routes , index_list , project ):
107
103
page_obj = PageIndex ()
108
- section_obj = SectionIndex ()
109
- index_list = []
110
- section_index_list = []
111
- routes = [project .slug ]
112
- routes .extend ([p .parent .slug for p in project .superprojects .all ()])
104
+ for route in routes :
105
+ page_obj .bulk_index (index_list , parent = project .slug , routing = route )
106
+
107
+
108
+ def delete_pages (version , commit , project ):
109
+ page_obj = PageIndex ()
110
+ log .info ("Deleting files not in commit: %s" , commit )
111
+ # TODO: AK Make sure this works
112
+ delete_query = {
113
+ "query" : {
114
+ "bool" : {
115
+ "must" : [
116
+ {"term" : {"project" : project .slug , }},
117
+ {"term" : {"version" : version .slug , }},
118
+ ],
119
+ "must_not" : {
120
+ "term" : {
121
+ "commit" : commit
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+ page_obj .delete_document (body = delete_query )
128
+
129
+
130
+ class SectionIndexer (object ):
131
+
132
+ """Stores section metadata. Wraps state between calls."""
133
+
134
+ def __init__ (self ):
135
+ self .section_index_list = []
136
+ self .section_obj = SectionIndex ()
137
+
138
+ def __call__ (self , page , project , version , page_scale , page_id , routes ):
139
+ for section in page ['sections' ]:
140
+ self .section_index_list .append ({
141
+ 'id' : (hashlib
142
+ .md5 ('-' .join ([project .slug , version .slug ,
143
+ page ['path' ], section ['id' ]]))
144
+ .hexdigest ()),
145
+ 'project' : project .slug ,
146
+ 'version' : version .slug ,
147
+ 'path' : page ['path' ],
148
+ 'page_id' : section ['id' ],
149
+ 'title' : section ['title' ],
150
+ 'content' : section ['content' ],
151
+ 'weight' : page_scale ,
152
+ })
153
+ for route in routes :
154
+ self .section_obj .bulk_index (self .section_index_list , parent = page_id ,
155
+ routing = route )
156
+
157
+
158
+ def generate_index_list (version , project , page_list , commit , project_scale ,
159
+ page_scale , routes , index_sections ):
160
+ """Generate an index dict per page.
161
+
162
+ If requested, also index the relevant page sections.
163
+ """
164
+ section_indexer = SectionIndexer ()
113
165
for page in page_list :
114
166
log .debug ("Indexing page: %s:%s" , project .slug , page ['path' ])
115
167
page_id = (hashlib
116
168
.md5 ('-' .join ([project .slug , version .slug , page ['path' ]]))
117
169
.hexdigest ())
118
- index_list . append ( {
170
+ yield {
119
171
'id' : page_id ,
120
172
'project' : project .slug ,
121
173
'version' : version .slug ,
@@ -126,45 +178,29 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
126
178
'taxonomy' : None ,
127
179
'commit' : commit ,
128
180
'weight' : page_scale + project_scale ,
129
- })
130
- if section :
131
- for sect in page ['sections' ]:
132
- section_index_list .append ({
133
- 'id' : (hashlib
134
- .md5 ('-' .join ([project .slug , version .slug ,
135
- page ['path' ], sect ['id' ]]))
136
- .hexdigest ()),
137
- 'project' : project .slug ,
138
- 'version' : version .slug ,
139
- 'path' : page ['path' ],
140
- 'page_id' : sect ['id' ],
141
- 'title' : sect ['title' ],
142
- 'content' : sect ['content' ],
143
- 'weight' : page_scale ,
144
- })
145
- for route in routes :
146
- section_obj .bulk_index (section_index_list , parent = page_id ,
147
- routing = route )
181
+ }
182
+ if index_sections :
183
+ section_indexer (page , project , version , page_scale , routes , page_id )
148
184
149
- for route in routes :
150
- page_obj .bulk_index (index_list , parent = project .slug , routing = route )
185
+
186
+ def index_search_request (version , page_list , commit , project_scale , page_scale ,
187
+ section = True , delete = True ):
188
+ """Update search indexes with build output JSON
189
+
190
+ In order to keep sub-projects all indexed on the same shard, indexes will be
191
+ updated using the parent project's slug as the routing value.
192
+ """
193
+ project = version .project
194
+ log_page_list (project , page_list )
195
+ routes = [project .slug ]
196
+ routes .extend (p .parent .slug for p in project .superprojects .all ())
197
+
198
+ index_project (project , project_scale )
199
+ index_list = list (generate_index_list (
200
+ version , project , page_list , commit , project_scale , page_scale ,
201
+ routes , index_sections = section
202
+ ))
203
+ index_pages (routes , index_list , project )
151
204
152
205
if delete :
153
- log .info ("Deleting files not in commit: %s" , commit )
154
- # TODO: AK Make sure this works
155
- delete_query = {
156
- "query" : {
157
- "bool" : {
158
- "must" : [
159
- {"term" : {"project" : project .slug , }},
160
- {"term" : {"version" : version .slug , }},
161
- ],
162
- "must_not" : {
163
- "term" : {
164
- "commit" : commit
165
- }
166
- }
167
- }
168
- }
169
- }
170
- page_obj .delete_document (body = delete_query )
206
+ delete_pages (version , commit , project )
0 commit comments