1
+ import hashlib
1
2
from fnmatch import fnmatch
2
3
3
4
import structlog
4
5
5
- from readthedocs .builds .constants import BUILD_STATE_FINISHED , INTERNAL
6
+ from readthedocs .builds .constants import BUILD_STATE_FINISHED , INTERNAL , LATEST
6
7
from readthedocs .builds .models import Build , Version
7
- from readthedocs .projects .models import HTMLFile , Project
8
+ from readthedocs .filetreediff import write_manifest
9
+ from readthedocs .projects .models import Feature , HTMLFile , Project
8
10
from readthedocs .projects .signals import files_changed
9
11
from readthedocs .search .documents import PageDocument
10
12
from readthedocs .search .utils import index_objects , remove_indexed_files
@@ -120,7 +122,38 @@ def collect(self, sync_id: int):
120
122
self .version .imported_files .exclude (build = sync_id ).delete ()
121
123
122
124
123
- def _get_indexers (* , version , search_ranking , search_ignore , search_index_name = None ):
125
+ class FileManifestIndexer (Indexer ):
126
+ def __init__ (self , version : Version , build : Build ):
127
+ self .version = version
128
+ self .build = build
129
+ self ._hashes = {}
130
+
131
+ def process (self , html_file : HTMLFile , sync_id : int ):
132
+ self ._hashes [html_file .path ] = hashlib .md5 (
133
+ html_file .main_content .encode ()
134
+ ).hexdigest ()
135
+
136
+ def collect (self , sync_id : int ):
137
+ manifest = {
138
+ "build" : {
139
+ "id" : self .build .id ,
140
+ },
141
+ "files" : {
142
+ path : {
143
+ "hash" : hash ,
144
+ }
145
+ for path , hash in self ._hashes .items ()
146
+ },
147
+ }
148
+ write_manifest (self .version , manifest )
149
+
150
+
151
+ def _get_indexers (* , version : Version , build : Build , search_index_name = None ):
152
+ build_config = build .config or {}
153
+ search_config = build_config .get ("search" , {})
154
+ search_ranking = search_config .get ("ranking" , {})
155
+ search_ignore = search_config .get ("ignore" , [])
156
+
124
157
indexers = []
125
158
# NOTE: The search indexer must be before the index file indexer.
126
159
# This is because saving the objects in the DB will give them an id,
@@ -136,6 +169,22 @@ def _get_indexers(*, version, search_ranking, search_ignore, search_index_name=N
136
169
search_index_name = search_index_name ,
137
170
)
138
171
indexers .append (search_indexer )
172
+
173
+ # File tree diff is under a feature flag for now,
174
+ # and we only allow to compare PR previous against the latest version.
175
+ has_feature = version .project .has_feature (
176
+ Feature .GENERATE_MANIFEST_FOR_FILE_TREE_DIFF
177
+ )
178
+ create_manifest = has_feature and (
179
+ version .is_external or version == version .slug == LATEST
180
+ )
181
+ if create_manifest :
182
+ file_manifest_indexer = FileManifestIndexer (
183
+ version = version ,
184
+ build = build ,
185
+ )
186
+ indexers .append (file_manifest_indexer )
187
+
139
188
index_file_indexer = IndexFileIndexer (
140
189
project = version .project ,
141
190
version = version ,
@@ -230,16 +279,10 @@ def index_build(build_id):
230
279
build_id = build .id ,
231
280
)
232
281
233
- build_config = build .config or {}
234
- search_config = build_config .get ("search" , {})
235
- search_ranking = search_config .get ("ranking" , {})
236
- search_ignore = search_config .get ("ignore" , [])
237
-
238
282
try :
239
283
indexers = _get_indexers (
240
284
version = version ,
241
- search_ranking = search_ranking ,
242
- search_ignore = search_ignore ,
285
+ build = build ,
243
286
)
244
287
_process_files (version = version , indexers = indexers )
245
288
except Exception :
@@ -280,17 +323,10 @@ def reindex_version(version_id, search_index_name=None):
280
323
version_slug = version .slug ,
281
324
build_id = latest_successful_build .id ,
282
325
)
283
-
284
- build_config = latest_successful_build .config or {}
285
- search_config = build_config .get ("search" , {})
286
- search_ranking = search_config .get ("ranking" , {})
287
- search_ignore = search_config .get ("ignore" , [])
288
-
289
326
try :
290
327
indexers = _get_indexers (
291
328
version = version ,
292
- search_ranking = search_ranking ,
293
- search_ignore = search_ignore ,
329
+ build = latest_successful_build ,
294
330
search_index_name = search_index_name ,
295
331
)
296
332
_process_files (version = version , indexers = indexers )
0 commit comments