1
+ pub ( crate ) mod encode;
2
+
1
3
use std:: collections:: hash_map:: Entry ;
2
4
use std:: collections:: { BTreeMap , VecDeque } ;
3
5
@@ -17,12 +19,46 @@ use crate::html::format::join_with_double_colon;
17
19
use crate :: html:: markdown:: short_markdown_summary;
18
20
use crate :: html:: render:: { self , IndexItem , IndexItemFunctionType , RenderType , RenderTypeId } ;
19
21
22
+ use encode:: { bitmap_to_string, write_vlqhex_to_string} ;
23
+
24
+ /// The serialized search description sharded version
25
+ ///
26
+ /// The `index` is a JSON-encoded list of names and other information.
27
+ ///
28
+ /// The desc has newlined descriptions, split up by size into 128KiB shards.
29
+ /// For example, `(4, "foo\nbar\nbaz\nquux")`.
30
+ ///
31
+ /// There is no single, optimal size for these shards, because it depends on
32
+ /// configuration values that we can't predict or control, such as the version
33
+ /// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2
34
+ /// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether
35
+ /// the search query is going to produce a large number of results or a small
36
+ /// number, the bandwidth delay product of the network...
37
+ ///
38
+ /// Gzipping some standard library descriptions to guess what transport
39
+ /// compression will do, the compressed file sizes can be as small as 4.9KiB
40
+ /// or as large as 18KiB (ignoring the final 1.9KiB shard of leftovers).
41
+ /// A "reasonable" range for files is for them to be bigger than 1KiB,
42
+ /// since that's about the amount of data that can be transferred in a
43
+ /// single TCP packet, and 64KiB, the maximum amount of data that
44
+ /// TCP can transfer in a single round trip without extensions.
45
+ ///
46
+ /// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media
47
+ /// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept
48
+ /// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features
49
+ pub ( crate ) struct SerializedSearchIndex {
50
+ pub ( crate ) index : String ,
51
+ pub ( crate ) desc : Vec < ( usize , String ) > ,
52
+ }
53
+
54
+ const DESC_INDEX_SHARD_LEN : usize = 128 * 1024 ;
55
+
20
56
/// Builds the search index from the collected metadata
21
57
pub ( crate ) fn build_index < ' tcx > (
22
58
krate : & clean:: Crate ,
23
59
cache : & mut Cache ,
24
60
tcx : TyCtxt < ' tcx > ,
25
- ) -> String {
61
+ ) -> SerializedSearchIndex {
26
62
let mut itemid_to_pathid = FxHashMap :: default ( ) ;
27
63
let mut primitives = FxHashMap :: default ( ) ;
28
64
let mut associated_types = FxHashMap :: default ( ) ;
@@ -319,7 +355,6 @@ pub(crate) fn build_index<'tcx>(
319
355
. collect :: < Vec < _ > > ( ) ;
320
356
321
357
struct CrateData < ' a > {
322
- doc : String ,
323
358
items : Vec < & ' a IndexItem > ,
324
359
paths : Vec < ( ItemType , Vec < Symbol > ) > ,
325
360
// The String is alias name and the vec is the list of the elements with this alias.
@@ -328,6 +363,11 @@ pub(crate) fn build_index<'tcx>(
328
363
aliases : & ' a BTreeMap < String , Vec < usize > > ,
329
364
// Used when a type has more than one impl with an associated item with the same name.
330
365
associated_item_disambiguators : & ' a Vec < ( usize , String ) > ,
366
+ // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string
367
+ // for information on the format.
368
+ desc_index : String ,
369
+ // A list of items with no description. This is eventually turned into a bitmap.
370
+ empty_desc : Vec < u32 > ,
331
371
}
332
372
333
373
struct Paths {
@@ -409,7 +449,6 @@ pub(crate) fn build_index<'tcx>(
409
449
let mut names = Vec :: with_capacity ( self . items . len ( ) ) ;
410
450
let mut types = String :: with_capacity ( self . items . len ( ) ) ;
411
451
let mut full_paths = Vec :: with_capacity ( self . items . len ( ) ) ;
412
- let mut descriptions = Vec :: with_capacity ( self . items . len ( ) ) ;
413
452
let mut parents = Vec :: with_capacity ( self . items . len ( ) ) ;
414
453
let mut functions = String :: with_capacity ( self . items . len ( ) ) ;
415
454
let mut deprecated = Vec :: with_capacity ( self . items . len ( ) ) ;
@@ -432,7 +471,6 @@ pub(crate) fn build_index<'tcx>(
432
471
parents. push ( item. parent_idx . map ( |x| x + 1 ) . unwrap_or ( 0 ) ) ;
433
472
434
473
names. push ( item. name . as_str ( ) ) ;
435
- descriptions. push ( & item. desc ) ;
436
474
437
475
if !item. path . is_empty ( ) {
438
476
full_paths. push ( ( index, & item. path ) ) ;
@@ -444,7 +482,8 @@ pub(crate) fn build_index<'tcx>(
444
482
}
445
483
446
484
if item. deprecation . is_some ( ) {
447
- deprecated. push ( index) ;
485
+ // bitmasks always use 1-indexing for items, with 0 as the crate itself
486
+ deprecated. push ( u32:: try_from ( index + 1 ) . unwrap ( ) ) ;
448
487
}
449
488
}
450
489
@@ -455,42 +494,84 @@ pub(crate) fn build_index<'tcx>(
455
494
let has_aliases = !self . aliases . is_empty ( ) ;
456
495
let mut crate_data =
457
496
serializer. serialize_struct ( "CrateData" , if has_aliases { 9 } else { 8 } ) ?;
458
- crate_data. serialize_field ( "doc" , & self . doc ) ?;
459
497
crate_data. serialize_field ( "t" , & types) ?;
460
498
crate_data. serialize_field ( "n" , & names) ?;
461
- // Serialize as an array of item indices and full paths
462
499
crate_data. serialize_field ( "q" , & full_paths) ?;
463
- crate_data. serialize_field ( "d" , & descriptions) ?;
464
500
crate_data. serialize_field ( "i" , & parents) ?;
465
501
crate_data. serialize_field ( "f" , & functions) ?;
466
- crate_data. serialize_field ( "c " , & deprecated ) ?;
502
+ crate_data. serialize_field ( "D " , & self . desc_index ) ?;
467
503
crate_data. serialize_field ( "p" , & paths) ?;
468
504
crate_data. serialize_field ( "b" , & self . associated_item_disambiguators ) ?;
505
+ crate_data. serialize_field ( "c" , & bitmap_to_string ( & deprecated) ) ?;
506
+ crate_data. serialize_field ( "e" , & bitmap_to_string ( & self . empty_desc ) ) ?;
469
507
if has_aliases {
470
508
crate_data. serialize_field ( "a" , & self . aliases ) ?;
471
509
}
472
510
crate_data. end ( )
473
511
}
474
512
}
475
513
476
- // Collect the index into a string
477
- format ! (
514
+ let ( empty_desc, desc) = {
515
+ let mut empty_desc = Vec :: new ( ) ;
516
+ let mut result = Vec :: new ( ) ;
517
+ let mut set = String :: new ( ) ;
518
+ let mut len: usize = 0 ;
519
+ let mut item_index: u32 = 0 ;
520
+ for desc in std:: iter:: once ( & crate_doc) . chain ( crate_items. iter ( ) . map ( |item| & item. desc ) ) {
521
+ if desc == "" {
522
+ empty_desc. push ( item_index) ;
523
+ item_index += 1 ;
524
+ continue ;
525
+ }
526
+ if set. len ( ) >= DESC_INDEX_SHARD_LEN {
527
+ result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
528
+ len = 0 ;
529
+ } else if len != 0 {
530
+ set. push ( '\n' ) ;
531
+ }
532
+ set. push_str ( & desc) ;
533
+ len += 1 ;
534
+ item_index += 1 ;
535
+ }
536
+ result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
537
+ ( empty_desc, result)
538
+ } ;
539
+
540
+ let desc_index = {
541
+ let mut desc_index = String :: with_capacity ( desc. len ( ) * 4 ) ;
542
+ for & ( len, _) in desc. iter ( ) {
543
+ write_vlqhex_to_string ( len. try_into ( ) . unwrap ( ) , & mut desc_index) ;
544
+ }
545
+ desc_index
546
+ } ;
547
+
548
+ assert_eq ! (
549
+ crate_items. len( ) + 1 ,
550
+ desc. iter( ) . map( |( len, _) | * len) . sum:: <usize >( ) + empty_desc. len( )
551
+ ) ;
552
+
553
+ // The index, which is actually used to search, is JSON
554
+ // It uses `JSON.parse(..)` to actually load, since JSON
555
+ // parses faster than the full JavaScript syntax.
556
+ let index = format ! (
478
557
r#"["{}",{}]"# ,
479
558
krate. name( tcx) ,
480
559
serde_json:: to_string( & CrateData {
481
- doc: crate_doc,
482
560
items: crate_items,
483
561
paths: crate_paths,
484
562
aliases: & aliases,
485
563
associated_item_disambiguators: & associated_item_disambiguators,
564
+ desc_index,
565
+ empty_desc,
486
566
} )
487
567
. expect( "failed serde conversion" )
488
568
// All these `replace` calls are because we have to go through JS string for JSON content.
489
569
. replace( '\\' , r"\\" )
490
570
. replace( '\'' , r"\'" )
491
571
// We need to escape double quotes for the JSON.
492
572
. replace( "\\ \" " , "\\ \\ \" " )
493
- )
573
+ ) ;
574
+ SerializedSearchIndex { index, desc }
494
575
}
495
576
496
577
pub ( crate ) fn get_function_type_for_search < ' tcx > (
0 commit comments