Skip to content

Commit 029cb1b

Browse files
committed
Auto merge of rust-lang#123372 - GuillaumeGomez:rollup-nwxdzev, r=GuillaumeGomez
Rollup of 4 pull requests Successful merges: - rust-lang#122614 (rustdoc-search: shard the search result descriptions) - rust-lang#123338 (Update to new browser-ui-test version) - rust-lang#123366 (Minor by_move_body impl cleanups) - rust-lang#123371 (Remove dangling `.mir.stderr` and `.thir.stderr` test files) r? `@ghost` `@rustbot` modify labels: rollup
2 parents 36b6f9b + 4468068 commit 029cb1b

File tree

80 files changed

+1104
-808
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+1104
-808
lines changed

Cargo.lock

+2
Original file line numberDiff line numberDiff line change
@@ -4783,6 +4783,8 @@ version = "0.0.0"
47834783
dependencies = [
47844784
"arrayvec",
47854785
"askama",
4786+
"base64",
4787+
"byteorder",
47864788
"expect-test",
47874789
"indexmap",
47884790
"itertools 0.12.1",

compiler/rustc_mir_transform/src/coroutine/by_move_body.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//! be a coroutine body that takes all of its upvars by-move, and which we stash
44
//! into the `CoroutineInfo` for all coroutines returned by coroutine-closures.
55
6-
use rustc_data_structures::fx::FxIndexSet;
6+
use rustc_data_structures::unord::UnordSet;
77
use rustc_hir as hir;
88
use rustc_middle::mir::visit::MutVisitor;
99
use rustc_middle::mir::{self, dump_mir, MirPass};
@@ -33,7 +33,7 @@ impl<'tcx> MirPass<'tcx> for ByMoveBody {
3333
return;
3434
}
3535

36-
let mut by_ref_fields = FxIndexSet::default();
36+
let mut by_ref_fields = UnordSet::default();
3737
let by_move_upvars = Ty::new_tup_from_iter(
3838
tcx,
3939
tcx.closure_captures(coroutine_def_id).iter().enumerate().map(|(idx, capture)| {
@@ -73,7 +73,7 @@ impl<'tcx> MirPass<'tcx> for ByMoveBody {
7373

7474
struct MakeByMoveBody<'tcx> {
7575
tcx: TyCtxt<'tcx>,
76-
by_ref_fields: FxIndexSet<FieldIdx>,
76+
by_ref_fields: UnordSet<FieldIdx>,
7777
by_move_coroutine_ty: Ty<'tcx>,
7878
}
7979

@@ -89,11 +89,11 @@ impl<'tcx> MutVisitor<'tcx> for MakeByMoveBody<'tcx> {
8989
location: mir::Location,
9090
) {
9191
if place.local == ty::CAPTURE_STRUCT_LOCAL
92-
&& !place.projection.is_empty()
93-
&& let mir::ProjectionElem::Field(idx, ty) = place.projection[0]
92+
&& let Some((&mir::ProjectionElem::Field(idx, ty), projection)) =
93+
place.projection.split_first()
9494
&& self.by_ref_fields.contains(&idx)
9595
{
96-
let (begin, end) = place.projection[1..].split_first().unwrap();
96+
let (begin, end) = projection.split_first().unwrap();
9797
// FIXME(async_closures): I'm actually a bit surprised to see that we always
9898
// initially deref the by-ref upvars. If this is not actually true, then we
9999
// will at least get an ICE that explains why this isn't true :^)

src/ci/docker/host-x86_64/mingw-check/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ ENV SCRIPT python3 ../x.py --stage 2 test src/tools/expand-yaml-anchors && \
6060
/scripts/validate-error-codes.sh && \
6161
reuse --include-submodules lint && \
6262
# Runs checks to ensure that there are no ES5 issues in our JS code.
63-
es-check es6 ../src/librustdoc/html/static/js/*.js && \
63+
es-check es8 ../src/librustdoc/html/static/js/*.js && \
6464
eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \
6565
eslint -c ../src/tools/rustdoc-js/.eslintrc.js ../src/tools/rustdoc-js/tester.js && \
6666
eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.16.11
1+
0.17.0

src/librustdoc/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ path = "lib.rs"
99
[dependencies]
1010
arrayvec = { version = "0.7", default-features = false }
1111
askama = { version = "0.12", default-features = false, features = ["config"] }
12+
base64 = "0.21.7"
13+
byteorder = "1.5"
1214
itertools = "0.12"
1315
indexmap = "2"
1416
minifier = "0.3.0"

src/librustdoc/html/render/mod.rs

+4-29
Original file line numberDiff line numberDiff line change
@@ -184,40 +184,15 @@ pub(crate) enum RenderTypeId {
184184

185185
impl RenderTypeId {
186186
pub fn write_to_string(&self, string: &mut String) {
187-
// (sign, value)
188-
let (sign, id): (bool, u32) = match &self {
187+
let id: i32 = match &self {
189188
// 0 is a sentinel, everything else is one-indexed
190189
// concrete type
191-
RenderTypeId::Index(idx) if *idx >= 0 => (false, (idx + 1isize).try_into().unwrap()),
190+
RenderTypeId::Index(idx) if *idx >= 0 => (idx + 1isize).try_into().unwrap(),
192191
// generic type parameter
193-
RenderTypeId::Index(idx) => (true, (-*idx).try_into().unwrap()),
192+
RenderTypeId::Index(idx) => (*idx).try_into().unwrap(),
194193
_ => panic!("must convert render types to indexes before serializing"),
195194
};
196-
// zig-zag encoding
197-
let value: u32 = (id << 1) | (if sign { 1 } else { 0 });
198-
// Self-terminating hex use capital letters for everything but the
199-
// least significant digit, which is lowercase. For example, decimal 17
200-
// would be `` Aa `` if zig-zag encoding weren't used.
201-
//
202-
// Zig-zag encoding, however, stores the sign bit as the last bit.
203-
// This means, in the last hexit, 1 is actually `c`, -1 is `b`
204-
// (`a` is the imaginary -0), and, because all the bits are shifted
205-
// by one, `` A` `` is actually 8 and `` Aa `` is -8.
206-
//
207-
// https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
208-
// describes the encoding in more detail.
209-
let mut shift: u32 = 28;
210-
let mut mask: u32 = 0xF0_00_00_00;
211-
while shift < 32 {
212-
let hexit = (value & mask) >> shift;
213-
if hexit != 0 || shift == 0 {
214-
let hex =
215-
char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
216-
string.push(hex);
217-
}
218-
shift = shift.wrapping_sub(4);
219-
mask = mask >> 4;
220-
}
195+
search_index::encode::write_vlqhex_to_string(id, string);
221196
}
222197
}
223198

src/librustdoc/html/render/search_index.rs

+94-13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
pub(crate) mod encode;
2+
13
use std::collections::hash_map::Entry;
24
use std::collections::{BTreeMap, VecDeque};
35

@@ -17,12 +19,46 @@ use crate::html::format::join_with_double_colon;
1719
use crate::html::markdown::short_markdown_summary;
1820
use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, RenderTypeId};
1921

22+
use encode::{bitmap_to_string, write_vlqhex_to_string};
23+
24+
/// The serialized search description sharded version
25+
///
26+
/// The `index` is a JSON-encoded list of names and other information.
27+
///
28+
/// The desc has newlined descriptions, split up by size into 128KiB shards.
29+
/// For example, `(4, "foo\nbar\nbaz\nquux")`.
30+
///
31+
/// There is no single, optimal size for these shards, because it depends on
32+
/// configuration values that we can't predict or control, such as the version
33+
/// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2
34+
/// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether
35+
/// the search query is going to produce a large number of results or a small
36+
/// number, the bandwidth delay product of the network...
37+
///
38+
/// Gzipping some standard library descriptions to guess what transport
39+
/// compression will do, the compressed file sizes can be as small as 4.9KiB
40+
/// or as large as 18KiB (ignoring the final 1.9KiB shard of leftovers).
41+
/// A "reasonable" range for files is for them to be bigger than 1KiB,
42+
/// since that's about the amount of data that can be transferred in a
43+
/// single TCP packet, and 64KiB, the maximum amount of data that
44+
/// TCP can transfer in a single round trip without extensions.
45+
///
46+
/// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media
47+
/// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept
48+
/// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features
49+
pub(crate) struct SerializedSearchIndex {
50+
pub(crate) index: String,
51+
pub(crate) desc: Vec<(usize, String)>,
52+
}
53+
54+
const DESC_INDEX_SHARD_LEN: usize = 128 * 1024;
55+
2056
/// Builds the search index from the collected metadata
2157
pub(crate) fn build_index<'tcx>(
2258
krate: &clean::Crate,
2359
cache: &mut Cache,
2460
tcx: TyCtxt<'tcx>,
25-
) -> String {
61+
) -> SerializedSearchIndex {
2662
let mut itemid_to_pathid = FxHashMap::default();
2763
let mut primitives = FxHashMap::default();
2864
let mut associated_types = FxHashMap::default();
@@ -319,7 +355,6 @@ pub(crate) fn build_index<'tcx>(
319355
.collect::<Vec<_>>();
320356

321357
struct CrateData<'a> {
322-
doc: String,
323358
items: Vec<&'a IndexItem>,
324359
paths: Vec<(ItemType, Vec<Symbol>)>,
325360
// The String is alias name and the vec is the list of the elements with this alias.
@@ -328,6 +363,11 @@ pub(crate) fn build_index<'tcx>(
328363
aliases: &'a BTreeMap<String, Vec<usize>>,
329364
// Used when a type has more than one impl with an associated item with the same name.
330365
associated_item_disambiguators: &'a Vec<(usize, String)>,
366+
// A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string
367+
// for information on the format.
368+
desc_index: String,
369+
// A list of items with no description. This is eventually turned into a bitmap.
370+
empty_desc: Vec<u32>,
331371
}
332372

333373
struct Paths {
@@ -409,7 +449,6 @@ pub(crate) fn build_index<'tcx>(
409449
let mut names = Vec::with_capacity(self.items.len());
410450
let mut types = String::with_capacity(self.items.len());
411451
let mut full_paths = Vec::with_capacity(self.items.len());
412-
let mut descriptions = Vec::with_capacity(self.items.len());
413452
let mut parents = Vec::with_capacity(self.items.len());
414453
let mut functions = String::with_capacity(self.items.len());
415454
let mut deprecated = Vec::with_capacity(self.items.len());
@@ -432,7 +471,6 @@ pub(crate) fn build_index<'tcx>(
432471
parents.push(item.parent_idx.map(|x| x + 1).unwrap_or(0));
433472

434473
names.push(item.name.as_str());
435-
descriptions.push(&item.desc);
436474

437475
if !item.path.is_empty() {
438476
full_paths.push((index, &item.path));
@@ -444,7 +482,8 @@ pub(crate) fn build_index<'tcx>(
444482
}
445483

446484
if item.deprecation.is_some() {
447-
deprecated.push(index);
485+
// bitmasks always use 1-indexing for items, with 0 as the crate itself
486+
deprecated.push(u32::try_from(index + 1).unwrap());
448487
}
449488
}
450489

@@ -455,42 +494,84 @@ pub(crate) fn build_index<'tcx>(
455494
let has_aliases = !self.aliases.is_empty();
456495
let mut crate_data =
457496
serializer.serialize_struct("CrateData", if has_aliases { 9 } else { 8 })?;
458-
crate_data.serialize_field("doc", &self.doc)?;
459497
crate_data.serialize_field("t", &types)?;
460498
crate_data.serialize_field("n", &names)?;
461-
// Serialize as an array of item indices and full paths
462499
crate_data.serialize_field("q", &full_paths)?;
463-
crate_data.serialize_field("d", &descriptions)?;
464500
crate_data.serialize_field("i", &parents)?;
465501
crate_data.serialize_field("f", &functions)?;
466-
crate_data.serialize_field("c", &deprecated)?;
502+
crate_data.serialize_field("D", &self.desc_index)?;
467503
crate_data.serialize_field("p", &paths)?;
468504
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
505+
crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?;
506+
crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?;
469507
if has_aliases {
470508
crate_data.serialize_field("a", &self.aliases)?;
471509
}
472510
crate_data.end()
473511
}
474512
}
475513

476-
// Collect the index into a string
477-
format!(
514+
let (empty_desc, desc) = {
515+
let mut empty_desc = Vec::new();
516+
let mut result = Vec::new();
517+
let mut set = String::new();
518+
let mut len: usize = 0;
519+
let mut item_index: u32 = 0;
520+
for desc in std::iter::once(&crate_doc).chain(crate_items.iter().map(|item| &item.desc)) {
521+
if desc == "" {
522+
empty_desc.push(item_index);
523+
item_index += 1;
524+
continue;
525+
}
526+
if set.len() >= DESC_INDEX_SHARD_LEN {
527+
result.push((len, std::mem::replace(&mut set, String::new())));
528+
len = 0;
529+
} else if len != 0 {
530+
set.push('\n');
531+
}
532+
set.push_str(&desc);
533+
len += 1;
534+
item_index += 1;
535+
}
536+
result.push((len, std::mem::replace(&mut set, String::new())));
537+
(empty_desc, result)
538+
};
539+
540+
let desc_index = {
541+
let mut desc_index = String::with_capacity(desc.len() * 4);
542+
for &(len, _) in desc.iter() {
543+
write_vlqhex_to_string(len.try_into().unwrap(), &mut desc_index);
544+
}
545+
desc_index
546+
};
547+
548+
assert_eq!(
549+
crate_items.len() + 1,
550+
desc.iter().map(|(len, _)| *len).sum::<usize>() + empty_desc.len()
551+
);
552+
553+
// The index, which is actually used to search, is JSON
554+
// It uses `JSON.parse(..)` to actually load, since JSON
555+
// parses faster than the full JavaScript syntax.
556+
let index = format!(
478557
r#"["{}",{}]"#,
479558
krate.name(tcx),
480559
serde_json::to_string(&CrateData {
481-
doc: crate_doc,
482560
items: crate_items,
483561
paths: crate_paths,
484562
aliases: &aliases,
485563
associated_item_disambiguators: &associated_item_disambiguators,
564+
desc_index,
565+
empty_desc,
486566
})
487567
.expect("failed serde conversion")
488568
// All these `replace` calls are because we have to go through JS string for JSON content.
489569
.replace('\\', r"\\")
490570
.replace('\'', r"\'")
491571
// We need to escape double quotes for the JSON.
492572
.replace("\\\"", "\\\\\"")
493-
)
573+
);
574+
SerializedSearchIndex { index, desc }
494575
}
495576

496577
pub(crate) fn get_function_type_for_search<'tcx>(

0 commit comments

Comments
 (0)