Skip to content

Commit 7cb6c31

Browse files
committed
Auto merge of #138504 - bjorn3:string_merging_rust_strings, r=<try>
Nul terminate rust string literals This allows taking advantage of the C string merging functionality of linkers, reducing code size. Marked as draft to see if this actually has much of an effect. The disadvantage of this is that people may start to rely on string literals getting nul terminated. A potential solution for that would be to put a byte that is not part of a valid UTF-8 character right before the nul terminator. Builds on #138503
2 parents 8afd710 + 701f892 commit 7cb6c31

File tree

8 files changed

+69
-47
lines changed

8 files changed

+69
-47
lines changed

compiler/rustc_const_eval/src/interpret/intrinsics.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,17 @@ use super::{
2424
use crate::fluent_generated as fluent;
2525

2626
/// Directly returns an `Allocation` containing an absolute path representation of the given type.
27-
pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ConstAllocation<'tcx> {
28-
let path = crate::util::type_name(tcx, ty);
29-
let alloc = Allocation::from_bytes_byte_aligned_immutable(path.into_bytes(), ());
30-
tcx.mk_const_alloc(alloc)
27+
pub(crate) fn alloc_type_name<'tcx>(
28+
tcx: TyCtxt<'tcx>,
29+
ty: Ty<'tcx>,
30+
) -> (ConstAllocation<'tcx>, u64) {
31+
let mut path = crate::util::type_name(tcx, ty).into_bytes();
32+
let path_len = path.len().try_into().unwrap();
33+
if !path.contains(&0) {
34+
path.extend(b"\xff\0");
35+
};
36+
let alloc = Allocation::from_bytes_byte_aligned_immutable(path, ());
37+
(tcx.mk_const_alloc(alloc), path_len)
3138
}
3239

3340
/// The logic for all nullary intrinsics is implemented here. These intrinsics don't get evaluated
@@ -43,8 +50,8 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>(
4350
interp_ok(match name {
4451
sym::type_name => {
4552
ensure_monomorphic_enough(tcx, tp_ty)?;
46-
let alloc = alloc_type_name(tcx, tp_ty);
47-
ConstValue::Slice { data: alloc, meta: alloc.inner().size().bytes() }
53+
let (alloc, path_len) = alloc_type_name(tcx, tp_ty);
54+
ConstValue::Slice { data: alloc, meta: path_len }
4855
}
4956
sym::needs_drop => {
5057
ensure_monomorphic_enough(tcx, tp_ty)?;

compiler/rustc_const_eval/src/interpret/place.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,11 +1019,16 @@ where
10191019
&mut self,
10201020
s: &str,
10211021
) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> {
1022-
let bytes = s.as_bytes();
1023-
let ptr = self.allocate_bytes_dedup(bytes)?;
1022+
let ptr = if !s.contains('\0') {
1023+
let mut bytes = s.as_bytes().to_owned();
1024+
bytes.extend(b"\xff\0");
1025+
self.allocate_bytes_dedup(&bytes)?
1026+
} else {
1027+
self.allocate_bytes_dedup(s.as_bytes())?
1028+
};
10241029

10251030
// Create length metadata for the string.
1026-
let meta = Scalar::from_target_usize(u64::try_from(bytes.len()).unwrap(), self);
1031+
let meta = Scalar::from_target_usize(u64::try_from(s.len()).unwrap(), self);
10271032

10281033
// Get layout for Rust's str type.
10291034
let layout = self.layout_of(self.tcx.types.str_).unwrap();

compiler/rustc_mir_build/src/builder/expr/as_constant.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,15 @@ fn lit_to_mir_constant<'tcx>(tcx: TyCtxt<'tcx>, lit_input: LitToConstInput<'tcx>
121121
let value = match (lit, lit_ty.kind()) {
122122
(ast::LitKind::Str(s, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_str() => {
123123
let s = s.as_str();
124-
let allocation = Allocation::from_bytes_byte_aligned_immutable(s.as_bytes(), ());
124+
let allocation = if !s.contains('\0') {
125+
let mut s = s.as_bytes().to_owned();
126+
s.extend(b"\xff\0");
127+
Allocation::from_bytes_byte_aligned_immutable(s, ())
128+
} else {
129+
Allocation::from_bytes_byte_aligned_immutable(s.as_bytes(), ())
130+
};
125131
let allocation = tcx.mk_const_alloc(allocation);
126-
ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() }
132+
ConstValue::Slice { data: allocation, meta: s.len().try_into().unwrap() }
127133
}
128134
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
129135
if matches!(inner_ty.kind(), ty::Slice(_)) =>

tests/codegen/remap_path_prefix/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ mod aux_mod;
1212
include!("aux_mod.rs");
1313

1414
// Here we check that the expansion of the file!() macro is mapped.
15-
// CHECK: @alloc_5761061597a97f66e13ef2ff92712c4b = private unnamed_addr constant [34 x i8] c"/the/src/remap_path_prefix/main.rs"
15+
// CHECK: @alloc_643660180b5bd639a42b5b1707ce11a5 = private unnamed_addr constant [36 x i8] c"/the/src/remap_path_prefix/main.rs\FF\00"
1616
pub static FILE_PATH: &'static str = file!();
1717

1818
fn main() {

tests/mir-opt/const_allocation.main.GVN.after.32bit.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,27 +33,27 @@ ALLOC2 (size: 16, align: 4) {
3333
╾ALLOC4<imm>╼ 03 00 00 00 ╾ALLOC5<imm>╼ 03 00 00 00 │ ╾──╼....╾──╼....
3434
}
3535

36-
ALLOC4 (size: 3, align: 1) {
37-
66 6f 6f │ foo
36+
ALLOC4 (size: 5, align: 1) {
37+
66 6f 6f ff 00 │ foo..
3838
}
3939

40-
ALLOC5 (size: 3, align: 1) {
41-
62 61 72 │ bar
40+
ALLOC5 (size: 5, align: 1) {
41+
62 61 72 ff 00 │ bar..
4242
}
4343

4444
ALLOC3 (size: 24, align: 4) {
4545
0x00 │ ╾ALLOC6<imm>╼ 03 00 00 00 ╾ALLOC7<imm>╼ 03 00 00 00 │ ╾──╼....╾──╼....
4646
0x10 │ ╾ALLOC8<imm>╼ 04 00 00 00 │ ╾──╼....
4747
}
4848

49-
ALLOC6 (size: 3, align: 1) {
50-
6d 65 68 │ meh
49+
ALLOC6 (size: 5, align: 1) {
50+
6d 65 68 ff 00 │ meh..
5151
}
5252

53-
ALLOC7 (size: 3, align: 1) {
54-
6d 6f 70 │ mop
53+
ALLOC7 (size: 5, align: 1) {
54+
6d 6f 70 ff 00 │ mop..
5555
}
5656

57-
ALLOC8 (size: 4, align: 1) {
58-
6d c3 b6 70 │ m..p
57+
ALLOC8 (size: 6, align: 1) {
58+
6d c3 b6 70 ff 00 │ m..p..
5959
}

tests/mir-opt/const_allocation.main.GVN.after.64bit.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ ALLOC2 (size: 32, align: 8) {
3636
0x10 │ ╾ALLOC5<imm>╼ 03 00 00 00 00 00 00 00 │ ╾──────╼........
3737
}
3838

39-
ALLOC4 (size: 3, align: 1) {
40-
66 6f 6f │ foo
39+
ALLOC4 (size: 5, align: 1) {
40+
66 6f 6f ff 00 │ foo..
4141
}
4242

43-
ALLOC5 (size: 3, align: 1) {
44-
62 61 72 │ bar
43+
ALLOC5 (size: 5, align: 1) {
44+
62 61 72 ff 00 │ bar..
4545
}
4646

4747
ALLOC3 (size: 48, align: 8) {
@@ -50,14 +50,14 @@ ALLOC3 (size: 48, align: 8) {
5050
0x20 │ ╾ALLOC8<imm>╼ 04 00 00 00 00 00 00 00 │ ╾──────╼........
5151
}
5252

53-
ALLOC6 (size: 3, align: 1) {
54-
6d 65 68 │ meh
53+
ALLOC6 (size: 5, align: 1) {
54+
6d 65 68 ff 00 │ meh..
5555
}
5656

57-
ALLOC7 (size: 3, align: 1) {
58-
6d 6f 70 │ mop
57+
ALLOC7 (size: 5, align: 1) {
58+
6d 6f 70 ff 00 │ mop..
5959
}
6060

61-
ALLOC8 (size: 4, align: 1) {
62-
6d c3 b6 70 │ m..p
61+
ALLOC8 (size: 6, align: 1) {
62+
6d c3 b6 70 ff 00 │ m..p..
6363
}

tests/ui-fulldeps/stable-mir/check_allocation.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,6 @@ extern crate rustc_driver;
1919
extern crate rustc_interface;
2020
extern crate stable_mir;
2121

22-
use stable_mir::crate_def::CrateDef;
23-
use stable_mir::mir::alloc::GlobalAlloc;
24-
use stable_mir::mir::mono::{Instance, InstanceKind, StaticDef};
25-
use stable_mir::mir::{Body, TerminatorKind};
26-
use stable_mir::ty::{Allocation, ConstantKind, RigidTy, TyKind};
27-
use stable_mir::{CrateItem, CrateItems, ItemKind};
2822
use std::ascii::Char;
2923
use std::assert_matches::assert_matches;
3024
use std::cmp::{max, min};
@@ -33,6 +27,13 @@ use std::ffi::CStr;
3327
use std::io::Write;
3428
use std::ops::ControlFlow;
3529

30+
use stable_mir::crate_def::CrateDef;
31+
use stable_mir::mir::alloc::GlobalAlloc;
32+
use stable_mir::mir::mono::{Instance, InstanceKind, StaticDef};
33+
use stable_mir::mir::{Body, TerminatorKind};
34+
use stable_mir::ty::{Allocation, ConstantKind, RigidTy, TyKind};
35+
use stable_mir::{CrateItem, CrateItems, ItemKind};
36+
3637
const CRATE_NAME: &str = "input";
3738

3839
/// This function uses the Stable MIR APIs to get information about the test crate.
@@ -77,11 +78,13 @@ fn check_bar(item: CrateItem) {
7778

7879
let alloc_id_0 = alloc.provenance.ptrs[0].1.0;
7980
let GlobalAlloc::Memory(allocation) = GlobalAlloc::from(alloc_id_0) else { unreachable!() };
80-
assert_eq!(allocation.bytes.len(), 3);
81+
assert_eq!(allocation.bytes.len(), 5);
8182
assert_eq!(allocation.bytes[0].unwrap(), Char::CapitalB.to_u8());
8283
assert_eq!(allocation.bytes[1].unwrap(), Char::SmallA.to_u8());
8384
assert_eq!(allocation.bytes[2].unwrap(), Char::SmallR.to_u8());
84-
assert_eq!(std::str::from_utf8(&allocation.raw_bytes().unwrap()), Ok("Bar"));
85+
assert_eq!(allocation.bytes[3].unwrap(), 0xff);
86+
assert_eq!(allocation.bytes[4].unwrap(), Char::Null.to_u8());
87+
assert_eq!(allocation.raw_bytes().unwrap(), b"Bar\xff\0");
8588
}
8689

8790
/// Check the allocation data for static `C_STR`.

tests/ui-fulldeps/stable-mir/check_transform.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@ extern crate rustc_driver;
1717
extern crate rustc_interface;
1818
extern crate stable_mir;
1919

20+
use std::convert::TryFrom;
21+
use std::io::Write;
22+
use std::ops::ControlFlow;
23+
2024
use stable_mir::mir::alloc::GlobalAlloc;
2125
use stable_mir::mir::mono::Instance;
2226
use stable_mir::mir::{Body, ConstOperand, Operand, Rvalue, StatementKind, TerminatorKind};
2327
use stable_mir::ty::{ConstantKind, MirConst};
2428
use stable_mir::{CrateDef, CrateItems, ItemKind};
25-
use std::convert::TryFrom;
26-
use std::io::Write;
27-
use std::ops::ControlFlow;
2829

2930
const CRATE_NAME: &str = "input";
3031

@@ -37,17 +38,17 @@ fn test_transform() -> ControlFlow<()> {
3738
let target_fn = *get_item(&items, (ItemKind::Fn, "dummy")).unwrap();
3839
let instance = Instance::try_from(target_fn).unwrap();
3940
let body = instance.body().unwrap();
40-
check_msg(&body, "oops");
41+
check_msg(&body, b"oops\xff\0");
4142

4243
let new_msg = "new panic message";
4344
let new_body = change_panic_msg(body, new_msg);
44-
check_msg(&new_body, new_msg);
45+
check_msg(&new_body, new_msg.as_bytes());
4546

4647
ControlFlow::Continue(())
4748
}
4849

4950
/// Check that the body panic message matches the given message.
50-
fn check_msg(body: &Body, expected: &str) {
51+
fn check_msg(body: &Body, expected: &[u8]) {
5152
let msg = body
5253
.blocks
5354
.iter()
@@ -80,7 +81,7 @@ fn check_msg(body: &Body, expected: &str) {
8081
unreachable!()
8182
};
8283
let bytes = val.raw_bytes().unwrap();
83-
Some(std::str::from_utf8(&bytes).unwrap().to_string())
84+
Some(bytes.to_owned())
8485
}
8586
_ => None,
8687
})

0 commit comments

Comments
 (0)