Skip to content

Commit 47b41b7

Browse files
committed
Auto merge of #87254 - rusticstuff:rustc_codegen_llvm_dont_emit_zero_sized_padding, r=eddyb
LLVM codegen: Don't emit zero-sized padding for fields Currently padding is emitted before fields of a struct and at the end of the struct regardless of the ABI. Even if no padding is required zero-sized padding fields are emitted. This is not useful and - more importantly - it make it impossible to generate the exact vector types that LLVM expects for certain ARM SIMD intrinsics. This change should unblock the implementation of many ARM intrinsics using the `unadjusted` ABI, see rust-lang/stdarch#1143 (comment). This is a proof of concept only because the field lookup now takes O(number of fields) time compared to O(1) before since it recalculates the mapping at every lookup. I would like to find out how big the performance impact actually is before implementing caching or restricting this behavior to the `unadjusted` ABI. cc `@SparrowLii` `@bjorn3` ([Discussion on internals](https://internals.rust-lang.org/t/feature-request-add-a-way-in-rustc-for-generating-struct-type-llvm-ir-without-paddings/15007))
2 parents e8e1b32 + 02295f4 commit 47b41b7

File tree

7 files changed

+107
-41
lines changed

7 files changed

+107
-41
lines changed

compiler/rustc_codegen_llvm/src/context.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
2424
use rustc_span::symbol::Symbol;
2525
use rustc_target::abi::{HasDataLayout, LayoutOf, PointeeInfo, Size, TargetDataLayout, VariantIdx};
2626
use rustc_target::spec::{HasTargetSpec, RelocModel, Target, TlsModel};
27+
use smallvec::SmallVec;
2728

2829
use std::cell::{Cell, RefCell};
2930
use std::ffi::CStr;
@@ -74,8 +75,12 @@ pub struct CodegenCx<'ll, 'tcx> {
7475
/// See <https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable> for details
7576
pub used_statics: RefCell<Vec<&'ll Value>>,
7677

77-
pub lltypes: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), &'ll Type>>,
78+
/// Mapping of non-scalar types to llvm types and field remapping if needed.
79+
pub type_lowering: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), TypeLowering<'ll>>>,
80+
81+
/// Mapping of scalar types to llvm types.
7882
pub scalar_lltypes: RefCell<FxHashMap<Ty<'tcx>, &'ll Type>>,
83+
7984
pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
8085
pub isize_ty: &'ll Type,
8186

@@ -92,6 +97,15 @@ pub struct CodegenCx<'ll, 'tcx> {
9297
local_gen_sym_counter: Cell<usize>,
9398
}
9499

100+
pub struct TypeLowering<'ll> {
101+
/// Associated LLVM type
102+
pub lltype: &'ll Type,
103+
104+
/// If padding is used the slice maps fields from source order
105+
/// to llvm order.
106+
pub field_remapping: Option<SmallVec<[u32; 4]>>,
107+
}
108+
95109
fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode {
96110
match tls_model {
97111
TlsModel::GeneralDynamic => llvm::ThreadLocalMode::GeneralDynamic,
@@ -304,7 +318,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
304318
const_globals: Default::default(),
305319
statics_to_rauw: RefCell::new(Vec::new()),
306320
used_statics: RefCell::new(Vec::new()),
307-
lltypes: Default::default(),
321+
type_lowering: Default::default(),
308322
scalar_lltypes: Default::default(),
309323
pointee_infos: Default::default(),
310324
isize_ty,

compiler/rustc_codegen_llvm/src/type_.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ impl LayoutTypeMethods<'tcx> for CodegenCx<'ll, 'tcx> {
266266
layout.is_llvm_scalar_pair()
267267
}
268268
fn backend_field_index(&self, layout: TyAndLayout<'tcx>, index: usize) -> u64 {
269-
layout.llvm_field_index(index)
269+
layout.llvm_field_index(self, index)
270270
}
271271
fn scalar_pair_element_backend_type(
272272
&self,

compiler/rustc_codegen_llvm/src/type_of.rs

+62-27
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::abi::FnAbi;
22
use crate::common::*;
3+
use crate::context::TypeLowering;
34
use crate::type_::Type;
45
use rustc_codegen_ssa::traits::*;
56
use rustc_middle::bug;
@@ -9,6 +10,7 @@ use rustc_middle::ty::{self, Ty, TypeFoldable};
910
use rustc_target::abi::{Abi, AddressSpace, Align, FieldsShape};
1011
use rustc_target::abi::{Int, Pointer, F32, F64};
1112
use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAndLayoutMethods, Variants};
13+
use smallvec::{smallvec, SmallVec};
1214
use tracing::debug;
1315

1416
use std::fmt::Write;
@@ -17,6 +19,7 @@ fn uncached_llvm_type<'a, 'tcx>(
1719
cx: &CodegenCx<'a, 'tcx>,
1820
layout: TyAndLayout<'tcx>,
1921
defer: &mut Option<(&'a Type, TyAndLayout<'tcx>)>,
22+
field_remapping: &mut Option<SmallVec<[u32; 4]>>,
2023
) -> &'a Type {
2124
match layout.abi {
2225
Abi::Scalar(_) => bug!("handled elsewhere"),
@@ -75,7 +78,8 @@ fn uncached_llvm_type<'a, 'tcx>(
7578
FieldsShape::Array { count, .. } => cx.type_array(layout.field(cx, 0).llvm_type(cx), count),
7679
FieldsShape::Arbitrary { .. } => match name {
7780
None => {
78-
let (llfields, packed) = struct_llfields(cx, layout);
81+
let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
82+
*field_remapping = new_field_remapping;
7983
cx.type_struct(&llfields, packed)
8084
}
8185
Some(ref name) => {
@@ -90,14 +94,15 @@ fn uncached_llvm_type<'a, 'tcx>(
9094
fn struct_llfields<'a, 'tcx>(
9195
cx: &CodegenCx<'a, 'tcx>,
9296
layout: TyAndLayout<'tcx>,
93-
) -> (Vec<&'a Type>, bool) {
97+
) -> (Vec<&'a Type>, bool, Option<SmallVec<[u32; 4]>>) {
9498
debug!("struct_llfields: {:#?}", layout);
9599
let field_count = layout.fields.count();
96100

97101
let mut packed = false;
98102
let mut offset = Size::ZERO;
99103
let mut prev_effective_align = layout.align.abi;
100104
let mut result: Vec<_> = Vec::with_capacity(1 + field_count * 2);
105+
let mut field_remapping = smallvec![0; field_count];
101106
for i in layout.fields.index_by_increasing_offset() {
102107
let target_offset = layout.fields.offset(i as usize);
103108
let field = layout.field(cx, i);
@@ -116,33 +121,37 @@ fn struct_llfields<'a, 'tcx>(
116121
);
117122
assert!(target_offset >= offset);
118123
let padding = target_offset - offset;
119-
let padding_align = prev_effective_align.min(effective_field_align);
120-
assert_eq!(offset.align_to(padding_align) + padding, target_offset);
121-
result.push(cx.type_padding_filler(padding, padding_align));
122-
debug!(" padding before: {:?}", padding);
123-
124+
if padding != Size::ZERO {
125+
let padding_align = prev_effective_align.min(effective_field_align);
126+
assert_eq!(offset.align_to(padding_align) + padding, target_offset);
127+
result.push(cx.type_padding_filler(padding, padding_align));
128+
debug!(" padding before: {:?}", padding);
129+
}
130+
field_remapping[i] = result.len() as u32;
124131
result.push(field.llvm_type(cx));
125132
offset = target_offset + field.size;
126133
prev_effective_align = effective_field_align;
127134
}
135+
let padding_used = result.len() > field_count;
128136
if !layout.is_unsized() && field_count > 0 {
129137
if offset > layout.size {
130138
bug!("layout: {:#?} stride: {:?} offset: {:?}", layout, layout.size, offset);
131139
}
132140
let padding = layout.size - offset;
133-
let padding_align = prev_effective_align;
134-
assert_eq!(offset.align_to(padding_align) + padding, layout.size);
135-
debug!(
136-
"struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
137-
padding, offset, layout.size
138-
);
139-
result.push(cx.type_padding_filler(padding, padding_align));
140-
assert_eq!(result.len(), 1 + field_count * 2);
141+
if padding != Size::ZERO {
142+
let padding_align = prev_effective_align;
143+
assert_eq!(offset.align_to(padding_align) + padding, layout.size);
144+
debug!(
145+
"struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
146+
padding, offset, layout.size
147+
);
148+
result.push(cx.type_padding_filler(padding, padding_align));
149+
}
141150
} else {
142151
debug!("struct_llfields: offset: {:?} stride: {:?}", offset, layout.size);
143152
}
144-
145-
(result, packed)
153+
let field_remapping = if padding_used { Some(field_remapping) } else { None };
154+
(result, packed, field_remapping)
146155
}
147156

148157
impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
@@ -177,7 +186,7 @@ pub trait LayoutLlvmExt<'tcx> {
177186
index: usize,
178187
immediate: bool,
179188
) -> &'a Type;
180-
fn llvm_field_index(&self, index: usize) -> u64;
189+
fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64;
181190
fn pointee_info_at<'a>(&self, cx: &CodegenCx<'a, 'tcx>, offset: Size) -> Option<PointeeInfo>;
182191
}
183192

@@ -234,8 +243,8 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
234243
Variants::Single { index } => Some(index),
235244
_ => None,
236245
};
237-
if let Some(&llty) = cx.lltypes.borrow().get(&(self.ty, variant_index)) {
238-
return llty;
246+
if let Some(ref llty) = cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
247+
return llty.lltype;
239248
}
240249

241250
debug!("llvm_type({:#?})", self);
@@ -247,24 +256,32 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
247256
let normal_ty = cx.tcx.erase_regions(self.ty);
248257

249258
let mut defer = None;
259+
let mut field_remapping = None;
250260
let llty = if self.ty != normal_ty {
251261
let mut layout = cx.layout_of(normal_ty);
252262
if let Some(v) = variant_index {
253263
layout = layout.for_variant(cx, v);
254264
}
255265
layout.llvm_type(cx)
256266
} else {
257-
uncached_llvm_type(cx, *self, &mut defer)
267+
uncached_llvm_type(cx, *self, &mut defer, &mut field_remapping)
258268
};
259269
debug!("--> mapped {:#?} to llty={:?}", self, llty);
260270

261-
cx.lltypes.borrow_mut().insert((self.ty, variant_index), llty);
271+
cx.type_lowering.borrow_mut().insert(
272+
(self.ty, variant_index),
273+
TypeLowering { lltype: llty, field_remapping: field_remapping },
274+
);
262275

263276
if let Some((llty, layout)) = defer {
264-
let (llfields, packed) = struct_llfields(cx, layout);
265-
cx.set_struct_body(llty, &llfields, packed)
277+
let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
278+
cx.set_struct_body(llty, &llfields, packed);
279+
cx.type_lowering
280+
.borrow_mut()
281+
.get_mut(&(self.ty, variant_index))
282+
.unwrap()
283+
.field_remapping = new_field_remapping;
266284
}
267-
268285
llty
269286
}
270287

@@ -340,7 +357,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
340357
self.scalar_llvm_type_at(cx, scalar, offset)
341358
}
342359

343-
fn llvm_field_index(&self, index: usize) -> u64 {
360+
fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64 {
344361
match self.abi {
345362
Abi::Scalar(_) | Abi::ScalarPair(..) => {
346363
bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self)
@@ -354,7 +371,25 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
354371

355372
FieldsShape::Array { .. } => index as u64,
356373

357-
FieldsShape::Arbitrary { .. } => 1 + (self.fields.memory_index(index) as u64) * 2,
374+
FieldsShape::Arbitrary { .. } => {
375+
let variant_index = match self.variants {
376+
Variants::Single { index } => Some(index),
377+
_ => None,
378+
};
379+
380+
// Look up llvm field if indexes do not match memory order due to padding. If
381+
// `field_remapping` is `None` no padding was used and the llvm field index
382+
// matches the memory index.
383+
match cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
384+
Some(TypeLowering { field_remapping: Some(ref remap), .. }) => {
385+
remap[index] as u64
386+
}
387+
Some(_) => self.fields.memory_index(index) as u64,
388+
None => {
389+
bug!("TyAndLayout::llvm_field_index({:?}): type info not found", self)
390+
}
391+
}
392+
}
358393
}
359394
}
360395

compiler/rustc_codegen_llvm/src/va_arg.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ fn emit_aapcs_va_arg(
9898
// Implementation of the AAPCS64 calling convention for va_args see
9999
// https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
100100
let va_list_addr = list.immediate();
101-
let va_list_ty = list.deref(bx.cx).layout.llvm_type(bx);
101+
let va_list_layout = list.deref(bx.cx).layout;
102+
let va_list_ty = va_list_layout.llvm_type(bx);
102103
let layout = bx.cx.layout_of(target_ty);
103104

104105
let mut maybe_reg = bx.build_sibling_block("va_arg.maybe_reg");
@@ -110,13 +111,15 @@ fn emit_aapcs_va_arg(
110111

111112
let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
112113
let (reg_off, reg_top_index, slot_size) = if gr_type {
113-
let gr_offs = bx.struct_gep(va_list_ty, va_list_addr, 7);
114+
let gr_offs =
115+
bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
114116
let nreg = (layout.size.bytes() + 7) / 8;
115-
(gr_offs, 3, nreg * 8)
117+
(gr_offs, va_list_layout.llvm_field_index(bx.cx, 1), nreg * 8)
116118
} else {
117-
let vr_off = bx.struct_gep(va_list_ty, va_list_addr, 9);
119+
let vr_off =
120+
bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 4));
118121
let nreg = (layout.size.bytes() + 15) / 16;
119-
(vr_off, 5, nreg * 16)
122+
(vr_off, va_list_layout.llvm_field_index(bx.cx, 2), nreg * 16)
120123
};
121124

122125
// if the offset >= 0 then the value will be on the stack

src/test/codegen/align-enum.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pub enum Align64 {
88
A(u32),
99
B(u32),
1010
}
11-
// CHECK: %Align64 = type { [0 x i32], i32, [15 x i32] }
11+
// CHECK: %Align64 = type { i32, [15 x i32] }
1212

1313
pub struct Nested64 {
1414
a: u8,

src/test/codegen/align-struct.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,28 @@
55

66
#[repr(align(64))]
77
pub struct Align64(i32);
8-
// CHECK: %Align64 = type { [0 x i32], i32, [15 x i32] }
8+
// CHECK: %Align64 = type { i32, [15 x i32] }
99

1010
pub struct Nested64 {
1111
a: Align64,
1212
b: i32,
1313
c: i32,
1414
d: i8,
1515
}
16-
// CHECK: %Nested64 = type { [0 x i64], %Align64, [0 x i32], i32, [0 x i32], i32, [0 x i8], i8, [55 x i8] }
16+
// CHECK: %Nested64 = type { %Align64, i32, i32, i8, [55 x i8] }
1717

1818
pub enum Enum4 {
1919
A(i32),
2020
B(i32),
2121
}
22-
// CHECK: %"Enum4::A" = type { [1 x i32], i32, [0 x i32] }
22+
// CHECK: %"Enum4::A" = type { [1 x i32], i32 }
2323

2424
pub enum Enum64 {
2525
A(Align64),
2626
B(i32),
2727
}
28-
// CHECK: %Enum64 = type { [0 x i32], i32, [31 x i32] }
29-
// CHECK: %"Enum64::A" = type { [8 x i64], %Align64, [0 x i64] }
28+
// CHECK: %Enum64 = type { i32, [31 x i32] }
29+
// CHECK: %"Enum64::A" = type { [8 x i64], %Align64 }
3030

3131
// CHECK-LABEL: @align64
3232
#[no_mangle]

src/test/codegen/unpadded-simd.rs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Make sure that no 0-sized padding is inserted in structs and that
2+
// structs are represented as expected by Neon intrinsics in LLVM.
3+
// See #87254.
4+
5+
#![crate_type = "lib"]
6+
#![feature(repr_simd)]
7+
8+
#[derive(Copy, Clone, Debug)]
9+
#[repr(simd)]
10+
pub struct int16x4_t(pub i16, pub i16, pub i16, pub i16);
11+
12+
#[derive(Copy, Clone, Debug)]
13+
pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t);
14+
// CHECK: %int16x4x2_t = type { <4 x i16>, <4 x i16> }

0 commit comments

Comments
 (0)