Skip to content

Commit dd25686

Browse files
authored
Merge pull request #1425 from rust-lang/crypto_intrinsics_inline_asm
Implement AES-NI and SHA256 crypto intrinsics using inline asm
2 parents ede3269 + ca85cc3 commit dd25686

File tree

7 files changed

+404
-174
lines changed

7 files changed

+404
-174
lines changed

src/abi/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
383383
args,
384384
ret_place,
385385
target,
386+
source_info.span,
386387
);
387388
return;
388389
}

src/base.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
456456
);
457457
}
458458

459-
crate::inline_asm::codegen_inline_asm(
459+
crate::inline_asm::codegen_inline_asm_terminator(
460460
fx,
461461
source_info.span,
462462
template,

src/constant.rs

+31-12
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
//! Handling of `static`s, `const`s and promoted allocations
22
3+
use std::cmp::Ordering;
4+
35
use cranelift_module::*;
46
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
57
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
68
use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar};
79
use rustc_middle::mir::ConstValue;
10+
use rustc_middle::ty::ScalarInt;
811

912
use crate::prelude::*;
1013

@@ -430,17 +433,17 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
430433
pub(crate) fn mir_operand_get_const_val<'tcx>(
431434
fx: &FunctionCx<'_, '_, 'tcx>,
432435
operand: &Operand<'tcx>,
433-
) -> Option<ConstValue<'tcx>> {
436+
) -> Option<ScalarInt> {
434437
match operand {
435-
Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0),
438+
Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(),
436439
// FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored
437440
// inside a temporary before being passed to the intrinsic requiring the const argument.
438441
// This code tries to find a single constant defining definition of the referenced local.
439442
Operand::Copy(place) | Operand::Move(place) => {
440443
if !place.projection.is_empty() {
441444
return None;
442445
}
443-
let mut computed_const_val = None;
446+
let mut computed_scalar_int = None;
444447
for bb_data in fx.mir.basic_blocks.iter() {
445448
for stmt in &bb_data.statements {
446449
match &stmt.kind {
@@ -456,22 +459,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
456459
operand,
457460
ty,
458461
) => {
459-
if computed_const_val.is_some() {
462+
if computed_scalar_int.is_some() {
460463
return None; // local assigned twice
461464
}
462465
if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) {
463466
return None;
464467
}
465-
let const_val = mir_operand_get_const_val(fx, operand)?;
466-
if fx.layout_of(*ty).size
467-
!= const_val.try_to_scalar_int()?.size()
468+
let scalar_int = mir_operand_get_const_val(fx, operand)?;
469+
let scalar_int = match fx
470+
.layout_of(*ty)
471+
.size
472+
.cmp(&scalar_int.size())
468473
{
469-
return None;
470-
}
471-
computed_const_val = Some(const_val);
474+
Ordering::Equal => scalar_int,
475+
Ordering::Less => match ty.kind() {
476+
ty::Uint(_) => ScalarInt::try_from_uint(
477+
scalar_int.try_to_uint(scalar_int.size()).unwrap(),
478+
fx.layout_of(*ty).size,
479+
)
480+
.unwrap(),
481+
ty::Int(_) => ScalarInt::try_from_int(
482+
scalar_int.try_to_int(scalar_int.size()).unwrap(),
483+
fx.layout_of(*ty).size,
484+
)
485+
.unwrap(),
486+
_ => unreachable!(),
487+
},
488+
Ordering::Greater => return None,
489+
};
490+
computed_scalar_int = Some(scalar_int);
472491
}
473492
Rvalue::Use(operand) => {
474-
computed_const_val = mir_operand_get_const_val(fx, operand)
493+
computed_scalar_int = mir_operand_get_const_val(fx, operand)
475494
}
476495
_ => return None,
477496
}
@@ -522,7 +541,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
522541
TerminatorKind::Call { .. } => {}
523542
}
524543
}
525-
computed_const_val
544+
computed_scalar_int
526545
}
527546
}
528547
}

src/inline_asm.rs

+64-102
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use target_lexicon::BinaryFormat;
1010

1111
use crate::prelude::*;
1212

13-
enum CInlineAsmOperand<'tcx> {
13+
pub(crate) enum CInlineAsmOperand<'tcx> {
1414
In {
1515
reg: InlineAsmRegOrRegClass,
1616
value: Value,
@@ -34,16 +34,14 @@ enum CInlineAsmOperand<'tcx> {
3434
},
3535
}
3636

37-
pub(crate) fn codegen_inline_asm<'tcx>(
37+
pub(crate) fn codegen_inline_asm_terminator<'tcx>(
3838
fx: &mut FunctionCx<'_, '_, 'tcx>,
3939
span: Span,
4040
template: &[InlineAsmTemplatePiece],
4141
operands: &[InlineAsmOperand<'tcx>],
4242
options: InlineAsmOptions,
4343
destination: Option<mir::BasicBlock>,
4444
) {
45-
// FIXME add .eh_frame unwind info directives
46-
4745
// Used by panic_abort on Windows, but uses a syntax which only happens to work with
4846
// asm!() by accident and breaks with the GNU assembler as well as global_asm!() for
4947
// the LLVM backend.
@@ -135,15 +133,33 @@ pub(crate) fn codegen_inline_asm<'tcx>(
135133
})
136134
.collect::<Vec<_>>();
137135

138-
let mut inputs = Vec::new();
139-
let mut outputs = Vec::new();
136+
codegen_inline_asm_inner(fx, template, &operands, options);
137+
138+
match destination {
139+
Some(destination) => {
140+
let destination_block = fx.get_block(destination);
141+
fx.bcx.ins().jump(destination_block, &[]);
142+
}
143+
None => {
144+
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
145+
}
146+
}
147+
}
148+
149+
pub(crate) fn codegen_inline_asm_inner<'tcx>(
150+
fx: &mut FunctionCx<'_, '_, 'tcx>,
151+
template: &[InlineAsmTemplatePiece],
152+
operands: &[CInlineAsmOperand<'tcx>],
153+
options: InlineAsmOptions,
154+
) {
155+
// FIXME add .eh_frame unwind info directives
140156

141157
let mut asm_gen = InlineAssemblyGenerator {
142158
tcx: fx.tcx,
143159
arch: fx.tcx.sess.asm_arch.unwrap(),
144160
enclosing_def_id: fx.instance.def_id(),
145161
template,
146-
operands: &operands,
162+
operands,
147163
options,
148164
registers: Vec::new(),
149165
stack_slots_clobber: Vec::new(),
@@ -165,6 +181,8 @@ pub(crate) fn codegen_inline_asm<'tcx>(
165181
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
166182
fx.cx.global_asm.push_str(&generated_asm);
167183

184+
let mut inputs = Vec::new();
185+
let mut outputs = Vec::new();
168186
for (i, operand) in operands.iter().enumerate() {
169187
match operand {
170188
CInlineAsmOperand::In { reg: _, value } => {
@@ -186,16 +204,6 @@ pub(crate) fn codegen_inline_asm<'tcx>(
186204
}
187205

188206
call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
189-
190-
match destination {
191-
Some(destination) => {
192-
let destination_block = fx.get_block(destination);
193-
fx.bcx.ins().jump(destination_block, &[]);
194-
}
195-
None => {
196-
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
197-
}
198-
}
199207
}
200208

201209
struct InlineAssemblyGenerator<'a, 'tcx> {
@@ -637,8 +645,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
637645
) {
638646
match arch {
639647
InlineAsmArch::X86_64 => {
640-
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
641-
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
648+
match reg {
649+
InlineAsmReg::X86(reg)
650+
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
651+
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
652+
{
653+
// rustc emits x0 rather than xmm0
654+
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
655+
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
656+
.unwrap();
657+
}
658+
_ => {
659+
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
660+
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
661+
}
662+
}
642663
generated_asm.push('\n');
643664
}
644665
InlineAsmArch::AArch64 => {
@@ -663,8 +684,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
663684
) {
664685
match arch {
665686
InlineAsmArch::X86_64 => {
666-
generated_asm.push_str(" mov ");
667-
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
687+
match reg {
688+
InlineAsmReg::X86(reg)
689+
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
690+
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
691+
{
692+
// rustc emits x0 rather than xmm0
693+
write!(
694+
generated_asm,
695+
" movups xmm{}",
696+
reg as u32 - X86InlineAsmReg::xmm0 as u32
697+
)
698+
.unwrap();
699+
}
700+
_ => {
701+
generated_asm.push_str(" mov ");
702+
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
703+
}
704+
}
668705
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
669706
}
670707
InlineAsmArch::AArch64 => {
@@ -720,7 +757,12 @@ fn call_inline_asm<'tcx>(
720757
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
721758

722759
for (offset, place) in outputs {
723-
let ty = fx.clif_type(place.layout().ty).unwrap();
760+
let ty = if place.layout().ty.is_simd() {
761+
let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
762+
fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
763+
} else {
764+
fx.clif_type(place.layout().ty).unwrap()
765+
};
724766
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
725767
fx,
726768
ty,
@@ -729,83 +771,3 @@ fn call_inline_asm<'tcx>(
729771
place.write_cvalue(fx, CValue::by_val(value, place.layout()));
730772
}
731773
}
732-
733-
pub(crate) fn codegen_xgetbv<'tcx>(
734-
fx: &mut FunctionCx<'_, '_, 'tcx>,
735-
xcr_no: Value,
736-
ret: CPlace<'tcx>,
737-
) {
738-
// FIXME add .eh_frame unwind info directives
739-
740-
let operands = vec![
741-
CInlineAsmOperand::In {
742-
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
743-
value: xcr_no,
744-
},
745-
CInlineAsmOperand::Out {
746-
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
747-
late: true,
748-
place: Some(ret),
749-
},
750-
CInlineAsmOperand::Out {
751-
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
752-
late: true,
753-
place: None,
754-
},
755-
];
756-
let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM;
757-
758-
let mut inputs = Vec::new();
759-
let mut outputs = Vec::new();
760-
761-
let mut asm_gen = InlineAssemblyGenerator {
762-
tcx: fx.tcx,
763-
arch: fx.tcx.sess.asm_arch.unwrap(),
764-
enclosing_def_id: fx.instance.def_id(),
765-
template: &[InlineAsmTemplatePiece::String(
766-
"
767-
xgetbv
768-
// out = rdx << 32 | rax
769-
shl rdx, 32
770-
or rax, rdx
771-
"
772-
.to_string(),
773-
)],
774-
operands: &operands,
775-
options,
776-
registers: Vec::new(),
777-
stack_slots_clobber: Vec::new(),
778-
stack_slots_input: Vec::new(),
779-
stack_slots_output: Vec::new(),
780-
stack_slot_size: Size::from_bytes(0),
781-
};
782-
asm_gen.allocate_registers();
783-
asm_gen.allocate_stack_slots();
784-
785-
let inline_asm_index = fx.cx.inline_asm_index.get();
786-
fx.cx.inline_asm_index.set(inline_asm_index + 1);
787-
let asm_name = format!(
788-
"__inline_asm_{}_n{}",
789-
fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
790-
inline_asm_index
791-
);
792-
793-
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
794-
fx.cx.global_asm.push_str(&generated_asm);
795-
796-
for (i, operand) in operands.iter().enumerate() {
797-
match operand {
798-
CInlineAsmOperand::In { reg: _, value } => {
799-
inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value));
800-
}
801-
CInlineAsmOperand::Out { reg: _, late: _, place } => {
802-
if let Some(place) = place {
803-
outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place));
804-
}
805-
}
806-
_ => unreachable!(),
807-
}
808-
}
809-
810-
call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
811-
}

src/intrinsics/llvm.rs

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
1212
args: &[mir::Operand<'tcx>],
1313
ret: CPlace<'tcx>,
1414
target: Option<BasicBlock>,
15+
span: Span,
1516
) {
1617
if intrinsic.starts_with("llvm.aarch64") {
1718
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
@@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
3132
args,
3233
ret,
3334
target,
35+
span,
3436
);
3537
}
3638

0 commit comments

Comments
 (0)