Skip to content

Implement AES-NI and SHA256 crypto intrinsics using inline asm #1425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/abi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
args,
ret_place,
target,
source_info.span,
);
return;
}
Expand Down
2 changes: 1 addition & 1 deletion src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
);
}

crate::inline_asm::codegen_inline_asm(
crate::inline_asm::codegen_inline_asm_terminator(
fx,
source_info.span,
template,
Expand Down
43 changes: 31 additions & 12 deletions src/constant.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
//! Handling of `static`s, `const`s and promoted allocations

use std::cmp::Ordering;

use cranelift_module::*;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar};
use rustc_middle::mir::ConstValue;
use rustc_middle::ty::ScalarInt;

use crate::prelude::*;

Expand Down Expand Up @@ -430,17 +433,17 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
pub(crate) fn mir_operand_get_const_val<'tcx>(
fx: &FunctionCx<'_, '_, 'tcx>,
operand: &Operand<'tcx>,
) -> Option<ConstValue<'tcx>> {
) -> Option<ScalarInt> {
match operand {
Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0),
Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(),
// FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored
// inside a temporary before being passed to the intrinsic requiring the const argument.
// This code tries to find a single constant defining definition of the referenced local.
Operand::Copy(place) | Operand::Move(place) => {
if !place.projection.is_empty() {
return None;
}
let mut computed_const_val = None;
let mut computed_scalar_int = None;
for bb_data in fx.mir.basic_blocks.iter() {
for stmt in &bb_data.statements {
match &stmt.kind {
Expand All @@ -456,22 +459,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
operand,
ty,
) => {
if computed_const_val.is_some() {
if computed_scalar_int.is_some() {
return None; // local assigned twice
}
if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) {
return None;
}
let const_val = mir_operand_get_const_val(fx, operand)?;
if fx.layout_of(*ty).size
!= const_val.try_to_scalar_int()?.size()
let scalar_int = mir_operand_get_const_val(fx, operand)?;
let scalar_int = match fx
.layout_of(*ty)
.size
.cmp(&scalar_int.size())
{
return None;
}
computed_const_val = Some(const_val);
Ordering::Equal => scalar_int,
Ordering::Less => match ty.kind() {
ty::Uint(_) => ScalarInt::try_from_uint(
scalar_int.try_to_uint(scalar_int.size()).unwrap(),
fx.layout_of(*ty).size,
)
.unwrap(),
ty::Int(_) => ScalarInt::try_from_int(
scalar_int.try_to_int(scalar_int.size()).unwrap(),
fx.layout_of(*ty).size,
)
.unwrap(),
_ => unreachable!(),
},
Ordering::Greater => return None,
};
computed_scalar_int = Some(scalar_int);
}
Rvalue::Use(operand) => {
computed_const_val = mir_operand_get_const_val(fx, operand)
computed_scalar_int = mir_operand_get_const_val(fx, operand)
}
_ => return None,
}
Expand Down Expand Up @@ -522,7 +541,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
TerminatorKind::Call { .. } => {}
}
}
computed_const_val
computed_scalar_int
}
}
}
166 changes: 64 additions & 102 deletions src/inline_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use target_lexicon::BinaryFormat;

use crate::prelude::*;

enum CInlineAsmOperand<'tcx> {
pub(crate) enum CInlineAsmOperand<'tcx> {
In {
reg: InlineAsmRegOrRegClass,
value: Value,
Expand All @@ -34,16 +34,14 @@ enum CInlineAsmOperand<'tcx> {
},
}

pub(crate) fn codegen_inline_asm<'tcx>(
pub(crate) fn codegen_inline_asm_terminator<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
span: Span,
template: &[InlineAsmTemplatePiece],
operands: &[InlineAsmOperand<'tcx>],
options: InlineAsmOptions,
destination: Option<mir::BasicBlock>,
) {
// FIXME add .eh_frame unwind info directives

// Used by panic_abort on Windows, but uses a syntax which only happens to work with
// asm!() by accident and breaks with the GNU assembler as well as global_asm!() for
// the LLVM backend.
Expand Down Expand Up @@ -135,15 +133,33 @@ pub(crate) fn codegen_inline_asm<'tcx>(
})
.collect::<Vec<_>>();

let mut inputs = Vec::new();
let mut outputs = Vec::new();
codegen_inline_asm_inner(fx, template, &operands, options);

match destination {
Some(destination) => {
let destination_block = fx.get_block(destination);
fx.bcx.ins().jump(destination_block, &[]);
}
None => {
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
}
}
}

pub(crate) fn codegen_inline_asm_inner<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
template: &[InlineAsmTemplatePiece],
operands: &[CInlineAsmOperand<'tcx>],
options: InlineAsmOptions,
) {
// FIXME add .eh_frame unwind info directives

let mut asm_gen = InlineAssemblyGenerator {
tcx: fx.tcx,
arch: fx.tcx.sess.asm_arch.unwrap(),
enclosing_def_id: fx.instance.def_id(),
template,
operands: &operands,
operands,
options,
registers: Vec::new(),
stack_slots_clobber: Vec::new(),
Expand All @@ -165,6 +181,8 @@ pub(crate) fn codegen_inline_asm<'tcx>(
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
fx.cx.global_asm.push_str(&generated_asm);

let mut inputs = Vec::new();
let mut outputs = Vec::new();
for (i, operand) in operands.iter().enumerate() {
match operand {
CInlineAsmOperand::In { reg: _, value } => {
Expand All @@ -186,16 +204,6 @@ pub(crate) fn codegen_inline_asm<'tcx>(
}

call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);

match destination {
Some(destination) => {
let destination_block = fx.get_block(destination);
fx.bcx.ins().jump(destination_block, &[]);
}
None => {
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
}
}
}

struct InlineAssemblyGenerator<'a, 'tcx> {
Expand Down Expand Up @@ -637,8 +645,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
) {
match arch {
InlineAsmArch::X86_64 => {
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
.unwrap();
}
_ => {
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
}
}
generated_asm.push('\n');
}
InlineAsmArch::AArch64 => {
Expand All @@ -663,8 +684,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
) {
match arch {
InlineAsmArch::X86_64 => {
generated_asm.push_str(" mov ");
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
write!(
generated_asm,
" movups xmm{}",
reg as u32 - X86InlineAsmReg::xmm0 as u32
)
.unwrap();
}
_ => {
generated_asm.push_str(" mov ");
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
}
}
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
}
InlineAsmArch::AArch64 => {
Expand Down Expand Up @@ -720,7 +757,12 @@ fn call_inline_asm<'tcx>(
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);

for (offset, place) in outputs {
let ty = fx.clif_type(place.layout().ty).unwrap();
let ty = if place.layout().ty.is_simd() {
let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
} else {
fx.clif_type(place.layout().ty).unwrap()
};
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
fx,
ty,
Expand All @@ -729,83 +771,3 @@ fn call_inline_asm<'tcx>(
place.write_cvalue(fx, CValue::by_val(value, place.layout()));
}
}

pub(crate) fn codegen_xgetbv<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
xcr_no: Value,
ret: CPlace<'tcx>,
) {
// FIXME add .eh_frame unwind info directives

let operands = vec![
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
value: xcr_no,
},
CInlineAsmOperand::Out {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
late: true,
place: Some(ret),
},
CInlineAsmOperand::Out {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
late: true,
place: None,
},
];
let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM;

let mut inputs = Vec::new();
let mut outputs = Vec::new();

let mut asm_gen = InlineAssemblyGenerator {
tcx: fx.tcx,
arch: fx.tcx.sess.asm_arch.unwrap(),
enclosing_def_id: fx.instance.def_id(),
template: &[InlineAsmTemplatePiece::String(
"
xgetbv
// out = rdx << 32 | rax
shl rdx, 32
or rax, rdx
"
.to_string(),
)],
operands: &operands,
options,
registers: Vec::new(),
stack_slots_clobber: Vec::new(),
stack_slots_input: Vec::new(),
stack_slots_output: Vec::new(),
stack_slot_size: Size::from_bytes(0),
};
asm_gen.allocate_registers();
asm_gen.allocate_stack_slots();

let inline_asm_index = fx.cx.inline_asm_index.get();
fx.cx.inline_asm_index.set(inline_asm_index + 1);
let asm_name = format!(
"__inline_asm_{}_n{}",
fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
inline_asm_index
);

let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
fx.cx.global_asm.push_str(&generated_asm);

for (i, operand) in operands.iter().enumerate() {
match operand {
CInlineAsmOperand::In { reg: _, value } => {
inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value));
}
CInlineAsmOperand::Out { reg: _, late: _, place } => {
if let Some(place) = place {
outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place));
}
}
_ => unreachable!(),
}
}

call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
}
2 changes: 2 additions & 0 deletions src/intrinsics/llvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>,
target: Option<BasicBlock>,
span: Span,
) {
if intrinsic.starts_with("llvm.aarch64") {
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
Expand All @@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args,
ret,
target,
span,
);
}

Expand Down
Loading