Skip to content

Commit 1fc75fb

Browse files
committed
Implement AES-NI intrinsics using inline asm
1 parent 7ad5c32 commit 1fc75fb

File tree

4 files changed

+198
-5
lines changed

4 files changed

+198
-5
lines changed

src/abi/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
383383
args,
384384
ret_place,
385385
target,
386+
source_info.span,
386387
);
387388
return;
388389
}

src/inline_asm.rs

+39-5
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
645645
) {
646646
match arch {
647647
InlineAsmArch::X86_64 => {
648-
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
649-
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
648+
match reg {
649+
InlineAsmReg::X86(reg)
650+
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
651+
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
652+
{
653+
// rustc emits x0 rather than xmm0
654+
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
655+
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
656+
.unwrap();
657+
}
658+
_ => {
659+
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
660+
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
661+
}
662+
}
650663
generated_asm.push('\n');
651664
}
652665
InlineAsmArch::AArch64 => {
@@ -671,8 +684,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
671684
) {
672685
match arch {
673686
InlineAsmArch::X86_64 => {
674-
generated_asm.push_str(" mov ");
675-
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
687+
match reg {
688+
InlineAsmReg::X86(reg)
689+
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
690+
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
691+
{
692+
// rustc emits x0 rather than xmm0
693+
write!(
694+
generated_asm,
695+
" movups xmm{}",
696+
reg as u32 - X86InlineAsmReg::xmm0 as u32
697+
)
698+
.unwrap();
699+
}
700+
_ => {
701+
generated_asm.push_str(" mov ");
702+
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
703+
}
704+
}
676705
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
677706
}
678707
InlineAsmArch::AArch64 => {
@@ -728,7 +757,12 @@ fn call_inline_asm<'tcx>(
728757
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
729758

730759
for (offset, place) in outputs {
731-
let ty = fx.clif_type(place.layout().ty).unwrap();
760+
let ty = if place.layout().ty.is_simd() {
761+
let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
762+
fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
763+
} else {
764+
fx.clif_type(place.layout().ty).unwrap()
765+
};
732766
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
733767
fx,
734768
ty,

src/intrinsics/llvm.rs

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
1212
args: &[mir::Operand<'tcx>],
1313
ret: CPlace<'tcx>,
1414
target: Option<BasicBlock>,
15+
span: Span,
1516
) {
1617
if intrinsic.starts_with("llvm.aarch64") {
1718
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
@@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
3132
args,
3233
ret,
3334
target,
35+
span,
3436
);
3537
}
3638

src/intrinsics/llvm_x86.rs

+156
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
1515
args: &[mir::Operand<'tcx>],
1616
ret: CPlace<'tcx>,
1717
target: Option<BasicBlock>,
18+
span: Span,
1819
) {
1920
match intrinsic {
2021
"llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
@@ -718,6 +719,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
718719
}
719720

720721
"llvm.x86.pclmulqdq" => {
722+
// FIXME use inline asm
721723
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
722724
intrinsic_args!(fx, args => (a, b, imm8); intrinsic);
723725

@@ -779,6 +781,160 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
779781
ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted());
780782
}
781783

784+
"llvm.x86.aesni.aeskeygenassist" => {
785+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261
786+
intrinsic_args!(fx, args => (a, _imm8); intrinsic);
787+
788+
let a = a.load_scalar(fx);
789+
790+
let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1])
791+
{
792+
imm8
793+
} else {
794+
fx.tcx.sess.span_fatal(
795+
span,
796+
"Index argument for `_mm_aeskeygenassist_si128` is not a constant",
797+
);
798+
};
799+
800+
let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
801+
802+
codegen_inline_asm_inner(
803+
fx,
804+
&[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))],
805+
&[CInlineAsmOperand::InOut {
806+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
807+
_late: true,
808+
in_value: a,
809+
out_place: Some(ret),
810+
}],
811+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
812+
);
813+
}
814+
815+
"llvm.x86.aesni.aesimc" => {
816+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260
817+
intrinsic_args!(fx, args => (a); intrinsic);
818+
819+
let a = a.load_scalar(fx);
820+
821+
codegen_inline_asm_inner(
822+
fx,
823+
&[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())],
824+
&[CInlineAsmOperand::InOut {
825+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
826+
_late: true,
827+
in_value: a,
828+
out_place: Some(ret),
829+
}],
830+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
831+
);
832+
}
833+
834+
"llvm.x86.aesni.aesenc" => {
835+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252
836+
intrinsic_args!(fx, args => (a, round_key); intrinsic);
837+
838+
let a = a.load_scalar(fx);
839+
let round_key = round_key.load_scalar(fx);
840+
841+
codegen_inline_asm_inner(
842+
fx,
843+
&[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())],
844+
&[
845+
CInlineAsmOperand::InOut {
846+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
847+
_late: true,
848+
in_value: a,
849+
out_place: Some(ret),
850+
},
851+
CInlineAsmOperand::In {
852+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
853+
value: round_key,
854+
},
855+
],
856+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
857+
);
858+
}
859+
860+
"llvm.x86.aesni.aesenclast" => {
861+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257
862+
intrinsic_args!(fx, args => (a, round_key); intrinsic);
863+
864+
let a = a.load_scalar(fx);
865+
let round_key = round_key.load_scalar(fx);
866+
867+
codegen_inline_asm_inner(
868+
fx,
869+
&[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())],
870+
&[
871+
CInlineAsmOperand::InOut {
872+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
873+
_late: true,
874+
in_value: a,
875+
out_place: Some(ret),
876+
},
877+
CInlineAsmOperand::In {
878+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
879+
value: round_key,
880+
},
881+
],
882+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
883+
);
884+
}
885+
886+
"llvm.x86.aesni.aesdec" => {
887+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242
888+
intrinsic_args!(fx, args => (a, round_key); intrinsic);
889+
890+
let a = a.load_scalar(fx);
891+
let round_key = round_key.load_scalar(fx);
892+
893+
codegen_inline_asm_inner(
894+
fx,
895+
&[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())],
896+
&[
897+
CInlineAsmOperand::InOut {
898+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
899+
_late: true,
900+
in_value: a,
901+
out_place: Some(ret),
902+
},
903+
CInlineAsmOperand::In {
904+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
905+
value: round_key,
906+
},
907+
],
908+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
909+
);
910+
}
911+
912+
"llvm.x86.aesni.aesdeclast" => {
913+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247
914+
intrinsic_args!(fx, args => (a, round_key); intrinsic);
915+
916+
let a = a.load_scalar(fx);
917+
let round_key = round_key.load_scalar(fx);
918+
919+
codegen_inline_asm_inner(
920+
fx,
921+
&[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())],
922+
&[
923+
CInlineAsmOperand::InOut {
924+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
925+
_late: true,
926+
in_value: a,
927+
out_place: Some(ret),
928+
},
929+
CInlineAsmOperand::In {
930+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
931+
value: round_key,
932+
},
933+
],
934+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
935+
);
936+
}
937+
782938
"llvm.x86.avx.ptestz.256" => {
783939
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256&ig_expand=6945
784940
intrinsic_args!(fx, args => (a, b); intrinsic);

0 commit comments

Comments
 (0)