Skip to content

Commit 2df34f9

Browse files
authored
Merge pull request rust-lang#1490 from folkertdev/add-llvm-x86-crc32
add all `llvm.x86.sse42.crc32.*.*` intrinsics
2 parents 1a2c489 + 9059a74 commit 2df34f9

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

Diff for: example/std_example.rs

+18
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,21 @@ struct I64X2(i64, i64);
210210
#[allow(improper_ctypes_definitions)]
211211
extern "C" fn foo(_a: I64X2) {}
212212

213+
#[cfg(target_arch = "x86_64")]
214+
#[target_feature(enable = "sse4.2")]
215+
#[cfg(not(jit))]
216+
unsafe fn test_crc32() {
217+
assert!(is_x86_feature_detected!("sse4.2"));
218+
219+
let a = 42u32;
220+
let b = 0xdeadbeefu64;
221+
222+
assert_eq!(_mm_crc32_u8(a, b as u8), 4135334616);
223+
assert_eq!(_mm_crc32_u16(a, b as u16), 1200687288);
224+
assert_eq!(_mm_crc32_u32(a, b as u32), 2543798776);
225+
assert_eq!(_mm_crc32_u64(a as u64, b as u64), 241952147);
226+
}
227+
213228
#[cfg(target_arch = "x86_64")]
214229
#[target_feature(enable = "sse2")]
215230
unsafe fn test_simd() {
@@ -249,6 +264,9 @@ unsafe fn test_simd() {
249264
#[rustfmt::skip]
250265
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
251266
assert_eq!(mask1, 1);
267+
268+
#[cfg(not(jit))]
269+
test_crc32();
252270
}
253271

254272
#[cfg(target_arch = "x86_64")]

Diff for: src/intrinsics/llvm_x86.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -847,16 +847,27 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
847847
}
848848
}
849849

850-
"llvm.x86.sse42.crc32.32.32" => {
850+
"llvm.x86.sse42.crc32.32.8"
851+
| "llvm.x86.sse42.crc32.32.16"
852+
| "llvm.x86.sse42.crc32.32.32"
853+
| "llvm.x86.sse42.crc32.64.64" => {
851854
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32
852855
intrinsic_args!(fx, args => (crc, v); intrinsic);
853856

854857
let crc = crc.load_scalar(fx);
855858
let v = v.load_scalar(fx);
856859

860+
let asm = match intrinsic {
861+
"llvm.x86.sse42.crc32.32.8" => "crc32 eax, dl",
862+
"llvm.x86.sse42.crc32.32.16" => "crc32 eax, dx",
863+
"llvm.x86.sse42.crc32.32.32" => "crc32 eax, edx",
864+
"llvm.x86.sse42.crc32.64.64" => "crc32 rax, rdx",
865+
_ => unreachable!(),
866+
};
867+
857868
codegen_inline_asm_inner(
858869
fx,
859-
&[InlineAsmTemplatePiece::String("crc32 eax, edx".to_string())],
870+
&[InlineAsmTemplatePiece::String(asm.to_string())],
860871
&[
861872
CInlineAsmOperand::InOut {
862873
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),

0 commit comments

Comments
 (0)