Skip to content

Commit 6e1bf49

Browse files
Give priority to intrinsics translations from llvm
1 parent 618ba48 commit 6e1bf49

File tree

2 files changed

+45
-28
lines changed

2 files changed

+45
-28
lines changed

src/intrinsic/archs.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3021,19 +3021,19 @@ match name {
30213021
"llvm.x86.avx512.cvtss2usi64" => "__builtin_ia32_cvtss2usi64",
30223022
"llvm.x86.avx512.cvttsd2si" => "__builtin_ia32_vcvttsd2si32",
30233023
"llvm.x86.avx512.cvttsd2si64" => "__builtin_ia32_vcvttsd2si64",
3024-
"llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_cvttsd2usi",
3025-
// [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_vcvttsd2usi32",
3026-
"llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_cvttsd2usi64",
3027-
// [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_vcvttsd2usi64",
3024+
"llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_vcvttsd2usi32",
3025+
// [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_cvttsd2usi",
3026+
"llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_vcvttsd2usi64",
3027+
// [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_cvttsd2usi64",
30283028
"llvm.x86.avx512.cvttss2si" => "__builtin_ia32_vcvttss2si32",
30293029
"llvm.x86.avx512.cvttss2si64" => "__builtin_ia32_vcvttss2si64",
3030-
"llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_cvttss2usi",
3031-
// [DUPLICATE]: "llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_vcvttss2usi32",
3032-
"llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_cvttss2usi64",
3033-
// [DUPLICATE]: "llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_vcvttss2usi64",
3030+
"llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_vcvttss2usi32",
3031+
// [DUPLICATE]: "llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_cvttss2usi",
3032+
"llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_vcvttss2usi64",
3033+
// [DUPLICATE]: "llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_cvttss2usi64",
30343034
"llvm.x86.avx512.cvtusi2sd" => "__builtin_ia32_cvtusi2sd",
3035-
"llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss",
3036-
// [DUPLICATE]: "llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss32",
3035+
"llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss32",
3036+
// [DUPLICATE]: "llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss",
30373037
"llvm.x86.avx512.cvtusi642sd" => "__builtin_ia32_cvtusi2sd64",
30383038
// [DUPLICATE]: "llvm.x86.avx512.cvtusi642sd" => "__builtin_ia32_cvtusi642sd",
30393039
"llvm.x86.avx512.cvtusi642ss" => "__builtin_ia32_cvtusi2ss64",
@@ -3479,10 +3479,10 @@ match name {
34793479
"llvm.x86.avx512.rcp14.ss" => "__builtin_ia32_rcp14ss_mask",
34803480
"llvm.x86.avx512.rcp28.pd" => "__builtin_ia32_rcp28pd_mask",
34813481
"llvm.x86.avx512.rcp28.ps" => "__builtin_ia32_rcp28ps_mask",
3482-
"llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_mask",
3483-
// [DUPLICATE]: "llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_round_mask",
3484-
"llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_mask",
3485-
// [DUPLICATE]: "llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_round_mask",
3482+
"llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_round_mask",
3483+
// [DUPLICATE]: "llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_mask",
3484+
"llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_round_mask",
3485+
// [DUPLICATE]: "llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_mask",
34863486
"llvm.x86.avx512.rndscale.sd" => "__builtin_ia32_rndscalesd",
34873487
"llvm.x86.avx512.rndscale.ss" => "__builtin_ia32_rndscaless",
34883488
"llvm.x86.avx512.rsqrt14.pd.128" => "__builtin_ia32_rsqrt14pd128_mask",
@@ -3495,10 +3495,10 @@ match name {
34953495
"llvm.x86.avx512.rsqrt14.ss" => "__builtin_ia32_rsqrt14ss_mask",
34963496
"llvm.x86.avx512.rsqrt28.pd" => "__builtin_ia32_rsqrt28pd_mask",
34973497
"llvm.x86.avx512.rsqrt28.ps" => "__builtin_ia32_rsqrt28ps_mask",
3498-
"llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_mask",
3499-
// [DUPLICATE]: "llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_round_mask",
3500-
"llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_mask",
3501-
// [DUPLICATE]: "llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_round_mask",
3498+
"llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_round_mask",
3499+
// [DUPLICATE]: "llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_mask",
3500+
"llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_round_mask",
3501+
// [DUPLICATE]: "llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_mask",
35023502
"llvm.x86.avx512.scatter.dpd.512" => "__builtin_ia32_scattersiv8df",
35033503
"llvm.x86.avx512.scatter.dpi.512" => "__builtin_ia32_scattersiv16si",
35043504
"llvm.x86.avx512.scatter.dpq.512" => "__builtin_ia32_scattersiv8di",

tools/generate_intrinsics.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,34 @@ def extract_instrinsics_from_llvmint(llvmint, intrinsics):
144144
append_translation(json_data, p, intrinsics[arch])
145145

146146

147+
def fill_intrinsics(intrinsics, from_intrinsics, all_intrinsics):
148+
for arch in from_intrinsics:
149+
if arch not in intrinsics:
150+
intrinsics[arch] = []
151+
for entry in from_intrinsics[arch]:
152+
if entry[0] in all_intrinsics:
153+
if all_intrinsics[entry[0]] == entry[1]:
154+
# This is a "full" duplicate, both the LLVM instruction and the GCC
155+
# translation are the same.
156+
continue
157+
intrinsics[arch].append((entry[0], entry[1], True))
158+
else:
159+
intrinsics[arch].append((entry[0], entry[1], False))
160+
all_intrinsics[entry[0]] = entry[1]
161+
162+
147163
def update_intrinsics(llvm_path, llvmint):
148-
intrinsics = {}
164+
intrinsics_llvm = {}
165+
intrinsics_llvmint = {}
149166
all_intrinsics = {}
150167

151-
extract_instrinsics_from_llvm(llvm_path, intrinsics)
152-
extract_instrinsics_from_llvmint(llvmint, intrinsics)
168+
extract_instrinsics_from_llvm(llvm_path, intrinsics_llvm)
169+
extract_instrinsics_from_llvmint(llvmint, intrinsics_llvmint)
170+
171+
intrinsics = {}
172+
# We give priority to translations from LLVM over the ones from llvmint.
173+
fill_intrinsics(intrinsics, intrinsics_llvm, all_intrinsics)
174+
fill_intrinsics(intrinsics, intrinsics_llvmint, all_intrinsics)
153175

154176
archs = [arch for arch in intrinsics]
155177
archs.sort()
@@ -166,18 +188,13 @@ def update_intrinsics(llvm_path, llvmint):
166188
for arch in archs:
167189
if len(intrinsics[arch]) == 0:
168190
continue
169-
intrinsics[arch].sort()
191+
intrinsics[arch].sort(key=lambda x: (x[0], x[2]))
170192
out.write(' // {}\n'.format(arch))
171193
for entry in intrinsics[arch]:
172-
if entry[0] in all_intrinsics:
173-
if all_intrinsics[entry[0]] == entry[1]:
174-
# This is a "full" duplicate, both the LLVM instruction and the GCC
175-
# translation are the same.
176-
continue
194+
if entry[2] == True: # if it is a duplicate
177195
out.write(' // [DUPLICATE]: "{}" => "{}",\n'.format(entry[0], entry[1]))
178196
else:
179197
out.write(' "{}" => "{}",\n'.format(entry[0], entry[1]))
180-
all_intrinsics[entry[0]] = entry[1]
181198
out.write(' _ => unimplemented!("***** unsupported LLVM intrinsic {}", name),\n')
182199
out.write("}\n")
183200
print("Done!")

0 commit comments

Comments
 (0)