Skip to content

Commit a0983ed

Browse files
laithsakkaWenleiHe
authored andcommitted
Handle exp2 with proper vectorization and lowering to SVML calls
Summary: Add mapping from exp2 math functions to corresponding SVML calls. This is a follow up and extension for llvm diff https://reviews.llvm.org/D19544 Test Plan: - update test case and run ninja check. - run tests locally Reviewers: wenlei, hoyFB, mmasten, mzolotukhin, spatel Reviewed By: spatel Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77114
1 parent 88da019 commit a0983ed

File tree

2 files changed

+167
-1
lines changed

2 files changed

+167
-1
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,29 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
245245
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
246246
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
247247

248+
TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
249+
TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
250+
TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
251+
252+
TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", 4)
253+
TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", 8)
254+
TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", 16)
255+
256+
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", 2)
257+
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", 4)
258+
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", 8)
259+
260+
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", 4)
261+
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", 8)
262+
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", 16)
263+
264+
TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", 2)
265+
TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", 4)
266+
TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", 8)
267+
268+
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", 4)
269+
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", 8)
270+
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
248271

249272
#else
250273
#error "Must choose which vector library functions are to be defined."

llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ declare float @logf(float) #0
2828
declare double @llvm.log.f64(double) #0
2929
declare float @llvm.log.f32(float) #0
3030

31+
declare double @exp2(double) #0
32+
declare float @exp2f(float) #0
33+
declare double @llvm.exp2.f64(double) #0
34+
declare float @llvm.exp2.f32(float) #0
35+
declare double @__exp2_finite(double) #0
36+
declare float @__exp2f_finite(float) #0
3137

3238
define void @sin_f64(double* nocapture %varray) {
3339
; CHECK-LABEL: @sin_f64(
@@ -497,5 +503,142 @@ for.end:
497503
ret void
498504
}
499505

500-
attributes #0 = { nounwind readnone }
506+
define void @exp2_f64(double* nocapture %varray) {
507+
; CHECK-LABEL: @exp2_f64(
508+
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
509+
; CHECK: ret void
510+
;
511+
entry:
512+
br label %for.body
513+
514+
for.body:
515+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
516+
%tmp = trunc i64 %iv to i32
517+
%conv = sitofp i32 %tmp to double
518+
%call = tail call double @exp2(double %conv)
519+
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
520+
store double %call, double* %arrayidx, align 4
521+
%iv.next = add nuw nsw i64 %iv, 1
522+
%exitcond = icmp eq i64 %iv.next, 1000
523+
br i1 %exitcond, label %for.end, label %for.body
524+
525+
for.end:
526+
ret void
527+
}
528+
529+
define void @exp2_f32(float* nocapture %varray) {
530+
; CHECK-LABEL: @exp2_f32(
531+
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
532+
; CHECK: ret void
533+
;
534+
entry:
535+
br label %for.body
536+
537+
for.body:
538+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
539+
%tmp = trunc i64 %iv to i32
540+
%conv = sitofp i32 %tmp to float
541+
%call = tail call float @exp2f(float %conv)
542+
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
543+
store float %call, float* %arrayidx, align 4
544+
%iv.next = add nuw nsw i64 %iv, 1
545+
%exitcond = icmp eq i64 %iv.next, 1000
546+
br i1 %exitcond, label %for.end, label %for.body
501547

548+
for.end:
549+
ret void
550+
}
551+
552+
define void @exp2_f64_intrinsic(double* nocapture %varray) {
553+
; CHECK-LABEL: @exp2_f64_intrinsic(
554+
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
555+
; CHECK: ret void
556+
;
557+
entry:
558+
br label %for.body
559+
560+
for.body:
561+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
562+
%tmp = trunc i64 %iv to i32
563+
%conv = sitofp i32 %tmp to double
564+
%call = tail call double @llvm.exp2.f64(double %conv)
565+
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
566+
store double %call, double* %arrayidx, align 4
567+
%iv.next = add nuw nsw i64 %iv, 1
568+
%exitcond = icmp eq i64 %iv.next, 1000
569+
br i1 %exitcond, label %for.end, label %for.body
570+
571+
for.end:
572+
ret void
573+
}
574+
575+
define void @exp2_f32_intrinsic(float* nocapture %varray) {
576+
; CHECK-LABEL: @exp2_f32_intrinsic(
577+
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
578+
; CHECK: ret void
579+
;
580+
entry:
581+
br label %for.body
582+
583+
for.body:
584+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
585+
%tmp = trunc i64 %iv to i32
586+
%conv = sitofp i32 %tmp to float
587+
%call = tail call float @llvm.exp2.f32(float %conv)
588+
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
589+
store float %call, float* %arrayidx, align 4
590+
%iv.next = add nuw nsw i64 %iv, 1
591+
%exitcond = icmp eq i64 %iv.next, 1000
592+
br i1 %exitcond, label %for.end, label %for.body
593+
594+
for.end:
595+
ret void
596+
}
597+
598+
define void @exp2f_finite(float* nocapture %varray) {
599+
; CHECK-LABEL: @exp2f_finite(
600+
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
601+
; CHECK: ret void
602+
;
603+
entry:
604+
br label %for.body
605+
606+
for.body:
607+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
608+
%tmp = trunc i64 %iv to i32
609+
%conv = sitofp i32 %tmp to float
610+
%call = tail call float @__exp2f_finite(float %conv)
611+
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
612+
store float %call, float* %arrayidx, align 4
613+
%iv.next = add nuw nsw i64 %iv, 1
614+
%exitcond = icmp eq i64 %iv.next, 1000
615+
br i1 %exitcond, label %for.end, label %for.body
616+
617+
for.end:
618+
ret void
619+
}
620+
621+
define void @exp2_finite(double* nocapture %varray) {
622+
; CHECK-LABEL: @exp2_finite(
623+
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
624+
; CHECK: ret void
625+
;
626+
entry:
627+
br label %for.body
628+
629+
for.body:
630+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
631+
%tmp = trunc i64 %iv to i32
632+
%conv = sitofp i32 %tmp to double
633+
%call = tail call double @__exp2_finite(double %conv)
634+
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
635+
store double %call, double* %arrayidx, align 4
636+
%iv.next = add nuw nsw i64 %iv, 1
637+
%exitcond = icmp eq i64 %iv.next, 1000
638+
br i1 %exitcond, label %for.end, label %for.body
639+
640+
for.end:
641+
ret void
642+
}
643+
644+
attributes #0 = { nounwind readnone }

0 commit comments

Comments
 (0)