Skip to content

Commit f334db9

Browse files
authored
[llvm][CodeGen] Intrinsic llvm.powi.* code gen for vector arguments (llvm#118242)
Scalarize vector FPOWI instead of promoting the type. This allows the scalar FPOWIs to be visited and converted to libcalls before promoting the type. FIXME: This should be done in LegalizeVectorOps/LegalizeDAG, but call lowering needs the unpromoted EVT. Without this patch, in some backends, such as RISCV64 and LoongArch64, the i32 type is illegal and will be promoted. This causes exponent type check to fail when ISD::FPOWI node generates a libcall. Fix llvm#118079
1 parent 2691b96 commit f334db9

File tree

4 files changed

+1488
-0
lines changed

4 files changed

+1488
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2585,6 +2585,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
25852585
: RTLIB::getLDEXP(N->getValueType(0));
25862586

25872587
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
2588+
// Scalarize vector FPOWI instead of promoting the type. This allows the
2589+
// scalar FPOWIs to be visited and converted to libcalls before promoting
2590+
// the type.
2591+
// FIXME: This should be done in LegalizeVectorOps/LegalizeDAG, but call
2592+
// lowering needs the unpromoted EVT.
2593+
if (IsPowI && N->getValueType(0).isVector())
2594+
return DAG.UnrollVectorOp(N);
25882595
SmallVector<SDValue, 3> NewOps(N->ops());
25892596
NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset));
25902597
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32)
5+
6+
define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
7+
; CHECK-LABEL: powi_v8f32:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: addi.d $sp, $sp, -80
10+
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
11+
; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
12+
; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
13+
; CHECK-NEXT: addi.w $fp, $a0, 0
14+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
15+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
16+
; CHECK-NEXT: move $a0, $fp
17+
; CHECK-NEXT: bl %plt(__powisf2)
18+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
19+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
20+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
21+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
22+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
23+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
24+
; CHECK-NEXT: move $a0, $fp
25+
; CHECK-NEXT: bl %plt(__powisf2)
26+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
27+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
28+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
29+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
30+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
31+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
32+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
33+
; CHECK-NEXT: move $a0, $fp
34+
; CHECK-NEXT: bl %plt(__powisf2)
35+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
36+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
37+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
38+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
39+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
40+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
41+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
42+
; CHECK-NEXT: move $a0, $fp
43+
; CHECK-NEXT: bl %plt(__powisf2)
44+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
45+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
46+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
47+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
48+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
49+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
50+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
51+
; CHECK-NEXT: move $a0, $fp
52+
; CHECK-NEXT: bl %plt(__powisf2)
53+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
54+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
55+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
56+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
57+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
58+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
59+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
60+
; CHECK-NEXT: move $a0, $fp
61+
; CHECK-NEXT: bl %plt(__powisf2)
62+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
63+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
64+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
65+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
66+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
67+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
68+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
69+
; CHECK-NEXT: move $a0, $fp
70+
; CHECK-NEXT: bl %plt(__powisf2)
71+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
72+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
73+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
74+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
75+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
76+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
77+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
78+
; CHECK-NEXT: move $a0, $fp
79+
; CHECK-NEXT: bl %plt(__powisf2)
80+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
81+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
82+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
83+
; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
84+
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
85+
; CHECK-NEXT: addi.d $sp, $sp, 80
86+
; CHECK-NEXT: ret
87+
entry:
88+
%res = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %va, i32 %b)
89+
ret <8 x float> %res
90+
}
91+
92+
declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32)
93+
94+
define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
95+
; CHECK-LABEL: powi_v4f64:
96+
; CHECK: # %bb.0: # %entry
97+
; CHECK-NEXT: addi.d $sp, $sp, -80
98+
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
99+
; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
100+
; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
101+
; CHECK-NEXT: addi.w $fp, $a0, 0
102+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
103+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
104+
; CHECK-NEXT: move $a0, $fp
105+
; CHECK-NEXT: bl %plt(__powidf2)
106+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
107+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
108+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
109+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
110+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
111+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
112+
; CHECK-NEXT: move $a0, $fp
113+
; CHECK-NEXT: bl %plt(__powidf2)
114+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
115+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
116+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
117+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
118+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
119+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
120+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
121+
; CHECK-NEXT: move $a0, $fp
122+
; CHECK-NEXT: bl %plt(__powidf2)
123+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
124+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
125+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
126+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
127+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
128+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
129+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
130+
; CHECK-NEXT: move $a0, $fp
131+
; CHECK-NEXT: bl %plt(__powidf2)
132+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
133+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
134+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
135+
; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
136+
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
137+
; CHECK-NEXT: addi.d $sp, $sp, 80
138+
; CHECK-NEXT: ret
139+
entry:
140+
%res = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %va, i32 %b)
141+
ret <4 x double> %res
142+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
5+
6+
define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind {
7+
; CHECK-LABEL: powi_v4f32:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: addi.d $sp, $sp, -48
10+
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
11+
; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
12+
; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
13+
; CHECK-NEXT: addi.w $fp, $a0, 0
14+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
15+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
16+
; CHECK-NEXT: move $a0, $fp
17+
; CHECK-NEXT: bl %plt(__powisf2)
18+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
19+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
20+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
21+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
22+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
23+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
24+
; CHECK-NEXT: move $a0, $fp
25+
; CHECK-NEXT: bl %plt(__powisf2)
26+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
27+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
28+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1
29+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
30+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
31+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2
32+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
33+
; CHECK-NEXT: move $a0, $fp
34+
; CHECK-NEXT: bl %plt(__powisf2)
35+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
36+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
37+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
38+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
39+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
40+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
41+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
42+
; CHECK-NEXT: move $a0, $fp
43+
; CHECK-NEXT: bl %plt(__powisf2)
44+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
45+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
46+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
47+
; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
48+
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
49+
; CHECK-NEXT: addi.d $sp, $sp, 48
50+
; CHECK-NEXT: ret
51+
entry:
52+
%res = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %va, i32 %b)
53+
ret <4 x float> %res
54+
}
55+
56+
declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
57+
58+
define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind {
59+
; CHECK-LABEL: powi_v2f64:
60+
; CHECK: # %bb.0: # %entry
61+
; CHECK-NEXT: addi.d $sp, $sp, -48
62+
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
63+
; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
64+
; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
65+
; CHECK-NEXT: addi.w $fp, $a0, 0
66+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
67+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
68+
; CHECK-NEXT: move $a0, $fp
69+
; CHECK-NEXT: bl %plt(__powidf2)
70+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
71+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
72+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
73+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
74+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
75+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
76+
; CHECK-NEXT: move $a0, $fp
77+
; CHECK-NEXT: bl %plt(__powidf2)
78+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
79+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
80+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1
81+
; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
82+
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
83+
; CHECK-NEXT: addi.d $sp, $sp, 48
84+
; CHECK-NEXT: ret
85+
entry:
86+
%res = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %va, i32 %b)
87+
ret <2 x double> %res
88+
}

0 commit comments

Comments
 (0)