Skip to content

Commit 5482ae6

Browse files
jacquesguanjacquesguan
jacquesguan
authored and
jacquesguan
committed
[LegalizeTypes][VP] Add widen and split support for VP FP integer casting op.
This patch adds widen and split support for VP_FPTOSI, VP_FPTOUI, VP_SITOFP and VP_UITOFP. Differential Revision: https://reviews.llvm.org/D126847
1 parent 9f0869a commit 5482ae6

9 files changed

+438
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -1029,17 +1029,21 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
10291029
case ISD::FP_EXTEND:
10301030
case ISD::FP_ROUND:
10311031
case ISD::FP_TO_SINT:
1032+
case ISD::VP_FPTOSI:
10321033
case ISD::FP_TO_UINT:
1034+
case ISD::VP_FPTOUI:
10331035
case ISD::FRINT:
10341036
case ISD::FROUND:
10351037
case ISD::FROUNDEVEN:
10361038
case ISD::FSIN:
10371039
case ISD::FSQRT:
10381040
case ISD::FTRUNC:
10391041
case ISD::SINT_TO_FP:
1042+
case ISD::VP_SITOFP:
10401043
case ISD::TRUNCATE:
10411044
case ISD::VP_TRUNCATE:
10421045
case ISD::UINT_TO_FP:
1046+
case ISD::VP_UITOFP:
10431047
case ISD::FCANONICALIZE:
10441048
SplitVecRes_UnaryOp(N, Lo, Hi);
10451049
break;
@@ -3793,13 +3797,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
37933797
case ISD::FP_EXTEND:
37943798
case ISD::FP_ROUND:
37953799
case ISD::FP_TO_SINT:
3800+
case ISD::VP_FPTOSI:
37963801
case ISD::FP_TO_UINT:
3802+
case ISD::VP_FPTOUI:
37973803
case ISD::SIGN_EXTEND:
37983804
case ISD::VP_SIGN_EXTEND:
37993805
case ISD::SINT_TO_FP:
3806+
case ISD::VP_SITOFP:
38003807
case ISD::VP_TRUNCATE:
38013808
case ISD::TRUNCATE:
38023809
case ISD::UINT_TO_FP:
3810+
case ISD::VP_UITOFP:
38033811
case ISD::ZERO_EXTEND:
38043812
case ISD::VP_ZERO_EXTEND:
38053813
Res = WidenVecRes_Convert(N);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll

+51
Original file line numberDiff line numberDiff line change
@@ -311,3 +311,54 @@ define <4 x i64> @vfptosi_v4i64_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev
311311
ret <4 x i64> %v
312312
}
313313

314+
declare <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double>, <32 x i1>, i32)
315+
316+
define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
317+
; CHECK-LABEL: vfptosi_v32i64_v32f64:
318+
; CHECK: # %bb.0:
319+
; CHECK-NEXT: vmv1r.v v24, v0
320+
; CHECK-NEXT: li a1, 0
321+
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
322+
; CHECK-NEXT: addi a2, a0, -16
323+
; CHECK-NEXT: vslidedown.vi v0, v0, 2
324+
; CHECK-NEXT: bltu a0, a2, .LBB25_2
325+
; CHECK-NEXT: # %bb.1:
326+
; CHECK-NEXT: mv a1, a2
327+
; CHECK-NEXT: .LBB25_2:
328+
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
329+
; CHECK-NEXT: li a1, 16
330+
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t
331+
; CHECK-NEXT: bltu a0, a1, .LBB25_4
332+
; CHECK-NEXT: # %bb.3:
333+
; CHECK-NEXT: li a0, 16
334+
; CHECK-NEXT: .LBB25_4:
335+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
336+
; CHECK-NEXT: vmv1r.v v0, v24
337+
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
338+
; CHECK-NEXT: ret
339+
%v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
340+
ret <32 x i64> %v
341+
}
342+
343+
define <32 x i64> @vfptosi_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
344+
; CHECK-LABEL: vfptosi_v32i64_v32f64_unmasked:
345+
; CHECK: # %bb.0:
346+
; CHECK-NEXT: addi a1, a0, -16
347+
; CHECK-NEXT: li a2, 0
348+
; CHECK-NEXT: bltu a0, a1, .LBB26_2
349+
; CHECK-NEXT: # %bb.1:
350+
; CHECK-NEXT: mv a2, a1
351+
; CHECK-NEXT: .LBB26_2:
352+
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
353+
; CHECK-NEXT: li a1, 16
354+
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16
355+
; CHECK-NEXT: bltu a0, a1, .LBB26_4
356+
; CHECK-NEXT: # %bb.3:
357+
; CHECK-NEXT: li a0, 16
358+
; CHECK-NEXT: .LBB26_4:
359+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
360+
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
361+
; CHECK-NEXT: ret
362+
%v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
363+
ret <32 x i64> %v
364+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll

+51
Original file line numberDiff line numberDiff line change
@@ -311,3 +311,54 @@ define <4 x i64> @vfptoui_v4i64_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev
311311
ret <4 x i64> %v
312312
}
313313

314+
declare <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double>, <32 x i1>, i32)
315+
316+
define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
317+
; CHECK-LABEL: vfptoui_v32i64_v32f64:
318+
; CHECK: # %bb.0:
319+
; CHECK-NEXT: vmv1r.v v24, v0
320+
; CHECK-NEXT: li a1, 0
321+
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
322+
; CHECK-NEXT: addi a2, a0, -16
323+
; CHECK-NEXT: vslidedown.vi v0, v0, 2
324+
; CHECK-NEXT: bltu a0, a2, .LBB25_2
325+
; CHECK-NEXT: # %bb.1:
326+
; CHECK-NEXT: mv a1, a2
327+
; CHECK-NEXT: .LBB25_2:
328+
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
329+
; CHECK-NEXT: li a1, 16
330+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t
331+
; CHECK-NEXT: bltu a0, a1, .LBB25_4
332+
; CHECK-NEXT: # %bb.3:
333+
; CHECK-NEXT: li a0, 16
334+
; CHECK-NEXT: .LBB25_4:
335+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
336+
; CHECK-NEXT: vmv1r.v v0, v24
337+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t
338+
; CHECK-NEXT: ret
339+
%v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
340+
ret <32 x i64> %v
341+
}
342+
343+
define <32 x i64> @vfptoui_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
344+
; CHECK-LABEL: vfptoui_v32i64_v32f64_unmasked:
345+
; CHECK: # %bb.0:
346+
; CHECK-NEXT: addi a1, a0, -16
347+
; CHECK-NEXT: li a2, 0
348+
; CHECK-NEXT: bltu a0, a1, .LBB26_2
349+
; CHECK-NEXT: # %bb.1:
350+
; CHECK-NEXT: mv a2, a1
351+
; CHECK-NEXT: .LBB26_2:
352+
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
353+
; CHECK-NEXT: li a1, 16
354+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16
355+
; CHECK-NEXT: bltu a0, a1, .LBB26_4
356+
; CHECK-NEXT: # %bb.3:
357+
; CHECK-NEXT: li a0, 16
358+
; CHECK-NEXT: .LBB26_4:
359+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
360+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8
361+
; CHECK-NEXT: ret
362+
%v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
363+
ret <32 x i64> %v
364+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll

+52
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,55 @@ define <4 x double> @vsitofp_v4f64_v4i64_unmasked(<4 x i64> %va, i32 zeroext %ev
302302
%v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl)
303303
ret <4 x double> %v
304304
}
305+
306+
declare <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32)
307+
308+
define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
309+
; CHECK-LABEL: vsitofp_v32f64_v32i64:
310+
; CHECK: # %bb.0:
311+
; CHECK-NEXT: vmv1r.v v24, v0
312+
; CHECK-NEXT: li a1, 0
313+
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
314+
; CHECK-NEXT: addi a2, a0, -16
315+
; CHECK-NEXT: vslidedown.vi v0, v0, 2
316+
; CHECK-NEXT: bltu a0, a2, .LBB25_2
317+
; CHECK-NEXT: # %bb.1:
318+
; CHECK-NEXT: mv a1, a2
319+
; CHECK-NEXT: .LBB25_2:
320+
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
321+
; CHECK-NEXT: li a1, 16
322+
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
323+
; CHECK-NEXT: bltu a0, a1, .LBB25_4
324+
; CHECK-NEXT: # %bb.3:
325+
; CHECK-NEXT: li a0, 16
326+
; CHECK-NEXT: .LBB25_4:
327+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
328+
; CHECK-NEXT: vmv1r.v v0, v24
329+
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
330+
; CHECK-NEXT: ret
331+
%v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
332+
ret <32 x double> %v
333+
}
334+
335+
define <32 x double> @vsitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
336+
; CHECK-LABEL: vsitofp_v32f64_v32i64_unmasked:
337+
; CHECK: # %bb.0:
338+
; CHECK-NEXT: addi a1, a0, -16
339+
; CHECK-NEXT: li a2, 0
340+
; CHECK-NEXT: bltu a0, a1, .LBB26_2
341+
; CHECK-NEXT: # %bb.1:
342+
; CHECK-NEXT: mv a2, a1
343+
; CHECK-NEXT: .LBB26_2:
344+
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
345+
; CHECK-NEXT: li a1, 16
346+
; CHECK-NEXT: vfcvt.f.x.v v16, v16
347+
; CHECK-NEXT: bltu a0, a1, .LBB26_4
348+
; CHECK-NEXT: # %bb.3:
349+
; CHECK-NEXT: li a0, 16
350+
; CHECK-NEXT: .LBB26_4:
351+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
352+
; CHECK-NEXT: vfcvt.f.x.v v8, v8
353+
; CHECK-NEXT: ret
354+
%v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
355+
ret <32 x double> %v
356+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll

+52
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,55 @@ define <4 x double> @vuitofp_v4f64_v4i64_unmasked(<4 x i64> %va, i32 zeroext %ev
302302
%v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl)
303303
ret <4 x double> %v
304304
}
305+
306+
declare <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32)
307+
308+
define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
309+
; CHECK-LABEL: vuitofp_v32f64_v32i64:
310+
; CHECK: # %bb.0:
311+
; CHECK-NEXT: vmv1r.v v24, v0
312+
; CHECK-NEXT: li a1, 0
313+
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
314+
; CHECK-NEXT: addi a2, a0, -16
315+
; CHECK-NEXT: vslidedown.vi v0, v0, 2
316+
; CHECK-NEXT: bltu a0, a2, .LBB25_2
317+
; CHECK-NEXT: # %bb.1:
318+
; CHECK-NEXT: mv a1, a2
319+
; CHECK-NEXT: .LBB25_2:
320+
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
321+
; CHECK-NEXT: li a1, 16
322+
; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t
323+
; CHECK-NEXT: bltu a0, a1, .LBB25_4
324+
; CHECK-NEXT: # %bb.3:
325+
; CHECK-NEXT: li a0, 16
326+
; CHECK-NEXT: .LBB25_4:
327+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
328+
; CHECK-NEXT: vmv1r.v v0, v24
329+
; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
330+
; CHECK-NEXT: ret
331+
%v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
332+
ret <32 x double> %v
333+
}
334+
335+
define <32 x double> @vuitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
336+
; CHECK-LABEL: vuitofp_v32f64_v32i64_unmasked:
337+
; CHECK: # %bb.0:
338+
; CHECK-NEXT: addi a1, a0, -16
339+
; CHECK-NEXT: li a2, 0
340+
; CHECK-NEXT: bltu a0, a1, .LBB26_2
341+
; CHECK-NEXT: # %bb.1:
342+
; CHECK-NEXT: mv a2, a1
343+
; CHECK-NEXT: .LBB26_2:
344+
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
345+
; CHECK-NEXT: li a1, 16
346+
; CHECK-NEXT: vfcvt.f.xu.v v16, v16
347+
; CHECK-NEXT: bltu a0, a1, .LBB26_4
348+
; CHECK-NEXT: # %bb.3:
349+
; CHECK-NEXT: li a0, 16
350+
; CHECK-NEXT: .LBB26_4:
351+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
352+
; CHECK-NEXT: vfcvt.f.xu.v v8, v8
353+
; CHECK-NEXT: ret
354+
%v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
355+
ret <32 x double> %v
356+
}

llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll

+56
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,59 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f64_unmasked(<vscale x 2 x double
308308
%v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %evl)
309309
ret <vscale x 2 x i64> %v
310310
}
311+
312+
declare <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float>, <vscale x 32 x i1>, i32)
313+
314+
define <vscale x 32 x i32> @vfptosi_nxv32i32_nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
315+
; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32:
316+
; CHECK: # %bb.0:
317+
; CHECK-NEXT: vmv1r.v v24, v0
318+
; CHECK-NEXT: li a2, 0
319+
; CHECK-NEXT: csrr a1, vlenb
320+
; CHECK-NEXT: srli a4, a1, 2
321+
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
322+
; CHECK-NEXT: slli a1, a1, 1
323+
; CHECK-NEXT: sub a3, a0, a1
324+
; CHECK-NEXT: vslidedown.vx v0, v0, a4
325+
; CHECK-NEXT: bltu a0, a3, .LBB25_2
326+
; CHECK-NEXT: # %bb.1:
327+
; CHECK-NEXT: mv a2, a3
328+
; CHECK-NEXT: .LBB25_2:
329+
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
330+
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t
331+
; CHECK-NEXT: bltu a0, a1, .LBB25_4
332+
; CHECK-NEXT: # %bb.3:
333+
; CHECK-NEXT: mv a0, a1
334+
; CHECK-NEXT: .LBB25_4:
335+
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
336+
; CHECK-NEXT: vmv1r.v v0, v24
337+
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
338+
; CHECK-NEXT: ret
339+
%v = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> %m, i32 %evl)
340+
ret <vscale x 32 x i32> %v
341+
}
342+
343+
define <vscale x 32 x i32> @vfptosi_nxv32i32_nxv32f32_unmasked(<vscale x 32 x float> %va, i32 zeroext %evl) {
344+
; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32_unmasked:
345+
; CHECK: # %bb.0:
346+
; CHECK-NEXT: csrr a1, vlenb
347+
; CHECK-NEXT: slli a1, a1, 1
348+
; CHECK-NEXT: mv a2, a0
349+
; CHECK-NEXT: bltu a0, a1, .LBB26_2
350+
; CHECK-NEXT: # %bb.1:
351+
; CHECK-NEXT: mv a2, a1
352+
; CHECK-NEXT: .LBB26_2:
353+
; CHECK-NEXT: li a3, 0
354+
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
355+
; CHECK-NEXT: sub a1, a0, a1
356+
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
357+
; CHECK-NEXT: bltu a0, a1, .LBB26_4
358+
; CHECK-NEXT: # %bb.3:
359+
; CHECK-NEXT: mv a3, a1
360+
; CHECK-NEXT: .LBB26_4:
361+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
362+
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16
363+
; CHECK-NEXT: ret
364+
%v = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %evl)
365+
ret <vscale x 32 x i32> %v
366+
}

llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll

+56
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,59 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f64_unmasked(<vscale x 2 x double
308308
%v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %evl)
309309
ret <vscale x 2 x i64> %v
310310
}
311+
312+
declare <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float>, <vscale x 32 x i1>, i32)
313+
314+
define <vscale x 32 x i32> @vfptoui_nxv32i32_nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
315+
; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32:
316+
; CHECK: # %bb.0:
317+
; CHECK-NEXT: vmv1r.v v24, v0
318+
; CHECK-NEXT: li a2, 0
319+
; CHECK-NEXT: csrr a1, vlenb
320+
; CHECK-NEXT: srli a4, a1, 2
321+
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
322+
; CHECK-NEXT: slli a1, a1, 1
323+
; CHECK-NEXT: sub a3, a0, a1
324+
; CHECK-NEXT: vslidedown.vx v0, v0, a4
325+
; CHECK-NEXT: bltu a0, a3, .LBB25_2
326+
; CHECK-NEXT: # %bb.1:
327+
; CHECK-NEXT: mv a2, a3
328+
; CHECK-NEXT: .LBB25_2:
329+
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
330+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t
331+
; CHECK-NEXT: bltu a0, a1, .LBB25_4
332+
; CHECK-NEXT: # %bb.3:
333+
; CHECK-NEXT: mv a0, a1
334+
; CHECK-NEXT: .LBB25_4:
335+
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
336+
; CHECK-NEXT: vmv1r.v v0, v24
337+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t
338+
; CHECK-NEXT: ret
339+
%v = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> %m, i32 %evl)
340+
ret <vscale x 32 x i32> %v
341+
}
342+
343+
define <vscale x 32 x i32> @vfptoui_nxv32i32_nxv32f32_unmasked(<vscale x 32 x float> %va, i32 zeroext %evl) {
344+
; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32_unmasked:
345+
; CHECK: # %bb.0:
346+
; CHECK-NEXT: csrr a1, vlenb
347+
; CHECK-NEXT: slli a1, a1, 1
348+
; CHECK-NEXT: mv a2, a0
349+
; CHECK-NEXT: bltu a0, a1, .LBB26_2
350+
; CHECK-NEXT: # %bb.1:
351+
; CHECK-NEXT: mv a2, a1
352+
; CHECK-NEXT: .LBB26_2:
353+
; CHECK-NEXT: li a3, 0
354+
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
355+
; CHECK-NEXT: sub a1, a0, a1
356+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8
357+
; CHECK-NEXT: bltu a0, a1, .LBB26_4
358+
; CHECK-NEXT: # %bb.3:
359+
; CHECK-NEXT: mv a3, a1
360+
; CHECK-NEXT: .LBB26_4:
361+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
362+
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16
363+
; CHECK-NEXT: ret
364+
%v = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> shufflevector (<vscale x 32 x i1> insertelement (<vscale x 32 x i1> undef, i1 true, i32 0), <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer), i32 %evl)
365+
ret <vscale x 32 x i32> %v
366+
}

0 commit comments

Comments
 (0)