Skip to content

Commit 87519a2

Browse files
authored
[RISCV] Combine (mul (zext, zext)) -> (zext (mul (zext, zext))) (#86465)
Building on #86248, we can also narrow the width of a mul of zexts. This is specifically legal because on RVV we always extend to the next power of 2 width, and multiplying two N bit integers produces a maximum value of 2\*N bits. So as long as we keep an inner zext of 2\*N, we will have enough space for the multiply and won't overflow. Alive2 proof: https://alive2.llvm.org/ce/z/XteYyb
1 parent ecfffbf commit 87519a2

File tree

3 files changed

+64
-76
lines changed

3 files changed

+64
-76
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -12935,6 +12935,7 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
1293512935

1293612936
// add (zext, zext) -> zext (add (zext, zext))
1293712937
// sub (zext, zext) -> sext (sub (zext, zext))
12938+
// mul (zext, zext) -> zext (mul (zext, zext))
1293812939
//
1293912940
// where the sum of the extend widths match, and the the range of the bin op
1294012941
// fits inside the width of the narrower bin op. (For profitability on rvv, we
@@ -13380,6 +13381,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
1338013381
return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
1338113382
}
1338213383

13384+
if (SDValue V = combineBinOpOfZExt(N, DAG))
13385+
return V;
13386+
1338313387
return SDValue();
1338413388
}
1338513389

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -391,12 +391,12 @@ define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) {
391391
define <2 x i32> @vwmulu_v2i32_v2i8(ptr %x, ptr %y) {
392392
; CHECK-LABEL: vwmulu_v2i32_v2i8:
393393
; CHECK: # %bb.0:
394-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
394+
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
395395
; CHECK-NEXT: vle8.v v8, (a0)
396396
; CHECK-NEXT: vle8.v v9, (a1)
397-
; CHECK-NEXT: vzext.vf2 v10, v8
398-
; CHECK-NEXT: vzext.vf2 v11, v9
399-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
397+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
398+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
399+
; CHECK-NEXT: vzext.vf2 v8, v10
400400
; CHECK-NEXT: ret
401401
%a = load <2 x i8>, ptr %x
402402
%b = load <2 x i8>, ptr %y

llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll

+56-72
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vsc
355355
define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
356356
; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i16:
357357
; CHECK: # %bb.0:
358-
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
359-
; CHECK-NEXT: vzext.vf2 v10, v8
360-
; CHECK-NEXT: vzext.vf2 v11, v9
361-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
358+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
359+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
360+
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
361+
; CHECK-NEXT: vzext.vf2 v8, v10
362362
; CHECK-NEXT: ret
363363
%vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
364364
%vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -402,11 +402,9 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
402402
; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i16:
403403
; CHECK: # %bb.0:
404404
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
405-
; CHECK-NEXT: vmv.v.x v9, a0
406-
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
407-
; CHECK-NEXT: vzext.vf2 v10, v8
408-
; CHECK-NEXT: vzext.vf2 v11, v9
409-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
405+
; CHECK-NEXT: vwmulu.vx v9, v8, a0
406+
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
407+
; CHECK-NEXT: vzext.vf2 v8, v9
410408
; CHECK-NEXT: ret
411409
%head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
412410
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -451,10 +449,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vsc
451449
define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
452450
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i16:
453451
; CHECK: # %bb.0:
454-
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
455-
; CHECK-NEXT: vzext.vf2 v10, v8
456-
; CHECK-NEXT: vzext.vf2 v11, v9
457-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
452+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
453+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
454+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
455+
; CHECK-NEXT: vzext.vf2 v8, v10
458456
; CHECK-NEXT: ret
459457
%vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
460458
%vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -498,11 +496,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
498496
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i16:
499497
; CHECK: # %bb.0:
500498
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
501-
; CHECK-NEXT: vmv.v.x v9, a0
502-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
503-
; CHECK-NEXT: vzext.vf2 v10, v8
504-
; CHECK-NEXT: vzext.vf2 v11, v9
505-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
499+
; CHECK-NEXT: vwmulu.vx v10, v8, a0
500+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
501+
; CHECK-NEXT: vzext.vf2 v8, v10
506502
; CHECK-NEXT: ret
507503
%head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
508504
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -547,10 +543,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
547543
define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
548544
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16:
549545
; CHECK: # %bb.0:
550-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
551-
; CHECK-NEXT: vzext.vf2 v12, v8
552-
; CHECK-NEXT: vzext.vf2 v14, v9
553-
; CHECK-NEXT: vwmulu.vv v8, v12, v14
546+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
547+
; CHECK-NEXT: vwmulu.vv v12, v8, v9
548+
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
549+
; CHECK-NEXT: vzext.vf2 v8, v12
554550
; CHECK-NEXT: ret
555551
%vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
556552
%vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -594,11 +590,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
594590
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i16:
595591
; CHECK: # %bb.0:
596592
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
597-
; CHECK-NEXT: vmv.v.x v9, a0
598-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
599-
; CHECK-NEXT: vzext.vf2 v12, v8
600-
; CHECK-NEXT: vzext.vf2 v14, v9
601-
; CHECK-NEXT: vwmulu.vv v8, v12, v14
593+
; CHECK-NEXT: vwmulu.vx v12, v8, a0
594+
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
595+
; CHECK-NEXT: vzext.vf2 v8, v12
602596
; CHECK-NEXT: ret
603597
%head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
604598
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -643,10 +637,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
643637
define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
644638
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16:
645639
; CHECK: # %bb.0:
646-
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
647-
; CHECK-NEXT: vzext.vf2 v16, v8
648-
; CHECK-NEXT: vzext.vf2 v20, v10
649-
; CHECK-NEXT: vwmulu.vv v8, v16, v20
640+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
641+
; CHECK-NEXT: vwmulu.vv v16, v8, v10
642+
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
643+
; CHECK-NEXT: vzext.vf2 v8, v16
650644
; CHECK-NEXT: ret
651645
%vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
652646
%vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -690,11 +684,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
690684
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i16:
691685
; CHECK: # %bb.0:
692686
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
693-
; CHECK-NEXT: vmv.v.x v10, a0
694-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
695-
; CHECK-NEXT: vzext.vf2 v16, v8
696-
; CHECK-NEXT: vzext.vf2 v20, v10
697-
; CHECK-NEXT: vwmulu.vv v8, v16, v20
687+
; CHECK-NEXT: vwmulu.vx v16, v8, a0
688+
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
689+
; CHECK-NEXT: vzext.vf2 v8, v16
698690
; CHECK-NEXT: ret
699691
%head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
700692
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -739,10 +731,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscal
739731
define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
740732
; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i8:
741733
; CHECK: # %bb.0:
742-
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
743-
; CHECK-NEXT: vzext.vf4 v10, v8
744-
; CHECK-NEXT: vzext.vf4 v11, v9
745-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
734+
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
735+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
736+
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
737+
; CHECK-NEXT: vzext.vf4 v8, v10
746738
; CHECK-NEXT: ret
747739
%vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
748740
%vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -786,11 +778,9 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
786778
; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i8:
787779
; CHECK: # %bb.0:
788780
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
789-
; CHECK-NEXT: vmv.v.x v9, a0
790-
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
791-
; CHECK-NEXT: vzext.vf4 v10, v8
792-
; CHECK-NEXT: vzext.vf4 v11, v9
793-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
781+
; CHECK-NEXT: vwmulu.vx v9, v8, a0
782+
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
783+
; CHECK-NEXT: vzext.vf4 v8, v9
794784
; CHECK-NEXT: ret
795785
%head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
796786
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -835,10 +825,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscal
835825
define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
836826
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i8:
837827
; CHECK: # %bb.0:
838-
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
839-
; CHECK-NEXT: vzext.vf4 v10, v8
840-
; CHECK-NEXT: vzext.vf4 v11, v9
841-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
828+
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
829+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
830+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
831+
; CHECK-NEXT: vzext.vf4 v8, v10
842832
; CHECK-NEXT: ret
843833
%vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
844834
%vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -882,11 +872,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
882872
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i8:
883873
; CHECK: # %bb.0:
884874
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
885-
; CHECK-NEXT: vmv.v.x v9, a0
886-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
887-
; CHECK-NEXT: vzext.vf4 v10, v8
888-
; CHECK-NEXT: vzext.vf4 v11, v9
889-
; CHECK-NEXT: vwmulu.vv v8, v10, v11
875+
; CHECK-NEXT: vwmulu.vx v10, v8, a0
876+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
877+
; CHECK-NEXT: vzext.vf4 v8, v10
890878
; CHECK-NEXT: ret
891879
%head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
892880
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -931,10 +919,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
931919
define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
932920
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8:
933921
; CHECK: # %bb.0:
934-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
935-
; CHECK-NEXT: vzext.vf4 v12, v8
936-
; CHECK-NEXT: vzext.vf4 v14, v9
937-
; CHECK-NEXT: vwmulu.vv v8, v12, v14
922+
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
923+
; CHECK-NEXT: vwmulu.vv v12, v8, v9
924+
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
925+
; CHECK-NEXT: vzext.vf4 v8, v12
938926
; CHECK-NEXT: ret
939927
%vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
940928
%vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -978,11 +966,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
978966
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i8:
979967
; CHECK: # %bb.0:
980968
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
981-
; CHECK-NEXT: vmv.v.x v9, a0
982-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
983-
; CHECK-NEXT: vzext.vf4 v12, v8
984-
; CHECK-NEXT: vzext.vf4 v14, v9
985-
; CHECK-NEXT: vwmulu.vv v8, v12, v14
969+
; CHECK-NEXT: vwmulu.vx v12, v8, a0
970+
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
971+
; CHECK-NEXT: vzext.vf4 v8, v12
986972
; CHECK-NEXT: ret
987973
%head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
988974
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -1027,10 +1013,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
10271013
define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
10281014
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8:
10291015
; CHECK: # %bb.0:
1030-
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1031-
; CHECK-NEXT: vzext.vf4 v16, v8
1032-
; CHECK-NEXT: vzext.vf4 v20, v9
1033-
; CHECK-NEXT: vwmulu.vv v8, v16, v20
1016+
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
1017+
; CHECK-NEXT: vwmulu.vv v16, v8, v9
1018+
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1019+
; CHECK-NEXT: vzext.vf4 v8, v16
10341020
; CHECK-NEXT: ret
10351021
%vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
10361022
%vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1074,11 +1060,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
10741060
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i8:
10751061
; CHECK: # %bb.0:
10761062
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1077-
; CHECK-NEXT: vmv.v.x v9, a0
1078-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1079-
; CHECK-NEXT: vzext.vf4 v16, v8
1080-
; CHECK-NEXT: vzext.vf4 v20, v9
1081-
; CHECK-NEXT: vwmulu.vv v8, v16, v20
1063+
; CHECK-NEXT: vwmulu.vx v16, v8, a0
1064+
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1065+
; CHECK-NEXT: vzext.vf4 v8, v16
10821066
; CHECK-NEXT: ret
10831067
%head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
10841068
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer

0 commit comments

Comments
 (0)