1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3
3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
7
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
8
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
9
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512DQVL
4
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2, AVX1
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2, AVX2
6
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX, AVX512,AVX512BW
7
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX, AVX512,AVX512BWVL
8
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX, AVX512,AVX512DQ
9
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX, AVX512,AVX512DQVL
10
10
11
11
;
12
12
; vXi64
@@ -28,19 +28,19 @@ define i64 @test_v2i64(<2 x i64> %a0) {
28
28
; SSE-NEXT: movq %xmm0, %rax
29
29
; SSE-NEXT: retq
30
30
;
31
- ; AVX -LABEL: test_v2i64:
32
- ; AVX : # %bb.0:
33
- ; AVX -NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
34
- ; AVX -NEXT: vpsrlq $32, %xmm0, %xmm2
35
- ; AVX -NEXT: vpmuludq %xmm1, %xmm2, %xmm2
36
- ; AVX -NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
37
- ; AVX -NEXT: vpmuludq %xmm3, %xmm0, %xmm3
38
- ; AVX -NEXT: vpaddq %xmm2, %xmm3, %xmm2
39
- ; AVX -NEXT: vpsllq $32, %xmm2, %xmm2
40
- ; AVX -NEXT: vpmuludq %xmm1, %xmm0, %xmm0
41
- ; AVX -NEXT: vpaddq %xmm2, %xmm0, %xmm0
42
- ; AVX -NEXT: vmovq %xmm0, %rax
43
- ; AVX -NEXT: retq
31
+ ; AVX1OR2 -LABEL: test_v2i64:
32
+ ; AVX1OR2 : # %bb.0:
33
+ ; AVX1OR2 -NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
34
+ ; AVX1OR2 -NEXT: vpsrlq $32, %xmm0, %xmm2
35
+ ; AVX1OR2 -NEXT: vpmuludq %xmm1, %xmm2, %xmm2
36
+ ; AVX1OR2 -NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
37
+ ; AVX1OR2 -NEXT: vpmuludq %xmm3, %xmm0, %xmm3
38
+ ; AVX1OR2 -NEXT: vpaddq %xmm2, %xmm3, %xmm2
39
+ ; AVX1OR2 -NEXT: vpsllq $32, %xmm2, %xmm2
40
+ ; AVX1OR2 -NEXT: vpmuludq %xmm1, %xmm0, %xmm0
41
+ ; AVX1OR2 -NEXT: vpaddq %xmm2, %xmm0, %xmm0
42
+ ; AVX1OR2 -NEXT: vmovq %xmm0, %rax
43
+ ; AVX1OR2 -NEXT: retq
44
44
;
45
45
; AVX512BW-LABEL: test_v2i64:
46
46
; AVX512BW: # %bb.0:
@@ -792,13 +792,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
792
792
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
793
793
; AVX-NEXT: vmovd %xmm0, %eax
794
794
; AVX-NEXT: retq
795
- ;
796
- ; AVX512-LABEL: test_v2i32:
797
- ; AVX512: # %bb.0:
798
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
799
- ; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
800
- ; AVX512-NEXT: vmovd %xmm0, %eax
801
- ; AVX512-NEXT: retq
802
795
%1 = call i32 @llvm.vector.reduce.mul.v2i32 (<2 x i32 > %a0 )
803
796
ret i32 %1
804
797
}
@@ -832,15 +825,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
832
825
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
833
826
; AVX-NEXT: vmovd %xmm0, %eax
834
827
; AVX-NEXT: retq
835
- ;
836
- ; AVX512-LABEL: test_v4i32:
837
- ; AVX512: # %bb.0:
838
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
839
- ; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
840
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
841
- ; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
842
- ; AVX512-NEXT: vmovd %xmm0, %eax
843
- ; AVX512-NEXT: retq
844
828
%1 = call i32 @llvm.vector.reduce.mul.v4i32 (<4 x i32 > %a0 )
845
829
ret i32 %1
846
830
}
@@ -1111,14 +1095,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
1111
1095
; AVX-NEXT: vmovd %xmm0, %eax
1112
1096
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
1113
1097
; AVX-NEXT: retq
1114
- ;
1115
- ; AVX512-LABEL: test_v2i16:
1116
- ; AVX512: # %bb.0:
1117
- ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1118
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1119
- ; AVX512-NEXT: vmovd %xmm0, %eax
1120
- ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1121
- ; AVX512-NEXT: retq
1122
1098
%1 = call i16 @llvm.vector.reduce.mul.v2i16 (<2 x i16 > %a0 )
1123
1099
ret i16 %1
1124
1100
}
@@ -1144,16 +1120,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
1144
1120
; AVX-NEXT: vmovd %xmm0, %eax
1145
1121
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
1146
1122
; AVX-NEXT: retq
1147
- ;
1148
- ; AVX512-LABEL: test_v4i16:
1149
- ; AVX512: # %bb.0:
1150
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1151
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1152
- ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1153
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1154
- ; AVX512-NEXT: vmovd %xmm0, %eax
1155
- ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1156
- ; AVX512-NEXT: retq
1157
1123
%1 = call i16 @llvm.vector.reduce.mul.v4i16 (<4 x i16 > %a0 )
1158
1124
ret i16 %1
1159
1125
}
@@ -1183,18 +1149,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
1183
1149
; AVX-NEXT: vmovd %xmm0, %eax
1184
1150
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
1185
1151
; AVX-NEXT: retq
1186
- ;
1187
- ; AVX512-LABEL: test_v8i16:
1188
- ; AVX512: # %bb.0:
1189
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1190
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1191
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1192
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1193
- ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1194
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1195
- ; AVX512-NEXT: vmovd %xmm0, %eax
1196
- ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1197
- ; AVX512-NEXT: retq
1198
1152
%1 = call i16 @llvm.vector.reduce.mul.v8i16 (<8 x i16 > %a0 )
1199
1153
ret i16 %1
1200
1154
}
@@ -1547,14 +1501,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
1547
1501
; AVX-NEXT: vmovd %xmm0, %eax
1548
1502
; AVX-NEXT: # kill: def $al killed $al killed $eax
1549
1503
; AVX-NEXT: retq
1550
- ;
1551
- ; AVX512-LABEL: test_v2i8:
1552
- ; AVX512: # %bb.0:
1553
- ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1554
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1555
- ; AVX512-NEXT: vmovd %xmm0, %eax
1556
- ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1557
- ; AVX512-NEXT: retq
1558
1504
%1 = call i8 @llvm.vector.reduce.mul.v2i8 (<2 x i8 > %a0 )
1559
1505
ret i8 %1
1560
1506
}
@@ -1596,17 +1542,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
1596
1542
; AVX-NEXT: vmovd %xmm0, %eax
1597
1543
; AVX-NEXT: # kill: def $al killed $al killed $eax
1598
1544
; AVX-NEXT: retq
1599
- ;
1600
- ; AVX512-LABEL: test_v4i8:
1601
- ; AVX512: # %bb.0:
1602
- ; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1603
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1604
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1605
- ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1606
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1607
- ; AVX512-NEXT: vmovd %xmm0, %eax
1608
- ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1609
- ; AVX512-NEXT: retq
1610
1545
%1 = call i8 @llvm.vector.reduce.mul.v4i8 (<4 x i8 > %a0 )
1611
1546
ret i8 %1
1612
1547
}
@@ -1655,19 +1590,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
1655
1590
; AVX-NEXT: vmovd %xmm0, %eax
1656
1591
; AVX-NEXT: # kill: def $al killed $al killed $eax
1657
1592
; AVX-NEXT: retq
1658
- ;
1659
- ; AVX512-LABEL: test_v8i8:
1660
- ; AVX512: # %bb.0:
1661
- ; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1662
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1663
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1664
- ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1665
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1666
- ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1667
- ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
1668
- ; AVX512-NEXT: vmovd %xmm0, %eax
1669
- ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1670
- ; AVX512-NEXT: retq
1671
1593
%1 = call i8 @llvm.vector.reduce.mul.v8i8 (<8 x i8 > %a0 )
1672
1594
ret i8 %1
1673
1595
}
0 commit comments