|
5 | 5 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
|
6 | 6 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
|
7 | 7 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
|
| 8 | +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=znver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512VBMI2 |
8 | 9 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
|
9 | 10 |
|
10 | 11 | @a64 = common global [8 x i64] zeroinitializer, align 64
|
@@ -128,6 +129,13 @@ define void @fshl_v8i64() {
|
128 | 129 | ; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
|
129 | 130 | ; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @d64, align 8
|
130 | 131 | ; AVX512-NEXT: ret void
|
| 132 | +; |
| 133 | +; AVX512VBMI2-LABEL: @fshl_v8i64( |
| 134 | +; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 |
| 135 | +; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 |
| 136 | +; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) |
| 137 | +; AVX512VBMI2-NEXT: store <8 x i64> [[TMP3]], ptr @d64, align 8 |
| 138 | +; AVX512VBMI2-NEXT: ret void |
131 | 139 | ;
|
132 | 140 | %a0 = load i64, ptr @a64, align 8
|
133 | 141 | %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
|
@@ -249,6 +257,13 @@ define void @fshl_v16i32() {
|
249 | 257 | ; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
|
250 | 258 | ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @d32, align 4
|
251 | 259 | ; AVX512-NEXT: ret void
|
| 260 | +; |
| 261 | +; AVX512VBMI2-LABEL: @fshl_v16i32( |
| 262 | +; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 |
| 263 | +; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 |
| 264 | +; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) |
| 265 | +; AVX512VBMI2-NEXT: store <16 x i32> [[TMP3]], ptr @d32, align 4 |
| 266 | +; AVX512VBMI2-NEXT: ret void |
252 | 267 | ;
|
253 | 268 | %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
|
254 | 269 | %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
|
@@ -335,6 +350,13 @@ define void @fshl_v32i16() {
|
335 | 350 | ; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
|
336 | 351 | ; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @d16, align 2
|
337 | 352 | ; AVX512-NEXT: ret void
|
| 353 | +; |
| 354 | +; AVX512VBMI2-LABEL: @fshl_v32i16( |
| 355 | +; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2 |
| 356 | +; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2 |
| 357 | +; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) |
| 358 | +; AVX512VBMI2-NEXT: store <32 x i16> [[TMP3]], ptr @d16, align 2 |
| 359 | +; AVX512VBMI2-NEXT: ret void |
338 | 360 | ;
|
339 | 361 | %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
|
340 | 362 | %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
|
@@ -504,6 +526,13 @@ define void @fshl_v64i8() {
|
504 | 526 | ; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
|
505 | 527 | ; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @d8, align 1
|
506 | 528 | ; AVX512-NEXT: ret void
|
| 529 | +; |
| 530 | +; AVX512VBMI2-LABEL: @fshl_v64i8( |
| 531 | +; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1 |
| 532 | +; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1 |
| 533 | +; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP1]], <64 x i8> [[TMP2]]) |
| 534 | +; AVX512VBMI2-NEXT: store <64 x i8> [[TMP3]], ptr @d8, align 1 |
| 535 | +; AVX512VBMI2-NEXT: ret void |
507 | 536 | ;
|
508 | 537 | %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
|
509 | 538 | %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
|
@@ -811,6 +840,13 @@ define void @fshl_v2i32() {
|
811 | 840 | ; AVX512-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
|
812 | 841 | ; AVX512-NEXT: store <2 x i32> [[TMP3]], ptr @d32, align 4
|
813 | 842 | ; AVX512-NEXT: ret void
|
| 843 | +; |
| 844 | +; AVX512VBMI2-LABEL: @fshl_v2i32( |
| 845 | +; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 |
| 846 | +; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @b32, align 4 |
| 847 | +; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) |
| 848 | +; AVX512VBMI2-NEXT: store <2 x i32> [[TMP3]], ptr @d32, align 4 |
| 849 | +; AVX512VBMI2-NEXT: ret void |
814 | 850 | ;
|
815 | 851 | %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
|
816 | 852 | %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
|
@@ -863,6 +899,12 @@ define void @fshl_v2i32_uniformconst() {
|
863 | 899 | ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
|
864 | 900 | ; AVX512-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4
|
865 | 901 | ; AVX512-NEXT: ret void
|
| 902 | +; |
| 903 | +; AVX512VBMI2-LABEL: @fshl_v2i32_uniformconst( |
| 904 | +; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 |
| 905 | +; AVX512VBMI2-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) |
| 906 | +; AVX512VBMI2-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 |
| 907 | +; AVX512VBMI2-NEXT: ret void |
866 | 908 | ;
|
867 | 909 | %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
|
868 | 910 | %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
|
|
0 commit comments