Skip to content

Commit aae152f

Browse files
committedMar 5, 2024
Revert "[SLP]Improve minbitwidth analysis."
This reverts commit a730ed7 to fix compile time issue.
1 parent c8b3edc commit aae152f

15 files changed

+295
-451
lines changed
 

‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 184 additions & 338 deletions
Large diffs are not rendered by default.

‎llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
1717
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]]
1818
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
1919
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[SUB0]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
20-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
21-
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
20+
; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
21+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
2222
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
2323
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
24-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
25-
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
26-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
24+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
25+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
2726
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
2827
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
2928
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64

‎llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
;test_i16_extend NOTE: Assertions have been autogenerated by utils/update_test_checks.py
3-
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s
3+
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s
44
; RUN: cat %t | FileCheck -check-prefix=YAML %s
5-
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s
5+
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s
66
; RUN: cat %t | FileCheck -check-prefix=YAML %s
77

88

‎llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,21 @@ entry:
2828
define i64 @red_zext_ld_4xi64(ptr %ptr) {
2929
; CHECK-LABEL: @red_zext_ld_4xi64(
3030
; CHECK-NEXT: entry:
31-
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
32-
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
33-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
34-
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64
35-
; CHECK-NEXT: ret i64 [[TMP3]]
31+
; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
32+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
33+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
34+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
35+
; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
36+
; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
37+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
38+
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1
39+
; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64
40+
; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]]
41+
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
42+
; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1
43+
; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64
44+
; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]]
45+
; CHECK-NEXT: ret i64 [[ADD_3]]
3646
;
3747
entry:
3848
%ld0 = load i8, ptr %ptr

‎llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -802,10 +802,9 @@ define i64 @red_zext_ld_4xi64(ptr %ptr) {
802802
; CHECK-LABEL: @red_zext_ld_4xi64(
803803
; CHECK-NEXT: entry:
804804
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
805-
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
806-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
807-
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64
808-
; CHECK-NEXT: ret i64 [[TMP3]]
805+
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i64>
806+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP1]])
807+
; CHECK-NEXT: ret i64 [[TMP2]]
809808
;
810809
entry:
811810
%ld0 = load i8, ptr %ptr

‎llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@ define { i64, i64 } @patatino(double %arg) {
1515
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
1616
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
1717
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
18-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
19-
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
18+
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
19+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
2020
; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0
21-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
22-
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
23-
; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP12]], 1
21+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
22+
; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1
2423
; CHECK-NEXT: ret { i64, i64 } [[T17]]
2524
;
2625
bb:

‎llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-3 < %s | FileCheck %s
33

44
define void @t(i64 %v) {
55
; CHECK-LABEL: define void @t(

‎llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ define void @test(i8 %0) {
66
; CHECK-SAME: i8 [[TMP0:%.*]]) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 0, i8 poison>, i8 [[TMP0]], i32 1
9-
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
10-
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP1]], zeroinitializer
11-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
12-
; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
13-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
14-
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
15-
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP5]], [[TMP7]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
10+
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
11+
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i16> [[TMP2]], zeroinitializer
12+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
13+
; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32
14+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
15+
; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
16+
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP6]], [[TMP8]]
1617
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 1
1718
; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SHR]] to i8
1819
; CHECK-NEXT: store i8 [[CONV9]], ptr null, align 1
19-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
20+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2021
; CHECK-NEXT: ret void
2122
;
2223
entry:

‎llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,15 @@ define void @test(i64 %d.promoted.i) {
66
; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]]
9-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I]], i32 1
10-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1>
11-
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer
129
; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0
13-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
14-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1>
15-
; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer
16-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]])
17-
; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32
18-
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]])
19-
; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
20-
; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]]
21-
; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0
22-
; CHECK-NEXT: store i32 [[TMP10]], ptr null, align 4
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9
12+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1>
13+
; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer
14+
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
15+
; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
16+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
17+
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
2318
; CHECK-NEXT: ret void
2419
;
2520
entry:

‎llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,12 @@ target triple = "x86_64-unknown-linux-gnu"
1717
define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
1818
; SSE-LABEL: @PR31243_zext(
1919
; SSE-NEXT: entry:
20-
; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
21-
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
22-
; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
23-
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
24-
; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
25-
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
26-
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
27-
; SSE-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
28-
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
20+
; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
21+
; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
22+
; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
23+
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
24+
; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64
25+
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
2926
; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
3027
; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
3128
; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
@@ -76,15 +73,12 @@ entry:
7673
define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
7774
; SSE-LABEL: @PR31243_sext(
7875
; SSE-NEXT: entry:
79-
; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
80-
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
81-
; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
82-
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
83-
; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
84-
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
85-
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
86-
; SSE-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64
87-
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
76+
; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
77+
; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
78+
; SSE-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64
79+
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
80+
; SSE-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64
81+
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
8882
; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
8983
; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
9084
; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
@@ -95,12 +89,13 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
9589
; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
9690
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
9791
; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
98-
; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
99-
; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
100-
; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
101-
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
102-
; AVX-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64
103-
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
92+
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16>
93+
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0
94+
; AVX-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64
95+
; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]]
96+
; AVX-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1
97+
; AVX-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64
98+
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]]
10499
; AVX-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
105100
; AVX-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
106101
; AVX-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]

‎llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
1515
; CHECK-NEXT: br label [[BB3]]
1616
; CHECK: bb3:
1717
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 undef, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
18-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
19-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
18+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
19+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
2020
; CHECK-NEXT: ret i32 [[TMP6]]
2121
;
2222
entry:
@@ -52,8 +52,8 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
5252
; CHECK-NEXT: br label [[BB3]]
5353
; CHECK: bb3:
5454
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
55-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
56-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
55+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
56+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
5757
; CHECK-NEXT: ret i32 [[TMP6]]
5858
;
5959
entry:
@@ -89,8 +89,8 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
8989
; CHECK-NEXT: br label [[BB3]]
9090
; CHECK: bb3:
9191
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 undef>, [[ENTRY:%.*]] ]
92-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
93-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
92+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
93+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
9494
; CHECK-NEXT: ret i32 [[TMP6]]
9595
;
9696
entry:
@@ -127,8 +127,8 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar
127127
; CHECK-NEXT: br label [[BB3]]
128128
; CHECK: bb3:
129129
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
130-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
131-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
130+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
131+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
132132
; CHECK-NEXT: ret i32 [[TMP6]]
133133
;
134134
entry:
@@ -165,8 +165,8 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a
165165
; CHECK-NEXT: br label [[BB3]]
166166
; CHECK: bb3:
167167
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
168-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
169-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
168+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
169+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
170170
; CHECK-NEXT: ret i32 [[TMP6]]
171171
;
172172
entry:
@@ -202,8 +202,8 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8
202202
; CHECK-NEXT: br label [[BB3]]
203203
; CHECK: bb3:
204204
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
205-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
206-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
205+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
206+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
207207
; CHECK-NEXT: ret i32 [[TMP6]]
208208
;
209209
entry:

‎llvm/test/Transforms/SLPVectorizer/X86/resched.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,26 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
1111
; CHECK: if.then22.i:
1212
; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1
1313
; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
14-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
15-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
16-
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
14+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
15+
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
16+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[SHUFFLE1]], <i32 1, i32 2, i32 3, i32 4>
1717
; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5
1818
; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6
1919
; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7
2020
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0
21-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer
22-
; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP4]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
24-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
25-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
26-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_4_I_I]], i32 5
27-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_5_I_I]], i32 6
28-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_6_I_I]], i32 7
29-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
30-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
31-
; CHECK-NEXT: [[TMP14:%.*]] = trunc <16 x i32> [[TMP13]] to <16 x i8>
32-
; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i8> [[TMP14]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
33-
; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr undef, align 1
21+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer
22+
; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
24+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
25+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
26+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5
27+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6
28+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7
29+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
30+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
31+
; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>
32+
; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i8> [[TMP13]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
33+
; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr undef, align 1
3434
; CHECK-NEXT: unreachable
3535
; CHECK: if.end50.i:
3636
; CHECK-NEXT: ret void

‎llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ define i1 @test(i1 %cmp5.not.31) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> <i1 poison, i1 false, i1 false, i1 false>, i1 [[CMP5_NOT_31]], i32 0
99
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 2, i32 1, i32 1, i32 1>
11-
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
12-
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 0
13-
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP4]], 0
10+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1>
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32>
12+
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], <i32 2, i32 1, i32 1, i32 1>
13+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
14+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
15+
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP6]], 0
1416
; CHECK-NEXT: ret i1 [[CMP_NOT_I_I]]
1517
;
1618
entry:

‎llvm/test/Transforms/SLPVectorizer/X86/store-insertelement-minbitwidth.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,17 @@
88
; YAML-NEXT: Function: stores
99
; YAML-NEXT: Args:
1010
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
11-
; YAML-NEXT: - Cost: '-7'
11+
; YAML-NEXT: - Cost: '-3'
1212
; YAML-NEXT: - String: ' and with tree size '
1313
; YAML-NEXT: - TreeSize: '6'
1414
define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) {
1515
; CHECK-LABEL: @stores(
1616
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1
1717
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1
18-
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
19-
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
20-
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
21-
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i64>
22-
; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr [[OUT:%.*]], align 4
18+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64>
19+
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
20+
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
21+
; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr [[OUT:%.*]], align 4
2322
; CHECK-NEXT: ret void
2423
;
2524
%load.1 = load i8, ptr %in, align 1
@@ -64,18 +63,17 @@ define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) {
6463
; YAML-NEXT: Function: insertelems
6564
; YAML-NEXT: Args:
6665
; YAML-NEXT: - String: 'SLP vectorized with cost '
67-
; YAML-NEXT: - Cost: '-9'
66+
; YAML-NEXT: - Cost: '-5'
6867
; YAML-NEXT: - String: ' and with tree size '
6968
; YAML-NEXT: - TreeSize: '6'
7069
define <4 x i64> @insertelems(ptr noalias %in, ptr noalias %inn) {
7170
; CHECK-LABEL: @insertelems(
7271
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1
7372
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1
74-
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
75-
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
76-
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
77-
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i64>
78-
; CHECK-NEXT: ret <4 x i64> [[TMP6]]
73+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64>
74+
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
75+
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
76+
; CHECK-NEXT: ret <4 x i64> [[TMP5]]
7977
;
8078
%load.1 = load i8, ptr %in, align 1
8179
%gep.1 = getelementptr inbounds i8, ptr %in, i64 1

‎llvm/test/Transforms/SLPVectorizer/alt-cmp-vectorize.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define i32 @alt_cmp(i16 %call46) {
1010
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i16> [[TMP0]], [[TMP1]]
1111
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[TMP0]], [[TMP1]]
1212
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
13-
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
14-
; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i16
13+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16>
14+
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP5]])
1515
; CHECK-NEXT: [[OP_RDX:%.*]] = or i16 [[TMP6]], 0
1616
; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[OP_RDX]] to i32
1717
; CHECK-NEXT: ret i32 [[EXT]]

0 commit comments

Comments
 (0)
Please sign in to comment.