Skip to content

Commit aae152f

Browse files
committed
Revert "[SLP]Improve minbitwidth analysis."
This reverts commit a730ed7 to fix compile time issue.
1 parent c8b3edc commit aae152f

15 files changed

+295
-451
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 184 additions & 338 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
1717
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]]
1818
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
1919
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[SUB0]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
20-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
21-
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
20+
; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
21+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
2222
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
2323
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
24-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
25-
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
26-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
24+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
25+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
2726
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
2827
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
2928
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64

llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
;test_i16_extend NOTE: Assertions have been autogenerated by utils/update_test_checks.py
3-
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s
3+
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s
44
; RUN: cat %t | FileCheck -check-prefix=YAML %s
5-
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s
5+
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s
66
; RUN: cat %t | FileCheck -check-prefix=YAML %s
77

88

llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,21 @@ entry:
2828
define i64 @red_zext_ld_4xi64(ptr %ptr) {
2929
; CHECK-LABEL: @red_zext_ld_4xi64(
3030
; CHECK-NEXT: entry:
31-
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
32-
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
33-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
34-
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64
35-
; CHECK-NEXT: ret i64 [[TMP3]]
31+
; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
32+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
33+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
34+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
35+
; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
36+
; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
37+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
38+
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1
39+
; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64
40+
; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]]
41+
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
42+
; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1
43+
; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64
44+
; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]]
45+
; CHECK-NEXT: ret i64 [[ADD_3]]
3646
;
3747
entry:
3848
%ld0 = load i8, ptr %ptr

llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -802,10 +802,9 @@ define i64 @red_zext_ld_4xi64(ptr %ptr) {
802802
; CHECK-LABEL: @red_zext_ld_4xi64(
803803
; CHECK-NEXT: entry:
804804
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
805-
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
806-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
807-
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64
808-
; CHECK-NEXT: ret i64 [[TMP3]]
805+
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i64>
806+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP1]])
807+
; CHECK-NEXT: ret i64 [[TMP2]]
809808
;
810809
entry:
811810
%ld0 = load i8, ptr %ptr

llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@ define { i64, i64 } @patatino(double %arg) {
1515
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
1616
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
1717
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
18-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
19-
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
18+
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
19+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
2020
; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0
21-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
22-
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
23-
; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP12]], 1
21+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
22+
; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1
2423
; CHECK-NEXT: ret { i64, i64 } [[T17]]
2524
;
2625
bb:

llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-3 < %s | FileCheck %s
33

44
define void @t(i64 %v) {
55
; CHECK-LABEL: define void @t(

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-multiuse-with-insertelement.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ define void @test(i8 %0) {
66
; CHECK-SAME: i8 [[TMP0:%.*]]) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 0, i8 poison>, i8 [[TMP0]], i32 1
9-
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
10-
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP1]], zeroinitializer
11-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
12-
; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
13-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
14-
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
15-
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP5]], [[TMP7]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
10+
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
11+
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i16> [[TMP2]], zeroinitializer
12+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
13+
; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32
14+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
15+
; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
16+
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP6]], [[TMP8]]
1617
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 1
1718
; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SHR]] to i8
1819
; CHECK-NEXT: store i8 [[CONV9]], ptr null, align 1
19-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
20+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2021
; CHECK-NEXT: ret void
2122
;
2223
entry:

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,15 @@ define void @test(i64 %d.promoted.i) {
66
; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]]
9-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I]], i32 1
10-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1>
11-
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer
129
; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0
13-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
14-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1>
15-
; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer
16-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]])
17-
; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32
18-
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]])
19-
; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
20-
; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]]
21-
; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0
22-
; CHECK-NEXT: store i32 [[TMP10]], ptr null, align 4
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9
12+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1>
13+
; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer
14+
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
15+
; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
16+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
17+
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
2318
; CHECK-NEXT: ret void
2419
;
2520
entry:

llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,12 @@ target triple = "x86_64-unknown-linux-gnu"
1717
define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
1818
; SSE-LABEL: @PR31243_zext(
1919
; SSE-NEXT: entry:
20-
; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
21-
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
22-
; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
23-
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
24-
; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
25-
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
26-
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
27-
; SSE-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
28-
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
20+
; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
21+
; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
22+
; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
23+
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
24+
; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64
25+
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
2926
; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
3027
; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
3128
; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
@@ -76,15 +73,12 @@ entry:
7673
define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
7774
; SSE-LABEL: @PR31243_sext(
7875
; SSE-NEXT: entry:
79-
; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
80-
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
81-
; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
82-
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
83-
; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
84-
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
85-
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
86-
; SSE-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64
87-
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
76+
; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
77+
; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
78+
; SSE-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64
79+
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
80+
; SSE-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64
81+
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
8882
; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
8983
; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
9084
; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
@@ -95,12 +89,13 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
9589
; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
9690
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
9791
; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
98-
; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
99-
; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
100-
; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
101-
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
102-
; AVX-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64
103-
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
92+
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16>
93+
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0
94+
; AVX-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64
95+
; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]]
96+
; AVX-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1
97+
; AVX-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64
98+
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]]
10499
; AVX-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
105100
; AVX-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
106101
; AVX-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]

llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
1515
; CHECK-NEXT: br label [[BB3]]
1616
; CHECK: bb3:
1717
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 undef, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
18-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
19-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
18+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
19+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
2020
; CHECK-NEXT: ret i32 [[TMP6]]
2121
;
2222
entry:
@@ -52,8 +52,8 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
5252
; CHECK-NEXT: br label [[BB3]]
5353
; CHECK: bb3:
5454
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
55-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
56-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
55+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
56+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
5757
; CHECK-NEXT: ret i32 [[TMP6]]
5858
;
5959
entry:
@@ -89,8 +89,8 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
8989
; CHECK-NEXT: br label [[BB3]]
9090
; CHECK: bb3:
9191
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 undef>, [[ENTRY:%.*]] ]
92-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
93-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
92+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
93+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
9494
; CHECK-NEXT: ret i32 [[TMP6]]
9595
;
9696
entry:
@@ -127,8 +127,8 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar
127127
; CHECK-NEXT: br label [[BB3]]
128128
; CHECK: bb3:
129129
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
130-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
131-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
130+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
131+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
132132
; CHECK-NEXT: ret i32 [[TMP6]]
133133
;
134134
entry:
@@ -165,8 +165,8 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a
165165
; CHECK-NEXT: br label [[BB3]]
166166
; CHECK: bb3:
167167
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
168-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
169-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
168+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
169+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
170170
; CHECK-NEXT: ret i32 [[TMP6]]
171171
;
172172
entry:
@@ -202,8 +202,8 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8
202202
; CHECK-NEXT: br label [[BB3]]
203203
; CHECK: bb3:
204204
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
205-
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
206-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
205+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
206+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
207207
; CHECK-NEXT: ret i32 [[TMP6]]
208208
;
209209
entry:

0 commit comments

Comments
 (0)