Skip to content

Commit a7a74b3

Browse files
[SLP]Improve reordering of the alternate nodes
Better to preserve the original order of the alternate nodes to avoid inter-lane shuffling, select/insert subvector patterns provide better perf. Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm#136329
1 parent 5f91649 commit a7a74b3

File tree

7 files changed

+107
-79
lines changed

7 files changed

+107
-79
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6584,6 +6584,12 @@ static bool areTwoInsertFromSameBuildVector(
65846584
return false;
65856585
}
65866586

6587+
/// Checks if the specified instruction \p I is an alternate operation for
6588+
/// the given \p MainOp and \p AltOp instructions.
6589+
static bool isAlternateInstruction(Instruction *I, Instruction *MainOp,
6590+
Instruction *AltOp,
6591+
const TargetLibraryInfo &TLI);
6592+
65876593
std::optional<BoUpSLP::OrdersType>
65886594
BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
65896595
bool IgnoreReorder) {
@@ -6726,6 +6732,28 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
67266732
"BinaryOperator and CastInst.");
67276733
return TE.ReorderIndices;
67286734
}
6735+
if (!TopToBottom && IgnoreReorder && TE.State == TreeEntry::Vectorize &&
6736+
TE.isAltShuffle()) {
6737+
assert(TE.ReuseShuffleIndices.empty() &&
6738+
"ReuseShuffleIndices should be "
6739+
"empty for alternate instructions.");
6740+
SmallVector<int> Mask;
6741+
TE.buildAltOpShuffleMask(
6742+
[&](Instruction *I) {
6743+
assert(TE.getMatchingMainOpOrAltOp(I) &&
6744+
"Unexpected main/alternate opcode");
6745+
return isAlternateInstruction(I, TE.getMainOp(), TE.getAltOp(), *TLI);
6746+
},
6747+
Mask);
6748+
const int VF = TE.getVectorFactor();
6749+
OrdersType ResOrder(VF, VF);
6750+
for (unsigned I : seq<unsigned>(VF)) {
6751+
if (Mask[I] == PoisonMaskElem)
6752+
continue;
6753+
ResOrder[Mask[I] % VF] = I;
6754+
}
6755+
return std::move(ResOrder);
6756+
}
67296757
if (!TE.ReorderIndices.empty())
67306758
return TE.ReorderIndices;
67316759
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
@@ -7796,21 +7824,26 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
77967824
}
77977825
// Reorder operands of the user node and set the ordering for the user
77987826
// node itself.
7827+
auto IsNotProfitableAltCodeNode = [](const TreeEntry &TE) {
7828+
return TE.isAltShuffle() &&
7829+
(!TE.ReuseShuffleIndices.empty() || TE.getVectorFactor() == 2 ||
7830+
TE.ReorderIndices.empty());
7831+
};
77997832
if (Data.first->State != TreeEntry::Vectorize ||
78007833
!isa<ExtractElementInst, ExtractValueInst, LoadInst>(
78017834
Data.first->getMainOp()) ||
7802-
Data.first->isAltShuffle())
7835+
IsNotProfitableAltCodeNode(*Data.first))
78037836
Data.first->reorderOperands(Mask);
78047837
if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) ||
7805-
Data.first->isAltShuffle() ||
7838+
IsNotProfitableAltCodeNode(*Data.first) ||
78067839
Data.first->State == TreeEntry::StridedVectorize ||
78077840
Data.first->State == TreeEntry::CompressVectorize) {
78087841
reorderScalars(Data.first->Scalars, Mask);
78097842
reorderOrder(Data.first->ReorderIndices, MaskOrder,
78107843
/*BottomOrder=*/true);
78117844
if (Data.first->ReuseShuffleIndices.empty() &&
78127845
!Data.first->ReorderIndices.empty() &&
7813-
!Data.first->isAltShuffle()) {
7846+
!IsNotProfitableAltCodeNode(*Data.first)) {
78147847
// Insert user node to the list to try to sink reordering deeper in
78157848
// the graph.
78167849
Queue.push(Data.first);
@@ -8785,12 +8818,6 @@ static std::pair<size_t, size_t> generateKeySubkey(
87858818
static bool isMainInstruction(Instruction *I, Instruction *MainOp,
87868819
Instruction *AltOp, const TargetLibraryInfo &TLI);
87878820

8788-
/// Checks if the specified instruction \p I is an alternate operation for
8789-
/// the given \p MainOp and \p AltOp instructions.
8790-
static bool isAlternateInstruction(Instruction *I, Instruction *MainOp,
8791-
Instruction *AltOp,
8792-
const TargetLibraryInfo &TLI);
8793-
87948821
bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
87958822
ArrayRef<Value *> VL) const {
87968823
Type *ScalarTy = S.getMainOp()->getType();

llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -80,33 +80,29 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
8080
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
8181
; CHECK-NEXT: [[TMP48:%.*]] = add nsw <16 x i32> [[TMP45]], [[TMP47]]
8282
; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP44]], [[TMP46]]
83-
; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
84-
; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
85-
; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
86-
; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
87-
; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP53]]
88-
; CHECK-NEXT: [[TMP55:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP52]]
89-
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
90-
; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
91-
; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
92-
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
93-
; CHECK-NEXT: [[TMP60:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP59]]
94-
; CHECK-NEXT: [[TMP61:%.*]] = add nsw <16 x i32> [[TMP56]], [[TMP58]]
95-
; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
96-
; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
97-
; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
98-
; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
99-
; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]]
100-
; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]]
101-
; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
102-
; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15)
103-
; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537)
104-
; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535)
105-
; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]]
106-
; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]]
107-
; CHECK-NEXT: [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]])
108-
; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP74]], 65535
109-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP74]], 16
83+
; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 3, i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 7>
84+
; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 17, i32 1, i32 16, i32 0, i32 19, i32 3, i32 18, i32 2, i32 21, i32 5, i32 20, i32 4, i32 23, i32 7, i32 22, i32 6>
85+
; CHECK-NEXT: [[TMP52:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP51]]
86+
; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP51]]
87+
; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP52]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
88+
; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
89+
; CHECK-NEXT: [[TMP56:%.*]] = sub nsw <16 x i32> [[TMP54]], [[TMP55]]
90+
; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP54]], [[TMP55]]
91+
; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
92+
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
93+
; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
94+
; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
95+
; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP61]]
96+
; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP58]], [[TMP60]]
97+
; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
98+
; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15)
99+
; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537)
100+
; CHECK-NEXT: [[TMP67:%.*]] = mul nuw <16 x i32> [[TMP66]], splat (i32 65535)
101+
; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP64]]
102+
; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP67]]
103+
; CHECK-NEXT: [[TMP70:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
104+
; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP70]], 65535
105+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP70]], 16
110106
; CHECK-NEXT: [[RDD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]]
111107
; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[RDD119]], 1
112108
; CHECK-NEXT: ret i32 [[SHR120]]

llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,19 +1259,19 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
12591259
; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
12601260
; CHECK-NEXT: [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]]
12611261
; CHECK-NEXT: [[TMP46:%.*]] = sub nsw <16 x i32> [[TMP43]], [[TMP44]]
1262-
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
1263-
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1262+
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP45]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1263+
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
12641264
; CHECK-NEXT: [[TMP49:%.*]] = add nsw <16 x i32> [[TMP47]], [[TMP48]]
12651265
; CHECK-NEXT: [[TMP50:%.*]] = sub nsw <16 x i32> [[TMP47]], [[TMP48]]
1266-
; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1267-
; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
1266+
; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP50]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1267+
; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
12681268
; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP51]], [[TMP52]]
12691269
; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP52]]
1270-
; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 4, i32 21, i32 22, i32 7, i32 8, i32 25, i32 26, i32 11, i32 12, i32 29, i32 30, i32 15>
1271-
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
1270+
; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
1271+
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
12721272
; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]]
12731273
; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]]
1274-
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1274+
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP58]], <16 x i32> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12751275
; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], splat (i32 15)
12761276
; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], splat (i32 65537)
12771277
; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], splat (i32 65535)

llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,11 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
6262
; CHECK-LABEL: @build_vec_v4i32(
6363
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
6464
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
65-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>
66-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>
65+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
66+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 7, i32 2>
6767
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
68-
; CHECK-NEXT: ret <4 x i32> [[TMP5]]
68+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
69+
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
6970
;
7071
%v0.0 = extractelement <4 x i32> %v0, i32 0
7172
%v0.1 = extractelement <4 x i32> %v0, i32 1
@@ -196,8 +197,8 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
196197
; CHECK-LABEL: @reduction_v4i32(
197198
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
198199
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
199-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
200-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
200+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
201+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
201202
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
202203
; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15)
203204
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537)

0 commit comments

Comments
 (0)