Skip to content

Commit e858b10

Browse files
committed
Revert "[SLP]Reduce number of alternate instruction, where possible"
This caused failures such as: Instruction does not dominate all uses! %29 = insertelement <8 x i64> %28, i64 %xor6.i.5, i64 6 %17 = shufflevector <8 x i64> %29, <8 x i64> poison, <6 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> see comment on #123360 > Previous version was reviewed here #123360 > It is mostly the same, adjusted after graph-to-tree transformation > > Patch tries to remove wide alternate operations. > Currently SLP vectorizer emits something like this: > ``` > %0 = add i32 > %1 = sub i32 > %2 = add i32 > %3 = sub i32 > %4 = add i32 > %5 = sub i32 > %6 = add i32 > %7 = sub i32 > > transformes to > > %v1 = add <8 x i32> > %v2 = sub <8 x i32> > %res = shuffle %v1, %v2, <0, 9, 2, 11, 4, 13, 6, 15> > ``` > i.e. half of the results are just unused. This leads to increased > register pressure and potentially doubles number of operations. > > Patch introduces SplitVectorize mode, where it splits the operations by > opcodes and produces instead something like this: > ``` > %v1 = add <4 x i32> > %v2 = sub <4 x i32> > %res = shuffle %v1, %v2, <0, 4, 1, 5, 2, 6, 3, 7> > ``` > It allows to improve the performance by reducing number of ops. Also, it > turns on some other improvements, like improved graph reordering. > > [...] This reverts commit 9d37e61 as well as the follow-up commit 72bb0a9.
1 parent b9d6b62 commit e858b10

34 files changed

+781
-2731
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,10 +1771,6 @@ class TargetTransformInfo {
17711771
/// scalable version of the vectorized loop.
17721772
bool preferFixedOverScalableIfEqualCost() const;
17731773

1774-
/// \returns True if target prefers SLP vectorizer with altermate opcode
1775-
/// vectorization, false - otherwise.
1776-
bool preferAlternateOpcodeVectorization() const;
1777-
17781774
/// \returns True if the target prefers reductions in loop.
17791775
bool preferInLoopReduction(unsigned Opcode, Type *Ty) const;
17801776

@@ -2329,7 +2325,6 @@ class TargetTransformInfo::Concept {
23292325
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty) const = 0;
23302326
virtual bool preferPredicatedReductionSelect(unsigned Opcode,
23312327
Type *Ty) const = 0;
2332-
virtual bool preferAlternateOpcodeVectorization() const = 0;
23332328
virtual bool preferEpilogueVectorization() const = 0;
23342329

23352330
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
@@ -3140,9 +3135,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
31403135
bool preferInLoopReduction(unsigned Opcode, Type *Ty) const override {
31413136
return Impl.preferInLoopReduction(Opcode, Ty);
31423137
}
3143-
bool preferAlternateOpcodeVectorization() const override {
3144-
return Impl.preferAlternateOpcodeVectorization();
3145-
}
31463138
bool preferPredicatedReductionSelect(unsigned Opcode,
31473139
Type *Ty) const override {
31483140
return Impl.preferPredicatedReductionSelect(Opcode, Ty);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,6 @@ class TargetTransformInfoImplBase {
10071007
bool preferFixedOverScalableIfEqualCost() const { return false; }
10081008

10091009
bool preferInLoopReduction(unsigned Opcode, Type *Ty) const { return false; }
1010-
bool preferAlternateOpcodeVectorization() const { return true; }
10111010

10121011
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty) const {
10131012
return false;

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,10 +1384,6 @@ bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode,
13841384
return TTIImpl->preferInLoopReduction(Opcode, Ty);
13851385
}
13861386

1387-
bool TargetTransformInfo::preferAlternateOpcodeVectorization() const {
1388-
return TTIImpl->preferAlternateOpcodeVectorization();
1389-
}
1390-
13911387
bool TargetTransformInfo::preferPredicatedReductionSelect(unsigned Opcode,
13921388
Type *Ty) const {
13931389
return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty);

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
125125

126126
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
127127

128-
bool preferAlternateOpcodeVectorization() const { return false; }
129-
130128
bool preferEpilogueVectorization() const {
131129
// Epilogue vectorization is usually unprofitable - tail folding or
132130
// a smaller VF would have been better. This a blunt hammer - we

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
292292

293293
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
294294
bool IsZeroCmp) const;
295-
bool preferAlternateOpcodeVectorization() const { return false; }
296295
bool prefersVectorizedAddressing() const;
297296
bool supportsEfficientVectorElementLoadStore() const;
298297
bool enableInterleavedAccessVectorization();

0 commit comments

Comments
 (0)