Skip to content

Commit e1d4ca0

Browse files
committed
[InstCombine] matchRotate - add support for matching general funnel shifts with constant shift amounts (PR46896)
First step towards extending the existing rotation support to full funnel shift handling now that the backend legalization support has improved. This enables us to match the shift by constant cases, which are pretty trivial to expand again if necessary. D88420 will add non-uniform support for funnel shifts as well once its been finalized. Differential Revision: https://reviews.llvm.org/D88834
1 parent c1fd430 commit e1d4ca0

File tree

2 files changed

+31
-40
lines changed

2 files changed

+31
-40
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2052,39 +2052,45 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) {
20522052
return LastInst;
20532053
}
20542054

2055-
/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
2056-
static Instruction *matchRotate(Instruction &Or) {
2055+
/// Match UB-safe variants of the funnel shift intrinsic.
2056+
static Instruction *matchFunnelShift(Instruction &Or) {
20572057
// TODO: Can we reduce the code duplication between this and the related
20582058
// rotate matching code under visitSelect and visitTrunc?
20592059
unsigned Width = Or.getType()->getScalarSizeInBits();
20602060

2061-
// First, find an or'd pair of opposite shifts with the same shifted operand:
2062-
// or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
2061+
// First, find an or'd pair of opposite shifts:
2062+
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
20632063
BinaryOperator *Or0, *Or1;
20642064
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
20652065
!match(Or.getOperand(1), m_BinOp(Or1)))
20662066
return nullptr;
20672067

2068-
Value *ShVal, *ShAmt0, *ShAmt1;
2069-
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
2070-
!match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
2068+
Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2069+
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
2070+
!match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))))
20712071
return nullptr;
20722072

20732073
BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
20742074
BinaryOperator::BinaryOps ShiftOpcode1 = Or1->getOpcode();
20752075
if (ShiftOpcode0 == ShiftOpcode1)
20762076
return nullptr;
20772077

2078-
// Match the shift amount operands for a rotate pattern. This always matches
2079-
// a subtraction on the R operand.
2080-
auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
2078+
// Match the shift amount operands for a funnel shift pattern. This always
2079+
// matches a subtraction on the R operand.
2080+
auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
20812081
// Check for constant shift amounts that sum to the bitwidth.
20822082
// TODO: Support non-uniform shift amounts.
20832083
const APInt *LC, *RC;
20842084
if (match(L, m_APIntAllowUndef(LC)) && match(R, m_APIntAllowUndef(RC)))
20852085
if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
20862086
return ConstantInt::get(L->getType(), *LC);
20872087

2088+
// For non-constant cases, the following patterns currently only work for
2089+
// rotation patterns.
2090+
// TODO: Add general funnel-shift compatible patterns.
2091+
if (ShVal0 != ShVal1)
2092+
return nullptr;
2093+
20882094
// For non-constant cases we don't support non-pow2 shift masks.
20892095
// TODO: Is it worth matching urem as well?
20902096
if (!isPowerOf2_32(Width))
@@ -2121,7 +2127,8 @@ static Instruction *matchRotate(Instruction &Or) {
21212127
(SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
21222128
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
21232129
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
2124-
return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
2130+
return IntrinsicInst::Create(
2131+
F, {IsFshl ? ShVal0 : ShVal1, IsFshl ? ShVal1 : ShVal0, ShAmt});
21252132
}
21262133

21272134
/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
@@ -2574,8 +2581,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
25742581
if (Instruction *BSwap = matchBSwap(I))
25752582
return BSwap;
25762583

2577-
if (Instruction *Rotate = matchRotate(I))
2578-
return Rotate;
2584+
if (Instruction *Funnel = matchFunnelShift(I))
2585+
return Funnel;
25792586

25802587
if (Instruction *Concat = matchOrConcat(I, Builder))
25812588
return replaceInstUsesWith(I, Concat);

llvm/test/Transforms/InstCombine/funnel.ll

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,14 @@
33

44
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
55

6-
; TODO: Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
6+
; Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
77
; This should help cost modeling for vectorization, inlining, etc.
88
; If a target does not have a fshl instruction, the expansion will
99
; be exactly these same 3 basic ops (shl/lshr/or).
1010

1111
define i32 @fshl_i32_constant(i32 %x, i32 %y) {
1212
; CHECK-LABEL: @fshl_i32_constant(
13-
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11
14-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[Y:%.*]], 21
15-
; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
13+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 11)
1614
; CHECK-NEXT: ret i32 [[R]]
1715
;
1816
%shl = shl i32 %x, 11
@@ -23,9 +21,7 @@ define i32 @fshl_i32_constant(i32 %x, i32 %y) {
2321

2422
define i42 @fshr_i42_constant(i42 %x, i42 %y) {
2523
; CHECK-LABEL: @fshr_i42_constant(
26-
; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X:%.*]], 31
27-
; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[Y:%.*]], 11
28-
; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]]
24+
; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[Y:%.*]], i42 [[X:%.*]], i42 11)
2925
; CHECK-NEXT: ret i42 [[R]]
3026
;
3127
%shr = lshr i42 %x, 31
@@ -34,13 +30,11 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) {
3430
ret i42 %r
3531
}
3632

37-
; TODO: Vector types are allowed.
33+
; Vector types are allowed.
3834

3935
define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
4036
; CHECK-LABEL: @fshl_v2i16_constant_splat(
41-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
42-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 15>
43-
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
37+
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
4438
; CHECK-NEXT: ret <2 x i16> [[R]]
4539
;
4640
%shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -51,9 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
5145

5246
define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) {
5347
; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0(
54-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 undef, i16 1>
55-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 15>
56-
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
48+
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
5749
; CHECK-NEXT: ret <2 x i16> [[R]]
5850
;
5951
%shl = shl <2 x i16> %x, <i16 undef, i16 1>
@@ -64,9 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) {
6456

6557
define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) {
6658
; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1(
67-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
68-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 undef>
69-
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
59+
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
7060
; CHECK-NEXT: ret <2 x i16> [[R]]
7161
;
7262
%shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -75,13 +65,11 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) {
7565
ret <2 x i16> %r
7666
}
7767

78-
; TODO: Non-power-of-2 vector types are allowed.
68+
; Non-power-of-2 vector types are allowed.
7969

8070
define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {
8171
; CHECK-LABEL: @fshr_v2i17_constant_splat(
82-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 12>
83-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 5, i17 5>
84-
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
72+
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
8573
; CHECK-NEXT: ret <2 x i17> [[R]]
8674
;
8775
%shr = lshr <2 x i17> %x, <i17 12, i17 12>
@@ -92,9 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {
9280

9381
define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) {
9482
; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0(
95-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 undef>
96-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 undef, i17 5>
97-
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
83+
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
9884
; CHECK-NEXT: ret <2 x i17> [[R]]
9985
;
10086
%shr = lshr <2 x i17> %x, <i17 12, i17 undef>
@@ -105,9 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) {
10591

10692
define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) {
10793
; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1(
108-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 undef>
109-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 5, i17 undef>
110-
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
94+
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
11195
; CHECK-NEXT: ret <2 x i17> [[R]]
11296
;
11397
%shr = lshr <2 x i17> %x, <i17 12, i17 undef>

0 commit comments

Comments
 (0)