Skip to content

Commit 748d080

Browse files
committed
[RISCV] Eliminate unnecessary masking of promoted shift amounts
SelectionDAGBuilder::visitShift will always zero-extend a shift amount when it is promoted to the ShiftAmountTy. This results in zero-extension (masking) which is unnecessary for RISC-V as the shift operations only read the lower 5 or 6 bits (RV32 or RV64). I initially proposed adding a getExtendForShiftAmount hook so the shift amount can be any-extended (D52975). @efriedma explained this was unsafe, so I have instead eliminate the unnecessary and operations at instruction selection time in a manner similar to X86InstrCompiler.td. Differential Revision: https://reviews.llvm.org/D53224 llvm-svn: 344432
1 parent 71f484c commit 748d080

File tree

4 files changed

+90
-17
lines changed

4 files changed

+90
-17
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,12 @@ def ixlenimm : Operand<XLenVT> {
205205
// Standalone (codegen-only) immleaf patterns.
206206
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
207207
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
208+
// A mask value that won't affect significant shift bits.
209+
def immshiftxlen : ImmLeaf<XLenVT, [{
210+
if (Subtarget->is64Bit())
211+
return countTrailingOnes<uint64_t>(Imm) >= 6;
212+
return countTrailingOnes<uint64_t>(Imm) >= 5;
213+
}]>;
208214

209215
// Addressing modes.
210216
// Necessary because a frameindex can't be matched directly in a pattern.
@@ -646,13 +652,24 @@ def : PatGprGpr<and, AND>;
646652
def : PatGprSimm12<and, ANDI>;
647653
def : PatGprGpr<xor, XOR>;
648654
def : PatGprSimm12<xor, XORI>;
649-
def : PatGprGpr<shl, SLL>;
650655
def : PatGprUimmLog2XLen<shl, SLLI>;
651-
def : PatGprGpr<srl, SRL>;
652656
def : PatGprUimmLog2XLen<srl, SRLI>;
653-
def : PatGprGpr<sra, SRA>;
654657
def : PatGprUimmLog2XLen<sra, SRAI>;
655658

659+
// Match both a plain shift and one where the shift amount is masked (this is
660+
// typically introduced when the legalizer promotes the shift amount and
661+
// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
662+
// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
663+
multiclass VarShiftXLenPat<PatFrag ShiftOp, RVInst Inst> {
664+
def : Pat<(ShiftOp GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
665+
def : Pat<(ShiftOp GPR:$rs1, (and GPR:$rs2, immshiftxlen)),
666+
(Inst GPR:$rs1, GPR:$rs2)>;
667+
}
668+
669+
defm : VarShiftXLenPat<shl, SLL>;
670+
defm : VarShiftXLenPat<srl, SRL>;
671+
defm : VarShiftXLenPat<sra, SRA>;
672+
656673
/// FrameIndex calculations
657674

658675
def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),

llvm/test/CodeGen/RISCV/alu16.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
; that legalisation of these non-native types doesn't introduce unnecessary
77
; inefficiencies.
88

9-
; TODO: it's unnecessary to mask (zero-extend) the shift amount.
10-
119
define i16 @addi(i16 %a) nounwind {
1210
; RV32I-LABEL: addi:
1311
; RV32I: # %bb.0:
@@ -122,9 +120,6 @@ define i16 @sub(i16 %a, i16 %b) nounwind {
122120
define i16 @sll(i16 %a, i16 %b) nounwind {
123121
; RV32I-LABEL: sll:
124122
; RV32I: # %bb.0:
125-
; RV32I-NEXT: lui a2, 16
126-
; RV32I-NEXT: addi a2, a2, -1
127-
; RV32I-NEXT: and a1, a1, a2
128123
; RV32I-NEXT: sll a0, a0, a1
129124
; RV32I-NEXT: ret
130125
%1 = shl i16 %a, %b
@@ -173,7 +168,6 @@ define i16 @srl(i16 %a, i16 %b) nounwind {
173168
; RV32I: # %bb.0:
174169
; RV32I-NEXT: lui a2, 16
175170
; RV32I-NEXT: addi a2, a2, -1
176-
; RV32I-NEXT: and a1, a1, a2
177171
; RV32I-NEXT: and a0, a0, a2
178172
; RV32I-NEXT: srl a0, a0, a1
179173
; RV32I-NEXT: ret
@@ -184,9 +178,6 @@ define i16 @srl(i16 %a, i16 %b) nounwind {
184178
define i16 @sra(i16 %a, i16 %b) nounwind {
185179
; RV32I-LABEL: sra:
186180
; RV32I: # %bb.0:
187-
; RV32I-NEXT: lui a2, 16
188-
; RV32I-NEXT: addi a2, a2, -1
189-
; RV32I-NEXT: and a1, a1, a2
190181
; RV32I-NEXT: slli a0, a0, 16
191182
; RV32I-NEXT: srai a0, a0, 16
192183
; RV32I-NEXT: sra a0, a0, a1

llvm/test/CodeGen/RISCV/alu8.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
; that legalisation of these non-native types doesn't introduce unnecessary
77
; inefficiencies.
88

9-
; TODO: it's unnecessary to mask (zero-extend) the shift amount.
10-
119
define i8 @addi(i8 %a) nounwind {
1210
; RV32I-LABEL: addi:
1311
; RV32I: # %bb.0:
@@ -118,7 +116,6 @@ define i8 @sub(i8 %a, i8 %b) nounwind {
118116
define i8 @sll(i8 %a, i8 %b) nounwind {
119117
; RV32I-LABEL: sll:
120118
; RV32I: # %bb.0:
121-
; RV32I-NEXT: andi a1, a1, 255
122119
; RV32I-NEXT: sll a0, a0, a1
123120
; RV32I-NEXT: ret
124121
%1 = shl i8 %a, %b
@@ -163,7 +160,6 @@ define i8 @xor(i8 %a, i8 %b) nounwind {
163160
define i8 @srl(i8 %a, i8 %b) nounwind {
164161
; RV32I-LABEL: srl:
165162
; RV32I: # %bb.0:
166-
; RV32I-NEXT: andi a1, a1, 255
167163
; RV32I-NEXT: andi a0, a0, 255
168164
; RV32I-NEXT: srl a0, a0, a1
169165
; RV32I-NEXT: ret
@@ -174,7 +170,6 @@ define i8 @srl(i8 %a, i8 %b) nounwind {
174170
define i8 @sra(i8 %a, i8 %b) nounwind {
175171
; RV32I-LABEL: sra:
176172
; RV32I: # %bb.0:
177-
; RV32I-NEXT: andi a1, a1, 255
178173
; RV32I-NEXT: slli a0, a0, 24
179174
; RV32I-NEXT: srai a0, a0, 24
180175
; RV32I-NEXT: sra a0, a0, a1
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s -check-prefix=RV32I
4+
5+
; This test checks that unnecessary masking of shift amount operands is
6+
; eliminated during instruction selection. The test needs to ensure that the
7+
; masking is not removed if it may affect the shift amount.
8+
9+
define i32 @sll_redundant_mask(i32 %a, i32 %b) nounwind {
10+
; RV32I-LABEL: sll_redundant_mask:
11+
; RV32I: # %bb.0:
12+
; RV32I-NEXT: sll a0, a0, a1
13+
; RV32I-NEXT: ret
14+
%1 = and i32 %b, 31
15+
%2 = shl i32 %a, %1
16+
ret i32 %2
17+
}
18+
19+
define i32 @sll_non_redundant_mask(i32 %a, i32 %b) nounwind {
20+
; RV32I-LABEL: sll_non_redundant_mask:
21+
; RV32I: # %bb.0:
22+
; RV32I-NEXT: andi a1, a1, 15
23+
; RV32I-NEXT: sll a0, a0, a1
24+
; RV32I-NEXT: ret
25+
%1 = and i32 %b, 15
26+
%2 = shl i32 %a, %1
27+
ret i32 %2
28+
}
29+
30+
define i32 @srl_redundant_mask(i32 %a, i32 %b) nounwind {
31+
; RV32I-LABEL: srl_redundant_mask:
32+
; RV32I: # %bb.0:
33+
; RV32I-NEXT: srl a0, a0, a1
34+
; RV32I-NEXT: ret
35+
%1 = and i32 %b, 4095
36+
%2 = lshr i32 %a, %1
37+
ret i32 %2
38+
}
39+
40+
define i32 @srl_non_redundant_mask(i32 %a, i32 %b) nounwind {
41+
; RV32I-LABEL: srl_non_redundant_mask:
42+
; RV32I: # %bb.0:
43+
; RV32I-NEXT: andi a1, a1, 7
44+
; RV32I-NEXT: srl a0, a0, a1
45+
; RV32I-NEXT: ret
46+
%1 = and i32 %b, 7
47+
%2 = lshr i32 %a, %1
48+
ret i32 %2
49+
}
50+
51+
define i32 @sra_redundant_mask(i32 %a, i32 %b) nounwind {
52+
; RV32I-LABEL: sra_redundant_mask:
53+
; RV32I: # %bb.0:
54+
; RV32I-NEXT: sra a0, a0, a1
55+
; RV32I-NEXT: ret
56+
%1 = and i32 %b, 65535
57+
%2 = ashr i32 %a, %1
58+
ret i32 %2
59+
}
60+
61+
define i32 @sra_non_redundant_mask(i32 %a, i32 %b) nounwind {
62+
; RV32I-LABEL: sra_non_redundant_mask:
63+
; RV32I: # %bb.0:
64+
; RV32I-NEXT: andi a1, a1, 32
65+
; RV32I-NEXT: sra a0, a0, a1
66+
; RV32I-NEXT: ret
67+
%1 = and i32 %b, 32
68+
%2 = ashr i32 %a, %1
69+
ret i32 %2
70+
}

0 commit comments

Comments
 (0)