Skip to content

Commit b577d2d

Browse files
committed
[RISCV] Add a pass to remove duplicate VSETVLI instructions in a basic block.
Add simple pass for removing redundant vsetvli instructions within a basic block. This handles the case where the AVL register and VTYPE immediate are the same and no other instructions that change VTYPE or VL are between them. There are going to be more opportunities for improvement in this space as we development more complex tests. Differential Revision: https://reviews.llvm.org/D92679
1 parent 709112b commit b577d2d

File tree

10 files changed

+180
-71
lines changed

10 files changed

+180
-71
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_public_tablegen_target(RISCVCommonTableGen)
2121
add_llvm_target(RISCVCodeGen
2222
RISCVAsmPrinter.cpp
2323
RISCVCallLowering.cpp
24+
RISCVCleanupVSETVLI.cpp
2425
RISCVExpandAtomicPseudoInsts.cpp
2526
RISCVExpandPseudoInsts.cpp
2627
RISCVFrameLowering.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ void initializeRISCVExpandPseudoPass(PassRegistry &);
4646
FunctionPass *createRISCVExpandAtomicPseudoPass();
4747
void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
4848

49+
FunctionPass *createRISCVCleanupVSETVLIPass();
50+
void initializeRISCVCleanupVSETVLIPass(PassRegistry &);
51+
4952
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
5053
RISCVSubtarget &,
5154
RISCVRegisterBankInfo &);
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements a function pass that removes duplicate vsetvli
10+
// instructions within a basic block.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "RISCV.h"
15+
#include "RISCVSubtarget.h"
16+
#include "llvm/CodeGen/MachineFunctionPass.h"
17+
using namespace llvm;
18+
19+
#define DEBUG_TYPE "riscv-cleanup-vsetvli"
20+
#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass"
21+
22+
namespace {
23+
24+
class RISCVCleanupVSETVLI : public MachineFunctionPass {
25+
public:
26+
static char ID;
27+
28+
RISCVCleanupVSETVLI() : MachineFunctionPass(ID) {
29+
initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry());
30+
}
31+
bool runOnMachineFunction(MachineFunction &MF) override;
32+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
33+
34+
MachineFunctionProperties getRequiredProperties() const override {
35+
return MachineFunctionProperties().set(
36+
MachineFunctionProperties::Property::IsSSA);
37+
}
38+
39+
// This pass modifies the program, but does not modify the CFG
40+
void getAnalysisUsage(AnalysisUsage &AU) const override {
41+
AU.setPreservesCFG();
42+
MachineFunctionPass::getAnalysisUsage(AU);
43+
}
44+
45+
StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; }
46+
};
47+
48+
} // end anonymous namespace
49+
50+
char RISCVCleanupVSETVLI::ID = 0;
51+
52+
INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE,
53+
RISCV_CLEANUP_VSETVLI_NAME, false, false)
54+
55+
bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
56+
bool Changed = false;
57+
MachineInstr *PrevVSETVLI = nullptr;
58+
59+
for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
60+
MachineInstr &MI = *MII++;
61+
62+
if (MI.getOpcode() != RISCV::PseudoVSETVLI) {
63+
if (PrevVSETVLI &&
64+
(MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
65+
MI.modifiesRegister(RISCV::VTYPE))) {
66+
// Old VL/VTYPE is overwritten.
67+
PrevVSETVLI = nullptr;
68+
}
69+
continue;
70+
}
71+
72+
// If we don't have a previous VSETVLI or the VL output isn't dead, we
73+
// can't remove this VSETVLI.
74+
if (!PrevVSETVLI || !MI.getOperand(0).isDead()) {
75+
PrevVSETVLI = &MI;
76+
continue;
77+
}
78+
79+
Register PrevAVLReg = PrevVSETVLI->getOperand(1).getReg();
80+
Register AVLReg = MI.getOperand(1).getReg();
81+
int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
82+
int64_t VTYPEImm = MI.getOperand(2).getImm();
83+
84+
// Does this VSETVLI use the same AVL register and VTYPE immediate?
85+
if (PrevAVLReg != AVLReg || PrevVTYPEImm != VTYPEImm) {
86+
PrevVSETVLI = &MI;
87+
continue;
88+
}
89+
90+
// If the AVLReg is X0 we need to look at the output VL of both VSETVLIs.
91+
if (AVLReg == RISCV::X0) {
92+
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
93+
Register OutVL = MI.getOperand(0).getReg();
94+
// We can't remove if the previous VSETVLI left VL unchanged and the
95+
// current instruction is setting it to VLMAX. Without knowing the VL
96+
// before the previous instruction we don't know if this is a change.
97+
if (PrevOutVL == RISCV::X0 && OutVL != RISCV::X0) {
98+
PrevVSETVLI = &MI;
99+
continue;
100+
}
101+
}
102+
103+
// This VSETVLI is redundant, remove it.
104+
MI.eraseFromParent();
105+
Changed = true;
106+
}
107+
108+
return Changed;
109+
}
110+
111+
bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) {
112+
if (skipFunction(MF.getFunction()))
113+
return false;
114+
115+
// Skip if the vector extension is not enabled.
116+
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
117+
if (!ST.hasStdExtV())
118+
return false;
119+
120+
bool Changed = false;
121+
122+
for (MachineBasicBlock &MBB : MF)
123+
Changed |= runOnMachineBasicBlock(MBB);
124+
125+
return Changed;
126+
}
127+
128+
/// Returns an instance of the Cleanup VSETVLI pass.
129+
FunctionPass *llvm::createRISCVCleanupVSETVLIPass() {
130+
return new RISCVCleanupVSETVLI();
131+
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
3939
initializeGlobalISel(*PR);
4040
initializeRISCVMergeBaseOffsetOptPass(*PR);
4141
initializeRISCVExpandPseudoPass(*PR);
42+
initializeRISCVCleanupVSETVLIPass(*PR);
4243
}
4344

4445
static StringRef computeDataLayout(const Triple &TT) {
@@ -183,6 +184,8 @@ void RISCVPassConfig::addPreEmitPass2() {
183184
}
184185

185186
void RISCVPassConfig::addPreRegAlloc() {
186-
if (TM->getOptLevel() != CodeGenOpt::None)
187+
if (TM->getOptLevel() != CodeGenOpt::None) {
187188
addPass(createRISCVMergeBaseOffsetOptPass());
189+
addPass(createRISCVCleanupVSETVLIPass());
190+
}
188191
}

llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,9 @@ body: |
4949
# POST-INSERTER: dead %12:gpr = PseudoVSETVLI %0, 76, implicit-def $vl, implicit-def $vtype
5050
# POST-INSERTER: PseudoVSE64_V_M1 killed %8, %3, $noreg, $noreg, -1, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8)
5151

52-
# CODEGEN: vsetvli a4, a3, e64,m1,ta,mu
52+
# CODEGEN: vsetvli a3, a3, e64,m1,ta,mu
5353
# CODEGEN-NEXT: vle64.v v25, (a1)
54-
# CODEGEN-NEXT: vsetvli a1, a3, e64,m1,ta,mu
5554
# CODEGEN-NEXT: vle64.v v26, (a2)
56-
# CODEGEN-NEXT: vsetvli a1, a3, e64,m1,ta,mu
5755
# CODEGEN-NEXT: vadd.vv v25, v25, v26
58-
# CODEGEN-NEXT: vsetvli a1, a3, e64,m1,ta,mu
5956
# CODEGEN-NEXT: vse64.v v25, (a0)
6057
# CODEGEN-NEXT: ret
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
3+
4+
# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
5+
# keeps the previous value of VL, the second time sets it to VLMAX. We can't
6+
# remove the first since we can't tell if this is a change VL.
7+
8+
--- |
9+
; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
10+
source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
11+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
12+
target triple = "riscv64"
13+
14+
define void @cleanup_vsetvli() #0 {
15+
ret void
16+
}
17+
18+
attributes #0 = { "target-features"="+experimental-v" }
19+
20+
...
21+
---
22+
name: cleanup_vsetvli
23+
alignment: 4
24+
tracksRegLiveness: true
25+
registers:
26+
- { id: 0, class: gpr }
27+
frameInfo:
28+
maxAlignment: 1
29+
machineFunctionInfo: {}
30+
body: |
31+
bb.0 (%ir-block.0):
32+
; CHECK-LABEL: name: cleanup_vsetvli
33+
; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
34+
; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
35+
; CHECK: PseudoRET
36+
dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
37+
dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
38+
PseudoRET
39+
40+
...

llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,8 @@ define void @vadd_vint16m1(<vscale x 4 x i16> *%pc, <vscale x 4 x i16> *%pa, <vs
99
; CHECK: # %bb.0:
1010
; CHECK-NEXT: vsetvli a3, zero, e16,m1,ta,mu
1111
; CHECK-NEXT: vle16.v v25, (a1)
12-
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
1312
; CHECK-NEXT: vle16.v v26, (a2)
14-
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
1513
; CHECK-NEXT: vadd.vv v25, v25, v26
16-
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
1714
; CHECK-NEXT: vse16.v v25, (a0)
1815
; CHECK-NEXT: ret
1916
%va = load <vscale x 4 x i16>, <vscale x 4 x i16>* %pa
@@ -28,11 +25,8 @@ define void @vadd_vint16m2(<vscale x 8 x i16> *%pc, <vscale x 8 x i16> *%pa, <vs
2825
; CHECK: # %bb.0:
2926
; CHECK-NEXT: vsetvli a3, zero, e16,m2,ta,mu
3027
; CHECK-NEXT: vle16.v v26, (a1)
31-
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
3228
; CHECK-NEXT: vle16.v v28, (a2)
33-
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
3429
; CHECK-NEXT: vadd.vv v26, v26, v28
35-
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
3630
; CHECK-NEXT: vse16.v v26, (a0)
3731
; CHECK-NEXT: ret
3832
%va = load <vscale x 8 x i16>, <vscale x 8 x i16>* %pa
@@ -47,11 +41,8 @@ define void @vadd_vint16m4(<vscale x 16 x i16> *%pc, <vscale x 16 x i16> *%pa, <
4741
; CHECK: # %bb.0:
4842
; CHECK-NEXT: vsetvli a3, zero, e16,m4,ta,mu
4943
; CHECK-NEXT: vle16.v v28, (a1)
50-
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
5144
; CHECK-NEXT: vle16.v v8, (a2)
52-
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
5345
; CHECK-NEXT: vadd.vv v28, v28, v8
54-
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
5546
; CHECK-NEXT: vse16.v v28, (a0)
5647
; CHECK-NEXT: ret
5748
%va = load <vscale x 16 x i16>, <vscale x 16 x i16>* %pa
@@ -66,11 +57,8 @@ define void @vadd_vint16m8(<vscale x 32 x i16> *%pc, <vscale x 32 x i16> *%pa, <
6657
; CHECK: # %bb.0:
6758
; CHECK-NEXT: vsetvli a3, zero, e16,m8,ta,mu
6859
; CHECK-NEXT: vle16.v v8, (a1)
69-
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
7060
; CHECK-NEXT: vle16.v v16, (a2)
71-
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
7261
; CHECK-NEXT: vadd.vv v8, v8, v16
73-
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
7462
; CHECK-NEXT: vse16.v v8, (a0)
7563
; CHECK-NEXT: ret
7664
%va = load <vscale x 32 x i16>, <vscale x 32 x i16>* %pa
@@ -85,11 +73,8 @@ define void @vadd_vint16mf2(<vscale x 2 x i16> *%pc, <vscale x 2 x i16> *%pa, <v
8573
; CHECK: # %bb.0:
8674
; CHECK-NEXT: vsetvli a3, zero, e16,mf2,ta,mu
8775
; CHECK-NEXT: vle16.v v25, (a1)
88-
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
8976
; CHECK-NEXT: vle16.v v26, (a2)
90-
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
9177
; CHECK-NEXT: vadd.vv v25, v25, v26
92-
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
9378
; CHECK-NEXT: vse16.v v25, (a0)
9479
; CHECK-NEXT: ret
9580
%va = load <vscale x 2 x i16>, <vscale x 2 x i16>* %pa
@@ -104,11 +89,8 @@ define void @vadd_vint16mf4(<vscale x 1 x i16> *%pc, <vscale x 1 x i16> *%pa, <v
10489
; CHECK: # %bb.0:
10590
; CHECK-NEXT: vsetvli a3, zero, e16,mf4,ta,mu
10691
; CHECK-NEXT: vle16.v v25, (a1)
107-
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
10892
; CHECK-NEXT: vle16.v v26, (a2)
109-
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
11093
; CHECK-NEXT: vadd.vv v25, v25, v26
111-
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
11294
; CHECK-NEXT: vse16.v v25, (a0)
11395
; CHECK-NEXT: ret
11496
%va = load <vscale x 1 x i16>, <vscale x 1 x i16>* %pa

llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,8 @@ define void @vadd_vint32m1(<vscale x 2 x i32> *%pc, <vscale x 2 x i32> *%pa, <vs
99
; CHECK: # %bb.0:
1010
; CHECK-NEXT: vsetvli a3, zero, e32,m1,ta,mu
1111
; CHECK-NEXT: vle32.v v25, (a1)
12-
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
1312
; CHECK-NEXT: vle32.v v26, (a2)
14-
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
1513
; CHECK-NEXT: vadd.vv v25, v25, v26
16-
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
1714
; CHECK-NEXT: vse32.v v25, (a0)
1815
; CHECK-NEXT: ret
1916
%va = load <vscale x 2 x i32>, <vscale x 2 x i32>* %pa
@@ -28,11 +25,8 @@ define void @vadd_vint32m2(<vscale x 4 x i32> *%pc, <vscale x 4 x i32> *%pa, <vs
2825
; CHECK: # %bb.0:
2926
; CHECK-NEXT: vsetvli a3, zero, e32,m2,ta,mu
3027
; CHECK-NEXT: vle32.v v26, (a1)
31-
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
3228
; CHECK-NEXT: vle32.v v28, (a2)
33-
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
3429
; CHECK-NEXT: vadd.vv v26, v26, v28
35-
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
3630
; CHECK-NEXT: vse32.v v26, (a0)
3731
; CHECK-NEXT: ret
3832
%va = load <vscale x 4 x i32>, <vscale x 4 x i32>* %pa
@@ -47,11 +41,8 @@ define void @vadd_vint32m4(<vscale x 8 x i32> *%pc, <vscale x 8 x i32> *%pa, <vs
4741
; CHECK: # %bb.0:
4842
; CHECK-NEXT: vsetvli a3, zero, e32,m4,ta,mu
4943
; CHECK-NEXT: vle32.v v28, (a1)
50-
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
5144
; CHECK-NEXT: vle32.v v8, (a2)
52-
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
5345
; CHECK-NEXT: vadd.vv v28, v28, v8
54-
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
5546
; CHECK-NEXT: vse32.v v28, (a0)
5647
; CHECK-NEXT: ret
5748
%va = load <vscale x 8 x i32>, <vscale x 8 x i32>* %pa
@@ -66,11 +57,8 @@ define void @vadd_vint32m8(<vscale x 16 x i32> *%pc, <vscale x 16 x i32> *%pa, <
6657
; CHECK: # %bb.0:
6758
; CHECK-NEXT: vsetvli a3, zero, e32,m8,ta,mu
6859
; CHECK-NEXT: vle32.v v8, (a1)
69-
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
7060
; CHECK-NEXT: vle32.v v16, (a2)
71-
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
7261
; CHECK-NEXT: vadd.vv v8, v8, v16
73-
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
7462
; CHECK-NEXT: vse32.v v8, (a0)
7563
; CHECK-NEXT: ret
7664
%va = load <vscale x 16 x i32>, <vscale x 16 x i32>* %pa
@@ -85,11 +73,8 @@ define void @vadd_vint32mf2(<vscale x 1 x i32> *%pc, <vscale x 1 x i32> *%pa, <v
8573
; CHECK: # %bb.0:
8674
; CHECK-NEXT: vsetvli a3, zero, e32,mf2,ta,mu
8775
; CHECK-NEXT: vle32.v v25, (a1)
88-
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
8976
; CHECK-NEXT: vle32.v v26, (a2)
90-
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
9177
; CHECK-NEXT: vadd.vv v25, v25, v26
92-
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
9378
; CHECK-NEXT: vse32.v v25, (a0)
9479
; CHECK-NEXT: ret
9580
%va = load <vscale x 1 x i32>, <vscale x 1 x i32>* %pa

0 commit comments

Comments
 (0)