Skip to content

Commit 85c9c16

Browse files
authored
[RISCV] Support load clustering in the MachineScheduler (off by default) (#73754)
This adds minimal support for load clustering, but disables it by default. The intent is to iterate on the precise heuristic and the question of turning this on by default in a separate PR. Although previous discussion indicates hope that the MachineScheduler would replace most uses of the SelectionDAG scheduler, it does seem most targets aren't using MachineScheduler load clustering right now: PPC+AArch64 seem to just use it to help with paired load/store formation and although AMDGPU uses it for general clustering it also implements ShouldScheduleLoadsNear for the SelectionDAG scheduler's clustering.
1 parent 4b3ea33 commit 85c9c16

File tree

4 files changed

+115
-3
lines changed

4 files changed

+115
-3
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/STLExtras.h"
2020
#include "llvm/ADT/SmallVector.h"
2121
#include "llvm/Analysis/MemoryLocation.h"
22+
#include "llvm/Analysis/ValueTracking.h"
2223
#include "llvm/CodeGen/LiveIntervals.h"
2324
#include "llvm/CodeGen/LiveVariables.h"
2425
#include "llvm/CodeGen/MachineCombinerPattern.h"
@@ -2231,6 +2232,60 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
22312232
return true;
22322233
}
22332234

2235+
// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2236+
// helper?
2237+
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
2238+
ArrayRef<const MachineOperand *> BaseOps1,
2239+
const MachineInstr &MI2,
2240+
ArrayRef<const MachineOperand *> BaseOps2) {
2241+
// Only examine the first "base" operand of each instruction, on the
2242+
// assumption that it represents the real base address of the memory access.
2243+
// Other operands are typically offsets or indices from this base address.
2244+
if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
2245+
return true;
2246+
2247+
if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
2248+
return false;
2249+
2250+
auto MO1 = *MI1.memoperands_begin();
2251+
auto MO2 = *MI2.memoperands_begin();
2252+
if (MO1->getAddrSpace() != MO2->getAddrSpace())
2253+
return false;
2254+
2255+
auto Base1 = MO1->getValue();
2256+
auto Base2 = MO2->getValue();
2257+
if (!Base1 || !Base2)
2258+
return false;
2259+
Base1 = getUnderlyingObject(Base1);
2260+
Base2 = getUnderlyingObject(Base2);
2261+
2262+
if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
2263+
return false;
2264+
2265+
return Base1 == Base2;
2266+
}
2267+
2268+
bool RISCVInstrInfo::shouldClusterMemOps(
2269+
ArrayRef<const MachineOperand *> BaseOps1,
2270+
ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
2271+
unsigned NumBytes) const {
2272+
// If the mem ops (to be clustered) do not have the same base ptr, then they
2273+
// should not be clustered
2274+
if (!BaseOps1.empty() && !BaseOps2.empty()) {
2275+
const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2276+
const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2277+
if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
2278+
return false;
2279+
} else if (!BaseOps1.empty() || !BaseOps2.empty()) {
2280+
// If only one base op is empty, they do not have the same base ptr
2281+
return false;
2282+
}
2283+
2284+
// TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
2285+
// indicate they likely share a cache line.
2286+
return ClusterSize <= 4;
2287+
}
2288+
22342289
// Set BaseReg (the base register operand), Offset (the byte offset being
22352290
// accessed) and the access Width of the passed instruction that reads/writes
22362291
// memory. Returns false if the instruction does not read/write memory or the

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
157157
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
158158
const TargetRegisterInfo *TRI) const override;
159159

160+
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
161+
ArrayRef<const MachineOperand *> BaseOps2,
162+
unsigned ClusterSize,
163+
unsigned NumBytes) const override;
164+
160165
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
161166
const MachineOperand *&BaseOp,
162167
int64_t &Offset, unsigned &Width,

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ static cl::opt<bool>
9595
cl::desc("Enable Split RegisterAlloc for RVV"),
9696
cl::init(false));
9797

98+
static cl::opt<bool> EnableMISchedLoadClustering(
99+
"riscv-misched-load-clustering", cl::Hidden,
100+
cl::desc("Enable load clustering in the machine scheduler"),
101+
cl::init(false));
102+
98103
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
99104
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
100105
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -345,12 +350,16 @@ class RISCVPassConfig : public TargetPassConfig {
345350
ScheduleDAGInstrs *
346351
createMachineScheduler(MachineSchedContext *C) const override {
347352
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
353+
ScheduleDAGMILive *DAG = nullptr;
354+
if (EnableMISchedLoadClustering) {
355+
DAG = createGenericSchedLive(C);
356+
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
357+
}
348358
if (ST.hasMacroFusion()) {
349-
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
359+
DAG = DAG ? DAG : createGenericSchedLive(C);
350360
DAG->addMutation(createRISCVMacroFusionDAGMutation());
351-
return DAG;
352361
}
353-
return nullptr;
362+
return DAG;
354363
}
355364

356365
ScheduleDAGInstrs *
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; REQUIRES: asserts
2+
; RUN: llc -mtriple=riscv32 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \
3+
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
4+
; RUN: llc -mtriple=riscv64 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \
5+
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
6+
; RUN: llc -mtriple=riscv32 -riscv-misched-load-clustering -verify-misched \
7+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
8+
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
9+
; RUN: llc -mtriple=riscv64 -riscv-misched-load-clustering -verify-misched \
10+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
11+
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
12+
13+
14+
define i32 @load_clustering_1(ptr nocapture %p) {
15+
; NOCLUSTER: ********** MI Scheduling **********
16+
; NOCLUSTER-LABEL: load_clustering_1:%bb.0
17+
; NOCLUSTER: *** Final schedule for %bb.0 ***
18+
; NOCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
19+
; NOCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
20+
; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
21+
; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
22+
;
23+
; LDCLUSTER: ********** MI Scheduling **********
24+
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
25+
; LDCLUSTER: *** Final schedule for %bb.0 ***
26+
; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
27+
; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
28+
; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
29+
; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
30+
entry:
31+
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
32+
%val0 = load i32, i32* %arrayidx0
33+
%arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2
34+
%val1 = load i32, i32* %arrayidx1
35+
%tmp0 = add i32 %val0, %val1
36+
%arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1
37+
%val2 = load i32, i32* %arrayidx2
38+
%tmp1 = add i32 %tmp0, %val2
39+
%arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4
40+
%val3 = load i32, i32* %arrayidx3
41+
%tmp2 = add i32 %tmp1, %val3
42+
ret i32 %tmp2
43+
}

0 commit comments

Comments
 (0)