Skip to content

Commit abaa531

Browse files
authored
[RISCV] Implement RISCVTTIImpl::shouldConsiderAddressTypePromotion for RISCV (llvm#102560)
This optimization helps reduce repeated calculations of base addresses by extracting type extensions when the same base address is accessed multiple times but its offset is a constant.
1 parent 0df9189 commit abaa531

File tree

4 files changed

+131
-1
lines changed

4 files changed

+131
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14281428

14291429
// Disable strict node mutation.
14301430
IsStrictFPEnabled = true;
1431+
EnableExtLdPromotion = true;
14311432

14321433
// Let the subtarget decide if a predictable select is more expensive than the
14331434
// corresponding branch. This information is used in CGP/SelectOpt to decide

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,3 +2000,35 @@ bool RISCVTTIImpl::areInlineCompatible(const Function *Caller,
20002000
// target-features.
20012001
return (CallerBits & CalleeBits) == CalleeBits;
20022002
}
2003+
2004+
/// See if \p I should be considered for address type promotion. We check if \p
2005+
/// I is a sext with right type and used in memory accesses. If it used in a
2006+
/// "complex" getelementptr, we allow it to be promoted without finding other
2007+
/// sext instructions that sign extended the same initial value. A getelementptr
2008+
/// is considered as "complex" if it has more than 2 operands.
2009+
bool RISCVTTIImpl::shouldConsiderAddressTypePromotion(
2010+
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
2011+
bool Considerable = false;
2012+
AllowPromotionWithoutCommonHeader = false;
2013+
if (!isa<SExtInst>(&I))
2014+
return false;
2015+
Type *ConsideredSExtType =
2016+
Type::getInt64Ty(I.getParent()->getParent()->getContext());
2017+
if (I.getType() != ConsideredSExtType)
2018+
return false;
2019+
// See if the sext is the one with the right type and used in at least one
2020+
// GetElementPtrInst.
2021+
for (const User *U : I.users()) {
2022+
if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
2023+
Considerable = true;
2024+
// A getelementptr is considered as "complex" if it has more than 2
2025+
// operands. We will promote a SExt used in such complex GEP as we
2026+
// expect some computation to be merged if they are done on 64 bits.
2027+
if (GEPInst->getNumOperands() > 2) {
2028+
AllowPromotionWithoutCommonHeader = true;
2029+
break;
2030+
}
2031+
}
2032+
}
2033+
return Considerable;
2034+
}

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
397397
bool shouldFoldTerminatingConditionAfterLSR() const {
398398
return true;
399399
}
400-
400+
bool
401+
shouldConsiderAddressTypePromotion(const Instruction &I,
402+
bool &AllowPromotionWithoutCommonHeader);
401403
std::optional<unsigned> getMinPageSize() const { return 4096; }
402404
};
403405

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -S | FileCheck %s
3+
4+
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
5+
target triple = "riscv64"
6+
7+
%struct.match_state = type { i64, i64 }
8+
9+
; %add is also promoted by forking an extra sext.
10+
define void @promoteTwoOne(i32 %i, i32 %j, ptr %P1, ptr %P2 ) {
11+
; CHECK-LABEL: define void @promoteTwoOne(
12+
; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
13+
; CHECK-NEXT: entry:
14+
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[I]] to i64
15+
; CHECK-NEXT: [[PROMOTED2:%.*]] = sext i32 [[J]] to i64
16+
; CHECK-NEXT: [[S:%.*]] = add nsw i64 [[S2]], [[PROMOTED2]]
17+
; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]]
18+
; CHECK-NEXT: store i64 [[S]], ptr [[ADDR1]], align 8
19+
; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]]
20+
; CHECK-NEXT: store i64 [[S2]], ptr [[ADDR2]], align 8
21+
; CHECK-NEXT: ret void
22+
;
23+
entry:
24+
%add = add nsw i32 %i, %j
25+
%s = sext i32 %add to i64
26+
%addr1 = getelementptr inbounds i64, ptr %P1, i64 %s
27+
store i64 %s, ptr %addr1
28+
%s2 = sext i32 %i to i64
29+
%addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2
30+
store i64 %s2, ptr %addr2
31+
ret void
32+
}
33+
34+
; Both %add1 and %add2 are promoted by forking extra sexts.
35+
define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, ptr %P1, ptr %P2) {
36+
; CHECK-LABEL: define void @promoteTwoTwo(
37+
; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], i32 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
38+
; CHECK-NEXT: entry:
39+
; CHECK-NEXT: [[PROMOTED3:%.*]] = sext i32 [[J]] to i64
40+
; CHECK-NEXT: [[PROMOTED4:%.*]] = sext i32 [[I]] to i64
41+
; CHECK-NEXT: [[S:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED4]]
42+
; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]]
43+
; CHECK-NEXT: store i64 [[S]], ptr [[ADDR1]], align 8
44+
; CHECK-NEXT: [[PROMOTED2:%.*]] = sext i32 [[K]] to i64
45+
; CHECK-NEXT: [[S2:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]]
46+
; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]]
47+
; CHECK-NEXT: store i64 [[S2]], ptr [[ADDR2]], align 8
48+
; CHECK-NEXT: ret void
49+
;
50+
entry:
51+
%add1 = add nsw i32 %j, %i
52+
%s = sext i32 %add1 to i64
53+
%addr1 = getelementptr inbounds i64, ptr %P1, i64 %s
54+
store i64 %s, ptr %addr1
55+
%add2 = add nsw i32 %j, %k
56+
%s2 = sext i32 %add2 to i64
57+
%addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2
58+
store i64 %s2, ptr %addr2
59+
ret void
60+
}
61+
62+
define i64 @promoteGEPSunk(i1 %cond, ptr %base, i32 %i) {
63+
; CHECK-LABEL: define i64 @promoteGEPSunk(
64+
; CHECK-SAME: i1 [[COND:%.*]], ptr [[BASE:%.*]], i32 [[I:%.*]]) {
65+
; CHECK-NEXT: entry:
66+
; CHECK-NEXT: [[PROMOTED1:%.*]] = sext i32 [[I]] to i64
67+
; CHECK-NEXT: [[S:%.*]] = add nsw i64 [[PROMOTED1]], 1
68+
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S]]
69+
; CHECK-NEXT: [[S2:%.*]] = add nsw i64 [[PROMOTED1]], 2
70+
; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S2]]
71+
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_THEN2:%.*]]
72+
; CHECK: if.then:
73+
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[ADDR]], align 8
74+
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[ADDR2]], align 8
75+
; CHECK-NEXT: [[R:%.*]] = add i64 [[V]], [[V2]]
76+
; CHECK-NEXT: ret i64 [[R]]
77+
; CHECK: if.then2:
78+
; CHECK-NEXT: ret i64 0
79+
;
80+
entry:
81+
%add = add nsw i32 %i, 1
82+
%s = sext i32 %add to i64
83+
%addr = getelementptr inbounds i64, ptr %base, i64 %s
84+
%add2 = add nsw i32 %i, 2
85+
%s2 = sext i32 %add2 to i64
86+
%addr2 = getelementptr inbounds i64, ptr %base, i64 %s2
87+
br i1 %cond, label %if.then, label %if.then2
88+
if.then:
89+
%v = load i64, ptr %addr
90+
%v2 = load i64, ptr %addr2
91+
%r = add i64 %v, %v2
92+
ret i64 %r
93+
if.then2:
94+
ret i64 0;
95+
}

0 commit comments

Comments
 (0)