Skip to content

Commit a9dccb0

Browse files
[TargetTransformInfo] Added an opt/llc option for cache line size
In some passes we need a valid number of cache line size to do analysis or transformation, e.g., loop cache analysis and loop date prefetch. However, for some backend targets, `TTIImpl->getCacheLineSize()` is not implemented and hence 'TTI.getCacheLineSize()' would just return 0 which eventually might produce invalid result. In this patch we add a user-specified opt/llc option for cache line size. If the option is specified by users we use the value supplied, otherwise we fall-back to the default value obtained from `TTIImpl->->getCacheLineSize()`. The powerpc target already has such an option, this patch generalizes this option to TargetTransformInfo.cpp. Reviewed By: bmahjour, #loopoptwg Differential Revision: https://reviews.llvm.org/D127342
1 parent eea1531 commit a9dccb0

File tree

4 files changed

+170
-13
lines changed

4 files changed

+170
-13
lines changed

Diff for: llvm/lib/Analysis/TargetTransformInfo.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
3131
cl::Hidden,
3232
cl::desc("Recognize reduction patterns."));
3333

34+
static cl::opt<unsigned> CacheLineSize(
35+
"cache-line-size", cl::init(0), cl::Hidden,
36+
cl::desc("Use this to override the target cache line size when "
37+
"specified by the user."));
38+
3439
namespace {
3540
/// No-op implementation of the TTI interface using the utility base
3641
/// classes.
@@ -654,7 +659,8 @@ bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
654659
}
655660

656661
unsigned TargetTransformInfo::getCacheLineSize() const {
657-
return TTIImpl->getCacheLineSize();
662+
return CacheLineSize.getNumOccurrences() > 0 ? CacheLineSize
663+
: TTIImpl->getCacheLineSize();
658664
}
659665

660666
llvm::Optional<unsigned>

Diff for: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

-9
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ using namespace llvm;
2828
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
2929
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
3030

31-
// This is currently only used for the data prefetch pass
32-
static cl::opt<unsigned>
33-
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
34-
cl::desc("The loop prefetch cache line size"));
35-
3631
static cl::opt<bool>
3732
EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
3833
cl::desc("Enable using coldcc calling conv for cold "
@@ -901,10 +896,6 @@ PPCTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
901896
}
902897

903898
unsigned PPCTTIImpl::getCacheLineSize() const {
904-
// Check first if the user specified a custom line size.
905-
if (CacheLineSize.getNumOccurrences() > 0)
906-
return CacheLineSize;
907-
908899
// Starting with P7 we have a cache line size of 128.
909900
unsigned Directive = ST->getCPUDirective();
910901
// Assume that Future CPU has the same cache line size as the others.

Diff for: llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
; RUN: opt < %s -opaque-pointers -cache-line-size=32 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=SMALLER-CACHELINE %s
2+
; RUN: opt < %s -opaque-pointers -cache-line-size=256 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=LARGER-CACHELINE %s
3+
4+
;; This test is similar to test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll,
5+
;; with differences that it tests the scenarios where an option for cache line size is
6+
;; specified with different values.
7+
8+
; Check IndexedReference::computeRefCost can handle type differences between
9+
; Stride and TripCount
10+
11+
; SMALLER-CACHELINE: Loop 'for.cond' has cost = 256
12+
; LARGER-CACHELINE: Loop 'for.cond' has cost = 32
13+
%struct._Handleitem = type { %struct._Handleitem* }
14+
15+
define void @handle_to_ptr(%struct._Handleitem** %blocks) {
16+
; Preheader:
17+
entry:
18+
br label %for.cond
19+
20+
; Loop:
21+
for.cond: ; preds = %for.body, %entry
22+
%i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ]
23+
%cmp = icmp ult i32 %i.0, 1024
24+
br i1 %cmp, label %for.body, label %for.end
25+
26+
for.body: ; preds = %for.cond
27+
%idxprom = zext i32 %i.0 to i64
28+
%arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %blocks, i64 %idxprom
29+
store %struct._Handleitem* null, %struct._Handleitem** %arrayidx, align 8
30+
%inc = add nuw nsw i32 %i.0, 1
31+
br label %for.cond
32+
33+
; Exit blocks
34+
for.end: ; preds = %for.cond
35+
ret void
36+
}
37+
38+
39+
40+
; Check IndexedReference::computeRefCost can handle negative stride
41+
42+
; SMALLER-CACHELINE: Loop 'for.neg.cond' has cost = 256
43+
; LARGER-CACHELINE: Loop 'for.neg.cond' has cost = 32
44+
define void @handle_to_ptr_neg_stride(%struct._Handleitem** %blocks) {
45+
; Preheader:
46+
entry:
47+
br label %for.neg.cond
48+
49+
; Loop:
50+
for.neg.cond: ; preds = %for.neg.body, %entry
51+
%i.0 = phi i32 [ 1023, %entry ], [ %dec, %for.neg.body ]
52+
%cmp = icmp sgt i32 %i.0, 0
53+
br i1 %cmp, label %for.neg.body, label %for.neg.end
54+
55+
for.neg.body: ; preds = %for.neg.cond
56+
%idxprom = zext i32 %i.0 to i64
57+
%arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %blocks, i64 %idxprom
58+
store %struct._Handleitem* null, %struct._Handleitem** %arrayidx, align 8
59+
%dec = add nsw i32 %i.0, -1
60+
br label %for.neg.cond
61+
62+
; Exit blocks
63+
for.neg.end: ; preds = %for.neg.cond
64+
ret void
65+
}
66+
67+
68+
69+
; for (int i = 40960; i > 0; i--)
70+
; B[i] = B[40960 - i];
71+
72+
; FIXME: Currently negative access functions are treated the same as positive
73+
; access functions. When this is fixed this testcase should have a cost
74+
; approximately 2x higher.
75+
76+
; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10240
77+
; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1280
78+
define void @Test2(double* %B) {
79+
entry:
80+
br label %for.cond2
81+
82+
for.cond2: ; preds = %for.body, %entry
83+
%i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ]
84+
%cmp = icmp sgt i32 %i.0, 0
85+
br i1 %cmp, label %for.body, label %for.end
86+
87+
for.body: ; preds = %for.cond
88+
%sub = sub nsw i32 40960, %i.0
89+
%idxprom = sext i32 %sub to i64
90+
%arrayidx = getelementptr inbounds double, double* %B, i64 %idxprom
91+
%0 = load double, double* %arrayidx, align 8
92+
%idxprom1 = sext i32 %i.0 to i64
93+
%arrayidx2 = getelementptr inbounds double, double* %B, i64 %idxprom1
94+
store double %0, double* %arrayidx2, align 8
95+
%dec = add nsw i32 %i.0, -1
96+
br label %for.cond2
97+
98+
for.end: ; preds = %for.cond
99+
ret void
100+
}
101+
102+
103+
104+
; for (i = 40960; i > 0; i--)
105+
; C[i] = C[i];
106+
107+
; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10240
108+
; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1280
109+
define void @Test3(double** %C) {
110+
entry:
111+
br label %for.cond3
112+
113+
for.cond3: ; preds = %for.body, %entry
114+
%i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ]
115+
%cmp = icmp sgt i32 %i.0, 0
116+
br i1 %cmp, label %for.body, label %for.end
117+
118+
for.body: ; preds = %for.cond
119+
%idxprom = sext i32 %i.0 to i64
120+
%arrayidx = getelementptr inbounds double*, double** %C, i64 %idxprom
121+
%0 = load double*, double** %arrayidx, align 8
122+
%idxprom1 = sext i32 %i.0 to i64
123+
%arrayidx2 = getelementptr inbounds double*, double** %C, i64 %idxprom1
124+
store double* %0, double** %arrayidx2, align 8
125+
%dec = add nsw i32 %i.0, -1
126+
br label %for.cond3
127+
128+
for.end: ; preds = %for.cond
129+
ret void
130+
}
131+
132+
133+
134+
; for (i = 0; i < 40960; i++)
135+
; D[i] = D[i];
136+
137+
; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10240
138+
; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1280
139+
define void @Test4(double** %D) {
140+
entry:
141+
br label %for.cond4
142+
143+
for.cond4: ; preds = %for.body, %entry
144+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
145+
%cmp = icmp slt i32 %i.0, 40960
146+
br i1 %cmp, label %for.body, label %for.end
147+
148+
for.body: ; preds = %for.cond
149+
%idxprom = sext i32 %i.0 to i64
150+
%arrayidx = getelementptr inbounds double*, double** %D, i64 %idxprom
151+
%0 = load double*, double** %arrayidx, align 8
152+
%idxprom1 = sext i32 %i.0 to i64
153+
%arrayidx2 = getelementptr inbounds double*, double** %D, i64 %idxprom1
154+
store double* %0, double** %arrayidx2, align 8
155+
%inc = add nsw i32 %i.0, 1
156+
br label %for.cond4
157+
158+
for.end: ; preds = %for.cond
159+
ret void
160+
}

Diff for: llvm/test/CodeGen/PowerPC/ppc64-get-cache-line-size.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-ppc-prefetching=true | FileCheck %s
2-
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-ppc-prefetching=true -ppc-loop-prefetch-cache-line=64 | FileCheck %s -check-prefix=CHECK-DCBT
2+
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-ppc-prefetching=true -cache-line-size=64 | FileCheck %s -check-prefix=CHECK-DCBT
33
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-ppc-prefetching=true | FileCheck %s
4-
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-ppc-prefetching=true -ppc-loop-prefetch-cache-line=64 | FileCheck %s -check-prefix=CHECK-DCBT
4+
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-ppc-prefetching=true -cache-line-size=64 | FileCheck %s -check-prefix=CHECK-DCBT
55
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -enable-ppc-prefetching=true | FileCheck %s
6-
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -enable-ppc-prefetching=true -ppc-loop-prefetch-cache-line=64 | FileCheck %s -check-prefix=CHECK-DCBT
6+
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -enable-ppc-prefetching=true -cache-line-size=64 | FileCheck %s -check-prefix=CHECK-DCBT
77
; RUN: llc < %s -mtriple=ppc64-- -mcpu=a2 -enable-ppc-prefetching=true | FileCheck %s -check-prefix=CHECK-DCBT
88

99
; Function Attrs: nounwind

0 commit comments

Comments
 (0)