Skip to content

Commit 8ba1c38

Browse files
committed
[AArch64][GlobalISel] Add heuristics for G_FCONSTANT localization.
Now that in an earlier commit we adopt the heuristics for SDAG's expansion of 32/64b fpimms to either GPR materializations or CP load, we can also improve the localizer to also understand the same heuristics. This avoids localizing expensive immediates as that increases code size. The combination of these two changes results in minor improvements in CTMark -Os, and bigger improvements in some other cases.
1 parent 49d5bb4 commit 8ba1c38

File tree

2 files changed

+148
-10
lines changed

2 files changed

+148
-10
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "llvm/CodeGen/SelectionDAGNodes.h"
5151
#include "llvm/CodeGen/TargetCallingConv.h"
5252
#include "llvm/CodeGen/TargetInstrInfo.h"
53+
#include "llvm/CodeGen/TargetOpcodes.h"
5354
#include "llvm/CodeGen/ValueTypes.h"
5455
#include "llvm/IR/Attributes.h"
5556
#include "llvm/IR/Constants.h"
@@ -24764,7 +24765,8 @@ bool AArch64TargetLowering::shouldLocalize(
2476424765
llvm_unreachable("Unexpected remat cost");
2476524766
};
2476624767

24767-
switch (MI.getOpcode()) {
24768+
unsigned Opc = MI.getOpcode();
24769+
switch (Opc) {
2476824770
case TargetOpcode::G_GLOBAL_VALUE: {
2476924771
// On Darwin, TLS global vars get selected into function calls, which
2477024772
// we don't want localized, as they can get moved into the middle of a
@@ -24774,14 +24776,37 @@ bool AArch64TargetLowering::shouldLocalize(
2477424776
return false;
2477524777
return true; // Always localize G_GLOBAL_VALUE to avoid high reg pressure.
2477624778
}
24779+
case TargetOpcode::G_FCONSTANT:
2477724780
case TargetOpcode::G_CONSTANT: {
24778-
auto *CI = MI.getOperand(1).getCImm();
24781+
const ConstantInt *CI;
24782+
unsigned AdditionalCost = 0;
24783+
24784+
if (Opc == TargetOpcode::G_CONSTANT)
24785+
CI = MI.getOperand(1).getCImm();
24786+
else {
24787+
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
24788+
// We try to estimate cost of 32/64b fpimms, as they'll likely be
24789+
// materialized as integers.
24790+
if (Ty.getScalarSizeInBits() != 32 && Ty.getScalarSizeInBits() != 64)
24791+
break;
24792+
auto APF = MI.getOperand(1).getFPImm()->getValueAPF();
24793+
bool OptForSize =
24794+
MF.getFunction().hasOptSize() || MF.getFunction().hasMinSize();
24795+
if (isFPImmLegal(APF, EVT::getFloatingPointVT(Ty.getScalarSizeInBits()),
24796+
OptForSize))
24797+
return true; // Constant should be cheap.
24798+
CI =
24799+
ConstantInt::get(MF.getFunction().getContext(), APF.bitcastToAPInt());
24800+
// FP materialization also costs an extra move, from gpr to fpr.
24801+
AdditionalCost = 1;
24802+
}
2477924803
APInt Imm = CI->getValue();
2478024804
InstructionCost Cost = TTI->getIntImmCost(
2478124805
Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
2478224806
assert(Cost.isValid() && "Expected a valid imm cost");
2478324807

2478424808
unsigned RematCost = *Cost.getValue();
24809+
RematCost += AdditionalCost;
2478524810
Register Reg = MI.getOperand(0).getReg();
2478624811
unsigned MaxUses = maxUses(RematCost);
2478724812
// Don't pass UINT_MAX sentinal value to hasAtMostUserInstrs().

llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll

Lines changed: 121 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ define i32 @imm_cost_too_large_cost_of_2() {
124124
; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
125125
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
126126
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2228259
127-
; CHECK-NEXT: [[OPAQUE:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
127+
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
128128
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
129129
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
130130
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
@@ -134,19 +134,19 @@ define i32 @imm_cost_too_large_cost_of_2() {
134134
; CHECK-NEXT: successors: %bb.3(0x80000000)
135135
; CHECK-NEXT: {{ $}}
136136
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
137-
; CHECK-NEXT: G_STORE [[OPAQUE]](s32), [[GV3]](p0) :: (store (s32) into @var2)
137+
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV3]](p0) :: (store (s32) into @var2)
138138
; CHECK-NEXT: G_BR %bb.3
139139
; CHECK-NEXT: {{ $}}
140140
; CHECK-NEXT: bb.3.if.then2:
141141
; CHECK-NEXT: successors: %bb.4(0x80000000)
142142
; CHECK-NEXT: {{ $}}
143143
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
144-
; CHECK-NEXT: G_STORE [[OPAQUE]](s32), [[GV4]](p0) :: (store (s32) into @var1)
144+
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV4]](p0) :: (store (s32) into @var1)
145145
; CHECK-NEXT: G_BR %bb.4
146146
; CHECK-NEXT: {{ $}}
147147
; CHECK-NEXT: bb.4.if.end:
148148
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
149-
; CHECK-NEXT: G_STORE [[OPAQUE]](s32), [[GV5]](p0) :: (store (s32) into @var3)
149+
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3)
150150
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
151151
; CHECK-NEXT: $w0 = COPY [[C3]](s32)
152152
; CHECK-NEXT: RET_ReallyLR implicit $w0
@@ -180,7 +180,7 @@ define i64 @imm_cost_too_large_cost_of_4() {
180180
; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
181181
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
182182
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2228259
183-
; CHECK-NEXT: [[OPAQUE:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
183+
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
184184
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
185185
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
186186
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
@@ -190,19 +190,19 @@ define i64 @imm_cost_too_large_cost_of_4() {
190190
; CHECK-NEXT: successors: %bb.3(0x80000000)
191191
; CHECK-NEXT: {{ $}}
192192
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
193-
; CHECK-NEXT: G_STORE [[OPAQUE]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
193+
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
194194
; CHECK-NEXT: G_BR %bb.3
195195
; CHECK-NEXT: {{ $}}
196196
; CHECK-NEXT: bb.3.if.then2:
197197
; CHECK-NEXT: successors: %bb.4(0x80000000)
198198
; CHECK-NEXT: {{ $}}
199199
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
200-
; CHECK-NEXT: G_STORE [[OPAQUE]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
200+
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
201201
; CHECK-NEXT: G_BR %bb.4
202202
; CHECK-NEXT: {{ $}}
203203
; CHECK-NEXT: bb.4.if.end:
204204
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
205-
; CHECK-NEXT: G_STORE [[OPAQUE]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
205+
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
206206
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
207207
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
208208
; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -225,6 +225,119 @@ if.end:
225225
ret i64 0
226226
}
227227

228+
define i64 @f64_imm_cost_too_high(double %a) {
229+
; CHECK-LABEL: name: f64_imm_cost_too_high
230+
; CHECK: bb.1.entry:
231+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
232+
; CHECK-NEXT: liveins: $d0
233+
; CHECK-NEXT: {{ $}}
234+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e-02
235+
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
236+
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
237+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
238+
; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
239+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
240+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
241+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
242+
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
243+
; CHECK-NEXT: G_BR %bb.2
244+
; CHECK-NEXT: {{ $}}
245+
; CHECK-NEXT: bb.2.if.then:
246+
; CHECK-NEXT: successors: %bb.3(0x80000000)
247+
; CHECK-NEXT: {{ $}}
248+
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
249+
; CHECK-NEXT: G_STORE [[C]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
250+
; CHECK-NEXT: G_BR %bb.3
251+
; CHECK-NEXT: {{ $}}
252+
; CHECK-NEXT: bb.3.if.then2:
253+
; CHECK-NEXT: successors: %bb.4(0x80000000)
254+
; CHECK-NEXT: {{ $}}
255+
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
256+
; CHECK-NEXT: G_STORE [[C]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
257+
; CHECK-NEXT: G_BR %bb.4
258+
; CHECK-NEXT: {{ $}}
259+
; CHECK-NEXT: bb.4.if.end:
260+
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
261+
; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
262+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
263+
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
264+
; CHECK-NEXT: RET_ReallyLR implicit $x0
265+
entry:
266+
%0 = load i64, ptr @var1_64, align 4
267+
%cmp = icmp eq i64 %0, 1
268+
br i1 %cmp, label %if.then, label %if.end
269+
270+
if.then:
271+
store double 1.000000e-02, ptr @var2_64
272+
br label %if.then2
273+
274+
if.then2:
275+
store double 1.000000e-02, ptr @var1_64
276+
br label %if.end
277+
278+
if.end:
279+
store double 1.000000e-02, ptr @var3_64
280+
ret i64 0
281+
}
282+
283+
define i64 @f64_imm_cheap(double %a) {
284+
; CHECK-LABEL: name: f64_imm_cheap
285+
; CHECK: bb.1.entry:
286+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
287+
; CHECK-NEXT: liveins: $d0
288+
; CHECK-NEXT: {{ $}}
289+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
290+
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
291+
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
292+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
293+
; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
294+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
295+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
296+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
297+
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
298+
; CHECK-NEXT: G_BR %bb.2
299+
; CHECK-NEXT: {{ $}}
300+
; CHECK-NEXT: bb.2.if.then:
301+
; CHECK-NEXT: successors: %bb.3(0x80000000)
302+
; CHECK-NEXT: {{ $}}
303+
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
304+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
305+
; CHECK-NEXT: G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
306+
; CHECK-NEXT: G_BR %bb.3
307+
; CHECK-NEXT: {{ $}}
308+
; CHECK-NEXT: bb.3.if.then2:
309+
; CHECK-NEXT: successors: %bb.4(0x80000000)
310+
; CHECK-NEXT: {{ $}}
311+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
312+
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
313+
; CHECK-NEXT: G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
314+
; CHECK-NEXT: G_BR %bb.4
315+
; CHECK-NEXT: {{ $}}
316+
; CHECK-NEXT: bb.4.if.end:
317+
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
318+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
319+
; CHECK-NEXT: G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
320+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
321+
; CHECK-NEXT: $x0 = COPY [[C6]](s64)
322+
; CHECK-NEXT: RET_ReallyLR implicit $x0
323+
entry:
324+
%0 = load i64, ptr @var1_64, align 4
325+
%cmp = icmp eq i64 %0, 1
326+
br i1 %cmp, label %if.then, label %if.end
327+
328+
if.then:
329+
store double 0.0, ptr @var2_64
330+
br label %if.then2
331+
332+
if.then2:
333+
store double 0.0, ptr @var1_64
334+
br label %if.end
335+
336+
if.end:
337+
store double 0.0, ptr @var3_64
338+
ret i64 0
339+
}
340+
228341
@var1_64 = common global i64 0, align 4
229342
@var2_64 = common global i64 0, align 4
230343
@var3_64 = common global i64 0, align 4

0 commit comments

Comments
 (0)