Skip to content

Commit 4101aa1

Browse files
committed
[BOLT] Support PC-relative relocations with addends
PC-relative memory operand could reference a different object from the one located at the target address, e.g. when a negative offset is used. Check relocations for the real referenced object. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D120379
1 parent 8b62f19 commit 4101aa1

File tree

6 files changed

+132
-60
lines changed

6 files changed

+132
-60
lines changed

bolt/include/bolt/Core/BinaryFunction.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -1299,10 +1299,11 @@ class BinaryFunction {
12991299
case ELF::R_X86_64_32:
13001300
case ELF::R_X86_64_32S:
13011301
case ELF::R_X86_64_64:
1302+
case ELF::R_X86_64_PC8:
1303+
case ELF::R_X86_64_PC32:
1304+
case ELF::R_X86_64_PC64:
13021305
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
13031306
return;
1304-
case ELF::R_X86_64_PC32:
1305-
case ELF::R_X86_64_PC8:
13061307
case ELF::R_X86_64_PLT32:
13071308
case ELF::R_X86_64_GOTPCRELX:
13081309
case ELF::R_X86_64_REX_GOTPCRELX:

bolt/include/bolt/Core/Relocation.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct Relocation {
4949
/// Used to validate relocation correctness.
5050
uint64_t Value;
5151

52-
/// Return size of the given relocation \p Type.
52+
/// Return size in bytes of the given relocation \p Type.
5353
static size_t getSizeForType(uint64_t Type);
5454

5555
/// Return size of this relocation.

bolt/lib/Core/BinaryFunction.cpp

+71-47
Original file line numberDiff line numberDiff line change
@@ -1265,51 +1265,6 @@ bool BinaryFunction::disassemble() {
12651265
}
12661266
}
12671267

1268-
// Check if there's a relocation associated with this instruction.
1269-
bool UsedReloc = false;
1270-
for (auto Itr = Relocations.lower_bound(Offset),
1271-
ItrE = Relocations.lower_bound(Offset + Size);
1272-
Itr != ItrE; ++Itr) {
1273-
const Relocation &Relocation = Itr->second;
1274-
1275-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: replacing immediate 0x"
1276-
<< Twine::utohexstr(Relocation.Value)
1277-
<< " with relocation"
1278-
" against "
1279-
<< Relocation.Symbol << "+" << Relocation.Addend
1280-
<< " in function " << *this
1281-
<< " for instruction at offset 0x"
1282-
<< Twine::utohexstr(Offset) << '\n');
1283-
1284-
// Process reference to the primary symbol.
1285-
if (!Relocation.isPCRelative())
1286-
BC.handleAddressRef(Relocation.Value - Relocation.Addend, *this,
1287-
/*IsPCRel*/ false);
1288-
1289-
int64_t Value = Relocation.Value;
1290-
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1291-
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value,
1292-
Relocation.Type);
1293-
(void)Result;
1294-
assert(Result && "cannot replace immediate with relocation");
1295-
1296-
// For aarch, if we replaced an immediate with a symbol from a
1297-
// relocation, we mark it so we do not try to further process a
1298-
// pc-relative operand. All we need is the symbol.
1299-
if (BC.isAArch64())
1300-
UsedReloc = true;
1301-
1302-
// Make sure we replaced the correct immediate (instruction
1303-
// can have multiple immediate operands).
1304-
if (BC.isX86()) {
1305-
assert(truncateToSize(static_cast<uint64_t>(Value),
1306-
Relocation::getSizeForType(Relocation.Type)) ==
1307-
truncateToSize(Relocation.Value, Relocation::getSizeForType(
1308-
Relocation.Type)) &&
1309-
"immediate value mismatch in function");
1310-
}
1311-
}
1312-
13131268
if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
13141269
uint64_t TargetAddress = 0;
13151270
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
@@ -1394,8 +1349,75 @@ bool BinaryFunction::disassemble() {
13941349
if (BC.isAArch64())
13951350
handleAArch64IndirectCall(Instruction, Offset);
13961351
}
1397-
} else if (MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
1398-
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
1352+
} else {
1353+
// Check if there's a relocation associated with this instruction.
1354+
bool UsedReloc = false;
1355+
for (auto Itr = Relocations.lower_bound(Offset),
1356+
ItrE = Relocations.lower_bound(Offset + Size);
1357+
Itr != ItrE; ++Itr) {
1358+
const Relocation &Relocation = Itr->second;
1359+
uint64_t SymbolValue = Relocation.Value - Relocation.Addend;
1360+
if (Relocation.isPCRelative())
1361+
SymbolValue += getAddress() + Relocation.Offset;
1362+
1363+
// Process reference to the symbol.
1364+
if (BC.isX86())
1365+
BC.handleAddressRef(SymbolValue, *this, Relocation.isPCRelative());
1366+
1367+
if (BC.isAArch64() || !Relocation.isPCRelative()) {
1368+
int64_t Value = Relocation.Value;
1369+
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1370+
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(),
1371+
Value, Relocation.Type);
1372+
(void)Result;
1373+
assert(Result && "cannot replace immediate with relocation");
1374+
1375+
if (BC.isX86()) {
1376+
// Make sure we replaced the correct immediate (instruction
1377+
// can have multiple immediate operands).
1378+
assert(
1379+
truncateToSize(static_cast<uint64_t>(Value),
1380+
Relocation::getSizeForType(Relocation.Type)) ==
1381+
truncateToSize(Relocation.Value, Relocation::getSizeForType(
1382+
Relocation.Type)) &&
1383+
"immediate value mismatch in function");
1384+
} else if (BC.isAArch64()) {
1385+
// For aarch, if we replaced an immediate with a symbol from a
1386+
// relocation, we mark it so we do not try to further process a
1387+
// pc-relative operand. All we need is the symbol.
1388+
UsedReloc = true;
1389+
}
1390+
} else {
1391+
// Check if the relocation matches memop's Disp.
1392+
uint64_t TargetAddress;
1393+
if (!BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1394+
AbsoluteInstrAddr, Size)) {
1395+
errs() << "BOLT-ERROR: PC-relative operand can't be evaluated\n";
1396+
exit(1);
1397+
}
1398+
assert(TargetAddress == Relocation.Value + AbsoluteInstrAddr + Size &&
1399+
"Immediate value mismatch detected.");
1400+
1401+
const MCExpr *Expr = MCSymbolRefExpr::create(
1402+
Relocation.Symbol, MCSymbolRefExpr::VK_None, *BC.Ctx);
1403+
// Real addend for pc-relative targets is adjusted with a delta
1404+
// from relocation placement to the next instruction.
1405+
const uint64_t TargetAddend =
1406+
Relocation.Addend + Offset + Size - Relocation.Offset;
1407+
if (TargetAddend) {
1408+
const MCConstantExpr *Offset =
1409+
MCConstantExpr::create(TargetAddend, *BC.Ctx);
1410+
Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
1411+
}
1412+
BC.MIB->replaceMemOperandDisp(
1413+
Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
1414+
Instruction, Expr, *BC.Ctx, 0)));
1415+
UsedReloc = true;
1416+
}
1417+
}
1418+
1419+
if (MIB->hasPCRelOperand(Instruction) && !UsedReloc)
1420+
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
13991421
}
14001422

14011423
add_instruction:
@@ -1565,6 +1587,8 @@ bool BinaryFunction::scanExternalRefs() {
15651587
ItrE = Relocations.lower_bound(Offset + Size);
15661588
Itr != ItrE; ++Itr) {
15671589
Relocation &Relocation = Itr->second;
1590+
if (Relocation.isPCRelative() && BC.isX86())
1591+
continue;
15681592
if (ignoreReference(Relocation.Symbol))
15691593
continue;
15701594

bolt/lib/Core/Relocation.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,9 @@ uint64_t adjustValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) {
254254

255255
uint64_t extractValueX86(uint64_t Type, uint64_t Contents, uint64_t PC) {
256256
if (Type == ELF::R_X86_64_32S)
257-
return SignExtend64<32>(Contents & 0xffffffff);
257+
return SignExtend64<32>(Contents);
258+
if (Relocation::isPCRelative(Type))
259+
return SignExtend64(Contents, 8 * Relocation::getSizeForType(Type));
258260
return Contents;
259261
}
260262

@@ -442,6 +444,8 @@ bool isPCRelativeX86(uint64_t Type) {
442444
case ELF::R_X86_64_PC64:
443445
case ELF::R_X86_64_GOTPCREL:
444446
case ELF::R_X86_64_PLT32:
447+
case ELF::R_X86_64_GOTOFF64:
448+
case ELF::R_X86_64_GOTPC32:
445449
case ELF::R_X86_64_GOTTPOFF:
446450
case ELF::R_X86_64_GOTPCRELX:
447451
case ELF::R_X86_64_REX_GOTPCRELX:

bolt/lib/Rewrite/RewriteInstance.cpp

+17-9
Original file line numberDiff line numberDiff line change
@@ -2302,6 +2302,12 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
23022302
}
23032303
}
23042304

2305+
MCSymbol *ReferencedSymbol = nullptr;
2306+
if (!IsSectionRelocation) {
2307+
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2308+
ReferencedSymbol = BD->getSymbol();
2309+
}
2310+
23052311
// PC-relative relocations from data to code are tricky since the original
23062312
// information is typically lost after linking even with '--emit-relocs'.
23072313
// They are normally used by PIC-style jump tables and reference both
@@ -2310,16 +2316,19 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
23102316
// that it references an arbitrary location in the code, possibly even
23112317
// in a different function from that containing the jump table.
23122318
if (!IsAArch64 && Relocation::isPCRelative(RType)) {
2313-
// Just register the fact that we have PC-relative relocation at a given
2314-
// address. The actual referenced label/address cannot be determined
2315-
// from linker data alone.
2319+
// For relocations against non-code sections, just register the fact that
2320+
// we have a PC-relative relocation at a given address. The actual
2321+
// referenced label/address cannot be determined from linker data alone.
23162322
if (!IsFromCode)
23172323
BC->addPCRelativeDataRelocation(Rel.getOffset());
2318-
2319-
LLVM_DEBUG(
2320-
dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
2321-
<< Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
2322-
<< "\n");
2324+
else if (!IsSectionRelocation && ReferencedSymbol)
2325+
ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2326+
Addend, ExtractedValue);
2327+
else
2328+
LLVM_DEBUG(
2329+
dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
2330+
<< Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
2331+
<< "\n");
23232332
continue;
23242333
}
23252334

@@ -2399,7 +2408,6 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
23992408
}
24002409
}
24012410

2402-
MCSymbol *ReferencedSymbol = nullptr;
24032411
if (ForceRelocation) {
24042412
std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName;
24052413
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);

bolt/test/X86/fptr-addend-pcrel.s

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
## Check that BOLT correctly recognizes pc-relative function pointer
2+
## reference with an addend.
3+
4+
# REQUIRES: system-linux
5+
6+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
7+
# RUN: llvm-strip --strip-unneeded %t.o
8+
# RUN: ld.lld %t.o -o %t.exe -q
9+
# RUN: llvm-bolt %t.exe -relocs -o /dev/null -print-only=foo -print-disasm \
10+
# RUN: | FileCheck %s
11+
12+
.text
13+
.globl _start
14+
.type _start,@function
15+
_start:
16+
.cfi_startproc
17+
call foo
18+
retq
19+
.size main, .-main
20+
.cfi_endproc
21+
22+
.globl foo
23+
.type foo,@function
24+
foo:
25+
.cfi_startproc
26+
27+
leaq foo-1(%rip), %rax
28+
## Check that the instruction references foo with a negative addend,
29+
## not the previous function with a positive addend (_start+X).
30+
#
31+
# CHECK: leaq foo-1(%rip), %rax
32+
33+
retq
34+
.size foo, .-foo
35+
.cfi_endproc

0 commit comments

Comments
 (0)