Skip to content

Commit 1ba91eb

Browse files
thixotropistghidra1
authored andcommitted
GP-3707 ELF RISCV relocation processing improvements
1 parent 6f5ded5 commit 1ba91eb

File tree

1 file changed

+166
-71
lines changed

1 file changed

+166
-71
lines changed

Ghidra/Processors/RISCV/src/main/java/ghidra/app/util/bin/format/elf/relocation/RISCV_ElfRelocationHandler.java

Lines changed: 166 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,123 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
1716
package ghidra.app.util.bin.format.elf.relocation;
1817

18+
import java.util.Iterator;
19+
1920
import ghidra.app.util.bin.format.elf.*;
20-
import ghidra.program.model.address.Address;
21+
import ghidra.program.model.address.*;
2122
import ghidra.program.model.listing.Program;
2223
import ghidra.program.model.mem.Memory;
2324
import ghidra.program.model.mem.MemoryAccessException;
25+
import ghidra.program.model.reloc.*;
2426
import ghidra.program.model.reloc.Relocation.Status;
25-
import ghidra.program.model.reloc.RelocationResult;
2627
import ghidra.util.exception.NotFoundException;
2728

29+
/**
30+
* See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc for information on the different riscv elf
31+
* relocation types. Different relocation types are found in different contexts - not all of which are currently handled here.
32+
* The contexts we *attempt* to handle include:
33+
*
34+
* 1. fully linked Elf executables like the Linux `cp` utility which rely on dynamic linking to libraries like libc.so.6
35+
* 2. object files compiled with or without Position Independent code (`-fpic`) support
36+
* 3. Sharable object libraries like `libc.so.6`
37+
* 3. kernel load modules compiled with position independent code (`-fpic`) support
38+
*
39+
* Keep in mind:
40+
*
41+
* 1. You may find multiple relocations at any single address.
42+
* 2. Many relocations and relocation variants are there to support linker/loader optimizations unneeded by Ghidra.
43+
* 3. Some relocations can only name their target indirectly. R_RISCV_PCREL_LO12_I references a R_RISCV_PCREL_HI20 relocation,
44+
* but needs the symbol referenced by that R_RISCV_PCREL_HI20 in order to compute a PC relative offset.
45+
* 4. Many discrete symbols can share the same symbol name, e.g. `.L0`. These symbol names can include non-printing characters like `".L0^B2"`
46+
*
47+
*/
2848
public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
2949

3050
@Override
3151
public boolean canRelocate(ElfHeader elf) {
3252
return elf.e_machine() == ElfConstants.EM_RISCV;
3353
}
3454

55+
/**
56+
* Get the adjusted 20 high bits of a 32 bit target. The lower 12 bits will
57+
* be found in a later instruction, using a sign-extended signed addition. If those
58+
* 12 bits will be seen as negative, we need to increment the higher bits by one
59+
* @param target a 32 bit offset
60+
* @return the higher 20 bits possibly incremented by 0x1000
61+
*/
62+
static int getHi20(int target) {
63+
int target_h = target & 0xfffff000;
64+
// the low order 12 bits are sign-extended before adding, so we may need to adjust the high order 20 bits
65+
if ((target & 0x00000800) == 0x800) {
66+
target_h = target_h + 0x1000;
67+
}
68+
return target_h;
69+
}
70+
71+
/**
72+
* get the lower 12 bits of a 32 bit target. These will typically be added - not or'ed - to
73+
* a register holding the higher 20 bits.
74+
* @param target a 32 bit offset
75+
* @return the lower 12 bits of target
76+
*/
77+
static int getLo12(int target) {
78+
return (target & 0x00000fff);
79+
}
80+
81+
/**
82+
* PC relative relocations like R_RISCV_PCREL_LO12_I find their target indirectly,
83+
* using their symbolValue to locate the address of the matching R_RISCV_PCREL_HI20
84+
* or R_RISCV_GOT_HI20.
85+
* That *HI20 relocation's symbol value points to the actual target.
86+
* This function attempts to locate that actual target by querying Ghidra's Relocation
87+
* and Symbol tables. There can be more than one relocation assigned to a given address,
88+
* so we need to search.
89+
*
90+
* Note that this function probably belongs within ElfRelocationContext, but we have no
91+
* published strategy for integration testing that class.
92+
*
93+
* @return the relocation symbol value associated with the linked relocation type.
94+
*/
95+
static int getSymbolValueIndirect(ElfRelocationContext elfRelocationContext, int hi20Addr) {
96+
97+
int target;
98+
// Get the relevant Ghidra tables
99+
Program program = elfRelocationContext.getProgram();
100+
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
101+
RelocationTable relocationTable = program.getRelocationTable();
102+
103+
// get the possible address of R_RISCV_PCREL_HI20 relocation holding the target address
104+
Address hi20Address = program.getAddressFactory().getAddress(space.getSpaceID(), hi20Addr);
105+
106+
// we need an address set with one holding a single address to retrieve multiple relocations
107+
AddressSet relocationAddressSet = new AddressSet(hi20Address);
108+
// get any relocations at this address - hopefully R_RISCV_PCREL_HI20 and likely R_RISCV_RELAX
109+
Iterator<Relocation> relocs = relocationTable.getRelocations(relocationAddressSet);
110+
// search the relocations for a R_RISCV_PCREL_HI20 relocation
111+
while (relocs.hasNext()) {
112+
Relocation rel = relocs.next();
113+
// there may be other valid relocation types to process
114+
if ((rel.getType() == RISCV_ElfRelocationConstants.R_RISCV_PCREL_HI20) ||
115+
(rel.getType() == RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20)) {
116+
int refSymbolIndex = (int) rel.getValues()[0];
117+
// System.out.println("Matching PCREL_HI20 value is 0x" + Long.toHexString(refSymbolIndex));
118+
// Note that an elf symbol index is not the same thing as a regular symbol index.
119+
ElfSymbol elfSym = elfRelocationContext.getSymbol(refSymbolIndex);
120+
int targetOffset = (int) elfRelocationContext.getSymbolValue(elfSym);
121+
// System.out.println(
122+
// "PCREL_HI20 Symbol \"" + elfRelocationContext.getSymbolName(refSymbolIndex) +
123+
// "\" found with offset 0x" + Long.toHexString(targetOffset));
124+
// compute the target offset from the referred auipc instruction
125+
target = targetOffset - hi20Addr;
126+
// System.out.println("PCREL_HI20 Symbol location is 0x" + Long.toHexString(target));
127+
return target;
128+
}
129+
}
130+
return 0;
131+
}
132+
35133
@Override
36134
public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
37135
ElfRelocation relocation,
@@ -49,7 +147,8 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
49147
return RelocationResult.SKIPPED;
50148
}
51149

52-
long addend = relocation.hasAddend() ? relocation.getAddend() : is32 ? memory.getInt(relocationAddress) : memory.getLong(relocationAddress);
150+
long addend = relocation.hasAddend() ? relocation.getAddend()
151+
: is32 ? memory.getInt(relocationAddress) : memory.getLong(relocationAddress);
53152
long offset = relocationAddress.getOffset();
54153
long base = elfRelocationContext.getImageBaseWordAdjustmentOffset();
55154

@@ -59,27 +158,11 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
59158
long symbolValue = elfRelocationContext.getSymbolValue(sym);
60159
String symbolName = elfRelocationContext.getSymbolName(symbolIndex);
61160

62-
//TODO remove debug
63-
switch(type) {
64-
case 2:
65-
case 3:
66-
case 5:
67-
break;
68-
default:
69-
// System.out.println("DEBUG RISCV: " +
70-
// type + " " + relocationAddress + " " +
71-
// String.format("%x", symbolValue) + " " +
72-
// String.format("%x", addend) + " " +
73-
// String.format("%x", offset) + " " +
74-
// String.format("%x", base));// + " " +
75-
//String.format("%x", memory.getInt(relocationAddress)));
76-
break;
77-
}
78-
79161
long value64 = 0;
80162
int value32 = 0;
81163
short value16 = 0;
82164
byte value8 = 0;
165+
int target = 0;
83166

84167
int byteLength = 4; // most relocations affect 4-bytes (change if different)
85168

@@ -151,7 +234,7 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
151234

152235
case RISCV_ElfRelocationConstants.R_RISCV_TLS_DTPMOD64:
153236
// TLS relocation word64 = S->TLSINDEX
154-
markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTPMOD32", symbolName,
237+
markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTPMOD64", symbolName,
155238
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
156239
return RelocationResult.UNSUPPORTED;
157240

@@ -169,7 +252,7 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
169252

170253
case RISCV_ElfRelocationConstants.R_RISCV_TLS_TPREL32:
171254
// TLS relocation word32 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET
172-
markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTREL32", symbolName,
255+
markAsWarning(program, relocationAddress, "R_RISCV_TLS_TPREL32", symbolName,
173256
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
174257
return RelocationResult.UNSUPPORTED;
175258

@@ -181,7 +264,7 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
181264

182265
case RISCV_ElfRelocationConstants.R_RISCV_BRANCH:
183266
// PC-relative branch (B-Type)
184-
int target = (int) (addend + symbolValue - offset);
267+
target = (int) (addend + symbolValue - offset);
185268
value32 =
186269
((target & 0x01e) << 7) | ((target & 0x0800) >> 4) | ((target & 0x03e0) << 20) |
187270
((target & 0x1000) << 19) | memory.getInt(relocationAddress);
@@ -198,28 +281,19 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
198281
break;
199282

200283
case RISCV_ElfRelocationConstants.R_RISCV_CALL:
284+
// PC-relative call (PLT) MACRO call,tail (auipc+jalr pair) PIC
285+
// Identical processing in Ghidra as the following
286+
287+
case RISCV_ElfRelocationConstants.R_RISCV_CALL_PLT:
201288
// PC-relative call MACRO call,tail (auipc+jalr pair)
202289
target = (int) (addend + symbolValue - offset);
203-
int target_l = target << 20 >> 20;
204-
int target_h = target - target_l;
205-
value32 = (target_h & 0xfffff000) | memory.getInt(relocationAddress);
206-
memory.setInt(relocationAddress, value32);
207-
value32 = ((target_l & 0x00000fff) << 20) | memory.getInt(relocationAddress.add(4));
208-
memory.setInt(relocationAddress.add(4), value32);
290+
memory.setInt(relocationAddress,
291+
getHi20(target) | memory.getInt(relocationAddress));
292+
memory.setInt(relocationAddress.add(4),
293+
(getLo12(target) << 20) | memory.getInt(relocationAddress.add(4)));
294+
byteLength = 8;
209295
break;
210296

211-
case RISCV_ElfRelocationConstants.R_RISCV_CALL_PLT:
212-
// PC-relative call (PLT) MACRO call,tail (auipc+jalr pair) PIC
213-
markAsWarning(program, relocationAddress, "R_RISCV_CALL_PLT", symbolName,
214-
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
215-
return RelocationResult.UNSUPPORTED;
216-
217-
case RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20:
218-
// PC-relative GOT reference MACRO la
219-
markAsWarning(program, relocationAddress, "R_RISCV_GOT_HI20", symbolName,
220-
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
221-
return RelocationResult.UNSUPPORTED;
222-
223297
case RISCV_ElfRelocationConstants.R_RISCV_TLS_GOT_HI20:
224298
// PC-relative TLS IE GOT offset MACRO la.tls.ie
225299
markAsWarning(program, relocationAddress, "R_RISCV_TLS_GOT_HI20", symbolName,
@@ -232,23 +306,42 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
232306
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
233307
return RelocationResult.UNSUPPORTED;
234308

309+
case RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20:
310+
// PC-relative GOT reference MACRO la
235311
case RISCV_ElfRelocationConstants.R_RISCV_PCREL_HI20:
236-
// PC-relative reference %pcrel_hi(symbol) (U-Type)
237-
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_HI20", symbolName,
238-
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
239-
return RelocationResult.UNSUPPORTED;
312+
// PC-relative, not tested on 32 bit objects
313+
target = (int) (addend + symbolValue - offset);
314+
memory.setInt(relocationAddress,
315+
getHi20(target) | memory.getInt(relocationAddress));
316+
break;
240317

241318
case RISCV_ElfRelocationConstants.R_RISCV_PCREL_LO12_I:
242-
// PC-relative reference %pcrel_lo(symbol) (I-Type)
243-
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName,
244-
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
245-
return RelocationResult.UNSUPPORTED;
319+
// PC-relative reference %pcrel_lo(symbol) (I-Type), relative to the cited pc_rel_hi20
320+
target = getSymbolValueIndirect(elfRelocationContext, (int) symbolValue);
321+
if (target == 0) {
322+
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName,
323+
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
324+
return RelocationResult.UNSUPPORTED;
325+
}
326+
value32 = ((target & 0x00000fff) << 20) | memory.getInt(relocationAddress);
327+
memory.setInt(relocationAddress, value32);
328+
break;
246329

247330
case RISCV_ElfRelocationConstants.R_RISCV_PCREL_LO12_S:
248331
// PC-relative reference %pcrel_lo(symbol) (S-Type)
249-
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_S", symbolName,
250-
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
251-
return RelocationResult.UNSUPPORTED;
332+
// S-type immediates split the 12 bit value into separate 7 bit and 5 bit fields.
333+
// Warning: untested!
334+
target = getSymbolValueIndirect(elfRelocationContext, (int) symbolValue);
335+
if (target == 0) {
336+
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName,
337+
symbolIndex,
338+
"TODO, needs support ", elfRelocationContext.getLog());
339+
return RelocationResult.UNSUPPORTED;
340+
}
341+
value32 = ((target & 0x000007f) << 25) | (target & 0x00000f80) |
342+
memory.getInt(relocationAddress);
343+
memory.setInt(relocationAddress, value32);
344+
break;
252345

253346
case RISCV_ElfRelocationConstants.R_RISCV_HI20:
254347
// Absolute address %hi(symbol) (U-Type)
@@ -298,6 +391,8 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
298391

299392
case RISCV_ElfRelocationConstants.R_RISCV_ADD8:
300393
// 8-bit label addition word8 = old + S + A
394+
markAsWarning(program, relocationAddress, "R_RISCV_ADD8", symbolName, symbolIndex,
395+
"TODO, needs support ", elfRelocationContext.getLog());
301396
value8 = memory.getByte(relocationAddress);
302397
value8 += (byte) symbolValue;
303398
value8 += (byte) addend;
@@ -384,34 +479,36 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
384479
"TODO, needs support ", elfRelocationContext.getLog());
385480
return RelocationResult.UNSUPPORTED;
386481

387-
case RISCV_ElfRelocationConstants.R_RISCV_RVC_BRANCH:
482+
case RISCV_ElfRelocationConstants.R_RISCV_RVC_BRANCH: {
388483
// PC-relative branch offset (CB-Type)
389-
target = (short) (addend + symbolValue - offset);
484+
short target_s = (short) (addend + symbolValue - offset);
390485
// 15 13 | 12 11 10|9 7| 6 5 4 3 2|1 0
391486
// C.BEQZ offset[8|4:3] src offset[7:6|2:1|5] C1
392-
value16 = (short) (((target & 0x100) << 4) | ((target & 0x18) << 7) |
393-
((target & 0xc0) >> 1) |
394-
((target & 0x06) << 2) | ((target & 0x20) >> 3) |
487+
value16 = (short) (((target_s & 0x100) << 4) | ((target_s & 0x18) << 7) |
488+
((target_s & 0xc0) >> 1) |
489+
((target_s & 0x06) << 2) | ((target_s & 0x20) >> 3) |
395490
(memory.getShort(relocationAddress) & 0xe383));
396-
byteLength = 2;
397491
memory.setShort(relocationAddress, value16);
492+
byteLength = 2;
398493
break;
494+
}
399495

400-
case RISCV_ElfRelocationConstants.R_RISCV_RVC_JUMP:
401-
// PC-relative jump offset (CJ-Type)
402-
target = (short) (addend + symbolValue - offset);
496+
case RISCV_ElfRelocationConstants.R_RISCV_RVC_JUMP: {
497+
short target_s = (short) (addend + symbolValue - offset);
403498
// Complicated swizzling going on here.
404499
// For details, see The RISC-V Instruction Set Manual Volume I: Unprivileged ISA
405500
// 15 13 | 12 11 10 9 8 7 6 5 3 2|1 0
406501
// C.J offset[11| 4|9:8|10|6|7|3:1|5] C1
407-
value16 = (short) (((target & 0x800) << 1) | ((target & 0x10) << 7) |
408-
((target & 0x300) << 1) |
409-
((target & 0x400) >> 2) | ((target & 0x40) << 1) | ((target & 0x80) >> 1) |
410-
((target & 0x0e) << 2) | ((target & 0x20) >> 3) |
502+
value16 = (short) (((target_s & 0x800) << 1) | ((target_s & 0x10) << 7) |
503+
((target_s & 0x300) << 1) |
504+
((target_s & 0x400) >> 2) | ((target_s & 0x40) << 1) |
505+
((target_s & 0x80) >> 1) |
506+
((target_s & 0x0e) << 2) | ((target_s & 0x20) >> 3) |
411507
(memory.getShort(relocationAddress) & 0xe003));
412-
byteLength = 2;
413508
memory.setShort(relocationAddress, value16);
509+
byteLength = 2;
414510
break;
511+
}
415512

416513
case RISCV_ElfRelocationConstants.R_RISCV_RVC_LUI:
417514
// Absolute address (CI-Type)
@@ -444,10 +541,8 @@ public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
444541
return RelocationResult.UNSUPPORTED;
445542

446543
case RISCV_ElfRelocationConstants.R_RISCV_RELAX:
447-
// Instruction pair can be relaxed
448-
markAsWarning(program, relocationAddress, "R_RISCV_RELAX", symbolName, symbolIndex,
449-
"TODO, needs support ", elfRelocationContext.getLog());
450-
return RelocationResult.UNSUPPORTED;
544+
// Instruction pair can be relaxed by the linker/loader- ignore
545+
return RelocationResult.SKIPPED;
451546

452547
case RISCV_ElfRelocationConstants.R_RISCV_SUB6:
453548
// Local label subtraction

0 commit comments

Comments
 (0)