Skip to content

Commit f21704e

Browse files
[LLD][PowerPC] Fix bug in PC-Relative initial exec
There is a bug when initial exec is relaxed to local exec. In the following situation: InitExec.c ``` extern __thread unsigned TGlobal; unsigned getConst(unsigned*); unsigned addVal(unsigned, unsigned*); unsigned GetAddrT() { return addVal(getConst(&TGlobal), &TGlobal); } ``` Def.c ``` __thread unsigned TGlobal; unsigned getConst(unsigned* A) { return *A + 3; } unsigned addVal(unsigned A, unsigned* B) { return A + *B; } ``` The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem. To compile everything: ``` clang -O3 -mcpu=pwr10 -c InitExec.c clang -O3 -mcpu=pwr10 -c Def.c ld.lld InitExec.o Def.o -o IeToLe ``` If you objdump the problem object file: ``` $ llvm-objdump -dr --mcpu=pwr10 InitExec.o ``` you will get the following assembly: ``` 0000000000000000 <GetAddrT>: 0: a6 02 08 7c mflr 0 4: f0 ff c1 fb std 30, -16(1) 8: 10 00 01 f8 std 0, 16(1) c: d1 ff 21 f8 stdu 1, -48(1) 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 20: 01 00 00 48 bl 0x20 0000000000000020: R_PPC64_REL24_NOTOC getConst 24: 78 f3 c4 7f mr 4, 30 28: 30 00 21 38 addi 1, 1, 48 2c: 10 00 01 e8 ld 0, 16(1) 30: f0 ff c1 eb ld 30, -16(1) 34: a6 03 08 7c mtlr 0 38: 00 00 00 48 b 0x38 0000000000000038: R_PPC64_REL24_NOTOC addVal ``` The lines of interest are: ``` 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 ``` Which once linked gets turned into: ``` 10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0 10010218: 00 00 00 60 nop 1001021c: 78 f3 c3 7f mr 3, 30 ``` The problem is that register 30 is never set after the optimization. Therefore it is not correct to relax the above instructions by replacing the add instruction with a nop. Instead the add instruction should be replaced with a copy (mr) instruction. If the add uses the same resgiter as input and as ouput then it is safe to continue to replace the add with a nop. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D95262
1 parent cec2443 commit f21704e

File tree

3 files changed

+107
-4
lines changed

3 files changed

+107
-4
lines changed

lld/ELF/Arch/PPC64.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,15 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
920920
// that comes before it will already have computed the address of the
921921
// symbol.
922922
if (secondaryOp == 266) {
923-
write32(loc - 1, NOP);
923+
// Check if the add uses the same result register as the input register.
924+
uint32_t rt = (tlsInstr & 0x03E00000) >> 21; // bits 6-10
925+
uint32_t ra = (tlsInstr & 0x001F0000) >> 16; // bits 11-15
926+
if (ra == rt) {
927+
write32(loc - 1, NOP);
928+
} else {
929+
// mr rt, ra
930+
write32(loc - 1, 0x7C000378 | (rt << 16) | (ra << 21) | (ra << 11));
931+
}
924932
} else {
925933
uint32_t dFormOp = getPPCDFormOp(secondaryOp);
926934
if (dFormOp == 0)

lld/test/ELF/ppc64-tls-le-relax.s

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# REQUIRES: ppc
2+
# RUN: split-file %s %t
3+
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/initexec -o %t/initexec.o
4+
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t/defs.o
5+
# RUN: ld.lld %t/initexec.o %t/defs.o -o %t/out
6+
# RUN: llvm-objdump -d --mcpu=pwr10 --no-show-raw-insn %t/out | FileCheck %s
7+
8+
# CHECK-LABEL: <GetAddrT>:
9+
# CHECK: mflr 0
10+
# CHECK-NEXT: std 30, -16(1)
11+
# CHECK-NEXT: std 0, 16(1)
12+
# CHECK-NEXT: stdu 1, -48(1)
13+
# CHECK-NEXT: paddi 3, 13, -28672, 0
14+
# CHECK-NEXT: mr 30, 3
15+
# CHECK-NEXT: mr 3, 30
16+
# CHECK-NEXT: bl
17+
# CHECK-NEXT: mr 4, 30
18+
# CHECK-NEXT: addi 1, 1, 48
19+
# CHECK-NEXT: ld 0, 16(1)
20+
# CHECK-NEXT: ld 30, -16(1)
21+
# CHECK-NEXT: mtlr 0
22+
# CHECK-NEXT: b
23+
24+
## Generated From:
25+
## extern __thread unsigned TGlobal;
26+
## unsigned getConst(unsigned*);
27+
## unsigned addVal(unsigned, unsigned*);
28+
##
29+
## unsigned GetAddrT() {
30+
## return addVal(getConst(&TGlobal), &TGlobal);
31+
## }
32+
33+
//--- initexec
34+
GetAddrT:
35+
mflr 0
36+
std 30, -16(1)
37+
std 0, 16(1)
38+
stdu 1, -48(1)
39+
pld 3, TGlobal@got@tprel@pcrel(0), 1
40+
add 30, 3, TGlobal@tls@pcrel
41+
mr 3, 30
42+
bl getConst@notoc
43+
mr 4, 30
44+
addi 1, 1, 48
45+
ld 0, 16(1)
46+
ld 30, -16(1)
47+
mtlr 0
48+
b addVal@notoc
49+
50+
## Generated From:
51+
## __thread unsigned TGlobal;
52+
##
53+
## unsigned getConst(unsigned* A) {
54+
## return *A + 3;
55+
## }
56+
##
57+
## unsigned addVal(unsigned A, unsigned* B) {
58+
## return A + *B;
59+
## }
60+
61+
//--- defs
62+
.globl getConst
63+
getConst:
64+
lwz 3, 0(3)
65+
addi 3, 3, 3
66+
clrldi 3, 3, 32
67+
blr
68+
69+
.globl addVal
70+
addVal:
71+
lwz 4, 0(4)
72+
add 3, 4, 3
73+
clrldi 3, 3, 32
74+
blr
75+
76+
.section .tbss,"awT",@nobits
77+
.globl TGlobal
78+
.p2align 2
79+
TGlobal:
80+
.long 0
81+
.size TGlobal, 4

lld/test/ELF/ppc64-tls-pcrel-ie.s

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ y:
5454

5555
# LE-RELOC: There are no relocations in this file.
5656

57-
# LE-SYM: Symbol table '.symtab' contains 7 entries:
58-
# LE-SYM: 5: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
59-
# LE-SYM: 6: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
57+
# LE-SYM: Symbol table '.symtab' contains 8 entries:
58+
# LE-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
59+
# LE-SYM: 7: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
6060

6161
# LE-GOT: could not find section '.got'
6262

@@ -74,6 +74,20 @@ IEAddr:
7474
add 3, 3, x@tls@pcrel
7575
blr
7676

77+
# IE-LABEL: <IEAddrCopy>:
78+
# IE-NEXT: pld 3, 12488(0), 1
79+
# IE-NEXT: add 4, 3, 13
80+
# IE-NEXT: blr
81+
# LE-LABEL: <IEAddrCopy>:
82+
# LE-NEXT: paddi 3, 13, -28672, 0
83+
# LE-NEXT: mr 4, 3
84+
# LE-NEXT: blr
85+
.section .text_addr, "ax", %progbits
86+
IEAddrCopy:
87+
pld 3, x@got@tprel@pcrel(0), 1
88+
add 4, 3, x@tls@pcrel
89+
blr
90+
7791
# IE-LABEL: <IEVal>:
7892
# IE-NEXT: pld 3, 8408(0), 1
7993
# IE-NEXT: lwzx 3, 3, 13

0 commit comments

Comments
 (0)