Skip to content

Commit c3decdf

Browse files
authored
cranelift: Implement TLS on aarch64 Mach-O (Apple Silicon) (#5434)
* Implement TLS on Aarch64 Mach-O * Add aarch64 macho TLS filetest * Address review comments - `Aarch64` instead of `AArch64` in comments - Remove unnecessary guard in tls_value lowering - Remove unnecessary regalloc metadata in emission * Use x1 as temporary register in emission - Instead of passing in a temporary register to use when emitting the TLS code, just use `x1`, as it's already in the clobber set. This also keeps the size of `aarch64::inst::Inst` at 32 bytes. - Update filetest accordingly * Update aarch64 mach-o TLS filetest
1 parent d0570a7 commit c3decdf

File tree

7 files changed

+173
-4
lines changed

7 files changed

+173
-4
lines changed

cranelift/codegen/src/binemit/mod.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,20 @@ pub enum Reloc {
5656
/// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry.
5757
MachOX86_64Tlv,
5858

59-
/// AArch64 TLS GD
59+
/// Mach-O Aarch64 TLS
60+
/// PC-relative distance to the page of the TLVP slot.
61+
MachOAarch64TlsAdrPage21,
62+
63+
/// Mach-O Aarch64 TLS
64+
/// Offset within page of TLVP slot.
65+
MachOAarch64TlsAdrPageOff12,
66+
67+
/// Aarch64 TLS GD
6068
/// Set an ADRP immediate field to the top 21 bits of the final address. Checks for overflow.
6169
/// This is equivalent to `R_AARCH64_TLSGD_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage)
6270
Aarch64TlsGdAdrPage21,
6371

64-
/// AArch64 TLS GD
72+
/// Aarch64 TLS GD
6573
/// Set the add immediate field to the low 12 bits of the final address. Does not check for overflow.
6674
/// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12_NC` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage)
6775
Aarch64TlsGdAddLo12Nc,
@@ -109,6 +117,8 @@ impl fmt::Display for Reloc {
109117

110118
Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"),
111119
Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"),
120+
Self::MachOAarch64TlsAdrPage21 => write!(f, "MachOAarch64TlsAdrPage21"),
121+
Self::MachOAarch64TlsAdrPageOff12 => write!(f, "MachOAarch64TlsAdrPageOff12"),
112122
Self::Aarch64TlsGdAdrPage21 => write!(f, "Aarch64TlsGdAdrPage21"),
113123
Self::Aarch64TlsGdAddLo12Nc => write!(f, "Aarch64TlsGdAddLo12Nc"),
114124
Self::Aarch64AdrGotPage21 => write!(f, "Aarch64AdrGotPage21"),

cranelift/codegen/src/isa/aarch64/inst.isle

+10
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,10 @@
927927
(symbol ExternalName)
928928
(rd WritableReg))
929929

930+
(MachOTlsGetAddr
931+
(symbol ExternalName)
932+
(rd WritableReg))
933+
930934
;; An unwind pseudo-instruction.
931935
(Unwind
932936
(inst UnwindInst))
@@ -3591,6 +3595,12 @@
35913595
(_ Unit (emit (MInst.ElfTlsGetAddr name dst))))
35923596
dst))
35933597

3598+
(decl macho_tls_get_addr (ExternalName) Reg)
3599+
(rule (macho_tls_get_addr name)
3600+
(let ((dst WritableReg (temp_writable_reg $I64))
3601+
(_ Unit (emit (MInst.MachOTlsGetAddr name dst))))
3602+
dst))
3603+
35943604
;; A tuple of `ProducesFlags` and `IntCC`.
35953605
(type FlagsAndCC (enum (FlagsAndCC (flags ProducesFlags)
35963606
(cc IntCC))))

cranelift/codegen/src/isa/aarch64/inst/emit.rs

+48
Original file line numberDiff line numberDiff line change
@@ -3458,6 +3458,54 @@ impl MachInstEmit for Inst {
34583458
sink.put4(0xd503201f);
34593459
}
34603460

3461+
&Inst::MachOTlsGetAddr { ref symbol, rd } => {
3462+
// Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3463+
// to a function that takes the descriptor address in x0, and after the function returns x0
3464+
// contains the address for the thread local variable
3465+
//
3466+
// what we want to emit is basically:
3467+
//
3468+
// adrp x0, <label>@TLVPPAGE ; Load the address of the page of the thread local variable pointer (TLVP)
3469+
// ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3470+
// ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3471+
// blr x1 ; Call the function pointer with the descriptor address in x0
3472+
// ; x0 now contains the TLV address
3473+
3474+
let rd = allocs.next_writable(rd);
3475+
assert_eq!(xreg(0), rd.to_reg());
3476+
let rtmp = writable_xreg(1);
3477+
3478+
// adrp x0, <label>@TLVPPAGE
3479+
sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3480+
sink.put4(0x90000000);
3481+
3482+
// ldr x0, [x0, <label>@TLVPPAGEOFF]
3483+
sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3484+
sink.put4(0xf9400000);
3485+
3486+
// load [x0] into temp register
3487+
Inst::ULoad64 {
3488+
rd: rtmp,
3489+
mem: AMode::reg(rd.to_reg()),
3490+
flags: MemFlags::trusted(),
3491+
}
3492+
.emit(&[], sink, emit_info, state);
3493+
3494+
// call function pointer in temp register
3495+
Inst::CallInd {
3496+
info: crate::isa::Box::new(CallIndInfo {
3497+
rn: rtmp.to_reg(),
3498+
uses: smallvec![],
3499+
defs: smallvec![],
3500+
clobbers: PRegSet::empty(),
3501+
opcode: Opcode::CallIndirect,
3502+
caller_callconv: CallConv::AppleAarch64,
3503+
callee_callconv: CallConv::AppleAarch64,
3504+
}),
3505+
}
3506+
.emit(&[], sink, emit_info, state);
3507+
}
3508+
34613509
&Inst::Unwind { ref inst } => {
34623510
sink.add_unwind(inst.clone());
34633511
}

cranelift/codegen/src/isa/aarch64/inst/mod.rs

+11
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
907907
clobbers.remove(regs::xreg_preg(0));
908908
collector.reg_clobbers(clobbers);
909909
}
910+
&Inst::MachOTlsGetAddr { rd, .. } => {
911+
collector.reg_fixed_def(rd, regs::xreg(0));
912+
let mut clobbers =
913+
AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64);
914+
clobbers.remove(regs::xreg_preg(0));
915+
collector.reg_clobbers(clobbers);
916+
}
910917
&Inst::Unwind { .. } => {}
911918
&Inst::EmitIsland { .. } => {}
912919
&Inst::DummyUse { reg } => {
@@ -2701,6 +2708,10 @@ impl Inst {
27012708
let rd = pretty_print_reg(rd.to_reg(), allocs);
27022709
format!("elf_tls_get_addr {}, {}", rd, symbol.display(None))
27032710
}
2711+
&Inst::MachOTlsGetAddr { ref symbol, rd } => {
2712+
let rd = pretty_print_reg(rd.to_reg(), allocs);
2713+
format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2714+
}
27042715
&Inst::Unwind { ref inst } => {
27052716
format!("unwind {:?}", inst)
27062717
}

cranelift/codegen/src/isa/aarch64/lower.isle

+4-2
Original file line numberDiff line numberDiff line change
@@ -2574,10 +2574,12 @@
25742574

25752575
;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25762576

2577-
(rule (lower (tls_value (symbol_value_data name _ _)))
2578-
(if (tls_model_is_elf_gd))
2577+
(rule (lower (has_type (tls_model (TlsModel.ElfGd)) (tls_value (symbol_value_data name _ _))))
25792578
(elf_tls_get_addr name))
25802579

2580+
(rule (lower (has_type (tls_model (TlsModel.Macho)) (tls_value (symbol_value_data name _ _))))
2581+
(macho_tls_get_addr name))
2582+
25812583
;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25822584

25832585
(rule (lower (has_type $F64X2 (fcvt_low_from_sint val)))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
test compile precise-output
2+
set tls_model=macho
3+
target aarch64
4+
5+
function u0:0(i32) -> i32, i64 {
6+
gv0 = symbol colocated tls u1:0
7+
8+
block0(v0: i32):
9+
v1 = global_value.i64 gv0
10+
return v0, v1
11+
}
12+
13+
; VCode:
14+
; stp fp, lr, [sp, #-16]!
15+
; mov fp, sp
16+
; str x24, [sp, #-16]!
17+
; stp d14, d15, [sp, #-16]!
18+
; stp d12, d13, [sp, #-16]!
19+
; stp d10, d11, [sp, #-16]!
20+
; stp d8, d9, [sp, #-16]!
21+
; block0:
22+
; mov x24, x0
23+
; macho_tls_get_addr x0, userextname0
24+
; mov x1, x0
25+
; mov x0, x24
26+
; ldp d8, d9, [sp], #16
27+
; ldp d10, d11, [sp], #16
28+
; ldp d12, d13, [sp], #16
29+
; ldp d14, d15, [sp], #16
30+
; ldr x24, [sp], #16
31+
; ldp fp, lr, [sp], #16
32+
; ret
33+
;
34+
; Disassembled:
35+
; block0: ; offset 0x0
36+
; stp x29, x30, [sp, #-0x10]!
37+
; mov x29, sp
38+
; str x24, [sp, #-0x10]!
39+
; stp d14, d15, [sp, #-0x10]!
40+
; stp d12, d13, [sp, #-0x10]!
41+
; stp d10, d11, [sp, #-0x10]!
42+
; stp d8, d9, [sp, #-0x10]!
43+
; block1: ; offset 0x1c
44+
; mov x24, x0
45+
; adrp x0, #0 ; reloc_external MachOAarch64TlsAdrPage21 u1:0 0
46+
; ldr x0, [x0] ; reloc_external MachOAarch64TlsAdrPageOff12 u1:0 0
47+
; ldr x1, [x0]
48+
; blr x1
49+
; mov x1, x0
50+
; mov x0, x24
51+
; ldp d8, d9, [sp], #0x10
52+
; ldp d10, d11, [sp], #0x10
53+
; ldp d12, d13, [sp], #0x10
54+
; ldp d14, d15, [sp], #0x10
55+
; ldr x24, [sp], #0x10
56+
; ldp x29, x30, [sp], #0x10
57+
; ret
58+

cranelift/object/src/backend.rs

+30
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,36 @@ impl ObjectModule {
654654
32,
655655
)
656656
}
657+
Reloc::MachOAarch64TlsAdrPage21 => {
658+
assert_eq!(
659+
self.object.format(),
660+
object::BinaryFormat::MachO,
661+
"MachOAarch64TlsAdrPage21 is not supported for this file format"
662+
);
663+
(
664+
RelocationKind::MachO {
665+
value: object::macho::ARM64_RELOC_TLVP_LOAD_PAGE21,
666+
relative: true,
667+
},
668+
RelocationEncoding::Generic,
669+
21,
670+
)
671+
}
672+
Reloc::MachOAarch64TlsAdrPageOff12 => {
673+
assert_eq!(
674+
self.object.format(),
675+
object::BinaryFormat::MachO,
676+
"MachOAarch64TlsAdrPageOff12 is not supported for this file format"
677+
);
678+
(
679+
RelocationKind::MachO {
680+
value: object::macho::ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
681+
relative: false,
682+
},
683+
RelocationEncoding::Generic,
684+
12,
685+
)
686+
}
657687
Reloc::Aarch64TlsGdAdrPage21 => {
658688
assert_eq!(
659689
self.object.format(),

0 commit comments

Comments
 (0)