Skip to content

Commit aecdf15

Browse files
committed
[ARM] Do not emit ldrexd/strexd on Cortex-M chips
The ldrexd/strexd instructions are not supported on M-class chips, see for example https://developer.arm.com/documentation/dui0489/e/arm-and-thumb-instructions/memory-access-instructions/ldrex-and-strex which says: > All these 32-bit Thumb instructions are available in ARMv6T2 and > above, except that LDREXD and STREXD are not available in the ARMv7-M > architecture. Looking at the ARMv8-M architecture, it appears that these instructions aren't supported either. The Architecture Reference Manual lists ldrex/strex but not ldrexd/strexd: https://developer.arm.com/documentation/ddi0553/bn/ Godbolt example on LLVM 11.0.0, which incorrectly emits ldrexd/strexd instructions: https://llvm.godbolt.org/z/5qqPnE Differential Revision: https://reviews.llvm.org/D95891
1 parent cd2f65b commit aecdf15

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -18752,16 +18752,20 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1875218752
: AtomicExpansionKind::None;
1875318753
}
1875418754

18755+
// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
18756+
// bits, and up to 64 bits on the non-M profiles.
1875518757
TargetLowering::AtomicExpansionKind
1875618758
ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1875718759
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
1875818760
// implement cmpxchg without spilling. If the address being exchanged is also
1875918761
// on the stack and close enough to the spill slot, this can lead to a
1876018762
// situation where the monitor always gets cleared and the atomic operation
1876118763
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
18764+
unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
1876218765
bool HasAtomicCmpXchg =
1876318766
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
18764-
if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
18767+
if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
18768+
Size <= (Subtarget->isMClass() ? 32U : 64U))
1876518769
return AtomicExpansionKind::LLSC;
1876618770
return AtomicExpansionKind::None;
1876718771
}

llvm/test/CodeGen/ARM/atomic-64bit.ll

+33-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
33
; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
44
; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE
5+
; RUN: llc < %s -mtriple=armv7m--none-eabi | FileCheck %s --check-prefix=CHECK-M
6+
; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M
57

68
define i64 @test1(i64* %ptr, i64 %val) {
79
; CHECK-LABEL: test1:
@@ -28,6 +30,8 @@ define i64 @test1(i64* %ptr, i64 %val) {
2830
; CHECK-THUMB: bne
2931
; CHECK-THUMB: dmb {{ish$}}
3032

33+
; CHECK-M: __sync_fetch_and_add_8
34+
3135
%r = atomicrmw add i64* %ptr, i64 %val seq_cst
3236
ret i64 %r
3337
}
@@ -57,6 +61,8 @@ define i64 @test2(i64* %ptr, i64 %val) {
5761
; CHECK-THUMB: bne
5862
; CHECK-THUMB: dmb {{ish$}}
5963

64+
; CHECK-M: __sync_fetch_and_sub_8
65+
6066
%r = atomicrmw sub i64* %ptr, i64 %val seq_cst
6167
ret i64 %r
6268
}
@@ -86,6 +92,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
8692
; CHECK-THUMB: bne
8793
; CHECK-THUMB: dmb {{ish$}}
8894

95+
; CHECK-M: __sync_fetch_and_and_8
96+
8997
%r = atomicrmw and i64* %ptr, i64 %val seq_cst
9098
ret i64 %r
9199
}
@@ -115,6 +123,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
115123
; CHECK-THUMB: bne
116124
; CHECK-THUMB: dmb {{ish$}}
117125

126+
; CHECK-M: __sync_fetch_and_or_8
127+
118128
%r = atomicrmw or i64* %ptr, i64 %val seq_cst
119129
ret i64 %r
120130
}
@@ -144,6 +154,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
144154
; CHECK-THUMB: bne
145155
; CHECK-THUMB: dmb {{ish$}}
146156

157+
; CHECK-M: __sync_fetch_and_xor_8
158+
147159
%r = atomicrmw xor i64* %ptr, i64 %val seq_cst
148160
ret i64 %r
149161
}
@@ -165,6 +177,8 @@ define i64 @test6(i64* %ptr, i64 %val) {
165177
; CHECK-THUMB: bne
166178
; CHECK-THUMB: dmb {{ish$}}
167179

180+
; CHECK-M: __sync_lock_test_and_set_8
181+
168182
%r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
169183
ret i64 %r
170184
}
@@ -199,12 +213,15 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
199213
; CHECK-THUMB: beq
200214
; CHECK-THUMB: dmb {{ish$}}
201215

216+
; CHECK-M: __sync_val_compare_and_swap_8
217+
202218
%pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
203219
%r = extractvalue { i64, i1 } %pair, 0
204220
ret i64 %r
205221
}
206222

207-
; Compiles down to a single ldrexd
223+
; Compiles down to a single ldrexd, except on M class devices where ldrexd
224+
; isn't supported.
208225
define i64 @test8(i64* %ptr) {
209226
; CHECK-LABEL: test8:
210227
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
@@ -220,12 +237,15 @@ define i64 @test8(i64* %ptr) {
220237
; CHECK-THUMB-NOT: strexd
221238
; CHECK-THUMB: dmb {{ish$}}
222239

240+
; CHECK-M: __sync_val_compare_and_swap_8
241+
223242
%r = load atomic i64, i64* %ptr seq_cst, align 8
224243
ret i64 %r
225244
}
226245

227246
; Compiles down to atomicrmw xchg; there really isn't any more efficient
228-
; way to write it.
247+
; way to write it. Except on M class devices, where ldrexd/strexd aren't
248+
; supported.
229249
define void @test9(i64* %ptr, i64 %val) {
230250
; CHECK-LABEL: test9:
231251
; CHECK: dmb {{ish$}}
@@ -243,6 +263,8 @@ define void @test9(i64* %ptr, i64 %val) {
243263
; CHECK-THUMB: bne
244264
; CHECK-THUMB: dmb {{ish$}}
245265

266+
; CHECK-M: __sync_lock_test_and_set_8
267+
246268
store atomic i64 %val, i64* %ptr seq_cst, align 8
247269
ret void
248270
}
@@ -286,6 +308,8 @@ define i64 @test10(i64* %ptr, i64 %val) {
286308
; CHECK-THUMB: bne
287309
; CHECK-THUMB: dmb {{ish$}}
288310

311+
; CHECK-M: __sync_fetch_and_min_8
312+
289313
%r = atomicrmw min i64* %ptr, i64 %val seq_cst
290314
ret i64 %r
291315
}
@@ -329,6 +353,8 @@ define i64 @test11(i64* %ptr, i64 %val) {
329353
; CHECK-THUMB: bne
330354
; CHECK-THUMB: dmb {{ish$}}
331355

356+
; CHECK-M: __sync_fetch_and_umin_8
357+
332358
%r = atomicrmw umin i64* %ptr, i64 %val seq_cst
333359
ret i64 %r
334360
}
@@ -372,6 +398,8 @@ define i64 @test12(i64* %ptr, i64 %val) {
372398
; CHECK-THUMB: bne
373399
; CHECK-THUMB: dmb {{ish$}}
374400

401+
; CHECK-M: __sync_fetch_and_max_8
402+
375403
%r = atomicrmw max i64* %ptr, i64 %val seq_cst
376404
ret i64 %r
377405
}
@@ -414,6 +442,9 @@ define i64 @test13(i64* %ptr, i64 %val) {
414442
; CHECK-THUMB: cmp
415443
; CHECK-THUMB: bne
416444
; CHECK-THUMB: dmb {{ish$}}
445+
446+
; CHECK-M: __sync_fetch_and_umax_8
447+
417448
%r = atomicrmw umax i64* %ptr, i64 %val seq_cst
418449
ret i64 %r
419450
}

0 commit comments

Comments
 (0)