Skip to content

Commit 402f2ca

Browse files
committed
[ARM] Use lrdsb for more thumb1 loads.
Given a sextload i16, we can usually generate "ldrsh [rn. rm]". If we don't naturally have a rn, rm addressing mode, we can either generate "ldrh [rn, #0]; sxth" or "mov rm, #0; ldrsh [rn. rm]". We currently generate the first, always creating a sxth. They are both the same number of instructions, but if we generate the second then the mov #0 will likely be CSE'd or pulled out of a loop, etc. This adjusts the ISel patterns to do that, creating a mov instead of a sxth. Differential Revision: https://reviews.llvm.org/D98693
1 parent f596394 commit 402f2ca

File tree

4 files changed

+18
-22
lines changed

4 files changed

+18
-22
lines changed

llvm/lib/Target/ARM/ARMInstrThumb.td

+8-11
Original file line numberDiff line numberDiff line change
@@ -1659,19 +1659,16 @@ def : T1Pat<(post_store tGPR:$Rt, tGPR:$Rn, 4),
16591659
(tSTMIA_UPD tGPR:$Rn, tGPR:$Rt)>;
16601660

16611661
// If it's impossible to use [r,r] address mode for sextload, select to
1662-
// ldr{b|h} + sxt{b|h} instead.
1663-
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
1664-
(tSXTB (tLDRBi t_addrmode_is1:$addr))>,
1665-
Requires<[IsThumb, IsThumb1Only, HasV6]>;
1666-
def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
1667-
(tSXTB (tLDRBr t_addrmode_rr:$addr))>,
1662+
// ldsr{b|h} r, 0 instead, in a hope that the mov 0 will be more likely to be
1663+
// commoned out than a sxth.
1664+
let AddedComplexity = 10 in {
1665+
def : T1Pat<(sextloadi8 tGPR:$Rn),
1666+
(tLDRSB tGPR:$Rn, (tMOVi8 0))>,
16681667
Requires<[IsThumb, IsThumb1Only, HasV6]>;
1669-
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
1670-
(tSXTH (tLDRHi t_addrmode_is2:$addr))>,
1671-
Requires<[IsThumb, IsThumb1Only, HasV6]>;
1672-
def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
1673-
(tSXTH (tLDRHr t_addrmode_rr:$addr))>,
1668+
def : T1Pat<(sextloadi16 tGPR:$Rn),
1669+
(tLDRSH tGPR:$Rn, (tMOVi8 0))>,
16741670
Requires<[IsThumb, IsThumb1Only, HasV6]>;
1671+
}
16751672

16761673
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
16771674
(tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;

llvm/test/CodeGen/ARM/load.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ entry:
9696
; Immediate offset of zero
9797

9898
; CHECK-LABEL: ldrsb_ri_zero
99-
; CHECK-T1: ldrb r0, [r0]
100-
; CHECK-T1: sxtb r0, r0
99+
; CHECK-T1: movs r1, #0
100+
; CHECK-T1: ldrsb r0, [r0, r1]
101101
; CHECK-T2: ldrsb.w r0, [r0]
102102
define i32 @ldrsb_ri_zero(i8* %p) {
103103
entry:
@@ -107,8 +107,8 @@ entry:
107107
}
108108

109109
; CHECK-LABEL: ldrsh_ri_zero
110-
; CHECK-T1: ldrh r0, [r0]
111-
; CHECK-T1: sxth r0, r0
110+
; CHECK-T1: movs r1, #0
111+
; CHECK-T1: ldrsh r0, [r0, r1]
112112
; CHECK-T2: ldrsh.w r0, [r0]
113113
define i32 @ldrsh_ri_zero(i16* %p) {
114114
entry:

llvm/test/CodeGen/ARM/select-imm.ll

+2-3
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,9 @@ entry:
230230

231231
; THUMB1-LABEL: t9:
232232
; THUMB1: bl f
233-
; THUMB1: sxtb r1, r4
234-
; THUMB1: uxtb r0, r1
233+
; THUMB1: uxtb r0, r4
235234
; THUMB1: cmp r0, r0
236-
; THUMB1: adds r1, r1, #1
235+
; THUMB1: adds r1, r4, #1
237236
; THUMB1: mov r2, r0
238237
; THUMB1: adds r1, r1, #1
239238
; THUMB1: adds r2, r2, #1

llvm/test/CodeGen/Thumb/ldr_ext.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ define i32 @test3(i8* %t0) nounwind {
2626
; V5: lsls
2727
; V5: asrs
2828

29-
; V6: ldrb
30-
; V6: sxtb
29+
; V6: mov
30+
; V6: ldrsb
3131
%tmp.s = load i8, i8* %t0
3232
%tmp1.s = sext i8 %tmp.s to i32
3333
ret i32 %tmp1.s
@@ -38,8 +38,8 @@ define i32 @test4(i16* %t0) nounwind {
3838
; V5: lsls
3939
; V5: asrs
4040

41-
; V6: ldrh
42-
; V6: sxth
41+
; V6: mov
42+
; V6: ldrsh
4343
%tmp.s = load i16, i16* %t0
4444
%tmp1.s = sext i16 %tmp.s to i32
4545
ret i32 %tmp1.s

0 commit comments

Comments
 (0)