Skip to content

Commit 7a78756

Browse files
committed
[LSR] Regenerate test checks (NFC)
1 parent 5041442 commit 7a78756

File tree

5 files changed

+554
-150
lines changed

5 files changed

+554
-150
lines changed

llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll

+187-44
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
12
; RUN: llc -opaque-pointers=0 -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
23

34
; @simple is the most basic chain of address induction variables. Chaining
45
; saves at least one register and avoids complex addressing and setup
56
; code.
67
;
7-
; A9: @simple
88
; no expensive address computation in the preheader
9-
; A9: lsl
10-
; A9-NOT: lsl
11-
; A9: %loop
129
; no complex address modes
13-
; A9-NOT: lsl
1410
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
11+
; A9-LABEL: simple:
12+
; A9: @ %bb.0: @ %entry
13+
; A9-NEXT: .save {r4, r5, r6, lr}
14+
; A9-NEXT: push {r4, r5, r6, lr}
15+
; A9-NEXT: mov r3, r0
16+
; A9-NEXT: lsls r2, r2, #2
17+
; A9-NEXT: movs r0, #0
18+
; A9-NEXT: .LBB0_1: @ %loop
19+
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
20+
; A9-NEXT: add.w lr, r3, r2
21+
; A9-NEXT: ldr.w r12, [r3, r2]
22+
; A9-NEXT: ldr r3, [r3]
23+
; A9-NEXT: add.w r4, lr, r2
24+
; A9-NEXT: ldr.w r6, [lr, r2]
25+
; A9-NEXT: add r0, r3
26+
; A9-NEXT: adds r3, r4, r2
27+
; A9-NEXT: add r0, r12
28+
; A9-NEXT: ldr r5, [r4, r2]
29+
; A9-NEXT: add r0, r6
30+
; A9-NEXT: add r3, r2
31+
; A9-NEXT: add r0, r5
32+
; A9-NEXT: cmp r3, r1
33+
; A9-NEXT: bne .LBB0_1
34+
; A9-NEXT: @ %bb.2: @ %exit
35+
; A9-NEXT: pop {r4, r5, r6, pc}
1536
entry:
1637
br label %loop
1738
loop:
@@ -37,15 +58,34 @@ exit:
3758

3859
; @user is not currently chained because the IV is live across memory ops.
3960
;
40-
; A9: @user
4161
; stride multiples computed in the preheader
42-
; A9: lsl
43-
; A9: lsl
44-
; A9: %loop
4562
; complex address modes
46-
; A9: lsl
47-
; A9: lsl
4863
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
64+
; A9-LABEL: user:
65+
; A9: @ %bb.0: @ %entry
66+
; A9-NEXT: .save {r4, r5, r6, r7, lr}
67+
; A9-NEXT: push {r4, r5, r6, r7, lr}
68+
; A9-NEXT: add.w r3, r2, r2, lsl #1
69+
; A9-NEXT: lsl.w r12, r2, #4
70+
; A9-NEXT: lsl.w lr, r3, #2
71+
; A9-NEXT: movs r3, #0
72+
; A9-NEXT: .LBB1_1: @ %loop
73+
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
74+
; A9-NEXT: ldr r4, [r0]
75+
; A9-NEXT: ldr.w r5, [r0, r2, lsl #3]
76+
; A9-NEXT: ldr.w r6, [r0, r2, lsl #2]
77+
; A9-NEXT: add r3, r4
78+
; A9-NEXT: ldr.w r7, [r0, lr]
79+
; A9-NEXT: add r3, r6
80+
; A9-NEXT: add r3, r5
81+
; A9-NEXT: add r3, r7
82+
; A9-NEXT: str r3, [r0]
83+
; A9-NEXT: add r0, r12
84+
; A9-NEXT: cmp r0, r1
85+
; A9-NEXT: bne .LBB1_1
86+
; A9-NEXT: @ %bb.2: @ %exit
87+
; A9-NEXT: mov r0, r3
88+
; A9-NEXT: pop {r4, r5, r6, r7, pc}
4989
entry:
5090
br label %loop
5191
loop:
@@ -75,16 +115,43 @@ exit:
75115
; used to do, and exactly what we don't want to do. LSR's new IV
76116
; chaining feature should now undo the damage.
77117
;
78-
; A9: extrastride:
79118
; no spills
80-
; A9-NOT: str
81119
; only one stride multiple in the preheader
82-
; A9: lsl
83-
; A9-NOT: {{str r|lsl}}
84-
; A9: %for.body{{$}}
85120
; no complex address modes or reloads
86-
; A9-NOT: {{ldr .*[sp]|lsl}}
87121
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
122+
; A9-LABEL: extrastride:
123+
; A9: @ %bb.0: @ %entry
124+
; A9-NEXT: .save {r4, r5, r6, r7, lr}
125+
; A9-NEXT: push {r4, r5, r6, r7, lr}
126+
; A9-NEXT: ldr.w r12, [sp, #24]
127+
; A9-NEXT: cmp.w r12, #0
128+
; A9-NEXT: beq .LBB2_3
129+
; A9-NEXT: @ %bb.1: @ %for.body.lr.ph
130+
; A9-NEXT: ldr r4, [sp, #20]
131+
; A9-NEXT: add.w lr, r3, r1
132+
; A9-NEXT: lsls r3, r4, #2
133+
; A9-NEXT: .LBB2_2: @ %for.body
134+
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
135+
; A9-NEXT: adds r5, r0, r1
136+
; A9-NEXT: ldr r4, [r0, r1]
137+
; A9-NEXT: ldr r0, [r0]
138+
; A9-NEXT: subs.w r12, r12, #1
139+
; A9-NEXT: ldr r6, [r5, r1]
140+
; A9-NEXT: add r5, r1
141+
; A9-NEXT: add r0, r4
142+
; A9-NEXT: ldr r7, [r5, r1]
143+
; A9-NEXT: add r5, r1
144+
; A9-NEXT: add r0, r6
145+
; A9-NEXT: ldr r4, [r5, r1]
146+
; A9-NEXT: add r0, r7
147+
; A9-NEXT: add r0, r4
148+
; A9-NEXT: str r0, [r2]
149+
; A9-NEXT: add.w r0, r5, r1
150+
; A9-NEXT: add r2, r3
151+
; A9-NEXT: add r0, lr
152+
; A9-NEXT: bne .LBB2_2
153+
; A9-NEXT: .LBB2_3: @ %for.end
154+
; A9-NEXT: pop {r4, r5, r6, r7, pc}
88155
entry:
89156
%cmp8 = icmp eq i32 %z, 0
90157
br i1 %cmp8, label %for.end, label %for.body.lr.ph
@@ -136,10 +203,38 @@ for.end: ; preds = %for.body, %entry
136203
; }
137204
; where 's' can be folded into the addressing mode.
138205
; Consequently, we should *not* form any chains.
139-
;
140-
; A9: foldedidx:
141-
; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
142206
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
207+
; A9-LABEL: foldedidx:
208+
; A9: @ %bb.0: @ %entry
209+
; A9-NEXT: .save {r4, r5, r6, lr}
210+
; A9-NEXT: push {r4, r5, r6, lr}
211+
; A9-NEXT: mov.w lr, #0
212+
; A9-NEXT: .LBB3_1: @ %for.body
213+
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
214+
; A9-NEXT: ldrb.w r12, [r0, lr]
215+
; A9-NEXT: add.w r4, r1, lr
216+
; A9-NEXT: ldrb.w r3, [r1, lr]
217+
; A9-NEXT: add r3, r12
218+
; A9-NEXT: strb.w r3, [r2, lr]
219+
; A9-NEXT: add.w r3, r0, lr
220+
; A9-NEXT: ldrb.w r12, [r3, #1]
221+
; A9-NEXT: ldrb r5, [r4, #1]
222+
; A9-NEXT: add r12, r5
223+
; A9-NEXT: add.w r5, r2, lr
224+
; A9-NEXT: strb.w r12, [r5, #1]
225+
; A9-NEXT: add.w lr, lr, #4
226+
; A9-NEXT: cmp.w lr, #400
227+
; A9-NEXT: ldrb.w r12, [r3, #2]
228+
; A9-NEXT: ldrb r6, [r4, #2]
229+
; A9-NEXT: add r6, r12
230+
; A9-NEXT: strb r6, [r5, #2]
231+
; A9-NEXT: ldrb r3, [r3, #3]
232+
; A9-NEXT: ldrb r6, [r4, #3]
233+
; A9-NEXT: add r3, r6
234+
; A9-NEXT: strb r3, [r5, #3]
235+
; A9-NEXT: bne .LBB3_1
236+
; A9-NEXT: @ %bb.2: @ %for.end
237+
; A9-NEXT: pop {r4, r5, r6, pc}
143238
entry:
144239
br label %for.body
145240

@@ -200,14 +295,45 @@ for.end: ; preds = %for.body
200295
;
201296
; Loads and stores should use post-increment addressing, no add's or add.w's.
202297
; Most importantly, there should be no spills or reloads!
203-
;
204-
; A9: testNeon:
205-
; A9: %.lr.ph
206-
; A9-NOT: lsl.w
207-
; A9-NOT: {{ldr|str|adds|add r}}
208-
; A9-NOT: add.w r
209-
; A9: bne
210298
define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
299+
; A9-LABEL: testNeon:
300+
; A9: @ %bb.0:
301+
; A9-NEXT: .save {r4, r5, r7, lr}
302+
; A9-NEXT: push {r4, r5, r7, lr}
303+
; A9-NEXT: vmov.i32 q8, #0x0
304+
; A9-NEXT: cmp r2, #1
305+
; A9-NEXT: blt .LBB4_4
306+
; A9-NEXT: @ %bb.1: @ %.lr.ph
307+
; A9-NEXT: movs r5, #0
308+
; A9-NEXT: movw r4, #64464
309+
; A9-NEXT: sub.w r12, r5, r2, lsl #6
310+
; A9-NEXT: sub.w lr, r1, r1, lsl #4
311+
; A9-NEXT: movt r4, #65535
312+
; A9-NEXT: mov r5, r3
313+
; A9-NEXT: .LBB4_2: @ =>This Inner Loop Header: Depth=1
314+
; A9-NEXT: vld1.64 {d18}, [r0], r1
315+
; A9-NEXT: subs r2, #1
316+
; A9-NEXT: vld1.64 {d19}, [r0], r1
317+
; A9-NEXT: vst1.8 {d18, d19}, [r5]!
318+
; A9-NEXT: vld1.64 {d20}, [r0], r1
319+
; A9-NEXT: vld1.64 {d21}, [r0], r1
320+
; A9-NEXT: vst1.8 {d20, d21}, [r5]!
321+
; A9-NEXT: vld1.64 {d22}, [r0], r1
322+
; A9-NEXT: vadd.i8 q9, q9, q10
323+
; A9-NEXT: vld1.64 {d23}, [r0], r1
324+
; A9-NEXT: vst1.8 {d22, d23}, [r5]!
325+
; A9-NEXT: vld1.64 {d20}, [r0], r1
326+
; A9-NEXT: vadd.i8 q9, q9, q11
327+
; A9-NEXT: vld1.64 {d21}, [r0], lr
328+
; A9-NEXT: vadd.i8 q9, q9, q10
329+
; A9-NEXT: vadd.i8 q8, q8, q9
330+
; A9-NEXT: vst1.8 {d20, d21}, [r5], r4
331+
; A9-NEXT: bne .LBB4_2
332+
; A9-NEXT: @ %bb.3: @ %._crit_edge
333+
; A9-NEXT: add.w r3, r3, r12, lsl #4
334+
; A9-NEXT: .LBB4_4:
335+
; A9-NEXT: vst1.32 {d16, d17}, [r3]
336+
; A9-NEXT: pop {r4, r5, r7, pc}
211337
%1 = icmp sgt i32 %limit, 0
212338
br i1 %1, label %.lr.ph, label %45
213339

@@ -284,24 +410,41 @@ declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
284410
; Handle chains in which the same offset is used for both loads and
285411
; stores to the same array.
286412
; rdar://11410078.
287-
;
288-
; A9: @testReuse
289-
; A9: %for.body
290-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
291-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
292-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
293-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
294-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
295-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
296-
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
297-
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
298-
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
299-
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
300-
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
301-
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
302-
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
303-
; A9: bne
304413
define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
414+
; A9-LABEL: testReuse:
415+
; A9: @ %bb.0: @ %entry
416+
; A9-NEXT: sub.w r12, r0, r1, lsl #2
417+
; A9-NEXT: sub.w r0, r1, r1, lsl #2
418+
; A9-NEXT: lsls r2, r0, #1
419+
; A9-NEXT: movs r3, #0
420+
; A9-NEXT: .LBB5_1: @ %for.body
421+
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
422+
; A9-NEXT: add.w r0, r12, r3
423+
; A9-NEXT: adds r3, #8
424+
; A9-NEXT: vld1.8 {d16}, [r0], r1
425+
; A9-NEXT: cmp r3, #32
426+
; A9-NEXT: vld1.8 {d17}, [r0], r1
427+
; A9-NEXT: vhadd.u8 d16, d16, d17
428+
; A9-NEXT: vld1.8 {d18}, [r0], r1
429+
; A9-NEXT: vhadd.u8 d17, d17, d18
430+
; A9-NEXT: vld1.8 {d19}, [r0], r1
431+
; A9-NEXT: vhadd.u8 d18, d18, d19
432+
; A9-NEXT: vld1.8 {d20}, [r0], r1
433+
; A9-NEXT: vhadd.u8 d19, d19, d20
434+
; A9-NEXT: vld1.8 {d21}, [r0], r1
435+
; A9-NEXT: vhadd.u8 d20, d20, d21
436+
; A9-NEXT: vld1.8 {d22}, [r0], r1
437+
; A9-NEXT: vhadd.u8 d21, d21, d22
438+
; A9-NEXT: vld1.8 {d23}, [r0], r2
439+
; A9-NEXT: vst1.8 {d16}, [r0], r1
440+
; A9-NEXT: vst1.8 {d17}, [r0], r1
441+
; A9-NEXT: vst1.8 {d18}, [r0], r1
442+
; A9-NEXT: vst1.8 {d19}, [r0], r1
443+
; A9-NEXT: vst1.8 {d20}, [r0], r1
444+
; A9-NEXT: vst1.8 {d21}, [r0]
445+
; A9-NEXT: bne .LBB5_1
446+
; A9-NEXT: @ %bb.2: @ %for.end
447+
; A9-NEXT: bx lr
305448
entry:
306449
%mul = shl nsw i32 %stride, 2
307450
%idx.neg = sub i32 0, %mul

0 commit comments

Comments
 (0)