1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1
2
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3
3
- ; GCN-LABEL: {{^}}lsh8_or_and:
4
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
5
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
6
4
define amdgpu_kernel void @lsh8_or_and (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
5
+ ; GCN-LABEL: lsh8_or_and:
6
+ ; GCN: ; %bb.0: ; %bb
7
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
8
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
9
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
10
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400
11
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
12
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
13
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
14
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
15
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
16
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
17
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
18
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
19
+ ; GCN-NEXT: s_endpgm
7
20
bb:
8
21
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
9
22
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
15
28
ret void
16
29
}
17
30
18
- ; GCN-LABEL: {{^}}lsr24_or_and:
19
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
20
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
21
31
define amdgpu_kernel void @lsr24_or_and (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
32
+ ; GCN-LABEL: lsr24_or_and:
33
+ ; GCN: ; %bb.0: ; %bb
34
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
35
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
36
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
37
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
38
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
39
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
40
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
41
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
42
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
43
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
44
+ ; GCN-NEXT: v_perm_b32 v2, s0, v2, v3
45
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
46
+ ; GCN-NEXT: s_endpgm
22
47
bb:
23
48
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
24
49
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
30
55
ret void
31
56
}
32
57
33
- ; GCN-LABEL: {{^}}and_or_lsr24:
34
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
35
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
36
58
define amdgpu_kernel void @and_or_lsr24 (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
59
+ ; GCN-LABEL: and_or_lsr24:
60
+ ; GCN: ; %bb.0: ; %bb
61
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
62
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
63
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
64
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
65
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
66
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
67
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
68
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
69
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
70
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
71
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
72
+ ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
73
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
74
+ ; GCN-NEXT: s_endpgm
37
75
bb:
38
76
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
39
77
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
46
84
ret void
47
85
}
48
86
49
- ; GCN-LABEL: {{^}}and_or_and:
50
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
51
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
52
87
define amdgpu_kernel void @and_or_and (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
88
+ ; GCN-LABEL: and_or_and:
89
+ ; GCN: ; %bb.0: ; %bb
90
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
91
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
92
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
93
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500
94
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
95
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
96
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
97
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
98
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
99
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
100
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
101
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
102
+ ; GCN-NEXT: s_endpgm
53
103
bb:
54
104
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
55
105
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
61
111
ret void
62
112
}
63
113
64
- ; GCN-LABEL: {{^}}lsh8_or_lsr24:
65
- ; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24
66
114
define amdgpu_kernel void @lsh8_or_lsr24 (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
115
+ ; GCN-LABEL: lsh8_or_lsr24:
116
+ ; GCN: ; %bb.0: ; %bb
117
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
118
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
119
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
120
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
121
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
122
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
123
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
124
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
125
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
126
+ ; GCN-NEXT: v_alignbit_b32 v2, v2, s0, 24
127
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
128
+ ; GCN-NEXT: s_endpgm
67
129
bb:
68
130
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
69
131
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
75
137
ret void
76
138
}
77
139
78
- ; GCN-LABEL: {{^}}lsh16_or_lsr24:
79
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
80
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
81
140
define amdgpu_kernel void @lsh16_or_lsr24 (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
141
+ ; GCN-LABEL: lsh16_or_lsr24:
142
+ ; GCN: ; %bb.0: ; %bb
143
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
144
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
145
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
146
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03
147
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
148
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
149
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
150
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
151
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
152
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
153
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
154
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
155
+ ; GCN-NEXT: s_endpgm
82
156
bb:
83
157
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
84
158
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
90
164
ret void
91
165
}
92
166
93
- ; GCN-LABEL: {{^}}and_xor_and:
94
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
95
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
96
167
define amdgpu_kernel void @and_xor_and (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
168
+ ; GCN-LABEL: and_xor_and:
169
+ ; GCN: ; %bb.0: ; %bb
170
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
171
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
172
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
173
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
174
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
175
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
176
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
177
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
178
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
179
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
180
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
181
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
182
+ ; GCN-NEXT: s_endpgm
97
183
bb:
98
184
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
99
185
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
@@ -105,13 +191,25 @@ bb:
105
191
ret void
106
192
}
107
193
108
- ; GCN-LABEL: {{^}}and_or_or_and:
109
- ; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00
110
- ; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000
111
- ; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}}
112
- ; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]]
113
194
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
114
195
define amdgpu_kernel void @and_or_or_and (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
196
+ ; GCN-LABEL: and_or_or_and:
197
+ ; GCN: ; %bb.0: ; %bb
198
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
199
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
200
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
201
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
202
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
203
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
204
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
205
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
206
+ ; GCN-NEXT: s_and_b32 s0, s0, 0xff00
207
+ ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
208
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
209
+ ; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2
210
+ ; GCN-NEXT: v_or_b32_e32 v2, s0, v2
211
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
212
+ ; GCN-NEXT: s_endpgm
115
213
bb:
116
214
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
117
215
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
@@ -124,10 +222,22 @@ bb:
124
222
ret void
125
223
}
126
224
127
- ; GCN-LABEL: {{^}}and_or_and_shl:
128
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
129
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
130
225
define amdgpu_kernel void @and_or_and_shl (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
226
+ ; GCN-LABEL: and_or_and_shl:
227
+ ; GCN: ; %bb.0: ; %bb
228
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
229
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
230
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
231
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00
232
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
233
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
234
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
235
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
236
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
237
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
238
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
239
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
240
+ ; GCN-NEXT: s_endpgm
131
241
bb:
132
242
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
133
243
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
@@ -140,10 +250,22 @@ bb:
140
250
ret void
141
251
}
142
252
143
- ; GCN-LABEL: {{^}}or_and_or:
144
- ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
145
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
146
253
define amdgpu_kernel void @or_and_or (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
254
+ ; GCN-LABEL: or_and_or:
255
+ ; GCN: ; %bb.0: ; %bb
256
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
257
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
258
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
259
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
260
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
261
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
262
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
263
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
264
+ ; GCN-NEXT: flat_load_dword v2, v[0:1]
265
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
266
+ ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
267
+ ; GCN-NEXT: flat_store_dword v[0:1], v2
268
+ ; GCN-NEXT: s_endpgm
147
269
bb:
148
270
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
149
271
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
@@ -155,16 +277,31 @@ bb:
155
277
ret void
156
278
}
157
279
158
- ; GCN-LABEL: {{^}}known_ffff0500:
159
- ; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
160
- ; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00
161
- ; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000
162
- ; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]]
163
- ; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]]
164
- ; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}}
165
- ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
166
280
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
167
281
define amdgpu_kernel void @known_ffff0500 (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
282
+ ; GCN-LABEL: known_ffff0500:
283
+ ; GCN: ; %bb.0: ; %bb
284
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
285
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
286
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
287
+ ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004
288
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
289
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
290
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
291
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
292
+ ; GCN-NEXT: flat_load_dword v4, v[0:1]
293
+ ; GCN-NEXT: s_bitset1_b32 s0, 15
294
+ ; GCN-NEXT: s_and_b32 s0, s0, 0xff00
295
+ ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
296
+ ; GCN-NEXT: v_mov_b32_e32 v2, s2
297
+ ; GCN-NEXT: v_mov_b32_e32 v3, s3
298
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
299
+ ; GCN-NEXT: v_or_b32_e32 v4, 4, v4
300
+ ; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4
301
+ ; GCN-NEXT: v_or_b32_e32 v4, s0, v4
302
+ ; GCN-NEXT: flat_store_dword v[0:1], v4
303
+ ; GCN-NEXT: flat_store_dword v[2:3], v5
304
+ ; GCN-NEXT: s_endpgm
168
305
bb:
169
306
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
170
307
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
@@ -181,12 +318,27 @@ bb:
181
318
ret void
182
319
}
183
320
184
- ; GCN-LABEL: {{^}}known_050c0c00:
185
- ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
186
- ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
187
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
188
- ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
189
321
define amdgpu_kernel void @known_050c0c00 (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
322
+ ; GCN-LABEL: known_050c0c00:
323
+ ; GCN: ; %bb.0: ; %bb
324
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
325
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
326
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
327
+ ; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00
328
+ ; GCN-NEXT: v_mov_b32_e32 v6, 4
329
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
330
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
331
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
332
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
333
+ ; GCN-NEXT: flat_load_dword v4, v[0:1]
334
+ ; GCN-NEXT: s_or_b32 s0, s0, 4
335
+ ; GCN-NEXT: v_mov_b32_e32 v2, s2
336
+ ; GCN-NEXT: v_mov_b32_e32 v3, s3
337
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
338
+ ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
339
+ ; GCN-NEXT: flat_store_dword v[0:1], v4
340
+ ; GCN-NEXT: flat_store_dword v[2:3], v6
341
+ ; GCN-NEXT: s_endpgm
190
342
bb:
191
343
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
192
344
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
@@ -202,12 +354,28 @@ bb:
202
354
ret void
203
355
}
204
356
205
- ; GCN-LABEL: {{^}}known_ffff8004:
206
- ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
207
- ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
208
- ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
209
- ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
210
357
define amdgpu_kernel void @known_ffff8004 (i32 addrspace (1 )* nocapture %arg , i32 %arg1 ) {
358
+ ; GCN-LABEL: known_ffff8004:
359
+ ; GCN: ; %bb.0: ; %bb
360
+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
361
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
362
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
363
+ ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500
364
+ ; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004
365
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
366
+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
367
+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
368
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
369
+ ; GCN-NEXT: flat_load_dword v4, v[0:1]
370
+ ; GCN-NEXT: s_or_b32 s0, s0, 4
371
+ ; GCN-NEXT: v_mov_b32_e32 v2, s2
372
+ ; GCN-NEXT: v_mov_b32_e32 v3, s3
373
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
374
+ ; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4
375
+ ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
376
+ ; GCN-NEXT: flat_store_dword v[0:1], v4
377
+ ; GCN-NEXT: flat_store_dword v[2:3], v6
378
+ ; GCN-NEXT: s_endpgm
211
379
bb:
212
380
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
213
381
%gep = getelementptr i32 , i32 addrspace (1 )* %arg , i32 %id
0 commit comments