Skip to content

Commit f09a50c

Browse files
committed
[AMDGPU] Regenerate permute.ll test checks for future patch
1 parent 6f0ca6f commit f09a50c

File tree

1 file changed

+217
-49
lines changed

1 file changed

+217
-49
lines changed

llvm/test/CodeGen/AMDGPU/permute.ll

Lines changed: 217 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
12
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
23

3-
; GCN-LABEL: {{^}}lsh8_or_and:
4-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
5-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
64
define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
5+
; GCN-LABEL: lsh8_or_and:
6+
; GCN: ; %bb.0: ; %bb
7+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
8+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
9+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
10+
; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400
11+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
12+
; GCN-NEXT: v_mov_b32_e32 v1, s3
13+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
14+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
15+
; GCN-NEXT: flat_load_dword v2, v[0:1]
16+
; GCN-NEXT: s_waitcnt vmcnt(0)
17+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
18+
; GCN-NEXT: flat_store_dword v[0:1], v2
19+
; GCN-NEXT: s_endpgm
720
bb:
821
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
922
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -15,10 +28,22 @@ bb:
1528
ret void
1629
}
1730

18-
; GCN-LABEL: {{^}}lsr24_or_and:
19-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
20-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
2131
define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
32+
; GCN-LABEL: lsr24_or_and:
33+
; GCN: ; %bb.0: ; %bb
34+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
35+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
36+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
37+
; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
38+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
39+
; GCN-NEXT: v_mov_b32_e32 v1, s3
40+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
41+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
42+
; GCN-NEXT: flat_load_dword v2, v[0:1]
43+
; GCN-NEXT: s_waitcnt vmcnt(0)
44+
; GCN-NEXT: v_perm_b32 v2, s0, v2, v3
45+
; GCN-NEXT: flat_store_dword v[0:1], v2
46+
; GCN-NEXT: s_endpgm
2247
bb:
2348
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
2449
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -30,10 +55,23 @@ bb:
3055
ret void
3156
}
3257

33-
; GCN-LABEL: {{^}}and_or_lsr24:
34-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
35-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
3658
define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
59+
; GCN-LABEL: and_or_lsr24:
60+
; GCN: ; %bb.0: ; %bb
61+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
62+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
63+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
64+
; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
65+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
66+
; GCN-NEXT: v_mov_b32_e32 v1, s3
67+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
68+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
69+
; GCN-NEXT: flat_load_dword v2, v[0:1]
70+
; GCN-NEXT: s_waitcnt vmcnt(0)
71+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
72+
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
73+
; GCN-NEXT: flat_store_dword v[0:1], v2
74+
; GCN-NEXT: s_endpgm
3775
bb:
3876
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
3977
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -46,10 +84,22 @@ bb:
4684
ret void
4785
}
4886

49-
; GCN-LABEL: {{^}}and_or_and:
50-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
51-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
5287
define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
88+
; GCN-LABEL: and_or_and:
89+
; GCN: ; %bb.0: ; %bb
90+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
91+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
92+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
93+
; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500
94+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
95+
; GCN-NEXT: v_mov_b32_e32 v1, s3
96+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
97+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
98+
; GCN-NEXT: flat_load_dword v2, v[0:1]
99+
; GCN-NEXT: s_waitcnt vmcnt(0)
100+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
101+
; GCN-NEXT: flat_store_dword v[0:1], v2
102+
; GCN-NEXT: s_endpgm
53103
bb:
54104
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
55105
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -61,9 +111,21 @@ bb:
61111
ret void
62112
}
63113

64-
; GCN-LABEL: {{^}}lsh8_or_lsr24:
65-
; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24
66114
define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
115+
; GCN-LABEL: lsh8_or_lsr24:
116+
; GCN: ; %bb.0: ; %bb
117+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
118+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
119+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
120+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
121+
; GCN-NEXT: v_mov_b32_e32 v1, s3
122+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
123+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
124+
; GCN-NEXT: flat_load_dword v2, v[0:1]
125+
; GCN-NEXT: s_waitcnt vmcnt(0)
126+
; GCN-NEXT: v_alignbit_b32 v2, v2, s0, 24
127+
; GCN-NEXT: flat_store_dword v[0:1], v2
128+
; GCN-NEXT: s_endpgm
67129
bb:
68130
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
69131
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -75,10 +137,22 @@ bb:
75137
ret void
76138
}
77139

78-
; GCN-LABEL: {{^}}lsh16_or_lsr24:
79-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
80-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
81140
define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
141+
; GCN-LABEL: lsh16_or_lsr24:
142+
; GCN: ; %bb.0: ; %bb
143+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
144+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
145+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
146+
; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03
147+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
148+
; GCN-NEXT: v_mov_b32_e32 v1, s3
149+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
150+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
151+
; GCN-NEXT: flat_load_dword v2, v[0:1]
152+
; GCN-NEXT: s_waitcnt vmcnt(0)
153+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
154+
; GCN-NEXT: flat_store_dword v[0:1], v2
155+
; GCN-NEXT: s_endpgm
82156
bb:
83157
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
84158
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -90,10 +164,22 @@ bb:
90164
ret void
91165
}
92166

93-
; GCN-LABEL: {{^}}and_xor_and:
94-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
95-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
96167
define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
168+
; GCN-LABEL: and_xor_and:
169+
; GCN: ; %bb.0: ; %bb
170+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
171+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
172+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
173+
; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
174+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
175+
; GCN-NEXT: v_mov_b32_e32 v1, s3
176+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
177+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
178+
; GCN-NEXT: flat_load_dword v2, v[0:1]
179+
; GCN-NEXT: s_waitcnt vmcnt(0)
180+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
181+
; GCN-NEXT: flat_store_dword v[0:1], v2
182+
; GCN-NEXT: s_endpgm
97183
bb:
98184
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
99185
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -105,13 +191,25 @@ bb:
105191
ret void
106192
}
107193

108-
; GCN-LABEL: {{^}}and_or_or_and:
109-
; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00
110-
; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000
111-
; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}}
112-
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]]
113194
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
114195
define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
196+
; GCN-LABEL: and_or_or_and:
197+
; GCN: ; %bb.0: ; %bb
198+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
199+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
200+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
201+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
202+
; GCN-NEXT: v_mov_b32_e32 v1, s3
203+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
204+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
205+
; GCN-NEXT: flat_load_dword v2, v[0:1]
206+
; GCN-NEXT: s_and_b32 s0, s0, 0xff00
207+
; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
208+
; GCN-NEXT: s_waitcnt vmcnt(0)
209+
; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2
210+
; GCN-NEXT: v_or_b32_e32 v2, s0, v2
211+
; GCN-NEXT: flat_store_dword v[0:1], v2
212+
; GCN-NEXT: s_endpgm
115213
bb:
116214
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
117215
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -124,10 +222,22 @@ bb:
124222
ret void
125223
}
126224

127-
; GCN-LABEL: {{^}}and_or_and_shl:
128-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
129-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
130225
define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
226+
; GCN-LABEL: and_or_and_shl:
227+
; GCN: ; %bb.0: ; %bb
228+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
229+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
230+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
231+
; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00
232+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
233+
; GCN-NEXT: v_mov_b32_e32 v1, s3
234+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
235+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
236+
; GCN-NEXT: flat_load_dword v2, v[0:1]
237+
; GCN-NEXT: s_waitcnt vmcnt(0)
238+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
239+
; GCN-NEXT: flat_store_dword v[0:1], v2
240+
; GCN-NEXT: s_endpgm
131241
bb:
132242
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
133243
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -140,10 +250,22 @@ bb:
140250
ret void
141251
}
142252

143-
; GCN-LABEL: {{^}}or_and_or:
144-
; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
145-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
146253
define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
254+
; GCN-LABEL: or_and_or:
255+
; GCN: ; %bb.0: ; %bb
256+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
257+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
258+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
259+
; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
260+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
261+
; GCN-NEXT: v_mov_b32_e32 v1, s3
262+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
263+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
264+
; GCN-NEXT: flat_load_dword v2, v[0:1]
265+
; GCN-NEXT: s_waitcnt vmcnt(0)
266+
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
267+
; GCN-NEXT: flat_store_dword v[0:1], v2
268+
; GCN-NEXT: s_endpgm
147269
bb:
148270
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
149271
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -155,16 +277,31 @@ bb:
155277
ret void
156278
}
157279

158-
; GCN-LABEL: {{^}}known_ffff0500:
159-
; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
160-
; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00
161-
; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000
162-
; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]]
163-
; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]]
164-
; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}}
165-
; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
166280
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
167281
define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
282+
; GCN-LABEL: known_ffff0500:
283+
; GCN: ; %bb.0: ; %bb
284+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
285+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
286+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
287+
; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004
288+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
289+
; GCN-NEXT: v_mov_b32_e32 v1, s3
290+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
291+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
292+
; GCN-NEXT: flat_load_dword v4, v[0:1]
293+
; GCN-NEXT: s_bitset1_b32 s0, 15
294+
; GCN-NEXT: s_and_b32 s0, s0, 0xff00
295+
; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
296+
; GCN-NEXT: v_mov_b32_e32 v2, s2
297+
; GCN-NEXT: v_mov_b32_e32 v3, s3
298+
; GCN-NEXT: s_waitcnt vmcnt(0)
299+
; GCN-NEXT: v_or_b32_e32 v4, 4, v4
300+
; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4
301+
; GCN-NEXT: v_or_b32_e32 v4, s0, v4
302+
; GCN-NEXT: flat_store_dword v[0:1], v4
303+
; GCN-NEXT: flat_store_dword v[2:3], v5
304+
; GCN-NEXT: s_endpgm
168305
bb:
169306
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
170307
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -181,12 +318,27 @@ bb:
181318
ret void
182319
}
183320

184-
; GCN-LABEL: {{^}}known_050c0c00:
185-
; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
186-
; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
187-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
188-
; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
189321
define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
322+
; GCN-LABEL: known_050c0c00:
323+
; GCN: ; %bb.0: ; %bb
324+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
325+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
326+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
327+
; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00
328+
; GCN-NEXT: v_mov_b32_e32 v6, 4
329+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
330+
; GCN-NEXT: v_mov_b32_e32 v1, s3
331+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
332+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
333+
; GCN-NEXT: flat_load_dword v4, v[0:1]
334+
; GCN-NEXT: s_or_b32 s0, s0, 4
335+
; GCN-NEXT: v_mov_b32_e32 v2, s2
336+
; GCN-NEXT: v_mov_b32_e32 v3, s3
337+
; GCN-NEXT: s_waitcnt vmcnt(0)
338+
; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
339+
; GCN-NEXT: flat_store_dword v[0:1], v4
340+
; GCN-NEXT: flat_store_dword v[2:3], v6
341+
; GCN-NEXT: s_endpgm
190342
bb:
191343
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
192344
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
@@ -202,12 +354,28 @@ bb:
202354
ret void
203355
}
204356

205-
; GCN-LABEL: {{^}}known_ffff8004:
206-
; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
207-
; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
208-
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
209-
; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
210357
define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
358+
; GCN-LABEL: known_ffff8004:
359+
; GCN: ; %bb.0: ; %bb
360+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
361+
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
362+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
363+
; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500
364+
; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004
365+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
366+
; GCN-NEXT: v_mov_b32_e32 v1, s3
367+
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
368+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
369+
; GCN-NEXT: flat_load_dword v4, v[0:1]
370+
; GCN-NEXT: s_or_b32 s0, s0, 4
371+
; GCN-NEXT: v_mov_b32_e32 v2, s2
372+
; GCN-NEXT: v_mov_b32_e32 v3, s3
373+
; GCN-NEXT: s_waitcnt vmcnt(0)
374+
; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4
375+
; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
376+
; GCN-NEXT: flat_store_dword v[0:1], v4
377+
; GCN-NEXT: flat_store_dword v[2:3], v6
378+
; GCN-NEXT: s_endpgm
211379
bb:
212380
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
213381
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id

0 commit comments

Comments
 (0)