Skip to content

Commit e1730cf

Browse files
Konstantin PyzhovKonstantin Pyzhov
Konstantin Pyzhov
authored and
Konstantin Pyzhov
committed
[AMDGPU] Disable 'Skip Uniform Regions' optimization by default for AMDGPU.
Reviewers: sameerds, dstuttard Differential Revision: https://reviews.llvm.org/D77228
1 parent ec69bac commit e1730cf

32 files changed

+927
-869
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,7 @@ bool GCNPassConfig::addPreISel() {
866866
if (EnableStructurizerWorkarounds) {
867867
addPass(createUnifyLoopExitsPass());
868868
}
869-
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
869+
addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
870870
}
871871
addPass(createSinkingPass());
872872
addPass(createAMDGPUAnnotateUniformValues());

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

+2-1
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,10 @@ define void @constrained_if_register_class() {
136136
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137137
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
138138
; CHECK-NEXT: s_cselect_b32 s4, 1, 0
139+
; CHECK-NEXT: s_xor_b32 s4, s4, 1
139140
; CHECK-NEXT: s_and_b32 s4, s4, 1
140141
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
141-
; CHECK-NEXT: s_cbranch_scc1 BB4_6
142+
; CHECK-NEXT: s_cbranch_scc0 BB4_6
142143
; CHECK-NEXT: ; %bb.1: ; %bb2
143144
; CHECK-NEXT: s_getpc_b64 s[6:7]
144145
; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4

llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

+55-32
Original file line numberDiff line numberDiff line change
@@ -7,38 +7,45 @@
77
define amdgpu_kernel void @localize_constants(i1 %cond) {
88
; GFX9-LABEL: localize_constants:
99
; GFX9: ; %bb.0: ; %entry
10-
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
10+
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
11+
; GFX9-NEXT: s_mov_b32 s0, 1
1112
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
12-
; GFX9-NEXT: s_and_b32 s0, s0, 1
13-
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
13+
; GFX9-NEXT: s_xor_b32 s1, s1, 1
14+
; GFX9-NEXT: s_and_b32 s1, s1, 1
15+
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
1416
; GFX9-NEXT: s_cbranch_scc0 BB0_2
15-
; GFX9-NEXT: ; %bb.1: ; %bb0
16-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
17-
; GFX9-NEXT: global_store_dword v[0:1], v0, off
18-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
17+
; GFX9-NEXT: ; %bb.1: ; %bb1
18+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
1919
; GFX9-NEXT: global_store_dword v[0:1], v0, off
20-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
20+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
2121
; GFX9-NEXT: global_store_dword v[0:1], v0, off
2222
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
2323
; GFX9-NEXT: global_store_dword v[0:1], v0, off
24-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
25-
; GFX9-NEXT: global_store_dword v[0:1], v0, off
26-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
24+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
2725
; GFX9-NEXT: global_store_dword v[0:1], v0, off
28-
; GFX9-NEXT: s_endpgm
29-
; GFX9-NEXT: BB0_2: ; %bb1
30-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
26+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
3127
; GFX9-NEXT: global_store_dword v[0:1], v0, off
32-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
28+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
29+
; GFX9-NEXT: s_mov_b32 s0, 0
3330
; GFX9-NEXT: global_store_dword v[0:1], v0, off
34-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
31+
; GFX9-NEXT: BB0_2: ; %Flow
32+
; GFX9-NEXT: s_and_b32 s0, s0, 1
33+
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
34+
; GFX9-NEXT: s_cbranch_scc0 BB0_4
35+
; GFX9-NEXT: ; %bb.3: ; %bb0
36+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
3537
; GFX9-NEXT: global_store_dword v[0:1], v0, off
3638
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
3739
; GFX9-NEXT: global_store_dword v[0:1], v0, off
3840
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
3941
; GFX9-NEXT: global_store_dword v[0:1], v0, off
40-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
42+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
4143
; GFX9-NEXT: global_store_dword v[0:1], v0, off
44+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
45+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
46+
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
47+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
48+
; GFX9-NEXT: BB0_4: ; %bb2
4249
; GFX9-NEXT: s_endpgm
4350
entry:
4451
br i1 %cond, label %bb0, label %bb1
@@ -75,31 +82,46 @@ bb2:
7582
define amdgpu_kernel void @localize_globals(i1 %cond) {
7683
; GFX9-LABEL: localize_globals:
7784
; GFX9: ; %bb.0: ; %entry
78-
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
85+
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
86+
; GFX9-NEXT: s_mov_b32 s0, 1
87+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
88+
; GFX9-NEXT: s_xor_b32 s1, s1, 1
89+
; GFX9-NEXT: s_and_b32 s1, s1, 1
90+
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
91+
; GFX9-NEXT: s_cbranch_scc0 BB1_2
92+
; GFX9-NEXT: ; %bb.1: ; %bb1
93+
; GFX9-NEXT: s_getpc_b64 s[2:3]
94+
; GFX9-NEXT: s_add_u32 s2, s2, gv2@gotpcrel32@lo+4
95+
; GFX9-NEXT: s_addc_u32 s3, s3, gv2@gotpcrel32@hi+4
96+
; GFX9-NEXT: s_getpc_b64 s[4:5]
97+
; GFX9-NEXT: s_add_u32 s4, s4, gv3@gotpcrel32@lo+4
98+
; GFX9-NEXT: s_addc_u32 s5, s5, gv3@gotpcrel32@hi+4
99+
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
100+
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
101+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
102+
; GFX9-NEXT: s_mov_b32 s0, 0
79103
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
104+
; GFX9-NEXT: v_mov_b32_e32 v0, s2
105+
; GFX9-NEXT: v_mov_b32_e32 v1, s3
106+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
107+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
108+
; GFX9-NEXT: v_mov_b32_e32 v2, 1
109+
; GFX9-NEXT: v_mov_b32_e32 v1, s5
110+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
111+
; GFX9-NEXT: BB1_2: ; %Flow
80112
; GFX9-NEXT: s_and_b32 s0, s0, 1
81113
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
82-
; GFX9-NEXT: s_cbranch_scc0 BB1_2
83-
; GFX9-NEXT: ; %bb.1: ; %bb0
114+
; GFX9-NEXT: s_cbranch_scc0 BB1_4
115+
; GFX9-NEXT: ; %bb.3: ; %bb0
84116
; GFX9-NEXT: s_getpc_b64 s[0:1]
85117
; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
86118
; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+4
87-
; GFX9-NEXT: v_mov_b32_e32 v2, 0
119+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
88120
; GFX9-NEXT: s_getpc_b64 s[2:3]
89121
; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
90122
; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+4
91-
; GFX9-NEXT: s_branch BB1_3
92-
; GFX9-NEXT: BB1_2: ; %bb1
93-
; GFX9-NEXT: s_getpc_b64 s[0:1]
94-
; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
95-
; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+4
96-
; GFX9-NEXT: v_mov_b32_e32 v2, 0
97-
; GFX9-NEXT: s_getpc_b64 s[2:3]
98-
; GFX9-NEXT: s_add_u32 s2, s2, gv3@gotpcrel32@lo+4
99-
; GFX9-NEXT: s_addc_u32 s3, s3, gv3@gotpcrel32@hi+4
100-
; GFX9-NEXT: BB1_3: ; %bb2
101-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
102123
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
124+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
103125
; GFX9-NEXT: v_mov_b32_e32 v3, 1
104126
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
105127
; GFX9-NEXT: v_mov_b32_e32 v0, s0
@@ -108,6 +130,7 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
108130
; GFX9-NEXT: v_mov_b32_e32 v0, s2
109131
; GFX9-NEXT: v_mov_b32_e32 v1, s3
110132
; GFX9-NEXT: global_store_dword v[0:1], v3, off
133+
; GFX9-NEXT: BB1_4: ; %bb2
111134
; GFX9-NEXT: s_endpgm
112135
entry:
113136
br i1 %cond, label %bb0, label %bb1

llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) #0
77

88
define amdgpu_kernel void @long_branch_dbg_value(float addrspace(1)* nocapture %arg, float %arg1) #1 !dbg !5 {
99
; GCN-LABEL: long_branch_dbg_value:
10-
; GCN: BB0_4: ; %bb
10+
; GCN: BB0_5: ; %bb
1111
; GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value]
1212
; GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42
1313
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
14-
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], BB0_3-(BB0_4+4)
14+
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], BB0_4-(BB0_5+4)
1515
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0
1616
; GCN-NEXT: s_setpc_b64
1717
bb:

llvm/test/CodeGen/AMDGPU/branch-relaxation.ll

+19-34
Original file line numberDiff line numberDiff line change
@@ -224,32 +224,25 @@ bb3:
224224

225225
; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch:
226226
; GCN: s_cmp_eq_u32
227-
; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]+_[0-9]+]]
227+
; GCN: s_cbranch_scc{{[0-1]}} [[BB2:BB[0-9]+_[0-9]+]]
228228

229229
; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0
230230
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
231231
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4)
232232
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}}
233233
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
234234

235-
; GCN-NEXT: [[BB2]]: ; %bb2
236-
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
237-
; GCN: buffer_store_dword [[BB2_K]]
238-
239-
; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2
240-
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
241-
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4)
242-
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}}
243-
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}}
244-
245-
; GCN: [[BB3]]: ; %bb3
235+
; GCN: [[BB2]]: ; %bb3
246236
; GCN: v_nop_e64
247237
; GCN: v_nop_e64
248238
; GCN: v_nop_e64
249239
; GCN: v_nop_e64
250240
; GCN: ;;#ASMEND
251241

252-
; GCN-NEXT: [[BB4]]: ; %bb4
242+
; GCN: [[BB3]]:
243+
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
244+
; GCN: buffer_store_dword [[BB2_K]]
245+
253246
; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63
254247
; GCN: buffer_store_dword [[BB4_K]]
255248
; GCN-NEXT: s_endpgm
@@ -317,23 +310,15 @@ loop:
317310
; GCN-LABEL: {{^}}expand_requires_expand:
318311
; GCN-NEXT: ; %bb.0: ; %bb0
319312
; GCN: s_load_dword
320-
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 0{{$}}
321-
; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]]
322-
323-
; GCN-NEXT: [[LONGBB0:BB[0-9]+_[0-9]+]]: ; %bb0
313+
; GCN: {{s|v}}_cmp_lt_i32
314+
; GCN: s_cbranch
324315

325-
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
326-
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4)
327-
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}}
328-
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
329-
330-
; GCN-NEXT: [[BB1]]: ; %bb1
331-
; GCN-NEXT: s_load_dword
316+
; GCN: s_load_dword
332317
; GCN-NEXT: s_waitcnt lgkmcnt(0)
333-
; GCN-NEXT: s_cmp_eq_u32 s{{[0-9]+}}, 3{{$}}
334-
; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]_[0-9]+]]
318+
; GCN-NEXT: v_cmp_{{eq|ne}}_u32_e64
319+
; GCN: s_cbranch_vccz [[BB2:BB[0-9]_[0-9]+]]
335320

336-
; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]: ; %bb1
321+
; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]:
337322
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
338323
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4)
339324
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}}
@@ -451,7 +436,7 @@ endif:
451436
; GCN: v_nop_e64
452437
; GCN: v_nop_e64
453438
; GCN: ;;#ASMEND
454-
; GCN: s_cbranch_vccz [[RET:BB[0-9]+_[0-9]+]]
439+
; GCN: s_cbranch_{{vccz|vccnz}} [[RET:BB[0-9]+_[0-9]+]]
455440

456441
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
457442
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
@@ -491,22 +476,22 @@ ret:
491476

492477
; GCN-LABEL: {{^}}long_branch_hang:
493478
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
494-
; GCN: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
479+
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:BB[0-9]+_[0-9]+]]
495480
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
496481

497482
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
498483
; GCN-NEXT: s_addc_u32
499484
; GCN-NEXT: s_setpc_b64
500485

501486
; GCN-NEXT: [[LONG_BR_0]]:
502-
; GCN-DAG: v_cmp_lt_i32
503-
; GCN-DAG: v_cmp_gt_i32
504-
; GCN: s_cbranch_vccnz
505-
506-
; GCN: s_setpc_b64
507487
; GCN: s_setpc_b64
508488

509489
; GCN: [[LONG_BR_DEST0]]
490+
491+
; GCN: s_cbranch_vccnz
492+
; GCN-DAG: v_cmp_lt_i32
493+
; GCN-DAG: v_cmp_ge_i32
494+
510495
; GCN: s_cbranch_vccz
511496
; GCN: s_setpc_b64
512497

llvm/test/CodeGen/AMDGPU/branch-uniformity.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
;
99
; CHECK-LABEL: {{^}}main:
1010
; CHECK: ; %LOOP49
11-
; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
12-
; CHECK: s_cbranch_scc1
11+
; CHECK: s_cmp_{{lg|eq}}_u32 s{{[0-9]+}}, 0
12+
; CHECK: s_cbranch_scc{{[0-1]}}
1313
; CHECK: ; %ENDIF53
1414
define amdgpu_vs float @main(i32 %in) {
1515
main_body:

llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ for.body:
102102
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80
103103
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 4
104104

105-
; GCN: s_cbranch_vccnz [[LOOPBB]]
105+
; GCN: s_cbranch_{{vccz|vccnz}} [[LOOPBB]]
106106
; GCN-NEXT: ; %bb.2
107107
; GCN-NEXT: s_endpgm
108108
define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n) nounwind {

llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll

+16-21
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,12 @@
2727

2828
; GCN-LABEL: {{^}}sink_ubfe_i32:
2929
; GCN-NOT: lshr
30-
; GCN: s_cbranch_scc1
30+
; GCN: s_cbranch_scc{{[0-1]}}
3131

32-
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
33-
; GCN: BB0_2:
3432
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
35-
3633
; GCN: BB0_3:
34+
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
35+
3736
; GCN: buffer_store_dword
3837
; GCN: s_endpgm
3938
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
@@ -122,16 +121,15 @@ ret:
122121
; GCN-NOT: lshr
123122
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
124123
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
125-
; GCN: s_cbranch_scc1
124+
; GCN: s_cbranch_scc{{[0-1]}}
126125

127-
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
128-
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
129-
130-
; GCN: BB2_2:
131126
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
132127
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
133128

134129
; GCN: BB2_3:
130+
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
131+
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
132+
135133
; GCN: buffer_store_short
136134
; GCN: s_endpgm
137135
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
@@ -177,14 +175,13 @@ ret:
177175

178176
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
179177

178+
; GCN: s_cbranch_scc{{[0-1]}} BB3_2
180179
; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
181-
; GCN: s_cbranch_scc1 BB3_2
182-
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
183-
184-
; GCN: BB3_2:
185180
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
186181

187182
; GCN: BB3_3:
183+
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
184+
188185
; GCN: buffer_store_dwordx2
189186
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
190187
entry:
@@ -226,14 +223,13 @@ ret:
226223

227224
; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
228225

229-
; GCN: s_cbranch_scc1 BB4_2
226+
; GCN: s_cbranch_scc{{[0-1]}} BB4_2
230227

231-
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
232-
233-
; GCN: BB4_2:
234228
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
235229

236230
; GCN: BB4_3:
231+
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
232+
237233
; GCN: buffer_store_dwordx2
238234
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
239235
entry:
@@ -274,13 +270,12 @@ ret:
274270
; OPT: ret
275271

276272
; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
277-
; GCN: s_cbranch_scc1 BB5_2
278-
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
279-
280-
; GCN: BB5_2:
273+
; GCN: s_cbranch_scc{{[0-1]}} BB5_2
281274
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
282275

283276
; GCN: BB5_3:
277+
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
278+
284279
; GCN: buffer_store_dwordx2
285280
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
286281
entry:

0 commit comments

Comments
 (0)