Skip to content

Commit c8047c6

Browse files
authored
[AMDGPU][True16][CodeGen] update test for buildbot failure (llvm#131028)
This is a NFC patch llvm#103366 hit a buildbot failure with i1-to-bf16.ll. Update the test to fix the build. Also remove duplicated comments added in llvm#103366
1 parent 15e6bb6 commit c8047c6

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@
2525
/// This pass also adds register allocation hints to COPY.
2626
/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
2727
/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
28-
/// This pass also adds register allocation hints to COPY.
29-
/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
30-
/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
3128
/// and a VGPR_16. If we use the VGPR_16 that corresponds to the lo16 bits of
3229
/// the VGPR_32, the COPY can be completely eliminated.
3330
///

llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,14 @@ define amdgpu_ps i32 @s_uitofp_i1_to_bf16(i1 inreg %num) {
108108
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
109109
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
110110
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
111+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
111112
; GFX11-NEXT: s_bfe_u32 s1, s0, 0x10010
112-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
113113
; GFX11-NEXT: s_add_i32 s1, s1, s0
114114
; GFX11-NEXT: s_bitset1_b32 s0, 22
115115
; GFX11-NEXT: s_addk_i32 s1, 0x7fff
116116
; GFX11-NEXT: s_and_b32 s2, vcc_lo, exec_lo
117117
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
118+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
118119
; GFX11-NEXT: s_lshr_b32 s0, s0, 16
119120
; GFX11-NEXT: ; return to shader part epilog
120121
;
@@ -125,6 +126,7 @@ define amdgpu_ps i32 @s_uitofp_i1_to_bf16(i1 inreg %num) {
125126
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
126127
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
127128
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
129+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
128130
; GFX12-NEXT: s_bfe_u32 s1, s0, 0x10010
129131
; GFX12-NEXT: s_or_b32 s2, s0, 0x400000
130132
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -305,10 +307,11 @@ define amdgpu_ps <2 x i32> @s_uitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
305307
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s0
306308
; GFX11-NEXT: v_readfirstlane_b32 s2, v0
307309
; GFX11-NEXT: v_cmp_u_f32_e64 s1, v0, v0
308-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
310+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
309311
; GFX11-NEXT: v_readfirstlane_b32 s0, v1
310312
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
311313
; GFX11-NEXT: s_bfe_u32 s3, s0, 0x10010
314+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
312315
; GFX11-NEXT: s_add_i32 s3, s3, s0
313316
; GFX11-NEXT: s_bitset1_b32 s0, 22
314317
; GFX11-NEXT: s_addk_i32 s3, 0x7fff
@@ -338,6 +341,7 @@ define amdgpu_ps <2 x i32> @s_uitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
338341
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
339342
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
340343
; GFX12-NEXT: v_readfirstlane_b32 s0, v1
344+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
341345
; GFX12-NEXT: s_bfe_u32 s1, s0, 0x10010
342346
; GFX12-NEXT: s_or_b32 s3, s0, 0x400000
343347
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -1161,13 +1165,14 @@ define amdgpu_ps i32 @s_sitofp_i1_to_bf16(i1 inreg %num) {
11611165
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, s0
11621166
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
11631167
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
1168+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
11641169
; GFX11-NEXT: s_bfe_u32 s1, s0, 0x10010
1165-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
11661170
; GFX11-NEXT: s_add_i32 s1, s1, s0
11671171
; GFX11-NEXT: s_bitset1_b32 s0, 22
11681172
; GFX11-NEXT: s_addk_i32 s1, 0x7fff
11691173
; GFX11-NEXT: s_and_b32 s2, vcc_lo, exec_lo
11701174
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
1175+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
11711176
; GFX11-NEXT: s_ashr_i32 s0, s0, 16
11721177
; GFX11-NEXT: ; return to shader part epilog
11731178
;
@@ -1178,6 +1183,7 @@ define amdgpu_ps i32 @s_sitofp_i1_to_bf16(i1 inreg %num) {
11781183
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
11791184
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, s0
11801185
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
1186+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
11811187
; GFX12-NEXT: s_bfe_u32 s1, s0, 0x10010
11821188
; GFX12-NEXT: s_or_b32 s2, s0, 0x400000
11831189
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -1358,10 +1364,11 @@ define amdgpu_ps <2 x i32> @s_sitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
13581364
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1.0, s0
13591365
; GFX11-NEXT: v_readfirstlane_b32 s2, v0
13601366
; GFX11-NEXT: v_cmp_u_f32_e64 s0, v0, v0
1361-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1367+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
13621368
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
13631369
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
13641370
; GFX11-NEXT: s_bfe_u32 s3, s1, 0x10010
1371+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13651372
; GFX11-NEXT: s_add_i32 s3, s3, s1
13661373
; GFX11-NEXT: s_bitset1_b32 s1, 22
13671374
; GFX11-NEXT: s_addk_i32 s3, 0x7fff
@@ -1391,6 +1398,7 @@ define amdgpu_ps <2 x i32> @s_sitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
13911398
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
13921399
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
13931400
; GFX12-NEXT: v_readfirstlane_b32 s0, v1
1401+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
13941402
; GFX12-NEXT: s_bfe_u32 s1, s0, 0x10010
13951403
; GFX12-NEXT: s_or_b32 s3, s0, 0x400000
13961404
; GFX12-NEXT: s_wait_alu 0xfffe

0 commit comments

Comments
 (0)